############################ # Dataframe joining example ############################ ### Author: Eliza Argyridou ## Load necessary packages library(dplyr) # Data 1: Student list for R course program<-c("BSc","BSc", "MSc") name<-c("Mary", "John", "Andreas") R_grade<-c(1.5,2,3) df_R<-data.frame(program,name,R_grade) df_R # program name R_grade # BSc Mary 1.5 # BSc John 2.0 # MSc Andreas 3.0 # Data 2: Student list for Statistics program<-c("BSc","BSc","MSc","MSc") name<-c("Mary", "John", "Chris","Jane") Stats_grade<-c(2,1.5,3,2.5) df_Stats<-data.frame(program,name,Stats_grade) df_Stats # program name Stats_grade # BSc Mary 2.0 # BSc John 1.5 # MSc Chris 3.0 # MSc Jane 2.5 # Imagine that you work at the uni administration and have to prepare transipts of records for all students # you receive a student list for each course all_courses <- full_join(df_R,df_Stats) all_courses # program name R_grade Stats_grade # BSc Mary 1.5 2.0 # BSc John 2.0 1.5 # MSc Andreas 3.0 NA # MSc Chris NA 3.0 # MSc Jane NA 2.5 # Both the R course and Statistics are prerequisites for Multivariate Statistics # you need to find students who have attended both courses df_RandStats<- inner_join(df_R,df_Stats) df_RandStats # program name R_grade Stats_grade # BSc Mary 1.5 2.0 # BSc John 2.0 1.5 # Interested to know whether (or the grades of) students who attended Statistics also attended the R course df_Stats_R <- left_join(df_Stats, df_R, by=c("name","program")) df_Stats_R #program name Stats_grade R_grade # BSc Mary 2.0 1.5 # BSc John 1.5 2.0 # MSc Chris 3.0 NA # MSc Jane 2.5 NA # Interested to know whether (or the grades of) students who attended the R course also attended Statistics df_R_Stats <- right_join(df_Stats, df_R, by=c("name","program")) df_R_Stats # program name Stats_grade R_grade # BSc Mary 2.0 1.5 # BSc John 1.5 2.0 # MSc Andreas NA 3.0