merge/combine columns with same name but incomplete data

后端未结

关注

 7  956

臣服心动 2020-12-14 17:57

I have two data frames that have some columns with the same names and others with different names. The data frames look something like this:

df1
      ID hel


      
      
        
          7条回答        

        
                    
            
            
                         
                
              
              
                
                   攒了一身酷
                                             
                
                
                (楼主)
            
              
              
                2020-12-14 18:17
              

            
            
                        
Nobody's posted a dplyr solution, so here's a succinct option in dplyr. The approach is simply to do a full_join that combines all rows, then group and summarise to remove the redundant missing cells.


library(tidyverse)
df1 <- structure(list(ID = 1:5, hello = c(NA, NA, 10L, 4L, NA), world = c(NA, NA, 8L, 17L, NA), hockey = c(7L, 2L, 8L, 5L, 3L), soccer = c(4L, 5L, 23L, 12L, 43L)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"), spec = structure(list(cols = list(ID = structure(list(), class = c("collector_integer", "collector")), hello = structure(list(), class = c("collector_integer", "collector")), world = structure(list(), class = c("collector_integer", "collector")), hockey = structure(list(), class = c("collector_integer", "collector")), soccer = structure(list(), class = c("collector_integer", "collector"))), default = structure(list(), class = c("collector_guess", "collector"))), class = "col_spec"))
df2 <- structure(list(ID = 1:5, hello = c(2L, 5L, NA, NA, 9L), world = c(3L, 1L, NA, NA, 7L), football = c(43L, 24L, 2L, 5L, 12L), baseball = c(6L, 32L, 23L, 15L, 2L)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"), spec = structure(list(cols = list(ID = structure(list(), class = c("collector_integer", "collector")), hello = structure(list(), class = c("collector_integer", "collector")), world = structure(list(), class = c("collector_integer", "collector")), football = structure(list(), class = c("collector_integer", "collector")), baseball = structure(list(), class = c("collector_integer", "collector"))), default = structure(list(), class = c("collector_guess", "collector"))), class = "col_spec"))

df1 %>%
  full_join(df2, by = intersect(colnames(df1), colnames(df2))) %>%
  group_by(ID) %>%
  summarize_all(na.omit)
#> # A tibble: 5 x 7
#>      ID hello world hockey soccer football baseball
#>                 
#> 1     1     2     3      7      4       43        6
#> 2     2     5     1      2      5       24       32
#> 3     3    10     8      8     23        2       23
#> 4     4     4    17      5     12        5       15
#> 5     5     9     7      3     43       12        2


Created on 2018-07-13 by the reprex package (v0.2.0).
    
             
                                                        
            
            
              
                
                0
              
                   
                
               讨论(0)
              
                                                  
              
              
                          
             
       
          
              
                                       
     查看其它7个回答


            
                         
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
                              			
        
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复