Combining pivoted rows in R by common value

后端未结

关注

 4  1645

I have a data frame that looks like this

Name    Visit     Arrival      Departure

Jack    week 1     8:00         NA
Jack    week 1      NA          8:30
Sa


                      
              相关标签:


      
      
        
          4条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  借酒劲吻你        
                
              
                            
                2020-12-04 03:25
              
            
            
                                                                       
Here's one approach, assuming that person who visited will have exactly two rows of data:

library(dplyr)

df = readr::read_table("Name    Visit     Arrival      Departure
Jack    week 1     8:00         NA
Jack    week 1      NA          8:30
Sally   week 5     9:00         NA
Sally   week 5      NA          9:30
Adam    week 2     2:00         NA
Adam    week 2      NA          3:00", col_types="cccc")

df %>% 
  group_by(Name, Visit) %>% 
  mutate(Arrival = ifelse(is.na(Arrival), lag(Arrival), Arrival), 
         Departure = ifelse(is.na(Departure), lead(Departure), Departure)) %>% 
  ungroup() %>% 
  distinct(Name, Visit, .keep_all=TRUE)

# A tibble: 3 × 4
   Name  Visit Arrival Departure
  <chr>  <chr>   <chr>     <chr>
1  Jack week 1    8:00      8:30
2 Sally week 5    9:00      9:30
3  Adam week 2    2:00      3:00

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  后悔当初        
                
              
                            
                2020-12-04 03:32
              
            
            
                                                                       
I'm sure there might be a prettier way of doing this, but this is what worked for me:

 library(data.table)
library(reshape2)

test <- data.table(Name = c("Jack", "Jack", "Sally", "Sally", "Adam", "Adam"), Visit = c("week 1", "week 1", "week 5", "week 5", "week 2", "week 2"), Arrival = c("8:00", NA, "9:00", NA, "2:00", NA), Departure = c(NA, "8:30", NA, "9:30", NA, "3:00"))

test_m <- melt(test,id.vars = c("Name", "Visit"))
test_m <- test_m[!is.na(value),]
test_c <- dcast(test_m, Name + Visit ~ variable)

> test_c
   Name  Visit Arrival Departure
1  Adam week 2    2:00      3:00
2  Jack week 1    8:00      8:30
3 Sally week 5    9:00      9:30


Hope that helps
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  暗喜        
                
              
                            
                2020-12-04 03:34
              
            
            
                                                                       
Actually, if you are able to get back to the data before the pivot, tidyr::spread will do a beautiful job.

Name <- c("Jack", "Jack","Sally", "Sally", "Adam", "Adam")
Visit <- c("week1", "week1", "week5", "week5", "week2", "week2")
Itenary <- rep(c("Arrival", "Departure"), 3)
Time <- c("8:00", "8:30", "9:00", "9:30", "2:00", "2:30")

df <- data.frame(Name, Visit, Itenary, Time)

df

   Name Visit   Itenary Time
1  Jack week1   Arrival 8:00
2  Jack week1 Departure 8:30
3 Sally week5   Arrival 9:00
4 Sally week5 Departure 9:30
5  Adam week2   Arrival 2:00
6  Adam week2 Departure 2:30

df %>% 
  spread(key = Itenary, value = Time)

   Name Visit Arrival Departure
1  Adam week2    2:00      2:30
2  Jack week1    8:00      8:30
3 Sally week5    9:00      9:30

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  春和景丽        
                
              
                            
                2020-12-04 03:43
              
            
            
                                                                       
Just aggregate it with na.omit as the aggregation function:

aggregate(dat[c("Arrival","Departure")], dat[c("Name","Visit")], FUN=na.omit)
# or
aggregate(cbind(Arrival,Departure) ~ ., data=dat, FUN=na.omit, na.action=na.pass)
#   Name Visit Arrival Departure
#1  Jack week1    8:00      8:30
#2  Adam week2    2:00      3:00
#3 Sally week5    9:00      9:30


Same logic works in data.table:

dat[, lapply(.SD,na.omit), by=.(Name,Visit)]


...or dplyr:

dat %>% group_by(Name,Visit) %>% summarise_all(na.omit)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复