replace duplicate values with NA in time series data using dplyr

前端未结

关注

 2  1321

My data seems a bit different than other similar kind of posts.

box_num      date       x        y
1-Q      2018-11-18   20.2      8
1-Q      2018-11-25   21


                      
              相关标签:


      
      
        
          2条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  梦谈多话        
                
              
                            
                2020-12-19 14:20
              
            
            
                                                                       
Here is an option with data.table.  Convert the 'data.frame' to 'data.table' (setDT(df1), specify the columns of interest in .SDcols, replace the duplicated elements in the columns with NA and update those columns by assigning (:=) the output back to the columns

library(data.table)
setDT(df1)[,  c('x', 'y') := lapply(.SD, function(x) 
     replace(x, anyDuplicated(x), NA)), box_num, .SDcols= x:y]
df1
#   box_num       date      x      y
#1:     1-Q 2018-11-18 20.200  8.000
#2:     1-Q 2018-11-25 21.230  7.200
#3:     1-Q  2018-12-2     NA 23.000
#4:    98-L 2018-11-25  0.134  9.300
#5:    98-L  2018-12-2     NA  4.000
#6:   76-GI  2018-12-2 22.734  4.562
#7:   76-GI  2018-12-9 28.000     NA


data

df1 <- structure(list(box_num = c("1-Q", "1-Q", "1-Q", "98-L", "98-L", 
 "76-GI", "76-GI"), date = c("2018-11-18", "2018-11-25", "2018-12-2", 
"2018-11-25", "2018-12-2", "2018-12-2", "2018-12-9"), x = c(20.2, 
 21.23, 20.2, 0.134, 0.134, 22.734, 28), y = c(8, 7.2, 23, 9.3, 
 4, 4.562, 4.562)), class = "data.frame", 
 row.names = c(NA, -7L))

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  一向        
                
              
                            
                2020-12-19 14:37
              
            
            
                                                                       
Using dplyr we can group_by box_num and use mutate_at x and y column and replace the duplicated value by NA.

library(dplyr)

df %>%
  group_by(box_num) %>%
  mutate_at(vars(x:y), funs(replace(., duplicated(.), NA)))


# box_num date          x     y
#  <fct>   <fct>      <dbl> <dbl>
#1 1-Q     2018-11-18 20.2    8   
#2 1-Q     2018-11-25 21.2    7.2 
#3 1-Q     2018-12-2  NA     23   
#4 98-L    2018-11-25  0.134  9.3 
#5 98-L    2018-12-2  NA      4   
#6 76-GI   2018-12-2  22.7    4.56
#7 76-GI   2018-12-9  28     NA  




A base R option (which might not be the best in this case) would be : 

cols <- c("x", "y")
df[cols] <- sapply(df[cols], function(x) 
            ave(x, df$box_num, FUN = function(x) replace(x, duplicated(x), NA)))

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复