remove the first row for each group

前端未结

关注

 4  1259

suppose I have a dataset like this

df <- data.frame(group = c(rep(1,3),rep(2,2), rep(3,2),rep(4,3),rep(5, 2)), score = c(30, 10, 22, 44, 6, 5, 20, 35, 2,


                      
              相关标签:


      
      
        
          4条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  Happy的楠姐        
                
              
                            
                2021-01-14 07:01
              
            
            
                                                                       
An option with dplyr is to select rows ignoring 1st row

library(dplyr)
df %>%
  group_by(group) %>%
  slice(2:n())

#  group score
#  <dbl> <dbl>
#1  1.00 10.0 
#2  1.00 22.0 
#3  2.00  6.00
#4  3.00 20.0 
#5  4.00  2.00
#6  4.00 60.0 
#7  5.00  5.00




Another way is shown by @Rich Scriven in now deleted answer 

df %>%
  group_by(group) %>%
  slice(-1)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  南旧        
                
              
                            
                2021-01-14 07:02
              
            
            
                                                                       
Quite simple with duplicated

df[duplicated(df$group),]


   group score
2      1    10
3      1    22
5      2     6
7      3    20
9      4     2
10     4    60
12     5     5

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  粉色の甜心        
                
              
                            
                2021-01-14 07:14
              
            
            
                                                                       
dplyr::filter(df, group == lag(group))
   group score
1     1    10
2     1    22
3     2     6
4     3    20
5     4     2
6     4    60
7     5     5


See lead and lag of package dplyr for more information:

https://dplyr.tidyverse.org/reference/lead-lag.html
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  [愿得一人]        
                
              
                            
                2021-01-14 07:19
              
            
            
                                                                       
Another base R option would be to check the adjacent elements 

df[c(FALSE,df$group[-1]==df$group[-nrow(df)]),]
#   group score
#2      1    10
#3      1    22
#5      2     6
#7      3    20
#9      4     2
#10     4    60
#12     5     5


Here I removed the first observation in 'group' (df$group[-1]) and compared (==) with the vector in which last observation is removed (df$group[-nrow(df)])).  As the length of the comparison is one less than the nrow of the dataset, we pad with FALSE at the top and use this as logical index to subset the dataset.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复