select group before certain observations separated by grouping var in R with NA control

前端 未结 1 394
情歌与酒
情歌与酒 2021-01-16 18:43

My sample.

 data=structure(list(add = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L,          


        
相关标签:
1条回答
  • 2021-01-16 18:57

    I've added a piece of code that would solve your issue and a brief explanation of the error.

    Updated code

    data=structure(list(add = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("x", 
    "y"), class = "factor"), x1 = c(14L, 15L, 36L, 0L, 0L, 0L, 53L, 
    10L, 39L, 27L, 67L, 25L, 19L, 49L, 53L, 64L, 61L, 12L, 75L, 34L, 
    88L, 43L, 85L, 93L, 44L, 31L, 37L, 90L, 66L, 39L, 59L, 96L, 41L, 
    23L, 20L, 26L, 69L, 28L, 35L, 96L, 87L, 82L, 70L, 68L, 26L, 12L, 
    58L, 18L, 76L, 93L, 3L, 31L), group = structure(c(2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L), .Label = c("female", "male"), class = "factor")), .Names = c("add", 
    "x1", "group"), class = "data.frame", row.names = c(NA, -52L))
    
    library(tidyverse)
    library(data.table)
    
    data %>%  
      group_by(add) %>%                                          
      mutate(group2 = rleid(group)) %>% 
      group_by(add, group, group2) %>%
      mutate(MEAN = mean(x1[group=="male" & group2==1]),               
             Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
      group_by(add) %>%                                           
      mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1),
             x1 = ifelse(x1==0, NA, x1)) %>%  # new code added
      ungroup() %>%
      select(-group2) %>%
      data.frame()
    

    Error explanation

    You have to run the previous part of the code and in the end you just update the x1 column. You get that error because NA values break the mean and quantile calculations you need to do.

    An alternative way would be to update x1 in the beginning and then use na.rm=T for your calculations.

    For the new case you mentioned, where you start with NA values for x1 try this:

    data %>%  
      group_by(add) %>%                                          
      mutate(group2 = rleid(group)) %>% 
      group_by(add, group, group2) %>%
      mutate(MEAN = mean(x1[group=="male" & group2==1], na.rm = T),      ## extra code here ##    
             Q25 = quantile(x1[group=="male" & group2==1], 0.25, na.rm = T)) %>%  ## extra code here ##
      group_by(add) %>%                                           
      mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1))%>%
      ungroup() %>%
      select(-group2) %>%
      data.frame()
    

    For the new case (edit 2) you mentioned, first save the output of the previous code as data2:

    data2 = data %>% ...
    

    And then run this:

    data2 %>%
      group_by(add) %>%                           # for each add value                      
      mutate(group2 = rleid(group)) %>%           # created group2
      filter(group=="male" & group2==3) %>%       # keep only male after female
      summarise(SUM = sum(x1[row_number() <= 4])) # get sum of x1 for first 4 rows
    
    # # A tibble: 2 x 2
    #   add     SUM
    #   <fct> <dbl>
    # 1 x      94.9
    # 2 y     107.
    
    0 讨论(0)
提交回复
热议问题