My sample.
data=structure(list(add = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L,
I've added a piece of code that would solve your issue and a brief explanation of the error.
Updated code
data=structure(list(add = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("x",
"y"), class = "factor"), x1 = c(14L, 15L, 36L, 0L, 0L, 0L, 53L,
10L, 39L, 27L, 67L, 25L, 19L, 49L, 53L, 64L, 61L, 12L, 75L, 34L,
88L, 43L, 85L, 93L, 44L, 31L, 37L, 90L, 66L, 39L, 59L, 96L, 41L,
23L, 20L, 26L, 69L, 28L, 35L, 96L, 87L, 82L, 70L, 68L, 26L, 12L,
58L, 18L, 76L, 93L, 3L, 31L), group = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("female", "male"), class = "factor")), .Names = c("add",
"x1", "group"), class = "data.frame", row.names = c(NA, -52L))
library(tidyverse)
library(data.table)
data %>%
group_by(add) %>%
mutate(group2 = rleid(group)) %>%
group_by(add, group, group2) %>%
mutate(MEAN = mean(x1[group=="male" & group2==1]),
Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
group_by(add) %>%
mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1),
x1 = ifelse(x1==0, NA, x1)) %>% # new code added
ungroup() %>%
select(-group2) %>%
data.frame()
Error explanation
You have to run the previous part of the code and in the end you just update the x1 column. You get that error because NA values break the mean and quantile calculations you need to do.
An alternative way would be to update x1 in the beginning and then use na.rm=T for your calculations.
For the new case you mentioned, where you start with NA values for x1 try this:
data %>%
group_by(add) %>%
mutate(group2 = rleid(group)) %>%
group_by(add, group, group2) %>%
mutate(MEAN = mean(x1[group=="male" & group2==1], na.rm = T), ## extra code here ##
Q25 = quantile(x1[group=="male" & group2==1], 0.25, na.rm = T)) %>% ## extra code here ##
group_by(add) %>%
mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1))%>%
ungroup() %>%
select(-group2) %>%
data.frame()
For the new case (edit 2) you mentioned, first save the output of the previous code as data2:
data2 = data %>% ...
And then run this:
data2 %>%
group_by(add) %>% # for each add value
mutate(group2 = rleid(group)) %>% # created group2
filter(group=="male" & group2==3) %>% # keep only male after female
summarise(SUM = sum(x1[row_number() <= 4])) # get sum of x1 for first 4 rows
# # A tibble: 2 x 2
# add SUM
# <fct> <dbl>
# 1 x 94.9
# 2 y 107.