问题
How would you merge values between rows that have identical values in id_3
? I'm sure there's a better name for the question title but I'm struggling to find the appropriate operation/function name(s) for this procedure.
library(tidyverse)
id_1 <- c("x12", NA, "a_bc", NA)
id_2 <- c(NA, "gye", NA, "ab_c")
id_3 <- c("qwe", "ert", "abc", "abc")
param_1 <- c(0.21, 1.5, 0.23, NA)
param_12 <- c(0.05, 4.4, NA, 6.3)
df <- data.frame(id_1, id_2, id_3, param_1, param_12)
as_tibble(df)
# id_1 id_2 id_3 param_1 param_12
# <fct> <fct> <fct> <dbl> <dbl>
# 1 x12 NA qwe 0.21 0.05
# 2 NA gye ert 1.5 4.4
# 3 a_bc NA abc 0.23 NA
# 4 NA ab_c abc NA 6.3
desired df
:
# id_1 id_2 id_3 param_1 param_12
# <fct> <fct> <fct> <dbl> <dbl>
# 1 x12 NA qwe 0.21 0.05
# 2 NA gye ert 1.5 4.4
# 3 a_bc ab_c abc 0.23 6.3
update - with additional column formats such as character:
id_1 <- c("x12", NA, "a_bc", NA)
id_2 <- c(NA, "gye", NA, "ab_c")
id_3 <- c("qwe", "ert", "abc", "abc")
param_1 <- c(0.21, 1.5, 0.23, NA)
param_12 <- c(0.05, 4.4, NA, 6.3)
desc_1 <- c("st", NA, "ko", NA)
desc_2 <- c(NA, "lo", NA, "vf")
df <- data.frame(id_1, id_2, id_3, param_1, param_12, desc_1, desc_2)
df <- df %>% mutate(desc_1 = as.character(desc_1),
desc_2 = as.character(desc_2))
# A tibble: 4 x 7
# id_1 id_2 id_3 param_1 param_12 desc_1 desc_2
# <fct> <fct> <fct> <dbl> <dbl> <chr> <chr>
#1 x12 NA qwe 0.21 0.05 st NA
#2 NA gye ert 1.5 4.4 NA lo
#3 a_bc NA abc 0.23 NA ko NA
#4 NA ab_c abc NA 6.3 NA vf
df <- df %>% group_by(id_3) %>%
summarise_all(list(~ if(all(is.na(.))) NA else .[!is.na(.)]
`Error: Column `desc_1` can't promote group 1 to character`
回答1:
We can group by 'id_3' and summarise
all the columns to return NA
if all the values in that particular column is NA
for a group or else
remove the NA
and return the first non-NA element
library(tidyverse)
df %>%
group_by(id_3) %>%
summarise_all(list(~ if(all(is.na(.))) NA else .[!is.na(.)][1]))
# A tibble: 3 x 5
# id_3 id_1 id_2 param_1 param_12
# <fct> <fct> <fct> <dbl> <dbl>
#1 abc a_bc ab_c 0.23 6.3
#2 ert <NA> gye 1.5 4.4
#3 qwe x12 <NA> 0.21 0.05
-Rstudio
Update
For the second dataset, after the columns were converted to character
class, OP encountered error with if(all(is.na(.))) NA
l. If the columns are of same type, we can specify NA_character
, NA_real_
, NA_integer_
to dispatch the correct NA for each type. Here, we cannot do that, but there is one hacky option to return the 1st NA element in that column which will also have the correct type
df %>%
group_by(id_3) %>%
summarise_all(list(~ if(all(is.na(.))) .[!is.na(.)][1]
else .[!is.na(.)]))
# A tibble: 3 x 7
# id_3 id_1 id_2 param_1 param_12 desc_1 desc_2
# <fct> <fct> <fct> <dbl> <dbl> <chr> <chr>
#1 abc a_bc ab_c 0.23 6.3 ko vf
#2 ert <NA> gye 1.5 4.4 <NA> lo
#3 qwe x12 <NA> 0.21 0.05 st <NA>
来源:https://stackoverflow.com/questions/56366734/how-to-merge-data-across-data-frame-rows-based-on-identical-column-values