问题
I have data in long format as seen below:
Data:
id code
1 EP
2 EP
3 EP
4 UM
5 UM
1 UM
2 UM
10 UM
6 BZ
7 BZ
14 BZ
2 BZ
8 TVOL
9 TVOL
16 TVOL
10 NW
11 NW
7 NW
12 SM
13 SM
3 SM
14 GS
15 GS
1 GS
2 GS
9 GS
I would like to create a wide dataframe with each "code" as its own column marked TRUE/FALSE depending on whether there's an associated "id" as seen in the minimal example below:
id code.EP code.UM code.BZ code.TVOL code.NW code.SM code.GS
1 TRUE TRUE FALSE FALSE FALSE FALSE TRUE
2 TRUE FALSE TRUE FALSE FALSE FALSE TRUE
3 TRUE FALSE FALSE FALSE FALSE TRUE FALSE
4 FALSE TRUE FALSE FALSE FALSE FALSE FALSE
5 FALSE TRUE FALSE FALSE FALSE FALSE FALSE
Apologies if this has been answered before (I'm positive it is in various forms), I just can't seem to understand the similar examples I've found.
回答1:
We can use table and convert to logical
table(df1) > 0
# code
#id BZ EP GS NW SM TVOL UM
# 1 FALSE TRUE TRUE FALSE FALSE FALSE TRUE
# 2 TRUE TRUE TRUE FALSE FALSE FALSE TRUE
# 3 FALSE TRUE FALSE FALSE TRUE FALSE FALSE
# 4 FALSE FALSE FALSE FALSE FALSE FALSE TRUE
# 5 FALSE FALSE FALSE FALSE FALSE FALSE TRUE
# 6 TRUE FALSE FALSE FALSE FALSE FALSE FALSE
# 7 TRUE FALSE FALSE TRUE FALSE FALSE FALSE
# 8 FALSE FALSE FALSE FALSE FALSE TRUE FALSE
# 9 FALSE FALSE TRUE FALSE FALSE TRUE FALSE
# 10 FALSE FALSE FALSE TRUE FALSE FALSE TRUE
# 11 FALSE FALSE FALSE TRUE FALSE FALSE FALSE
# 12 FALSE FALSE FALSE FALSE TRUE FALSE FALSE
# 13 FALSE FALSE FALSE FALSE TRUE FALSE FALSE
# 14 TRUE FALSE TRUE FALSE FALSE FALSE FALSE
# 15 FALSE FALSE TRUE FALSE FALSE FALSE FALSE
# 16 FALSE FALSE FALSE FALSE FALSE TRUE FALSE
data
df1 <- structure(list(id = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 10L, 6L, 7L,
14L, 2L, 8L, 9L, 16L, 10L, 11L, 7L, 12L, 13L, 3L, 14L, 15L, 1L,
2L, 9L), code = c("EP", "EP", "EP", "UM", "UM", "UM", "UM", "UM",
"BZ", "BZ", "BZ", "BZ", "TVOL", "TVOL", "TVOL", "NW", "NW", "NW",
"SM", "SM", "SM", "GS", "GS", "GS", "GS", "GS")),
class = "data.frame", row.names = c(NA,
-26L))
回答2:
For a tidyverse approach try this:
library(dplyr)
library(tidyr)
df <- structure(list(id = c(
"1", "2", "3", "4", "5", "1", "2", "10",
"6", "7", "14", "2", "8", "9", "16", "10", "11", "7", "12", "13",
"3", "14", "15", "1", "2", "9"
), code = c(
"EP", "EP", "EP", "UM",
"UM", "UM", "UM", "UM", "BZ", "BZ", "BZ", "BZ", "TVOL", "TVOL",
"TVOL", "NW", "NW", "NW", "SM", "SM", "SM", "GS", "GS", "GS",
"GS", "GS"
)), row.names = c(NA, -26L), class = c(
"tbl_df", "tbl",
"data.frame"
))
df %>%
pivot_wider(id_cols = "id", names_prefix = "code.", names_from = "code", values_from = "code") %>%
mutate_at(vars(starts_with("code")), ~ ifelse(!is.na(.x), TRUE, FALSE))
#> # A tibble: 16 x 8
#> id code.EP code.UM code.BZ code.TVOL code.NW code.SM code.GS
#> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
#> 1 1 TRUE TRUE FALSE FALSE FALSE FALSE TRUE
#> 2 2 TRUE TRUE TRUE FALSE FALSE FALSE TRUE
#> 3 3 TRUE FALSE FALSE FALSE FALSE TRUE FALSE
#> 4 4 FALSE TRUE FALSE FALSE FALSE FALSE FALSE
#> 5 5 FALSE TRUE FALSE FALSE FALSE FALSE FALSE
#> 6 10 FALSE TRUE FALSE FALSE TRUE FALSE FALSE
#> 7 6 FALSE FALSE TRUE FALSE FALSE FALSE FALSE
#> 8 7 FALSE FALSE TRUE FALSE TRUE FALSE FALSE
#> 9 14 FALSE FALSE TRUE FALSE FALSE FALSE TRUE
#> 10 8 FALSE FALSE FALSE TRUE FALSE FALSE FALSE
#> 11 9 FALSE FALSE FALSE TRUE FALSE FALSE TRUE
#> 12 16 FALSE FALSE FALSE TRUE FALSE FALSE FALSE
#> 13 11 FALSE FALSE FALSE FALSE TRUE FALSE FALSE
#> 14 12 FALSE FALSE FALSE FALSE FALSE TRUE FALSE
#> 15 13 FALSE FALSE FALSE FALSE FALSE TRUE FALSE
#> 16 15 FALSE FALSE FALSE FALSE FALSE FALSE TRUE
回答3:
additional option
library(tidyverse)
df %>%
mutate(index = T) %>%
pivot_wider(id, names_from = code, values_from = index, names_prefix = "code.") %>%
mutate_all(replace_na, F)
来源:https://stackoverflow.com/questions/60744395/r-help-converting-factor-data-from-long-to-wide-and-assigning-logical-value