问题
I understand how to use map to iterate over arguments in a df and create a new list column.
For example,
params <- expand.grid(param_a = c(2, 4, 6)
,param_b = c(3, 6, 9)
,param_c = c(50, 100)
,param_d = c(1, 0)
)
df.preprocessed <- dplyr::as.tbl(params) %>%
dplyr::mutate(test_var = purrr::map(param_a, function(x){
rep(5, x)
}
))
However, how do I use the analogous syntax with pmap in the event that I want to specify more than 2 parameters?
df.preprocessed <- dplyr::as.tbl(params) %>%
dplyr::mutate(test_var = purrr::pmap(list(x = param_a
,y = param_b
,z = param_c
,u = param_d), function(x, y){
rep(5,x)*y
}
)
)
Error output:
Error in mutate_impl(.data, dots) : Evaluation error: unused arguments (z = .l[[c(3, i)]], u = .l[[c(4, i)]]).
回答1:
With pmap
, the first argument is a list, so you can pass it your data frame directly, and then name your arguments in your function with the same names as the columns in your data frame. You'll need unnest()
to unpack the list elements returned by pmap()
:
df.preprocessed <- dplyr::as.tbl(params) %>%
dplyr::mutate(test_var = purrr::pmap(., function(param_a, param_b, ...){
rep(5, param_a) * param_b
})) %>%
tidyr::unnest()
> df.preprocessed
# A tibble: 144 x 5
param_a param_b param_c param_d test_var
<dbl> <dbl> <dbl> <dbl> <dbl>
1 2 3 50 1 15
2 2 3 50 1 15
3 4 3 50 1 15
4 4 3 50 1 15
5 4 3 50 1 15
6 4 3 50 1 15
7 6 3 50 1 15
8 6 3 50 1 15
9 6 3 50 1 15
10 6 3 50 1 15
# ... with 134 more rows
回答2:
How about using rowwise
and mutate
directly without map
:
my_fun <- function(param_a, param_b){
rep(5, param_a) * param_b
}
df.preprocessed <- dplyr::as.tbl(params) %>%
rowwise() %>%
dplyr::mutate(test_var = list(my_fun(param_a, param_b))) %>%
tidyr::unnest()
回答3:
We could try
f1 <- function(x, y, ...) rep(5, x)*y
df.preprocessed <- dplyr::as.tbl(params) %>%
dplyr::mutate(test_var = purrr::pmap(list(x = param_a
,y = param_b
,z = param_c
,u = param_d),f1
)
)
df.preprocessed
# A tibble: 36 x 5
# param_a param_b param_c param_d test_var
# <dbl> <dbl> <dbl> <dbl> <list>
# 1 2 3 50 1 <dbl [2]>
# 2 4 3 50 1 <dbl [4]>
# 3 6 3 50 1 <dbl [6]>
# 4 2 6 50 1 <dbl [2]>
# 5 4 6 50 1 <dbl [4]>
# 6 6 6 50 1 <dbl [6]>
# 7 2 9 50 1 <dbl [2]>
# 8 4 9 50 1 <dbl [4]>
# 9 6 9 50 1 <dbl [6]>
#10 2 3 100 1 <dbl [2]>
# ... with 26 more rows
回答4:
You can do this:
df.preprocessed <- dplyr::as.tbl(params) %>%
dplyr::mutate(test_var = purrr::pmap(list(x = param_a
,y = param_b
,z = param_c
,u = param_d),
~ rep(5,.x)*.y
)
)
or
df.preprocessed <- dplyr::as.tbl(params) %>%
dplyr::mutate(test_var = purrr::pmap(list(x = param_a
,y = param_b
,z = param_c
,u = param_d),
~ rep(5,..1)*..2
)
)
The second way is more general as you can use ..3
, ..4
etc...
来源:https://stackoverflow.com/questions/46531582/using-purrrpmap-within-mutate-to-create-list-column