问题
This question links to this SO answer except that here I want to use the variable specified as function arg in a mutate_(). It works if I don't make any "calculations" in the mutate_():
data <-
data.frame(v1=c(1,2),
v2=c(3,4))
func1 <- function(df, varname){
res <-
df %>%
mutate_(v3=varname)
return(res)
}
func1(data, "v1")
This give the expected:
v1 v2 v3
1 1 3 1
2 2 4 2
But if I do anything like this, it seems that I have not specified "v3" correctly:
func2 <- function(df, varname){
res <-
df %>%
mutate_(v3=sum(varname))
return(res)
}
func2(data, "v1")
Does not work; how come it is not equivalent to this outside a function ?:
data %>%
mutate(v3=sum(v1))
Gives:
v1 v2 v3
1 1 3 3
2 2 4 3
UPDATE (after @docendo discimus 's solution):
The solution about using lazyeval::interp() works. But it seems that Im getting a lot of typing if one have a little more complex function. Eg. I wanted a function that could return score and fisher's 2x2 pvalue for all combinations of N-P in a data frame of counts, c.
require(plyr)
require(dplyr)
require(lazyeval)
set.seed(8)
df <-
data.frame(
N = sample(c("n1","n2","n3","n4"),20, replace=T),
P = sample(c("p1","p2","p3","p4"),20, replace=T),
c = round(runif(20,0,10),0)) %>%
distinct()
So I started making a function test.df using a lot of lines with group_byand mutate. Without lazyeval it does NOT work (of cause), but would look something like this:
test.df <- function(df=NULL, N=NULL, P=NULL, count=NULL, ...){
require(plyr)
require(dplyr)
test <- function(a,b,c,d){
data <- matrix(c(a,b,c,d),ncol=2)
c(p = fisher.test(data)$p.value,
OR = fisher.test(data)$estimate)
}
df %>%
ungroup() %>%
mutate(n.total = sum(count)) %>%
group_by(N) %>%
mutate(n.N=sum(count)) %>%
group_by(P) %>%
mutate(n.P = sum(count)) %>%
rowwise() %>%
mutate(score(count/n.N)/(n.P/n.total), #simple enrichment score
p=test(count,n.N-count,n.P-count,n.total-n.N-n.P+2*count)[[1]], #p values
OR=test(count,n.N-count,n.P-count,n.total-n.N-n.P+2*count)[[2]]) #Odds ratio
ungroup() %>%
mutate(p_adj=p.adjust(p, method="BH"))
}
Then I turned to the lazyval-way, and it works!:
test.df <- function(df=NULL, N=NULL, P=NULL, count=NULL, ...){
require(plyr)
require(dplyr)
require(lazyeval)
test <- function(a,b,c,d){
data <- matrix(c(a,b,c,d),ncol=2)
c(p = fisher.test(data)$p.value,
OR = fisher.test(data)$estimate)
}
df %>%
ungroup() %>%
mutate_(n.total = interp(~sum(count), count=as.name(count))) %>%
group_by_(interp(~N, N=as.name(N))) %>%
mutate_(n.N = interp(~sum(count), count=as.name(count))) %>%
group_by_(interp(~P, P=as.name(P))) %>%
mutate_(n.P = interp(~sum(count), count=as.name(count))) %>%
rowwise() %>%
mutate_(score=interp(~(count/n.N)/(n.P/n.total),
.values=list(count=as.name(count),
n.N=quote(n.N),
n.P=quote(n.P),
n.total=quote(n.total))),
p=interp(~(test(count,n.N-count,n.P-count,n.total-n.N-n.P+2*count)[[1]]),
.values=list(fisher=quote(fisher),
count=as.name(count),
n.N=quote(n.N),
n.P=quote(n.P),
n.total=quote(n.total))),
OR=interp(~(test(count,n.N-count,n.P-count,n.total-n.N-n.P+2*count)[[2]]),
.values=list(fisher=quote(fisher),
count=as.name(count),
n.N=quote(n.N),
n.P=quote(n.P),
n.total=quote(n.total)))) %>%
ungroup() %>%
mutate_(p_adj=interp(~p.adjust(p, method="BH"),
.values=list(p.adjust=quote(p.adjust),
p=quote(p))))
}
Gives:
N P c n.total n.N n.P score p OR p_adj
1 n2 p1 9 89 23 27 1.2898551 1.856249e-01 2.0197105 0.309374904
2 n1 p2 3 89 21 16 0.7946429 1.000000e+00 0.7458441 1.000000000
3 n4 p3 5 89 20 30 0.7416667 5.917559e-01 0.6561651 0.724442095
4 n3 p1 9 89 25 27 1.1866667 3.053538e-01 1.7087545 0.469775140
5 n2 p3 3 89 23 30 0.3869565 2.237379e-02 0.2365142 0.074579284
6 n3 p4 3 89 25 16 0.6675000 5.428536e-01 0.5696359 0.723804744
7 n2 p1 5 89 23 27 0.7165862 4.412042e-01 0.6216888 0.630291707
8 n4 p3 2 89 20 30 0.2966667 1.503170e-02 0.1733288 0.060126805
9 n4 p3 10 89 20 30 1.4833333 5.406588e-02 2.9136831 0.108131750
10 n3 p4 1 89 25 16 0.2225000 3.524192e-02 0.1410289 0.091433058
11 n2 p1 1 89 23 27 0.1433172 1.312078e-03 0.0731707 0.008747184
12 n1 p3 1 89 21 30 0.1412698 1.168232e-03 0.0704372 0.008747184
13 n2 p4 1 89 23 16 0.2418478 6.108872e-02 0.1598541 0.111070394
14 n3 p1 3 89 25 27 0.3955556 3.793658e-02 0.2475844 0.091433058
15 n1 p2 10 89 21 16 2.6488095 8.710747e-05 10.5125558 0.001742149
16 n4 p2 3 89 20 16 0.8343750 1.000000e+00 0.8027796 1.000000000
17 n1 p4 7 89 21 16 1.8541667 4.114488e-02 3.6049777 0.091433058
18 n2 p4 4 89 23 16 0.9673913 1.000000e+00 1.0173534 1.000000000
19 n2 p2 0 89 23 16 0.0000000 9.115366e-03 0.0000000 0.045576831
20 n3 p3 9 89 25 30 1.0680000 6.157758e-01 1.3880504 0.724442095
Am I not using lazyeval appropriately, or maybe building the function in a stupid way ? Some input is really appreciated here.
回答1:
You have to use lazy evaluation (with the package lazyeval), for example like this:
library(lazyeval)
func2 <- function(df, varname){
df %>%
mutate_(v3=interp(~sum(x), x = as.name(varname)))
}
func2(data, "v1")
# v1 v2 v3
#1 1 3 3
#2 2 4 3
回答2:
With the devel version of dplyr (0.5.0) or in the new version (0.6.0 - awaiting release in April 2017), this can be done using slightly different syntax
library(dplyr)
funcN <- function(dat, varname){
expr <- enquo(varname)
dat %>%
mutate(v3 = sum(!!expr))
#or
#mutate(v3 = sum(UQ(expr)))
}
funcN(data, v1)
# v1 v2 v3
#1 1 3 3
#2 2 4 3
Here, enquo takes the arguments and returns the value as a quosure (similar to substitute in base R) by evaluating the function arguments lazily and inside the summarise, we ask it to unquote (!! or UQ) so that it gets evaluated.
来源:https://stackoverflow.com/questions/28973056/in-r-pass-column-name-as-argument-and-use-it-in-function-with-dplyrmutate-a