In R: pass column name as argument and use it in function with dplyr::mutate() and lazyeval::interp()

我们两清 提交于 2019-11-28 05:32:52

问题


This question links to this SO answer except that here I want to use the variable specified as function arg in a mutate_(). It works if I don't make any "calculations" in the mutate_():

data <- 
  data.frame(v1=c(1,2),
             v2=c(3,4))



func1 <- function(df, varname){
    res <-
      df %>%
      mutate_(v3=varname)
    return(res)
  }
func1(data, "v1")

This give the expected:

  v1 v2 v3
1  1  3  1
2  2  4  2

But if I do anything like this, it seems that I have not specified "v3" correctly:

func2 <- function(df, varname){
  res <-
    df %>%
    mutate_(v3=sum(varname))
  return(res)
}
func2(data, "v1") 

Does not work; how come it is not equivalent to this outside a function ?:

data %>%
  mutate(v3=sum(v1))

Gives:

  v1 v2 v3
1  1  3  3
2  2  4  3

UPDATE (after @docendo discimus 's solution): The solution about using lazyeval::interp() works. But it seems that Im getting a lot of typing if one have a little more complex function. Eg. I wanted a function that could return score and fisher's 2x2 pvalue for all combinations of N-P in a data frame of counts, c.

require(plyr)
require(dplyr)
require(lazyeval)
set.seed(8)
df <- 
  data.frame(
    N = sample(c("n1","n2","n3","n4"),20, replace=T),
    P = sample(c("p1","p2","p3","p4"),20, replace=T),
    c = round(runif(20,0,10),0)) %>%
  distinct()

So I started making a function test.df using a lot of lines with group_byand mutate. Without lazyeval it does NOT work (of cause), but would look something like this:

test.df <- function(df=NULL, N=NULL, P=NULL, count=NULL, ...){
  require(plyr)
  require(dplyr)


  test <- function(a,b,c,d){
    data <- matrix(c(a,b,c,d),ncol=2)
    c(p = fisher.test(data)$p.value,
      OR = fisher.test(data)$estimate)
  }

  df %>%
    ungroup() %>%
    mutate(n.total = sum(count)) %>% 
    group_by(N) %>%
    mutate(n.N=sum(count)) %>%
    group_by(P) %>%
    mutate(n.P = sum(count)) %>%
    rowwise() %>%
    mutate(score(count/n.N)/(n.P/n.total), #simple enrichment score
           p=test(count,n.N-count,n.P-count,n.total-n.N-n.P+2*count)[[1]], #p values
           OR=test(count,n.N-count,n.P-count,n.total-n.N-n.P+2*count)[[2]]) #Odds ratio
    ungroup() %>%
    mutate(p_adj=p.adjust(p, method="BH"))

} 

Then I turned to the lazyval-way, and it works!:

test.df <- function(df=NULL, N=NULL, P=NULL, count=NULL, ...){
  require(plyr)
  require(dplyr)
  require(lazyeval)

  test <- function(a,b,c,d){
    data <- matrix(c(a,b,c,d),ncol=2)
    c(p = fisher.test(data)$p.value,
      OR = fisher.test(data)$estimate)
  }

  df %>%
    ungroup() %>%
    mutate_(n.total = interp(~sum(count), count=as.name(count))) %>% 
    group_by_(interp(~N, N=as.name(N))) %>%
    mutate_(n.N = interp(~sum(count), count=as.name(count))) %>%
    group_by_(interp(~P, P=as.name(P))) %>%
    mutate_(n.P = interp(~sum(count), count=as.name(count))) %>%
    rowwise() %>%
    mutate_(score=interp(~(count/n.N)/(n.P/n.total), 
                       .values=list(count=as.name(count),
                                    n.N=quote(n.N),
                                    n.P=quote(n.P),
                                    n.total=quote(n.total))),
            p=interp(~(test(count,n.N-count,n.P-count,n.total-n.N-n.P+2*count)[[1]]),
                     .values=list(fisher=quote(fisher),
                                  count=as.name(count),
                                  n.N=quote(n.N),
                                  n.P=quote(n.P),
                                  n.total=quote(n.total))),
            OR=interp(~(test(count,n.N-count,n.P-count,n.total-n.N-n.P+2*count)[[2]]),
                      .values=list(fisher=quote(fisher),
                                   count=as.name(count),
                                   n.N=quote(n.N),
                                   n.P=quote(n.P),
                                   n.total=quote(n.total)))) %>% 
    ungroup() %>%
    mutate_(p_adj=interp(~p.adjust(p, method="BH"),
                         .values=list(p.adjust=quote(p.adjust),
                                      p=quote(p))))

} 

Gives:

    N  P  c n.total n.N n.P     score            p         OR       p_adj
1  n2 p1  9      89  23  27 1.2898551 1.856249e-01  2.0197105 0.309374904
2  n1 p2  3      89  21  16 0.7946429 1.000000e+00  0.7458441 1.000000000
3  n4 p3  5      89  20  30 0.7416667 5.917559e-01  0.6561651 0.724442095
4  n3 p1  9      89  25  27 1.1866667 3.053538e-01  1.7087545 0.469775140
5  n2 p3  3      89  23  30 0.3869565 2.237379e-02  0.2365142 0.074579284
6  n3 p4  3      89  25  16 0.6675000 5.428536e-01  0.5696359 0.723804744
7  n2 p1  5      89  23  27 0.7165862 4.412042e-01  0.6216888 0.630291707
8  n4 p3  2      89  20  30 0.2966667 1.503170e-02  0.1733288 0.060126805
9  n4 p3 10      89  20  30 1.4833333 5.406588e-02  2.9136831 0.108131750
10 n3 p4  1      89  25  16 0.2225000 3.524192e-02  0.1410289 0.091433058
11 n2 p1  1      89  23  27 0.1433172 1.312078e-03  0.0731707 0.008747184
12 n1 p3  1      89  21  30 0.1412698 1.168232e-03  0.0704372 0.008747184
13 n2 p4  1      89  23  16 0.2418478 6.108872e-02  0.1598541 0.111070394
14 n3 p1  3      89  25  27 0.3955556 3.793658e-02  0.2475844 0.091433058
15 n1 p2 10      89  21  16 2.6488095 8.710747e-05 10.5125558 0.001742149
16 n4 p2  3      89  20  16 0.8343750 1.000000e+00  0.8027796 1.000000000
17 n1 p4  7      89  21  16 1.8541667 4.114488e-02  3.6049777 0.091433058
18 n2 p4  4      89  23  16 0.9673913 1.000000e+00  1.0173534 1.000000000
19 n2 p2  0      89  23  16 0.0000000 9.115366e-03  0.0000000 0.045576831
20 n3 p3  9      89  25  30 1.0680000 6.157758e-01  1.3880504 0.724442095

Am I not using lazyeval appropriately, or maybe building the function in a stupid way ? Some input is really appreciated here.


回答1:


You have to use lazy evaluation (with the package lazyeval), for example like this:

library(lazyeval)
func2 <- function(df, varname){
     df %>%
       mutate_(v3=interp(~sum(x), x = as.name(varname)))
}
func2(data, "v1")
#  v1 v2 v3
#1  1  3  3
#2  2  4  3



回答2:


With the devel version of dplyr (0.5.0) or in the new version (0.6.0 - awaiting release in April 2017), this can be done using slightly different syntax

library(dplyr)
funcN <- function(dat, varname){
 expr <- enquo(varname)
 dat %>%
     mutate(v3 = sum(!!expr))
     #or
     #mutate(v3 = sum(UQ(expr)))

} 

funcN(data, v1)
#  v1 v2 v3
#1  1  3  3
#2  2  4  3

Here, enquo takes the arguments and returns the value as a quosure (similar to substitute in base R) by evaluating the function arguments lazily and inside the summarise, we ask it to unquote (!! or UQ) so that it gets evaluated.



来源:https://stackoverflow.com/questions/28973056/in-r-pass-column-name-as-argument-and-use-it-in-function-with-dplyrmutate-a

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!