R - iteratively apply a function of a list of variables

前端 未结 2 2064
南旧
南旧 2021-01-07 08:58

My goal is to create a function that, when looped over multiple variables of a data frame, will return a new data frame containing the percents and 95% confidence intervals

2条回答
  •  情话喂你
    2021-01-07 09:22

    You can also keep the function mainly intact and use lapply over it:

    vars <- c("cyl", "am")
    lapply(vars, t1.props, data=mtcars)
    [[1]]
      variable level                ci.95
    1      cyl     4 34.38 (19.50, 53.11)
    2      cyl     6 21.88 (10.35, 40.45)
    3      cyl     8 43.75 (27.10, 61.94)
    
    [[2]]
      variable level                ci.95
    1       am     0  59.38 (40.94, 75.5)
    2       am     1 40.62 (24.50, 59.06)
    

    And combine them all into one data frame with:

    lst <- lapply(vars, t1.props, data=mtcars)
    do.call(rbind,lst)
    

    Data

    You must simplify the var and var.name assignments to:

    t1.props <- function(x, data = NULL) {
    
      # Grab dataframe and/or variable name
      if(!missing(data)){
        var <- data[,x]
      } else {
        var <- x
      }
    
      # Grab variable name for use in ouput
      var.name <- x
    
      # Omit observations with missing data
      var.clean <- na.omit(var)
    
      # Number of nonmissing observations
      n <- length(var.clean)
    
      # Grab levels of variable
      levels <- sort(unique(var.clean))
    
      # Create an empty data frame to store values
      out <- data.frame(variable = NA,
                        level = NA,
                        ci.95 = NA)
    
      # Estimate prop, se, and ci for each level of the variable
      for(i in seq_along(levels)) {
        prop <- paste0("prop", i)
        se <- paste0("se", i)
        log.prop <- paste0("log.trans", i)
        log.se <- paste0("log.se", i)
        log.l <- paste0("log.l", i)
        log.u <- paste0("log.u", i)
        lcl <- paste0("lcl", i)
        ucl <- paste0("ucl", i)
    
        # Find the proportion for each level of the variable
        assign(prop, sum(var.clean == levels[i]) / n)
    
        # Find the standard error for each level of the variable
        assign(se, sd(var.clean == levels[i]) /
                 sqrt(length(var.clean == levels[i])))
    
        # Perform a logit transformation of the original percentage estimate
        assign(log.prop, log(get(prop)) - log(1 - get(prop)))
    
        # Transform the standard error of the percentage to a standard error of its
        # logit transformation
        assign(log.se, get(se) / (get(prop) * (1 - get(prop))))
    
        # Calculate the lower and upper confidence bounds of the logit
        # transformation
        assign(log.l,
               get(log.prop) -
                 qt(.975, (length(var.clean == levels[i]) - 1)) * get(log.se))
        assign(log.u,
               get(log.prop) +
                 qt(.975, (length(var.clean == levels[i]) - 1)) * get(log.se))
    
        # Finally, perform inverse logit transformations to get the confidence bounds
        assign(lcl, exp(get(log.l)) / (1 + exp(get(log.l))))
        assign(ucl, exp(get(log.u)) / (1 + exp(get(log.u))))
    
        # Create a combined 95% CI variable for easy copy/paste into Word tables
        ci.95 <- paste0(round(get(prop) * 100, 2), " ",
                        "(", sprintf("%.2f", round(get(lcl) * 100, 2)), ",", " ",
                        round(get(ucl) * 100, 2), ")")
    
        # Populate the "out" data frame with values
        out <- rbind(out, c(as.character(var.name), levels[i], ci.95))
      }
    
      # Remove first (empty) row from out
      # But only in the first iteration
      if (is.na(out[1,1])) {
        out <- out[-1, ]
        rownames(out) <- 1:nrow(out)
      }
      out
    }
    

提交回复
热议问题