Correlation between NA columns

人盡茶涼 提交于 2019-12-02 14:03:56
this is the correct and running solution you can refer to this 

corr <- function(directory, threshold = 0) {
  ## 'directory' is a character vector of length 1 indicating the location of
  ## the CSV files

  ## 'threshold' is a numeric vector of length 1 indicating the number of
  ## completely observed observations (on all variables) required to compute
  ## the correlation between nitrate and sulfate; the default is 0

  ## Return a numeric vector of correlations
  df = complete(directory)
  ids = df[df["nobs"] > threshold, ]$id
  corrr = numeric()
  for (i in ids) {

    newRead = read.csv(paste(directory, "/", formatC(i, width = 3, flag = "0"), 
                             ".csv", sep = ""))
    dff = newRead[complete.cases(newRead), ]
    corrr = c(corrr, cor(dff$sulfate, dff$nitrate))
  }
  return(corrr)
}
complete <- function(directory, id = 1:332) {
  f <- function(i) {
    data = read.csv(paste(directory, "/", formatC(i, width = 3, flag = "0"), 
                          ".csv", sep = ""))
    sum(complete.cases(data))
  }
  nobs = sapply(id, f)
  return(data.frame(id, nobs))
}
cr <- corr("specdata", 150)
head(cr)

This problem would probably best be broken up into two steps -- computing the value for each file and collecting the results for all your files.

corr.file <- function(filename) {
  data <- read.csv(paste(directory, "/", i, sep =""))
  x <- complete.cases(data)
  sulfate <- data[,2]
  nitrate <- data[,3]
  b <- cor(sulfate,nitrate)
  if (j>threshold) return(b) else return(numeric())
}

a <- list.files("specdata")
correlations <- sapply(a, corr.file)
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!