问题
I tried to create an easier way to refer to columns with the function below, by allowing both indexes and names. See also link.
So this one works:
df <- data.table::fread("a b c d e f g h i j
1 2 3 4 5 6 7 8 9 10",
header = TRUE)
columns <- c(1:8, "i", 9, "j")
col2num <- function(df, columns){
nums <- as.numeric(columns)
nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
return(nums)
}
col2num(df, columns)
#> Warning in col2num(df, columns): NAs introduced by coercion
#> [1] 1 2 3 4 5 6 7 8 9 9 10
And this one works too:
col2name <- function(df, columns){
nums <- as.numeric(columns)
nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
return(names(df)[nums])
}
col2name(df, columns)
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "i" "j"
Warning message:
In col2name(df, columns) : NAs introduced by coercion
But when I do the following, it no longer works:
columns <- c(1:7, "j", 8, "i")
col2name <- function(df, columns){
nums <- as.numeric(columns)
nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
return(names(df)[nums])
}
col2name(df, columns)
Error in nums[is.na(nums)] <- which(names(df) == columns[is.na(nums)]) :
replacement has length zero
Also, this one does not work:
columns <- c("a", "j", 8, "i")
col2name <- function(df, columns){
nums <- as.numeric(columns)
nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
return(names(df)[nums])
}
col2name(df, columns)
[1] "a" "i" "h" "a"
How can I fix this?
回答1:
We just need to loop over the columns
:
col2num <- function(df, columns){
nums <- as.numeric(columns)
nums[is.na(nums)] <- sapply(columns[is.na(as.numeric(columns))],
function(x) which(names(df) == x))
return(nums)
}
col2name <- function(df, columns){
nums <- as.numeric(columns)
nums[is.na(nums)] <- sapply(columns[is.na(as.numeric(columns))],
function(x) which(names(df) == x))
return(names(df)[nums])
}
columns1 <- c(1:8, "i", 9, "j")
columns2 <- c(1:7, "j", 8, "i")
suppressWarnings(col2name(df, columns1))
#> [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "i" "j"
suppressWarnings(col2num(df, columns1))
#> [1] 1 2 3 4 5 6 7 8 9 9 10
suppressWarnings(col2num(df, columns2))
#> [1] 1 2 3 4 5 6 7 10 8 9
suppressWarnings(col2name(df, columns2))
#> [1] "a" "b" "c" "d" "e" "f" "g" "j" "h" "i"
I am using suppressWarnings
to avoid getting following warning each time I run the function:
Warning messages: 1: In col2name(df, columns) : NAs introduced by coercion 2: In lapply(X = X, FUN = FUN, ...) : NAs introduced by coercion
回答2:
An alternative with the disadvantage of necessitating that the data be a data.frame
object:
indexr<- function(df, cols){
to_match<-cols[grep("[A-za-z]",cols)]
matched<-match(to_match,names(df))
numerics <- as.numeric(c(setdiff(cols,to_match),matched))
df[c(numerics)]
}
indexr(iris,c(1,"Sepal.Width"))
Sepal.Length Sepal.Width
1 5.1 3.5
2 4.9 3.0
3 4.7 3.2
With your data(drawback is that we go back to a data.frame). Might define a method for that.
data.table::setDF(df)
indexr(df,columns)
a b c d e f g h i i.1 j
1 1 2 3 4 5 6 7 8 9 9 10
Edit To return names instead:
indexr<- function(df, cols){
to_match<-cols[grep("[A-za-z]",cols)]
matched<-match(to_match,names(df))
numerics <- as.numeric(c(setdiff(cols,to_match),matched))
names(df[c(numerics)])
}
indexr(mtcars,c("mpg",5))
[1] "drat" "mpg"
indexr(df,columns)
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "i.1"
[11] "j"
来源:https://stackoverflow.com/questions/57293986/a-function-that-allows-referral-to-columns-with-both-index-and-name