I am trying to convert a factor variable into binary / boolean (0 or 1).
Sample data:
df <-data.frame(a = c(1,2,3), b = c(1,1,2), c = c(\"Rose\",
Using dplyr and putting it on pipe. @bramtayl's answer was cleaner but I couldn't find a way to use custom variable name. This is less clean but more DRY
expand_factor <- function(df,variable){
variable = as.name(variable)
paste0('~ ',variable,' -1',collapse = '') %>%
as.formula ->formulae
current.na.action <- options('na.action')
options(na.action='na.pass')
expanded<-model.matrix(data=df,object = formulae)
options(na.action=current.na.action)
colnames(expanded) <-gsub(replacement = 'is_',x = colnames(expanded),pattern=variable)
expanded %>%
tbl_df %>%
mutate_each(funs(as.integer)) ->expanded
return(bind_cols(df,expanded))
}
library(dplyr)
df <-data_frame(x = iris$Species,y = iris$Petal.Width)
df <- rbind(data_frame(x=NA,y = NA),df)
df %>%
expand_factor('x')
> df %>%
+ expand_factor('x')
# A tibble: 151 5
x y is_setosa is_versicolor is_virginica
1 NA NA NA NA
2 setosa 0.2 1 0 0
3 setosa 0.2 1 0 0
4 setosa 0.2 1 0 0
5 setosa 0.2 1 0 0
6 setosa 0.2 1 0 0
7 setosa 0.4 1 0 0
8 setosa 0.3 1 0 0
9 setosa 0.2 1 0 0
10 setosa 0.2 1 0 0
# ... with 141 more rows