Converting factors to binary in R

前端未结

关注

 4  1503

眼角桃花 2020-12-01 19:08

I am trying to convert a factor variable into binary / boolean (0 or 1).

Sample data:

df  <-data.frame(a = c(1,2,3), b = c(1,1,2), c = c(\"Rose\",


      
      
        
          4条回答        

        
                    
            
            
                         
                
              
              
                
                   暗喜
                                             
                
                
                (楼主)
            
              
              
                2020-12-01 19:43
              

            
            
                        
Using dplyr and putting it on pipe. @bramtayl's answer was cleaner but I couldn't find a way to use custom variable name. This is less clean but more DRY

expand_factor <- function(df,variable){
    variable = as.name(variable)
    paste0('~ ',variable,' -1',collapse = '') %>% 
        as.formula ->formulae

    current.na.action <- options('na.action')
    options(na.action='na.pass')
    expanded<-model.matrix(data=df,object = formulae)
    options(na.action=current.na.action)

    colnames(expanded) <-gsub(replacement = 'is_',x = colnames(expanded),pattern=variable) 

    expanded %>% 
        tbl_df %>% 
        mutate_each(funs(as.integer)) ->expanded

    return(bind_cols(df,expanded))
}

library(dplyr)
df  <-data_frame(x = iris$Species,y = iris$Petal.Width)
df <- rbind(data_frame(x=NA,y = NA),df)

df %>% 
    expand_factor('x')

> df %>% 
+   expand_factor('x')
# A tibble: 151  5
        x     y is_setosa is_versicolor is_virginica
                           
1        NA        NA            NA           NA
2  setosa   0.2         1             0            0
3  setosa   0.2         1             0            0
4  setosa   0.2         1             0            0
5  setosa   0.2         1             0            0
6  setosa   0.2         1             0            0
7  setosa   0.4         1             0            0
8  setosa   0.3         1             0            0
9  setosa   0.2         1             0            0
10 setosa   0.2         1             0            0
# ... with 141 more rows

    
             
                                                        
            
            
              
                
                0
              
                   
                
               讨论(0)
              
                                                  
              
              
                          
             
       
          
              
                                       
     查看其它4个回答


            
                         
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
                              			
        
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复