Function to count NA values at each level of a factor

后端未结

关注

 3  1267

I have this dataframe:

set.seed(50)
data <- data.frame(age=c(rep(\"juv\", 10), rep(\"ad\", 10)),
                   sex=c(rep(\"m\", 10), rep(\"f\", 10)),


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  借酒劲吻你        
                
              
                            
                2020-12-18 13:16
              
            
            
                                                                       
Use aggregate:

nacheck <- function(var, factor)
    aggregate(var, list(factor), function(x) sum(is.na(x)))

nacheck(data$length, data$age)
nacheck(data$length, data$sex)
nacheck(data$length, data$size)


You could also apply this to your dataframe, by each factor to get NA counts for all of the dimension measures for each factor.

apply(data[,c("length","width","height")], 2, nacheck, factor=data$age)
apply(data[,c("length","width","height")], 2, nacheck, factor=data$sex)
apply(data[,c("length","width","height")], 2, nacheck, factor=data$size)


To do this all as one function, nest nacheck in something and then lapply:

exploreNA <- function(df, factors){
    nacheck <- function(var, factor)
        aggregate(var, list(factor), function(x) sum(is.na(x)))
    lapply(factors, function(x) apply(df, 2, nacheck, factor=x))
}

exploreNA(data[,c("length","width","height")], list(data$age, data$sex, data$size))

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  无人及你        
                
              
                            
                2020-12-18 13:19
              
            
            
                                                                       
Looking for something like this...???

library(doBy)
summaryBy(length+width+height~age+sex+size,
          data=data,
          FUN=function(x) sum(is.na(x)),
          keep.names=TRUE)
  age sex  size length width height
1  ad   f small      3     4      4
2 juv   m large      5     4      4

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  旧巷少年郎        
                
              
                            
                2020-12-18 13:35
              
            
            
                                                                       
A data.table approach:

library(data.table)
DT <- data.table(data)
DT[, lapply(.SD, function(x) sum(is.na(x))) , by = list(age,sex,size)]
##    age sex  size length width height
## 1: juv   m large      5     4      4
## 2:  ad   f small      3     4      4


and the  plyr equivalent using colwise and ddply

ddply(data, .(age,sex,size), colwise(.fun = function(x) sum(is.na(x))))
##   age sex  size length width height
## 1  ad   f small      3     4      4
## 2 juv   m large      5     4      4


You could always use a vector of column names for the by components

by.cols <- c('age', 'sex' ,'size')
# then the following will work....
DT[, lapply(.SD, function(x) sum(is.na(x))), by = by.cols]
ddply(data, by.cols, colwise(.fun = function(x) sum(is.na(x))))

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复