Pandas groupby two columns and plot

后端未结

关注

 3  1287

I have a dataframe like this:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

df = pd.DataFrame({\'category\': lis


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  说谎        
                
              
                            
                2020-12-29 09:21
              
            
            
                                                                       
Data
import numpy as np
import pandas as pd
df = pd.DataFrame({'category': list('XYZXY'),
                   'NotUsed': range(5,10),
                   'sex': list('mfmff')})

  category  NotUsed sex
0        X        5   m
1        Y        6   f
2        Z        7   m
3        X        8   f
4        Y        9   f

Using crosstab
pd.crosstab(df['category'],df['sex']).plot.bar()

Using groupby+unstack:
(df.groupby(['sex','category'])['B']
   .count().unstack('sex').plot.bar())

Using pivot_table:
pd.pivot_table(df, values = 'B', index = 'category',
               columns = 'sex',aggfunc ='count').plot.bar()

Using seaborn:
import seaborn as sns
sns.countplot(data=df,x='category',hue='sex')

or,
sns.catplot(data=df,kind='count',x='category',hue='sex')

output

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  闹比i        
                
              
                            
                2020-12-29 09:36
              
            
            
                                                                       
IIUC,

df.groupby(['category','sex']).B.count().unstack().reset_index()\
.plot.bar(x = 'category', y = ['f', 'm'])




Edit: If you have multiple columns, you can use groupby, count and droplevel.

new_df = df.groupby(['category','sex']).count().unstack()
new_df.columns = new_df.columns.droplevel()
new_df.reset_index().plot.bar()

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  挽巷        
                
              
                            
                2020-12-29 09:39
              
            
            
                                                                       
You can also use this

pd.pivot_table(df, values = 'B', index = 'category', columns = 'sex',
               aggfunc = lambda x: len(x)).plot.bar()


which results in exactly the same plot.


                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复