How to plot scikit learn classification report?

后端 未结 10 1993
广开言路
广开言路 2020-12-04 18:20

Is it possible to plot with matplotlib scikit-learn classification report?. Let\'s assume I print the classification report like this:

print \'\\n*Classifica         


        
相关标签:
10条回答
  • 2020-12-04 18:38

    If you just want to plot the classification report as a bar chart in a Jupyter notebook, you can do the following.

    # Assuming that classification_report, y_test and predictions are in scope...
    import pandas as pd
    
    # Build a DataFrame from the classification_report output_dict.
    report_data = []
    for label, metrics in classification_report(y_test, predictions, output_dict=True).items():
        metrics['label'] = label
        report_data.append(metrics)
    
    report_df = pd.DataFrame(
        report_data, 
        columns=['label', 'precision', 'recall', 'f1-score', 'support']
    )
    
    # Plot as a bar chart.
    report_df.plot(y=['precision', 'recall', 'f1-score'], x='label', kind='bar')
    

    One issue with this visualisation is that imbalanced classes are not obvious, but are important in interpreting the results. One way to represent this is to add a version of the label that includes the number of samples (i.e. the support):

    # Add a column to the DataFrame.
    report_df['labelsupport'] = [f'{label} (n={support})' 
                                 for label, support in zip(report_df.label, report_df.support)]
    
    # Plot the chart the same way, but use `labelsupport` as the x-axis.
    report_df.plot(y=['precision', 'recall', 'f1-score'], x='labelsupport', kind='bar')
    
    0 讨论(0)
  • 2020-12-04 18:42
    This works for me, pieced it together from the top answer above, also, i cannot comment but THANKS all for this thread, it helped a LOT!
    def plot_classification_report(cr, title='Classification report ', with_avg_total=False, cmap=plt.cm.Blues):
        lines = cr.split('\n')
        classes = []
        plotMat = []
        for line in lines[2 : (len(lines) - 6)]: rt
            t = line.split()
            classes.append(t[0])
            v = [float(x) for x in t[1: len(t) - 1]]
            plotMat.append(v)
    
        if with_avg_total:
            aveTotal = lines[len(lines) - 1].split()
            classes.append('avg/total')
            vAveTotal = [float(x) for x in t[1:len(aveTotal) - 1]]
            plotMat.append(vAveTotal)
    
        plt.figure(figsize=(12,48))
        #plt.imshow(plotMat, interpolation='nearest', cmap=cmap) THIS also works but the scale is not good neither the colors for many classes(200)
        #plt.colorbar()
    
        plt.title(title)
        x_tick_marks = np.arange(3)
        y_tick_marks = np.arange(len(classes))
        plt.xticks(x_tick_marks, ['precision', 'recall', 'f1-score'], rotation=45)
        plt.yticks(y_tick_marks, classes)
        plt.tight_layout()
        plt.ylabel('Classes')
        plt.xlabel('Measures')
        import seaborn as sns
        sns.heatmap(plotMat, annot=True) 
    
    After this, make sure class labels don't contain any space due the splits
    reportstr = classification_report(true_classes, y_pred,target_names=class_labels_no_spaces)
    
    plot_classification_report(reportstr)
    
    0 讨论(0)
  • 2020-12-04 18:46

    Expanding on Bin's answer:

    import matplotlib.pyplot as plt
    import numpy as np
    
    def show_values(pc, fmt="%.2f", **kw):
        '''
        Heatmap with text in each cell with matplotlib's pyplot
        Source: https://stackoverflow.com/a/25074150/395857 
        By HYRY
        '''
        from itertools import izip
        pc.update_scalarmappable()
        ax = pc.get_axes()
        #ax = pc.axes# FOR LATEST MATPLOTLIB
        #Use zip BELOW IN PYTHON 3
        for p, color, value in izip(pc.get_paths(), pc.get_facecolors(), pc.get_array()):
            x, y = p.vertices[:-2, :].mean(0)
            if np.all(color[:3] > 0.5):
                color = (0.0, 0.0, 0.0)
            else:
                color = (1.0, 1.0, 1.0)
            ax.text(x, y, fmt % value, ha="center", va="center", color=color, **kw)
    
    
    def cm2inch(*tupl):
        '''
        Specify figure size in centimeter in matplotlib
        Source: https://stackoverflow.com/a/22787457/395857
        By gns-ank
        '''
        inch = 2.54
        if type(tupl[0]) == tuple:
            return tuple(i/inch for i in tupl[0])
        else:
            return tuple(i/inch for i in tupl)
    
    
    def heatmap(AUC, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20, correct_orientation=False, cmap='RdBu'):
        '''
        Inspired by:
        - https://stackoverflow.com/a/16124677/395857 
        - https://stackoverflow.com/a/25074150/395857
        '''
    
        # Plot it out
        fig, ax = plt.subplots()    
        #c = ax.pcolor(AUC, edgecolors='k', linestyle= 'dashed', linewidths=0.2, cmap='RdBu', vmin=0.0, vmax=1.0)
        c = ax.pcolor(AUC, edgecolors='k', linestyle= 'dashed', linewidths=0.2, cmap=cmap)
    
        # put the major ticks at the middle of each cell
        ax.set_yticks(np.arange(AUC.shape[0]) + 0.5, minor=False)
        ax.set_xticks(np.arange(AUC.shape[1]) + 0.5, minor=False)
    
        # set tick labels
        #ax.set_xticklabels(np.arange(1,AUC.shape[1]+1), minor=False)
        ax.set_xticklabels(xticklabels, minor=False)
        ax.set_yticklabels(yticklabels, minor=False)
    
        # set title and x/y labels
        plt.title(title)
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)      
    
        # Remove last blank column
        plt.xlim( (0, AUC.shape[1]) )
    
        # Turn off all the ticks
        ax = plt.gca()    
        for t in ax.xaxis.get_major_ticks():
            t.tick1On = False
            t.tick2On = False
        for t in ax.yaxis.get_major_ticks():
            t.tick1On = False
            t.tick2On = False
    
        # Add color bar
        plt.colorbar(c)
    
        # Add text in each cell 
        show_values(c)
    
        # Proper orientation (origin at the top left instead of bottom left)
        if correct_orientation:
            ax.invert_yaxis()
            ax.xaxis.tick_top()       
    
        # resize 
        fig = plt.gcf()
        #fig.set_size_inches(cm2inch(40, 20))
        #fig.set_size_inches(cm2inch(40*4, 20*4))
        fig.set_size_inches(cm2inch(figure_width, figure_height))
    
    
    
    def plot_classification_report(classification_report, title='Classification report ', cmap='RdBu'):
        '''
        Plot scikit-learn classification report.
        Extension based on https://stackoverflow.com/a/31689645/395857 
        '''
        lines = classification_report.split('\n')
    
        classes = []
        plotMat = []
        support = []
        class_names = []
        for line in lines[2 : (len(lines) - 2)]:
            t = line.strip().split()
            if len(t) < 2: continue
            classes.append(t[0])
            v = [float(x) for x in t[1: len(t) - 1]]
            support.append(int(t[-1]))
            class_names.append(t[0])
            print(v)
            plotMat.append(v)
    
        print('plotMat: {0}'.format(plotMat))
        print('support: {0}'.format(support))
    
        xlabel = 'Metrics'
        ylabel = 'Classes'
        xticklabels = ['Precision', 'Recall', 'F1-score']
        yticklabels = ['{0} ({1})'.format(class_names[idx], sup) for idx, sup  in enumerate(support)]
        figure_width = 25
        figure_height = len(class_names) + 7
        correct_orientation = False
        heatmap(np.array(plotMat), title, xlabel, ylabel, xticklabels, yticklabels, figure_width, figure_height, correct_orientation, cmap=cmap)
    
    
    def main():
        sampleClassificationReport = """             precision    recall  f1-score   support
    
              Acacia       0.62      1.00      0.76        66
              Blossom       0.93      0.93      0.93        40
              Camellia       0.59      0.97      0.73        67
              Daisy       0.47      0.92      0.62       272
              Echium       1.00      0.16      0.28       413
    
            avg / total       0.77      0.57      0.49       858"""
    
    
        plot_classification_report(sampleClassificationReport)
        plt.savefig('test_plot_classif_report.png', dpi=200, format='png', bbox_inches='tight')
        plt.close()
    
    if __name__ == "__main__":
        main()
        #cProfile.run('main()') # if you want to do some profiling
    

    outputs:

    Example with more classes (~40):

    0 讨论(0)
  • 2020-12-04 18:46

    This is my simple solution, using seaborn heatmap

    import seaborn as sns
    import numpy as np
    from sklearn.metrics import precision_recall_fscore_support
    import matplotlib.pyplot as plt
    
    y = np.random.randint(low=0, high=10, size=100)
    y_p = np.random.randint(low=0, high=10, size=100)
    
    def plot_classification_report(y_tru, y_prd, figsize=(10, 10), ax=None):
    
        plt.figure(figsize=figsize)
    
        xticks = ['precision', 'recall', 'f1-score', 'support']
        yticks = list(np.unique(y_tru))
        yticks += ['avg']
    
        rep = np.array(precision_recall_fscore_support(y_tru, y_prd)).T
        avg = np.mean(rep, axis=0)
        avg[-1] = np.sum(rep[:, -1])
        rep = np.insert(rep, rep.shape[0], avg, axis=0)
    
        sns.heatmap(rep,
                    annot=True, 
                    cbar=False, 
                    xticklabels=xticks, 
                    yticklabels=yticks,
                    ax=ax)
    
    plot_classification_report(y, y_p)
    

    This is how the plot will look like

    0 讨论(0)
  • 2020-12-04 18:46

    You can do:

    import matplotlib.pyplot as plt
    
    cm =  [[0.50, 1.00, 0.67],
           [0.00, 0.00, 0.00],
           [1.00, 0.67, 0.80]]
    labels = ['class 0', 'class 1', 'class 2']
    fig, ax = plt.subplots()
    h = ax.matshow(cm)
    fig.colorbar(h)
    ax.set_xticklabels([''] + labels)
    ax.set_yticklabels([''] + labels)
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Ground truth')
    

    corr_matrix

    0 讨论(0)
  • 2020-12-04 18:48

    No string processing + sns.heatmap

    The following solution uses the output_dict=True option in classification_report to get a dictionary and then a heat map is drawn using seaborn to the dataframe created from the dictionary.


    import numpy as np
    import seaborn as sns
    from sklearn.metrics import classification_report
    import pandas as pd
    

    Generating data. Classes: A,B,C,D,E,F,G,H,I

    true = np.random.randint(0, 10, size=100)
    pred = np.random.randint(0, 10, size=100)
    labels = np.arange(10)
    target_names = list("ABCDEFGHI")
    

    Call classification_report with output_dict=True

    clf_report = classification_report(true,
                                       pred,
                                       labels=labels,
                                       target_names=target_names,
                                       output_dict=True)
    

    Create a dataframe from the dictionary and plot a heatmap of it.

    # .iloc[:-1, :] to exclude support
    sns.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True)
    

    0 讨论(0)
提交回复
热议问题