How to plot scikit learn classification report?

后端 未结 10 1994
广开言路
广开言路 2020-12-04 18:20

Is it possible to plot with matplotlib scikit-learn classification report?. Let\'s assume I print the classification report like this:

print \'\\n*Classifica         


        
相关标签:
10条回答
  • 2020-12-04 18:54

    Here you can get the plot same as Franck Dernoncourt's, but with much shorter code (can fit into a single function).

    import matplotlib.pyplot as plt
    import numpy as np
    import itertools
    
    
    def plot_classification_report(classificationReport,
                                   title='Classification report',
                                   cmap='RdBu'):
    
        classificationReport = classificationReport.replace('\n\n', '\n')
        classificationReport = classificationReport.replace(' / ', '/')
        lines = classificationReport.split('\n')
    
        classes, plotMat, support, class_names = [], [], [], []
        for line in lines[1:]:  # if you don't want avg/total result, then change [1:] into [1:-1]
            t = line.strip().split()
            if len(t) < 2:
                continue
            classes.append(t[0])
            v = [float(x) for x in t[1: len(t) - 1]]
            support.append(int(t[-1]))
            class_names.append(t[0])
            plotMat.append(v)
    
        plotMat = np.array(plotMat)
        xticklabels = ['Precision', 'Recall', 'F1-score']
        yticklabels = ['{0} ({1})'.format(class_names[idx], sup)
                       for idx, sup in enumerate(support)]
    
        plt.imshow(plotMat, interpolation='nearest', cmap=cmap, aspect='auto')
        plt.title(title)
        plt.colorbar()
        plt.xticks(np.arange(3), xticklabels, rotation=45)
        plt.yticks(np.arange(len(classes)), yticklabels)
    
        upper_thresh = plotMat.min() + (plotMat.max() - plotMat.min()) / 10 * 8
        lower_thresh = plotMat.min() + (plotMat.max() - plotMat.min()) / 10 * 2
        for i, j in itertools.product(range(plotMat.shape[0]), range(plotMat.shape[1])):
            plt.text(j, i, format(plotMat[i, j], '.2f'),
                     horizontalalignment="center",
                     color="white" if (plotMat[i, j] > upper_thresh or plotMat[i, j] < lower_thresh) else "black")
    
        plt.ylabel('Metrics')
        plt.xlabel('Classes')
        plt.tight_layout()
    
    
    def main():
    
        sampleClassificationReport = """             precision    recall  f1-score   support
    
              Acacia       0.62      1.00      0.76        66
              Blossom       0.93      0.93      0.93        40
              Camellia       0.59      0.97      0.73        67
              Daisy       0.47      0.92      0.62       272
              Echium       1.00      0.16      0.28       413
    
            avg / total       0.77      0.57      0.49       858"""
    
        plot_classification_report(sampleClassificationReport)
        plt.show()
        plt.close()
    
    
    if __name__ == '__main__':
        main()
    

    0 讨论(0)
  • 2020-12-04 18:56

    I just wrote a function plot_classification_report() for this purpose. Hope it helps. This function takes out put of classification_report function as an argument and plot the scores. Here is the function.

    def plot_classification_report(cr, title='Classification report ', with_avg_total=False, cmap=plt.cm.Blues):
    
        lines = cr.split('\n')
    
        classes = []
        plotMat = []
        for line in lines[2 : (len(lines) - 3)]:
            #print(line)
            t = line.split()
            # print(t)
            classes.append(t[0])
            v = [float(x) for x in t[1: len(t) - 1]]
            print(v)
            plotMat.append(v)
    
        if with_avg_total:
            aveTotal = lines[len(lines) - 1].split()
            classes.append('avg/total')
            vAveTotal = [float(x) for x in t[1:len(aveTotal) - 1]]
            plotMat.append(vAveTotal)
    
    
        plt.imshow(plotMat, interpolation='nearest', cmap=cmap)
        plt.title(title)
        plt.colorbar()
        x_tick_marks = np.arange(3)
        y_tick_marks = np.arange(len(classes))
        plt.xticks(x_tick_marks, ['precision', 'recall', 'f1-score'], rotation=45)
        plt.yticks(y_tick_marks, classes)
        plt.tight_layout()
        plt.ylabel('Classes')
        plt.xlabel('Measures')
    

    For the example classification_report provided by you. Here are the code and output.

    sampleClassificationReport = """             precision    recall  f1-score   support
    
              1       0.62      1.00      0.76        66
              2       0.93      0.93      0.93        40
              3       0.59      0.97      0.73        67
              4       0.47      0.92      0.62       272
              5       1.00      0.16      0.28       413
    
    avg / total       0.77      0.57      0.49       858"""
    
    
    plot_classification_report(sampleClassificationReport)
    

    Here is how to use it with sklearn classification_report output:

    from sklearn.metrics import classification_report
    classificationReport = classification_report(y_true, y_pred, target_names=target_names)
    
    plot_classification_report(classificationReport)
    

    With this function, you can also add the "avg / total" result to the plot. To use it just add an argument with_avg_total like this:

    plot_classification_report(classificationReport, with_avg_total=True)
    
    0 讨论(0)
  • 2020-12-04 18:57

    It was really useful for my Franck Dernoncourt and Bin's answer, but I had two problems.

    First, when I tried to use it with classes like "No hit" or a name with space inside, the plot failed.
    And the other problem was to use this functions with MatPlotlib 3.* and scikitLearn-0.22.* versions. So I did some little changes:

    import matplotlib.pyplot as plt
    import numpy as np
    
    def show_values(pc, fmt="%.2f", **kw):
        '''
        Heatmap with text in each cell with matplotlib's pyplot
        Source: https://stackoverflow.com/a/25074150/395857 
        By HYRY
        '''
        pc.update_scalarmappable()
        ax = pc.axes
        #ax = pc.axes# FOR LATEST MATPLOTLIB
        #Use zip BELOW IN PYTHON 3
        for p, color, value in zip(pc.get_paths(), pc.get_facecolors(), pc.get_array()):
            x, y = p.vertices[:-2, :].mean(0)
            if np.all(color[:3] > 0.5):
                color = (0.0, 0.0, 0.0)
            else:
                color = (1.0, 1.0, 1.0)
            ax.text(x, y, fmt % value, ha="center", va="center", color=color, **kw)
    
    
    def cm2inch(*tupl):
        '''
        Specify figure size in centimeter in matplotlib
        Source: https://stackoverflow.com/a/22787457/395857
        By gns-ank
        '''
        inch = 2.54
        if type(tupl[0]) == tuple:
            return tuple(i/inch for i in tupl[0])
        else:
            return tuple(i/inch for i in tupl)
    
    
    def heatmap(AUC, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20, correct_orientation=False, cmap='RdBu'):
        '''
        Inspired by:
        - https://stackoverflow.com/a/16124677/395857 
        - https://stackoverflow.com/a/25074150/395857
        '''
    
        # Plot it out
        fig, ax = plt.subplots()    
        #c = ax.pcolor(AUC, edgecolors='k', linestyle= 'dashed', linewidths=0.2, cmap='RdBu', vmin=0.0, vmax=1.0)
        c = ax.pcolor(AUC, edgecolors='k', linestyle= 'dashed', linewidths=0.2, cmap=cmap, vmin=0.0, vmax=1.0)
    
        # put the major ticks at the middle of each cell
        ax.set_yticks(np.arange(AUC.shape[0]) + 0.5, minor=False)
        ax.set_xticks(np.arange(AUC.shape[1]) + 0.5, minor=False)
    
        # set tick labels
        #ax.set_xticklabels(np.arange(1,AUC.shape[1]+1), minor=False)
        ax.set_xticklabels(xticklabels, minor=False)
        ax.set_yticklabels(yticklabels, minor=False)
    
        # set title and x/y labels
        plt.title(title, y=1.25)
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)      
    
        # Remove last blank column
        plt.xlim( (0, AUC.shape[1]) )
    
        # Turn off all the ticks
        ax = plt.gca()    
        for t in ax.xaxis.get_major_ticks():
            t.tick1line.set_visible(False)
            t.tick2line.set_visible(False)
        for t in ax.yaxis.get_major_ticks():
            t.tick1line.set_visible(False)
            t.tick2line.set_visible(False)
    
        # Add color bar
        plt.colorbar(c)
    
        # Add text in each cell 
        show_values(c)
    
        # Proper orientation (origin at the top left instead of bottom left)
        if correct_orientation:
            ax.invert_yaxis()
            ax.xaxis.tick_top()       
    
        # resize 
        fig = plt.gcf()
        #fig.set_size_inches(cm2inch(40, 20))
        #fig.set_size_inches(cm2inch(40*4, 20*4))
        fig.set_size_inches(cm2inch(figure_width, figure_height))
    
    
    
    def plot_classification_report(classification_report, number_of_classes=2, title='Classification report ', cmap='RdYlGn'):
        '''
        Plot scikit-learn classification report.
        Extension based on https://stackoverflow.com/a/31689645/395857 
        '''
        lines = classification_report.split('\n')
    
        #drop initial lines
        lines = lines[2:]
    
        classes = []
        plotMat = []
        support = []
        class_names = []
        for line in lines[: number_of_classes]:
            t = list(filter(None, line.strip().split('  ')))
            if len(t) < 4: continue
            classes.append(t[0])
            v = [float(x) for x in t[1: len(t) - 1]]
            support.append(int(t[-1]))
            class_names.append(t[0])
            plotMat.append(v)
    
    
        xlabel = 'Metrics'
        ylabel = 'Classes'
        xticklabels = ['Precision', 'Recall', 'F1-score']
        yticklabels = ['{0} ({1})'.format(class_names[idx], sup) for idx, sup  in enumerate(support)]
        figure_width = 10
        figure_height = len(class_names) + 3
        correct_orientation = True
        heatmap(np.array(plotMat), title, xlabel, ylabel, xticklabels, yticklabels, figure_width, figure_height, correct_orientation, cmap=cmap)
        plt.show()
    
    
    
    0 讨论(0)
  • 2020-12-04 18:59

    My solution is to use the python package, Yellowbrick. Yellowbrick in a nutshell combines scikit-learn with matplotlib to produce visualizations for your models. In a few lines you can do what was suggested above. http://www.scikit-yb.org/en/latest/api/classifier/classification_report.html

    from sklearn.naive_bayes import GaussianNB
    from yellowbrick.classifier import ClassificationReport
    
    # Instantiate the classification model and visualizer
    bayes = GaussianNB()
    visualizer = ClassificationReport(bayes, classes=classes, support=True)
    
    visualizer.fit(X_train, y_train)  # Fit the visualizer and the model
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show()             # Draw/show the data
    
    0 讨论(0)
提交回复
热议问题