How do i create Confusion matrix of predicted and ground truth labels with Tensorflow?

前端 未结 3 2048
天命终不由人
天命终不由人 2020-12-15 01:58

I have implemented a Nueral Network model for a classification with the help of using TensorFlow. But, i don\'t know how can i able to draw confusion matrix by using predic

相关标签:
3条回答
  • 2020-12-15 02:24

    If you want to produce a confusion matrix, and then later precision and recall, you first need to get your counts of true positives, true negatives, false positives and false negatives. Here is how:

    For better readibility, I wrote the code very verbose.

    def evaluation(logits,labels):
    "Returns correct predictions, and 4 values needed for precision, recall and F1 score"
    
    
        # Step 1:
        # Let's create 2 vectors that will contain boolean values, and will describe our labels
    
        is_label_one = tf.cast(labels, dtype=tf.bool)
        is_label_zero = tf.logical_not(is_label_one)
        # Imagine that labels = [0,1]
        # Then
        # is_label_one = [False,True]
        # is_label_zero = [True,False]
    
        # Step 2:
        # get the prediction and false prediction vectors. correct_prediction is something that you choose within your model.
        correct_prediction = tf.nn.in_top_k(logits, labels, 1, name="correct_answers")
        false_prediction = tf.logical_not(correct_prediction)
    
        # Step 3:
        # get the 4 metrics by comparing boolean vectors
        # TRUE POSITIVES
        true_positives = tf.reduce_sum(tf.to_int32(tf.logical_and(correct_prediction,is_label_one)))
    
        # FALSE POSITIVES
        false_positives = tf.reduce_sum(tf.to_int32(tf.logical_and(false_prediction, is_label_zero)))
    
        # TRUE NEGATIVES
        true_negatives = tf.reduce_sum(tf.to_int32(tf.logical_and(correct_prediction, is_label_zero)))
    
        # FALSE NEGATIVES
        false_negatives = tf.reduce_sum(tf.to_int32(tf.logical_and(false_prediction, is_label_one)))
    
    
    return true_positives, false_positives, true_negatives, false_negatives
    
    # Now you can do something like this in your session:
    
    true_positives, \
    false_positives, \
    true_negatives, \
    false_negatives = sess.run(evaluation(logits,labels), feed_dict=feed_dict)
    
    # you can print the confusion matrix using the 4 values from above, or get precision and recall:
    precision = float(true_positives) / float(true_positives+false_positives)
    recall = float(true_positives) / float(true_positives+false_negatives)
    
    # or F1 score:
    F1_score = 2 * ( precision * recall ) / ( precision+recall )
    
    0 讨论(0)
  • 2020-12-15 02:29

    For the moment, I use this solution to obtain the confusion matrix :

    # load the data
    (train_x, train_y), (dev_x, dev_y), (test_x, test_y) = dataLoader.load()
    
    # build the classifier
    classifier = tf.estimator.DNNClassifier(...)
    
    # train the classifier
    classifier.train(input_fn=lambda:train_input_fn(), steps=1000)
    
    # evaluate and prediction on the test set
    test_evaluate = classifier.evaluate(input_fn=lambda:eval_input_fn())
    test_predict = classifier.predict(input_fn = lambda:eval_input_fn())
    
    # parse the prediction to retrieve the predicted labels
    predictions = []
    
    for i in list(test_predict):
        predictions.append(i['class_ids'][0])
    
    # build the prediction matrix
    matrix = tf.confusion_matrix(test_y, predictions)
    
    #display the prediction matrix
    with tf.Session():
        print(str(tf.Tensor.eval(matrix)))
    

    But I am not convince by my loop to retrieve the predicted labels...there should be a better Python way to do this...(or a TensorFlow way...)

    0 讨论(0)
  • 2020-12-15 02:37

    This code worked for me. I sort it out myself :)

    from sklearn.metrics import precision_recall_fscore_support as score
    from sklearn.metrics import classification_report
    
    def print_confusion_matrix(plabels,tlabels):
    """
        functions print the confusion matrix for the different classes
        to find the error...
    
        Input:
        -----------
        plabels: predicted labels for the classes...
        tlabels: true labels for the classes
    
        code from: http://stackoverflow.com/questions/2148543/how-to-write-a-confusion-matrix-in-python
    """
    import pandas as pd
    plabels = pd.Series(plabels)
    tlabels = pd.Series(tlabels)
    
    # draw a cross tabulation...
    df_confusion = pd.crosstab(tlabels,plabels, rownames=['Actual'], colnames=['Predicted'], margins=True)
    
    #print df_confusion
    return df_confusion
    
    def confusionMatrix(text,Labels,y_pred, not_partial):
        y_actu = np.where(Labels[:]==1)[1]
        df = print_confusion_matrix(y_pred,y_actu)
        print "\n",df
        #print plt.imshow(df.as_matrix())
        if not_partial:
           print "\n",classification_report(y_actu, y_pred)
        print "\n\t------------------------------------------------------\n"
    
    def do_eval(message, sess, correct_prediction, accuracy, pred, X_, y_,x,y):
        predictions = sess.run([correct_prediction], feed_dict={x: X_, y: y_})
        prediction  = tf.argmax(pred,1)
        labels = prediction.eval(feed_dict={x: X_, y: y_}, session=sess)
        print message, accuracy.eval({x: X_, y: y_}),"\n"
        confusionMatrix("Partial Confusion matrix",y_,predictions[0], False)#Partial confusion Matrix
        confusionMatrix("Complete Confusion matrix",y_,labels, True) #complete confusion Matrix
    
    # Launch the graph
    with tf.Session() as sess:
    sess.run(init)
    data = zip(X_train,y_train)
    data = np.array(data)
    data_size = len(data)
    num_batches_per_epoch = int(len(data)/batch_size) + 1
    for epoch in range(training_epochs):
        avg_cost = 0.
        # Shuffle the data at each epoch
        shuffle_indices = np.random.permutation(np.arange(data_size))
        shuffled_data = data[shuffle_indices]
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            sample = zip(*shuffled_data[start_index:end_index])
            #picking up random batches from training set of specific size
            batch_xs, batch_ys = sample[0],sample[1]
            # Fit training using batch data
            sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
            # Compute average loss
            avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys})/num_batches_per_epoch
        #append loss
        loss_history.append(avg_cost)
    
        # Display logs per epoch step
        if (epoch % display_step == 0):           
            correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))            
            # Calculate training  accuracy
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            trainAccuracy = accuracy.eval({x: X_train, y: y_train})
            train_acc_history.append(trainAccuracy)           
            # Calculate validation  accuracy
            valAccuracy = accuracy.eval({x: X_val, y: y_val})
            val_acc_history.append(valAccuracy) 
            print "Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost), "train=",trainAccuracy,"val=", valAccuracy
    
    print "Optimization Finished!\n"
    
    # Evaluation of  model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) 
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    
    do_eval("Accuracy of Gold Test set Results: ", sess, correct_prediction, accuracy, pred, X_gold, y_gold, x, y)
    

    and here is the sample output:

    Accuracy of Gold Test set Results:  0.642608 
    
    
    Predicted  False  True  All
    Actual                     
    0             20    46   66
    1              3     1    4
    2             21     1   22
    3              8     4   12
    4             16     7   23
    5             54   259  313
    6             41    14   55
    7             11     2   13
    8             48    94  142
    9             29     4   33
    10            17     4   21
    11            39   116  155
    All          307   552  859
    
    Predicted   0  1  2   3   4    5   6   7    8   9  10   11  All
    Actual                                                         
    0          46  0  0   0   0    8   0   2    2   2   0    6   66
    1           0  1  0   1   0    2   0   0    0   0   0    0    4
    2           3  0  1   3   0   12   0   0    1   0   0    2   22
    3           2  0  0   4   1    3   1   1    0   0   0    0   12
    4           1  0  0   0   7   12   0   0    1   0   0    2   23
    5           8  0  0   1   5  259   9   0    9   3   1   18  313
    6           1  0  0   1   6   30  14   1    2   0   0    0   55
    7           3  0  0   0   0    2   0   2    4   0   1    1   13
    8           6  0  0   1   1   18   0   3   94   8   1   10  142
    9           9  0  0   0   0    1   1   1    9   4   0    8   33
    10          1  0  0   0   3    6   0   1    1   0   4    5   21
    11          5  1  0   1   0   18   1   0    6   5   2  116  155
    All        85  2  1  12  23  371  26  11  129  22   9  168  859
    
             precision    recall  f1-score   support
    
          0       0.54      0.70      0.61        66
          1       0.50      0.25      0.33         4
          2       1.00      0.05      0.09        22
          3       0.33      0.33      0.33        12
          4       0.30      0.30      0.30        23
          5       0.70      0.83      0.76       313
          6       0.54      0.25      0.35        55
          7       0.18      0.15      0.17        13
          8       0.73      0.66      0.69       142
          9       0.18      0.12      0.15        33
         10       0.44      0.19      0.27        21
         11       0.69      0.75      0.72       155
    
         avg / total       0.64      0.64      0.62       859
    
    0 讨论(0)
提交回复
热议问题