ggplot2 Heatmap 2 Different Color Schemes - Confusion Matrix: Matches in Different Color Scheme than Missclassifications

自作多情 提交于 2021-02-11 05:54:18

问题


I adapted a heatmap plot for a confusion matrix from this answer.
However I would like to twist it. In the diagonal (from top left to bottom right) are the matches (correct classifications). My aim would be, to plot this diagonal in a yellow color palette. And mismatches (so all tiles except those in the diagonal) in a red color palette.

In my plot.cm function I can get the diagonal with

  cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
  cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal

And with the correct geom_tile aesthetics I can get only the diagonal (in the desired yellow-ish) color scheme

geom_tile( data = cm_d[!is.na(cm_d$diag), ],aes(color = Freq)) +
scale_fill_gradient(guide = FALSE,low=alpha("lightyellow",0.75), high="yellow",na.value = 'white') 

However I am not able to get the second color scheme on the elements of cm_d$ndiag I found the package ggnewscale that offers new_scale() as well as new_scale_fill().
I tired to implement it with the help of this blog. However the result are only darkgray filled tiles for the rest of the heatmap

# adapted from https://stackoverflow.com/a/60150826/7318488
library(ggplot2)     # to plot
library(gridExtra)   # to put more
library(grid)        # plot together
library(likert)      # for reversing the factor order
library(ggnewscale)

plot.cm <- function(cm){
  # extract the confusion matrix values as data.frame
  cm_d <- as.data.frame(cm$table)
  cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
  cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal     
  cm_d[cm_d == 0] <- NA # Replace 0 with NA for white tiles
  cm_d$Reference <-  reverse.levels(cm_d$Reference) # diagonal starts at top left

  # plotting the matrix
  cm_d_p <-  ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+
    scale_x_discrete(position = "top") +
    geom_tile( data = cm_d[!is.na(cm_d$diag), ],aes(color = Freq)) +
    scale_fill_gradient(guide = FALSE,low=alpha("lightyellow",0.75), high="yellow",na.value = 'white') +
    # THIS DOESNT WORK
    # new_scale("fill") +
    # geom_tile( data = cm_d[!is.na(cm_d$ndiag), ],aes(color = Freq)) +
    # scale_fill_gradient(guide = FALSE,low=alpha("red",0.75), high="darkred",na.value = 'white') +

    geom_text(aes(label = Freq), color = 'black', size = 6) +
    theme_light() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
      legend.position = "none",
      panel.border = element_blank(),
      plot.background = element_blank(),
      axis.line = element_blank())

  return(cm_d_p)
}

Sample Data:
Simulated Caret Confusion Matrix

library(caret)
# simulated data
set.seed(23)
pred <- factor(sample(1:7,100,replace=T))
ref<- factor(sample(1:7,100,replace=T))
cm <- caret::confusionMatrix(pred,ref)
g <- plot.cm(cm)
g

回答1:


I believe the issue is simply that you're specifying aes(color = Freq) instead of aes(fill = Freq. Is plot what you were aiming for? You could also simplify all of this by just using a divergent color scale and creating a new variable that marks Freq as negative if it's off the diagonal? See second example below

# adapted from https://stackoverflow.com/a/60150826/7318488
library(ggplot2)     # to plot
library(gridExtra)   # to put more
library(grid)        # plot together
library(likert)      # for reversing the factor order
#> Loading required package: xtable
library(ggnewscale)

plot.cm <- function(cm){
  # extract the confusion matrix values as data.frame
  cm_d <- as.data.frame(cm$table)
  cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
  cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal     
  cm_d[cm_d == 0] <- NA # Replace 0 with NA for white tiles
  cm_d$Reference <-  reverse.levels(cm_d$Reference) # diagonal starts at top left

  # plotting the matrix
  cm_d_p <-  ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+
    scale_x_discrete(position = "top") +
    geom_tile( data = cm_d[!is.na(cm_d$diag), ],aes(fill = Freq)) +
    scale_fill_gradient(guide = FALSE,low=alpha("lightyellow",0.75), high="yellow",na.value = 'white') +
    # THIS DOESNT WORK
    new_scale("fill") +
    geom_tile( data = cm_d[!is.na(cm_d$ndiag), ],aes(fill = Freq)) +
    scale_fill_gradient(guide = FALSE,low=alpha("red",0.75), high="red",na.value = 'white') +

    geom_text(aes(label = Freq), color = 'black', size = 6) +
    theme_light() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          legend.position = "none",
          panel.border = element_blank(),
          plot.background = element_blank(),
          axis.line = element_blank())

  return(cm_d_p)
}

library(caret)
#> Loading required package: lattice
# simulated data
set.seed(23)
pred <- factor(sample(1:7,100,replace=T))
ref<- factor(sample(1:7,100,replace=T))
cm <- caret::confusionMatrix(pred,ref)
g <- plot.cm(cm)
g
#> Warning: Removed 8 rows containing missing values (geom_text).

Created on 2020-04-29 by the reprex package (v0.3.0)

# adapted from https://stackoverflow.com/a/60150826/7318488
library(ggplot2)     # to plot
library(gridExtra)   # to put more
library(grid)        # plot together
library(likert)      # for reversing the factor order
#> Loading required package: xtable
library(ggnewscale)

plot.cm <- function(cm){
  # extract the confusion matrix values as data.frame
  cm_d <- as.data.frame(cm$table)
  cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
  cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal     
  cm_d[cm_d == 0] <- NA # Replace 0 with NA for white tiles
  cm_d$Reference <-  reverse.levels(cm_d$Reference) # diagonal starts at top left

  cm_d$ref_freq <- cm_d$Freq * ifelse(is.na(cm_d$diag),-1,1)

  # plotting the matrix
  cm_d_p <-  ggplot(data = cm_d, aes(x = Prediction , y =  Reference, fill = Freq))+
    scale_x_discrete(position = "top") +
    geom_tile( data = cm_d,aes(fill = ref_freq)) +
    scale_fill_gradient2(guide = FALSE,low="red",high="yellow", midpoint = 0,na.value = 'white') +
    geom_text(aes(label = Freq), color = 'black', size = 6)+
     theme_light() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          legend.position = "none",
          panel.border = element_blank(),
          plot.background = element_blank(),
          axis.line = element_blank())

  return(cm_d_p)
}

library(caret)
#> Loading required package: lattice
# simulated data
set.seed(23)
pred <- factor(sample(1:7,100,replace=T))
ref<- factor(sample(1:7,100,replace=T))
cm <- caret::confusionMatrix(pred,ref)
g <- plot.cm(cm)
g
#> Warning: Removed 8 rows containing missing values (geom_text).

Created on 2020-04-29 by the reprex package (v0.3.0)



来源:https://stackoverflow.com/questions/61504970/ggplot2-heatmap-2-different-color-schemes-confusion-matrix-matches-in-differe

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!