Variation on “How to plot decision boundary of a k-nearest neighbor classifier from Elements of Statistical Learning?”

后端 未结 1 1472
梦如初夏
梦如初夏 2020-12-05 16:33

This is a question related to https://stats.stackexchange.com/questions/21572/how-to-plot-decision-boundary-of-a-k-nearest-neighbor-classifier-from-elements-o

For co

相关标签:
1条回答
  • 2020-12-05 17:12

    Separating the main parts in the code will help outlining how to achieve this:

    Test data with 3 classes

     train <- rbind(iris3[1:25,1:2,1],
                    iris3[1:25,1:2,2],
                    iris3[1:25,1:2,3])
     cl <- factor(c(rep("s",25), rep("c",25), rep("v",25)))
    

    Test data covering a grid

     require(MASS)
    
     test <- expand.grid(x=seq(min(train[,1]-1), max(train[,1]+1),
                               by=0.1),
                         y=seq(min(train[,2]-1), max(train[,2]+1), 
                               by=0.1))
    

    Classification for that grid

    3 classes obviously

     require(class)
     classif <- knn(train, test, cl, k = 3, prob=TRUE)
     prob <- attr(classif, "prob")
    

    Data structure for plotting

     require(dplyr)
    
     dataf <- bind_rows(mutate(test,
                               prob=prob,
                               cls="c",
                               prob_cls=ifelse(classif==cls,
                                               1, 0)),
                        mutate(test,
                               prob=prob,
                               cls="v",
                               prob_cls=ifelse(classif==cls,
                                               1, 0)),
                        mutate(test,
                               prob=prob,
                               cls="s",
                               prob_cls=ifelse(classif==cls,
                                               1, 0)))
    

    Plot

     require(ggplot2)
     ggplot(dataf) +
        geom_point(aes(x=x, y=y, col=cls),
                   data = mutate(test, cls=classif),
                   size=1.2) + 
        geom_contour(aes(x=x, y=y, z=prob_cls, group=cls, color=cls),
                     bins=2,
                     data=dataf) +
        geom_point(aes(x=x, y=y, col=cls),
                   size=3,
                   data=data.frame(x=train[,1], y=train[,2], cls=cl))
    

    plot

    We can also be a little fancier and plot the probability of class membership as a indication of the "confidence".

     ggplot(dataf) +
        geom_point(aes(x=x, y=y, col=cls, size=prob),
                   data = mutate(test, cls=classif)) + 
        scale_size(range=c(0.8, 2)) +
        geom_contour(aes(x=x, y=y, z=prob_cls, group=cls, color=cls),
                     bins=2,
                     data=dataf) +
        geom_point(aes(x=x, y=y, col=cls),
                   size=3,
                   data=data.frame(x=train[,1], y=train[,2], cls=cl)) +
        geom_point(aes(x=x, y=y),
                   size=3, shape=1,
                   data=data.frame(x=train[,1], y=train[,2], cls=cl))
    

    enter image description here

    0 讨论(0)
提交回复
热议问题