问题
I am trying to create a biplot for a linear discriminate analysis (LDA). I am using a modified version of code obtained from here https://stats.stackexchange.com/questions/82497/can-the-scaling-values-in-a-linear-discriminant-analysis-lda-be-used-to-plot-e
However, I have 80 variables, making the biplot extremely difficult to read. This is worsened by highly contributing variables, since their arrow lengths are very long and the remaining labels are scrunched up in the middle.
So what I am trying to achieve is a biplot where all variable arrows are of equal length, and their relative contributions (scalings) are distinguished by graded colours.
So far I have managed to get the graded colours, but I can't find a way to make the arrow lengths the same. From what I understand, geom_text
and geom_segment
uses the LD1 and LD2 values to determine both the length and direction of the arrows. How can I override the length?
CODE:
library(ggplot2)
library(grid)
library(MASS)
data(iris)
iris.lda <- lda(as.factor(Species)~.,
data=iris)
#Project data on linear discriminants
iris.lda.values <- predict(iris.lda, iris[,-5])
#Extract scaling for each predictor and
data.lda <- data.frame(varnames=rownames(coef(iris.lda)), coef(iris.lda))
#coef(iris.lda) is equivalent to iris.lda$scaling
data.lda$length <- with(data.lda, sqrt(LD1^2+LD2^2))
#Plot the results
p <- qplot(data=data.frame(iris.lda.values$x),
main="LDA",
x=LD1,
y=LD2,
colour=iris$Species)+stat_ellipse(geom="polygon", alpha=.3, aes(fill=iris$Species))
p <- p + geom_hline(aes(yintercept=0), size=.2) + geom_vline(aes(xintercept=0), size=.2)
p <- p + theme(legend.position="right")
p <- p + geom_text(data=data.lda,
aes(x=LD1, y=LD2,
label=varnames,
shape=NULL, linetype=NULL,
alpha=length, position="identity"),
size = 4, vjust=.5,
hjust=0, color="red")
p <- p + geom_segment(data=data.lda,
aes(x=0, y=0,
xend=LD1, yend=LD2,
shape=NULL, linetype=NULL,
alpha=length),
arrow=arrow(length=unit(0.1,"mm")),
color="red")
p <- p + coord_flip()
print(p)
回答1:
How about something like this? We have to do some trigonometry to get the lengths to be equal. Note that the equality is in plot coordinates, so if you want to actually appear in equal size, you'll need to add coord_equal
.
(I cleaned up your plotting code, since a lot of it was quite a mess.)
rad <- 3 # This sets the length of your lines.
data.lda$length <- with(data.lda, sqrt(LD1^2+LD2^2))
data.lda$angle <- atan2(data.lda$LD1, data.lda$LD2)
data.lda$x_start <- data.lda$y_start <- 0
data.lda$x_end <- cos(data.lda$angle) * rad
data.lda$y_end <- sin(data.lda$angle) * rad
#Plot the results
ggplot(cbind(iris, iris.lda.values$x),
aes(y = LD1, x = LD2, colour = Species)) +
stat_ellipse(aes(fill = Species), geom = "polygon", alpha = .3) +
geom_point() +
geom_hline(yintercept = 0, size = .2) +
geom_vline(xintercept = 0, size = .2) +
geom_text(aes(y = y_end, x = x_end, label = varnames, alpha = length),
data.lda, size = 4, vjust = .5, hjust = 0, colour = "red") +
geom_spoke(aes(x_start, y_start, angle = angle, alpha = length), data.lda,
color = "red", radius = rad, size = 1) +
ggtitle("LDA") +
theme(legend.position = "right")
回答2:
I think i have a simpler code for achieving the biplot. I hope the below code helps. I have used the IRIS dataset for the analysis
library(readr)
IR <- read_csv("D:/Keerthesh/R Folder/DataSet/Iris.csv")
# --- data partition -- #
set.seed(555)
IRSam <- sample.int(n = nrow(IR), size = floor(.60*nrow(IR)), replace = FALSE, prob = NULL)
IRTrain <- IR[IRSam,]
IRTest <- IR[-IRSam,]
library(MASS)
IR.lda <- lda(Species~. -Id, data=IRTrain)
print(IR.lda)
For plotting the biplot you will need to install ggord package from github.
library(devtools)
install_github('fawda123/ggord') --- Used to install ggord from github we need to run devtools to achieve this.
library(ggord)
ggord(IR.lda, IRTrain$Species, ylim=c(-5,5), xlim=c(-10,10))
来源:https://stackoverflow.com/questions/40121516/lda-contribution-biplot