Add P values to comparisons within groups boxplot

[亡魂溺海] 提交于 2021-02-05 07:56:50

问题


I'm trying to create a boxplot which shows only the significant p values, within the groups for each bar in a box plot. For example here it would compare I1 and SI2 for the "fair", "good", "very good" etc

I've tried using the following code to achieve the above plot

library(ggplot2)
library(dplyr)
data("diamonds")

labeldat <- diamonds %>%
  group_by(cut, clarity) %>%
  dplyr::summarise(labels = paste(n(), n_distinct(color), sep = "\n"))


Comparisons = list(c("I1","SI2"),c("I1","SI1"),c("I1","VS2"),c("I1","VS1"),c("I1","VVS2"),c("I1","VVS1"),c("I1","IF"),
                   c("SI2","SI1"),c("SI2","VS2"),c("SI2","VS1"),c("SI2","VVS2"),c("SI2","VVS1"),c("SI2","IF"),
                   c("SI1","VS2"),c("SI1","VS1"),c("SI1","VVS2"),c("SI1","VVS1"),c("SI1","IF"),
                   c("VS2","VS1"),c("VS2","VVS2"),c("VS2","VVS1"),c("VS2","IF"),
                   c("VS1","VVS2"),c("VS1","VVS1"),c("VS1","IF"),
                   c("VVS2","VVS1"),c("VVS2","IF"),
                   c("VVS1","IF"))



ggplot(diamonds, aes(x=cut, y=price)) +
  geom_boxplot(aes(fill=clarity), position = position_dodge2(width=0.75)) + 
  theme_bw() + 
  geom_text(data = labeldat, aes(x = cut, y = -250, label = labels), hjust = 0.5, position = position_dodge2(width = .75))+
  stat_compare_means(aes(group=clarity), label = "p.signif", method="t.test", comparisons = Comparisons)

Unfortunately using the comparisons argument seems through a computation error which I can't work out how to solve: Warning message: Computation failed in stat_signif(): missing value where TRUE/FALSE needed

I have tried running this without the comparisons, but it seems to just give me an overall score


回答1:


I'll preface this by saying that in this example there are too many comparisons being made so the result is cluttered and to fit the extra info the y-axis is greatly expanded and the boxplots are squashed. But for the sake of providing an answer and imagining you might have a dataset that has fewer comparisons, the issue is that stat_compare_means() compares groups on the x-axis. To compare by clarity, you need to put it on the x-axis and then facet by cut.

library(ggplot2)
library(ggpubr)
library(dplyr)

labeldat <- diamonds %>%
  group_by(cut, clarity) %>%
  dplyr::summarise(labels = paste(n(), n_distinct(color), sep = "\n"))

ggplot(diamonds, aes(x=clarity, y=price)) +
  geom_boxplot(aes(fill=clarity), position = position_dodge2(width=0.75)) + 
  stat_compare_means(aes(group=clarity), label = "p.signif", method="t.test", comparisons = combn(1:8, 2, FUN = list)) +
  facet_grid(cols = vars(cut)) +
  theme_bw() + 
  geom_text(data = labeldat, aes(x = clarity, y = -2000, label = labels), hjust = 0.5, position = position_dodge2(width = .75)) +
  theme(axis.text.x = element_blank())




回答2:


You could use ggsignif for that. It allows for manual annotation, so you could calculate p-values separately, and create an annotation data.frame with filtered comparisons. Example:

library(ggplot2)
library(ggsignif)
library(dplyr)
library(data.table)

dm <- split(diamonds, diamonds$cut)
getp <- function(y, pval=.05){
    a <- stats::pairwise.wilcox.test(x=y$price, g=y$clarity,
        p.adjust.method="none", paired=FALSE)
    return(as.data.table(as.table(a$p.value))[!is.na(N) & N < pval])
}
dmp <- data.table::rbindlist(lapply(dm, getp), idcol = "cut")
data.table::setnames(dmp, c("cut", "start", "end", "label"))
dmp$label <- formatC(
    signif(dmp$label, digits = 3),
    digits = 3,
    format = "g",
    flag = "#"
)
dmp[, y := (0:(.N-1)) * (2E4/.N)+2e4, by=cut]
data.table::setDF(dmp)

ggplot(diamonds, aes(x=clarity, y=price)) +
    geom_boxplot(aes(fill=clarity), position = position_dodge2(width=0.75)) + 
    facet_wrap(~ cut)+
    ggsignif::geom_signif(data=dmp,
        aes(xmin=start, xmax=end, annotations=label, y_position=y),
        textsize = 2, vjust = -0.2,
        manual=TRUE) + 
    ylim(NA, 4E4) +
    theme_bw() +
    theme(axis.text.x = element_blank())



来源:https://stackoverflow.com/questions/60198635/add-p-values-to-comparisons-within-groups-boxplot

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!