I'm trying to make a similar plot to Fig. 2d-f in an article published on Nature this year. It's basically a half boxplot with points on the other half.
Can anyone give me some hints? Thank you very much!
These are my data and code which produced full boxes with points inside
require(magrittr)
require(tidyverse)
dat <- structure(list(p1 = c(0.0854261831077604, 0.408418657218253,
0.577793646477315, 0.578028229977424, 0.48933166218204, 0.53117814324334,
0.526653494462464, 0.00687616283435221, 0.444300425796509, 0.00287319455358522,
0.949821402532831, 0.96832469523368, 0.953281969982759, 0.360125244759434,
0.407921095422844, 0.885776732104954, 0.159882184516691, 0.911094990767761,
0.0444367172734037, 0.144888951725151, 0.508858686640707, 0.694913731085945,
0.117270366119258, 0.78227546070467, 0.980457304886186, 0.711464034564424,
0.753944466390685, 0.0474210438747038, 0.00344183466223558, 0.0290017465534545,
0.75092385236303, 0.868873921257987, 0.744396990487425, 0.0140007244233847,
0.0332266395043963, 0.482897084793009, 0.0535516646483004, 0.452926358923891,
0.0144057727301603, 0.171918034525543), p2 = c(0.101262675229211,
0.196913109208586, 0.37814311161382, 0.0677625689405156, 0.12517090579686,
0.409083554335168, 0.158886941347288, 0.847394861862651, 0.180560031076741,
0.967122694294885, 0.000901627067665116, 0.00039495110143705,
9.70707318411806e-05, 0.546200038486894, 0.435475454787648, 5.95555269800323e-06,
0.0178837768834925, 8.42690065415846e-06, 0.00777059697751842,
0.0020397073541544, 0.486699073016371, 0.283679673247571, 0.857183359146641,
0.200712003853458, 0.0164911141652784, 0.0542250670734297, 0.232340206984506,
0.948523714169708, 0.169881661474024, 0.968983592882272, 0.00250367590158291,
0.000792323746977033, 0.000185068166140097, 0.0193600071757997,
0.114775271592724, 4.65931778380389e-05, 0.000754760900847164,
2.07521623816406e-05, 0.00782764273312856, 0.00276993826117348
), p3 = c(0.0118642223785376, 0.0267362912322735, 6.60753171741111e-08,
0.053576051466652, 0.00375873110094442, 9.85095078844696e-08,
0.0525436528683484, 0.0193735809639814, 8.44717454802822e-07,
0.00608007737576027, 0.0205563904131287, 0.0104638062130591,
0.0249997053664864, 0.0587924727726031, 0.0443600964770995, 0.067125687916273,
0.758612877724648, 0.0618158334848203, 0.0251025592849138, 0.790905778949543,
0.00126904829915329, 0.00760772364901772, 0.00119821088328392,
0.0115117347754715, 0.000863676435448072, 0.000996891439583434,
0.0115279148630096, 0.00249122388568909, 5.21508620418823e-05,
0.00144050407848742, 0.120373444447631, 0.0534773096149069, 0.110284261289338,
0.571243879053544, 0.438152084363961, 0.364887514202121, 0.696293189762153,
0.414870716968937, 0.0557358576822093, 0.783929426716999), p4 = c(0.000107231042599948,
0.000379648762557529, 8.25102162601208e-06, 0.000343829024899591,
0.000140680688077216, 1.90076798696051e-06, 0.000214507212681323,
1.38587688080716e-05, 3.48104084092359e-06, 6.50782599216903e-07,
0.0114584884733498, 0.00652170746426181, 0.0143309604192116,
0.0275718029789144, 0.0352327288308957, 0.022950800779703, 0.0569939247302654,
0.0190248244391564, 0.0305921420687752, 0.00589871320676732,
0.000805515847378872, 1.97674357551495e-05, 8.30853708305541e-06,
1.32462751169762e-06, 4.8731965929686e-05, 0.0057411315642433,
4.82406700397824e-05, 0.000204633566379066, 0.0552263911781015,
0.000181994007177494, 0.0585729576787707, 0.0273685460128338,
0.0568746134466117, 0.299309335625926, 0.278980446497419, 0.105600715225359,
0.176549247514501, 0.101420411455169, 0.01003894550707, 0.0010803018725911
), p5 = c(0.786823338804824, 0.151956168584644, 0.0433468890359269,
0.19556481029922, 0.380808150243027, 0.0389798680141623, 0.260481184897901,
0.101147673996922, 0.0184624278061585, 0.0222416874775066, 0.000113517761014704,
0.00329593083795693, 0.000476682365422989, 0.00571997662739322,
0.0697473913851358, 0.0216803412883361, 0.00631472476841249,
0.00628215584877364, 0.540944692186543, 0.0135127011440213, 0.00235752761214414,
3.10282042735927e-06, 0.0239147204208516, 4.97334784773176e-05,
0.00213837866453402, 0.000212207014031345, 0.00180443364400107,
8.15954685083038e-05, 0.00445169398173509, 0.000391265642772285,
0.0676128522356959, 0.0494864355994384, 0.0882575475549674, 0.0960799089263987,
0.134853114895623, 0.0465661014986807, 0.0728456746626632, 0.0307607877988244,
0.476388236185883, 0.00831263646470973), p6 = c(0.0145163494370677,
0.215596124993685, 0.00070803577599434, 0.104724510291289, 0.000789869989050939,
0.0207564351298348, 0.00122021921131791, 0.0251938615732845,
0.356672789562296, 0.00168169551566413, 0.0171485737520108, 0.0109989091496048,
0.00681361113427885, 0.00159046437476052, 0.00726323309637717,
0.00246048235803604, 0.000312511376490686, 0.00177376855883463,
0.351153292208846, 0.0427541476203625, 1.01485842454486e-05,
0.0137760017612841, 0.000425034892882118, 0.0054497425604112,
7.93882623673471e-07, 0.227360668344289, 0.000334737447758259,
0.0012777890350116, 0.766946267841861, 8.96835836820999e-07,
1.32173732897771e-05, 1.46376785664669e-06, 1.51905551715105e-06,
6.14479494697213e-06, 1.24431458762028e-05, 1.99110299298599e-06,
5.46251153509928e-06, 9.72690797485877e-07, 0.435603545161549,
0.0319896621589845), type = c("small", "small", "small", "small",
"small", "small", "small", "small", "small", "small", "small",
"small", "small", "small", "small", "small", "small", "small",
"small", "small", "big", "big", "big", "big", "big", "big", "big",
"big", "big", "big", "big", "big", "big", "big", "big", "big",
"big", "big", "big", "big"), loc = c("abro", "abro", "abro",
"abro", "abro", "abro", "abro", "abro", "abro", "abro", "dome",
"dome", "dome", "dome", "dome", "dome", "dome", "dome", "dome",
"dome", "abro", "abro", "abro", "abro", "abro", "abro", "abro",
"abro", "abro", "abro", "dome", "dome", "dome", "dome", "dome",
"dome", "dome", "dome", "dome", "dome")), .Names = c("p1", "p2",
"p3", "p4", "p5", "p6", "type", "loc"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -40L))
glimpse(dat)
#> Observations: 40
#> Variables: 8
#> $ p1 <dbl> 0.085426183, 0.408418657, 0.577793646, 0.578028230, 0.489...
#> $ p2 <dbl> 1.012627e-01, 1.969131e-01, 3.781431e-01, 6.776257e-02, 1...
#> $ p3 <dbl> 1.186422e-02, 2.673629e-02, 6.607532e-08, 5.357605e-02, 3...
#> $ p4 <dbl> 1.072310e-04, 3.796488e-04, 8.251022e-06, 3.438290e-04, 1...
#> $ p5 <dbl> 7.868233e-01, 1.519562e-01, 4.334689e-02, 1.955648e-01, 3...
#> $ p6 <dbl> 1.451635e-02, 2.155961e-01, 7.080358e-04, 1.047245e-01, 7...
#> $ type <chr> "small", "small", "small", "small", "small", "small", "sm...
#> $ loc <chr> "abro", "abro", "abro", "abro", "abro", "abro", "abro", "...
Convert data to long format
dat_long <- dat %>%
gather(key, value, 1:6) %>%
mutate(loc = factor(loc, levels = c("abro", "dome")),
type = factor(type),
key = factor(key))
Plot boxplot with points
ggplot(dat_long, aes(x = type, y = value, color = key)) +
facet_grid(loc ~ key) +
geom_point(position = position_jitter(width = 0.3), alpha = 0.3, size = 2) +
geom_boxplot(outlier.color = NA) +
theme_light() +
theme(legend.position = "bottom") +
guides(col = guide_legend(nrow = 1))
A very fast solution would be to add some nudge using position_nudge.
dat_long %>%
ggplot(aes(x = type, y = value, fill=key)) +
geom_boxplot(outlier.color = NA) +
geom_point(position = position_nudge(x=0.5), shape = 21, size = 2) +
facet_grid(loc ~ key)
Or transform the x axis factor to numeric and add some value
dat_long %>%
ggplot(aes(x = type, y = value, fill=key)) +
geom_boxplot(outlier.color = NA) +
geom_point(aes(as.numeric(type) + 0.5), shape = 21, size = 2) +
facet_grid(loc ~ key)
A more generalised method regarding the x axis position would be following. In brief, the idea is to add a second data layer of the same boxes. The second boxes are hided using suitable linetype and alpha (see scale_) but could be easily overplotted by the points.
dat_long <- dat %>%
gather(key, value, 1:6) %>%
mutate(loc = factor(loc, levels = c("abro", "dome")),
type = factor(type),
key = factor(key)) %>%
mutate(gr=1) # adding factor level for first layer
dat_long %>%
mutate(gr=2) %>% # adding factor level for second invisible layer
bind_rows(dat_long) %>% # add the same data
ggplot(aes(x = type, y = value, fill=key, alpha=factor(gr), linetype = factor(gr))) +
geom_boxplot(outlier.color = NA) +
facet_grid(loc ~ key) +
geom_point(data=. %>% filter(gr==1),position = position_nudge(y=0,x=0.2), shape = 21, size = 2)+
scale_alpha_discrete(range = c(1, 0)) +
scale_linetype_manual(values = c("solid","blank")) +
guides(alpha ="none", linetype="none")
Using the code zankuralt posted below and optimise it for faceting you can try:
dat %>%
gather(key, value, 1:6) %>%
mutate(loc = factor(loc, levels = c("abro", "dome")),
type = factor(type),
key = factor(key)) %>%
mutate(type2=as.numeric(type)) %>%
group_by(type, loc, key) %>%
mutate(d_ymin = min(value),
d_ymax = max(value),
d_lower = quantile(value, 0.25),
d_middle = median(value),
d_upper = quantile(value, 0.75)) %>%
ggplot() +
geom_boxplot(aes(x = type2 - 0.2,
ymin = d_lower,
ymax = d_upper,
lower = d_lower,
middle = d_middle,
upper = d_upper,
width = 2 * 0.2,
fill = key),
stat = "identity") +
geom_jitter(aes(x = type2 + 0.2,
y = value,
color = key),
width = 0.2 - 0.25 * 0.2,
height = 0)+
# vertical segment
geom_segment(aes(x = type2,
y = d_ymin,
xend = type2,
yend = d_ymax)) +
# top horizontal segment
geom_segment(aes(x = type2 - 0.1,
y = d_ymax,
xend = type2,
yend = d_ymax)) +
# top vertical segment
geom_segment(aes(x = type2 - 0.1,
y = d_ymin,
xend = type2,
yend = d_ymin)) +
# have to manually add in the x scale because we made everything numeric
# to do the shifting
scale_x_continuous(breaks = c(1,2),
labels = c("big","small"))+
facet_grid(loc ~ key)
I find this hybrid boxplot very, very lovely so I wanted to recreate it too.
I wrote a geom_boxjitter that inherits from geom_boxplot and only adds minor changes:
- It draws the
geom_rectonly on the left half. - It jitters the points with default width of the right half, default height 0.4*resolution and can also take a seed argument.
- It adds additional whiskers (the
horizontal ones) if
errorbar.drawis set toTRUE. Their length can also be adjusted.
You can check the code here. I think it is great how easy it has become to alter existing geoms with slight changes. Using part of your data:
library(tidyverse)
library(cowplot)
library(ggparl)
P <- ggplot(
dat_long %>% filter(key %in% c("p1", "p2")),
aes(x = type, y = value, fill = key)) +
geom_boxjitter(outlier.color = NA, jitter.shape = 21, jitter.color = NA,
jitter.height = 0.05, jitter.width = 0.075, errorbar.draw = TRUE) +
theme(legend.position = "none") +
ylim(c(-0.05, 1.05)) +
scale_fill_manual(values = c("#ecb21e", "#812e91"))
P
来源:https://stackoverflow.com/questions/49003863/how-to-plot-a-hybrid-boxplot-half-boxplot-with-jitter-points-on-the-other-half



