I\'ve just started using the geom_map
function in ggplot2
. After having read the 29 posts I could find on geom_map
on here, I\'m still
The missing countries in the ARC…
data frame == missing regions on the map which can be compensated for with base layer made from the world_map
data frame:
library(maps)
world_map<-map_data("world")
gg <- ggplot(ARCTP53_SOExample)
# need one layer with ALL THE THINGS (well, all the regions)
gg <- gg + geom_map(dat=world_map, map = world_map,
aes(map_id=region), fill="white", color="black")
# now we can put the layer we really want
gg <- gg + geom_map(map = world_map,
aes(map_id = Country, fill = Country), colour = "black")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + theme(legend.position="none")
gg
I removed the legend since using a choropleth kinda assumes folks know geography.
NOTE: Using a distinct color per-region (country) is really not a good idea. Since you're really trying to only highlight where the mutation has been studied, a single color should suffice:
gg <- ggplot(ARCTP53_SOExample)
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region),
fill="white", color="black")
gg <- gg + geom_map(map = world_map, aes(map_id = Country),
fill = "steelblue", colour = "black")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + theme(legend.position="none")
gg
Since you eventually want to tell the story of ExonIntron
, you might want to consider using that as the color of the choropleth. I know nothing of genes, so I don't know if a gradient makes sense or if distinct colors are the way to go. I will posit that the the plethora of distinct colors created by the following code makes me think that you might want to do one gradient scale for intron
and one for extron
. Again, I'm not a gene person.
gg <- ggplot(ARCTP53_SOExample)
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region),
fill="white", color="black")
gg <- gg + geom_map(map = world_map, aes(map_id = Country, fill = ExonIntron),
colour = "black")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg
Some of the colors are either in really tiny regions, or they are in regions whose names don't match the names in world_map$region
. You'll probably want to take a look at that. This:
wm.reg <- unique(as.character(world_map$region))
arc.reg <- unique(as.character(ARCTP53_SOExample$Country))
arc.reg %in% wm.reg
## [1] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## [14] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE TRUE TRUE
kinda shows some are missing.
You might also want to consider doing the legend differently (i.e. put it on the bottom) if you go with a legend vs build your own table of results.
UPDATE
I almost forgot. Since you (most likely) have no need to Antarctica, you should get rid of it since it's eating up quite a bit of valuable space:
world_map <- subset(world_map, region!="Antarctica")
gg <- ggplot(ARCTP53_SOExample)
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region),
fill="white", color="black")
gg <- gg + geom_map(map = world_map, aes(map_id = Country, fill = ExonIntron),
colour = "black")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + theme(legend.position="none")
gg
(NOTE: I got rid of the legend since I really think you should re-think how you want the colors on the map and then use an extra table or plot to act as the legend)
FINAL UPDATE (per OP request in the comments below)
library(ggplot2)
library(maps)
library(plyr)
library(gridExtra)
ARCTP53_SOExample <- read.csv("dat.csv")
# reduce all the distinct exon/introns to just exon or intron
ARCTP53_SOExample$EorI <- factor(ifelse(grepl("exon",
ARCTP53_SOExample$ExonIntron,
ignore.case = TRUE),
"exon", "intron"))
# extract summary data for the two variables we care about for the map
arc.combined <- count(ARCTP53_SOExample, .(Country, EorI))
colnames(arc.combined) <- c("region", "EorI", "ei.ct")
# get total for country (region) and add to the summary info
arc.combined <- merge(arc.combined, count(arc.combined, .(region), wt_var=.(ei.ct)))
colnames(arc.combined) <- c("region", "EorI", "ei.ct", "region.total")
# it wasn't specified if the "EorI" is going to be used on the map so
# we won't use it below (but we could, now)
# get map and intercourse Antarctica
world_map <- map_data("world")
world_map <- subset(world_map, region!="Antarctica")
# this will show the counts by country with all of the "chart junk" removed
# and the "counts" scaled as a gradient, and with the legend at the top
gg <- ggplot(arc.combined)
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region),
fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = region, fill = region.total), size=0.25)
gg <- gg + scale_fill_gradient(low="#fff7bc", high="#cc4c02", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg
# BUT you might want to show the counts by intron/exon by country
# SO we do a separate map for each factor and combine them
# with some grid magic. This provides more granular control over
# each choropleth (in the event one wanted to tweak one or the other)
# exon
gg <- ggplot(arc.combined[arc.combined$EorI=="exon",])
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region),
fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = region, fill = ei.ct), size=0.25)
gg <- gg + scale_fill_gradient(low="#f7fcb9", high="#238443", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by 'exon' & country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg.exon <- gg
# intron
gg <- ggplot(arc.combined[arc.combined$EorI=="intron",])
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region),
fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = region, fill = ei.ct),
colour = "#7f7f7f", size=0.25)
gg <- gg + scale_fill_gradient(low="#ece7f2", high="#0570b0", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by 'intron' & country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg.intron <- gg
# use some grid magic to combine them into one plot
grid.arrange(gg.exon, gg.intron, ncol=1)