I am trying to reproduce the simple population pyramid from the post Simpler population pyramid in ggplot2
using ggplot2 and dplyr (instead
You can avoid both dplyr and plyr when making population pyramids with recent versions of ggplot2.
If you have counts of the sizes of age-sex groups then use the answer here
If your data is at the individual level (as yours is) then use the following:
set.seed(321)
test <- data.frame(v=sample(1:20,1000,replace=T), g=c('M','F'))
head(test)
# v g
# 1 20 M
# 2 19 F
# 3 5 M
# 4 6 F
# 5 8 M
# 6 7 F
library("ggplot2")
ggplot(data = test, aes(x = as.factor(v), fill = g)) +
geom_bar(data = subset(test, g == "F")) +
geom_bar(data = subset(test, g == "M"),
mapping = aes(y = - ..count.. ),
position = "identity") +
scale_y_continuous(labels = abs) +
coord_flip()
You avoid the error by specifying the argument data in geom_bar:
ggplot(data = test, aes(x = as.factor(v), fill = g)) +
geom_bar(data = dplyr::filter(test, g == "F")) +
geom_bar(data = dplyr::filter(test, g == "M"), aes(y = ..count.. * (-1))) +
scale_y_continuous(breaks = seq(-40, 40, 10), labels = abs(seq(-40, 40, 10))) +
coord_flip()
To build an Age Pyramid with individual data or microdata you can use:
test <- data.frame(v=sample(1:100, 1000, replace=T), g=c('M','F'))
ggplot(data = test, aes(x = v, fill = g)) +
geom_histogram(data = subset(test, g == "F"), binwidth = 5, color="white", position = "identity") +
geom_histogram(data = subset(test, g == "M"), binwidth = 5, color="white", position = "identity",
mapping = aes(y = - ..count.. )) +
scale_x_continuous("Age", breaks = c(seq(0, 100, by=5))) +
scale_y_continuous("Population", breaks = seq(-30, 30, 10), labels = abs) +
scale_fill_discrete(name = "Sex") +
coord_flip() +
theme_bw()
Changing the binwidth in geom_histogram() can group your data in wider categories.
Changing binwidth to 10 and adjusting the axis breaks:
ggplot(data = test, aes(x = v, fill = g)) +
geom_histogram(data = subset(test, g == "F"), binwidth = 10, color="white", position = "identity") +
geom_histogram(data = subset(test, g == "M"), binwidth = 10, color="white", position = "identity",
mapping = aes(y = - ..count.. )) +
scale_x_continuous("Age", breaks = c(seq(0, 100, by = 10))) +
scale_y_continuous("Population", breaks = seq(-100, 100, 10), labels = abs) +
scale_fill_discrete(name = "Sex") +
coord_flip() +
theme_bw()