I have a sample of 1m records obtained from my original data. (For your reference, you may use this dummy data that may generate approximately similar distribution
Here's a slightly different approach which uses geom_ploygon(...)
instead of multiple calls to stat_function(...)
. One problem with stat_function(...)
is that the secondary arguments (mu, sigma, and lambda in this example), which are passed using the args=list(...)
parameter, cannot be included in an aesthetic mapping, so you have to have multiple calls to stat_function(...)
as is @Spacedman`s solution.
This approach builds the PDFs outside of ggplot and uses a single call to geom_polygon(...)
. As a result, it works without modification for an arbitrary number of distributions in the mixture.
# ggplot mixture plot
gg.mixEM <- function(EM) {
require(ggplot2)
x <- with(EM,seq(min(x),max(x),len=1000))
pars <- with(EM,data.frame(comp=colnames(posterior), mu, sigma,lambda))
em.df <- data.frame(x=rep(x,each=nrow(pars)),pars)
em.df$y <- with(em.df,lambda*dnorm(x,mean=mu,sd=sigma))
ggplot(data.frame(x=EM$x),aes(x,y=..density..)) +
geom_histogram(fill=NA,color="black")+
geom_polygon(data=em.df,aes(x,y,fill=comp),color="grey50", alpha=0.5)+
scale_fill_discrete("Component\nMeans",labels=format(em.df$mu,digits=3))+
theme_bw()
}
library(mixtools)
# two components
set.seed(1) # for reproducible example
b <- rnorm(2000000, mean=c(8,17), sd=2)
c <- b[sample(length(b), 1000000) ]
c2 <- normalmixEM(c, lambda=NULL, mu=NULL, sigma=NULL)
gg.mixEM(c2)
# three components
set.seed(1)
b <- rnorm(2000000, mean=c(8,17,30), sd=c(2,3,5))
c <- b[sample(length(b), 1000000) ]
library(mixtools)
c3 <- normalmixEM(c, k=3, lambda=NULL, mu=NULL, sigma=NULL)
gg.mixEM(c3)