LC50 / LD50 confidence intervals from multiple regression glm with interaction

混江龙づ霸主 提交于 2019-12-03 16:34:46

Replicate data (update: new version of ggplot2 might not like weird data frames with matrices in them??)

mydata <- data.frame(
        LogPesticide = rep(log(c(0, 0.1, 0.2, 0.4, 0.8, 1.6) + 0.05), 4),
        LogFood = rep(log(c(1, 2, 4, 8)), each = 6)
      )
set.seed(seed=16) 

growth <- function(x, a = 1, K = 1, r = 1) {
    ## Logistic growth function. a = position of turning point
    ## K = carrying capacity
    ## r = growth rate (larger r -> narrower curve)
    return((K * exp(r * (x - a))) / (1 + exp(r * (x - a))))
}

rlf <- data.frame(LogFood=log(c(1,2,4,8)),
                              a=log(c(0.1,0.2,0.4,0.8)),
                              r=6,4,3,1)
mydata <- merge(mydata,rlf)
mydata <- plyr::mutate(mydata,
               y=growth(LogPesticide,a,K=1,r),
               Dead=rbinom(n=nrow(mydata),size=20,prob=y),
               N=20,
               Alive=N-Dead,
               pmort=Dead/N)


model <- glm(pmort ~ LogPesticide * LogFood, family = quasibinomial,
          data = mydata, weights=N)

For convenience:

cc <- setNames(coef(model),c("b_int","b_P","b_F","b_PF"))
vv <- vcov(model)
dimnames(vv) <- list(names(cc),names(cc))

Basic prediction data frame:

pframe <- with(mydata,
         expand.grid(LogPesticide=seq(min(LogPesticide),max(LogPesticide),
                      length=51),
                     LogFood=unique(LogFood)))
pframe$pmort <- predict(model,newdata=pframe,type="response")

Now let's break this down. The predicted value at a given level of (log) food F and (log) pesticide P is

logit(surv) = b_int + b_P*P + b_F*F + b_PF*F*P

Thus the logistic curve with respect to pesticide at level F is

logit(surv) = (b_int+b_F*F) + (b_P+b_PF*F)*P

We want to know the value of P for which logit(surv) is 0 (the LC50), so we need

0 = (b_int+b_F*F) + (b_P+b_PF*F)*P50
P50 = -(b_int+b_F*F)/(b_P+b_PF*F)

Translating to code:

P50mean <- function(logF) {
    with(as.list(cc), -(b_int+b_F*logF)/(b_P+b_PF*logF))
}
with(mydata,P50mean(c(min=min(LogFood),max=max(LogFood))))


pLC50 <- data.frame(LogFood=unique(mydata$LogFood))
pLC50 <- transform(pLC50,
               pmort=0.5,
               LogPesticide=P50mean(LogFood))

To get the confidence intervals, the two easiest methods are (1) delta method and (2) posterior prediction intervals (also called 'parametric Bayes' in some contexts). (You could also do nonparametric bootstrapping.)

Delta method

I tried to do this by hand but realized it was getting too hairy (all four coefficients are strongly correlated, and all of these correlations have to be kept track of in the computation -- it's not as easy as the usual formulas where the numerator and denominator are independent values ...)

library("emdbook")
deltavar(-(b_int+b_F*2)/(b_P+b_PF*2),meanval=cc,Sigma=vv)
## have to be a bit fancy here with eval/substitute ...
pLC50$var1 <- sapply(pLC50$LogFood,
            function(logF)
                 eval(substitute(
                     deltavar(-(b_int+b_F*logF)/(b_P+b_PF*logF),
                               meanval=cc,Sigma=vv),
                     list(logF=logF))))

Population prediction intervals

This assumes (slightly more weakly) that the sampling distribution of the parameters is multivariate Normal.

PP <- function(logF,n=1000) {
    b <- MASS::mvrnorm(n,mu=cc,Sigma=vv)
    pred <- with(as.data.frame(b),
         -(b_int+b_F*logF)/(b_P+b_PF*logF))
    return(var(pred))
}
set.seed(101)
pLC50$var2 <- sapply(pLC50$LogFood,PP)

The PPI would actually allow us to relax the assumptions a bit, by getting the quantiles of the distribution of predicted LC50s ... as it turns out (see below) the PPI-based confidence intervals are a bit wider than the Delta method ones, but they're not horribly far apart.

Now plot the whole mess:

library(ggplot2); theme_set(theme_bw())
gg0 <- ggplot(mydata,aes(LogPesticide,pmort,
              colour=factor(LogFood),
              fill = factor(LogFood))) + geom_point() +
       ## individual fits -- a bit ugly
       ##       geom_smooth(method="glm",aes(weight=N),
       ##           method.args=list(family=binomial),alpha=0.1)+
       geom_line(data=pframe,linetype=2)+
       geom_point(data=pLC50,pch=5,size=4)+
       geom_hline(yintercept=0.5,col="gray")

 gg0 + geom_errorbarh(data=pLC50,lwd=2,alpha=0.5,
                       aes(xmin=LogPesticide-1.96*sqrt(var1),
                           xmax=LogPesticide+1.96*sqrt(var1)),
                       height=0)+
       geom_errorbarh(data=pLC50,
                       aes(xmin=LogPesticide-1.96*sqrt(var2),
                           xmax=LogPesticide+1.96*sqrt(var2)),
                      height=0.02)

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!