How do I extract hazards from survfit in R?

时光总嘲笑我的痴心妄想 提交于 2019-12-04 04:07:47

It is a bit tricky since the hazard is an estimate of an instantaneous probability (and this is discrete data), but the basehaz function might be of some help, but it only returns the cumulative hazard. So you would have still have to perform an extra step.

I have also had luck with the muhaz function. From its documentation:

library(muhaz)
?muhaz
data(ovarian, package="survival")
attach(ovarian)
fit1 <- muhaz(futime, fustat)
plot(fit1)

I am not sure the best way to get at the 95% confidence interval, but bootstrapping might be one approach.

#Function to bootstrap hazard estimates
haz.bootstrap <- function(data,trial,min.time,max.time){
  library(data.table)
  data <- as.data.table(data)
  data <- data[sample(1:nrow(data),nrow(data),replace=T)]
  fit1 <- muhaz(data$futime, data$fustat,min.time=min.time,max.time=max.time)
  result <- data.table(est.grid=fit1$est.grid,trial,haz.est=fit1$haz.est)
  return(result)
}

#Re-run function to get 1000 estimates
haz.list <- lapply(1:1000,function(x) haz.bootstrap(data=ovarian,trial=x,min.time=0,max.time=744))
haz.table <- rbindlist(haz.list,fill=T)

#Calculate Mean,SD,upper and lower 95% confidence bands
plot.table <- haz.table[, .(Mean=mean(haz.est),SD=sd(haz.est)), by=est.grid]
plot.table[, u95 := Mean+1.96*SD]
plot.table[, l95 := Mean-1.96*SD]

#Plot graph
library(ggplot2)
p <- ggplot(data=plot.table)+geom_smooth(aes(x=est.grid,y=Mean))
p <- p+geom_smooth(aes(x=est.grid,y=u95),linetype="dashed")
p <- p+geom_smooth(aes(x=est.grid,y=l95),linetype="dashed")
p

Jacco

As a supplement to Mike's answer, one could model the number of events by a Poisson distribution instead of a Normal distribution. The hazard rate can then be calculated via a gamma distribution. The code would become:

library(muhaz)
library(data.table)
library(rGammaGamma)
data(ovarian, package="survival")
attach(ovarian)
fit1 <- muhaz(futime, fustat)
plot(fit1)

#Function to bootstrap hazard estimates
haz.bootstrap <- function(data,trial,min.time,max.time){
  library(data.table)
  data <- as.data.table(data)
  data <- data[sample(1:nrow(data),nrow(data),replace=T)]
  fit1 <- muhaz(data$futime, data$fustat,min.time=min.time,max.time=max.time)
  result <- data.table(est.grid=fit1$est.grid,trial,haz.est=fit1$haz.est)
  return(result)
}

#Re-run function to get 1000 estimates
haz.list <- lapply(1:1000,function(x) haz.bootstrap(data=ovarian,trial=x,min.time=0,max.time=744))
haz.table <- rbindlist(haz.list,fill=T)

#Calculate Mean, gamma parameters, upper and lower 95% confidence bands
plot.table <- haz.table[, .(Mean=mean(haz.est),
                            Shape = gammaMME(haz.est)["shape"],
                            Scale = gammaMME(haz.est)["scale"]), by=est.grid]
plot.table[, u95 := qgamma(0.95,shape = Shape + 1, scale = Scale)]
# The + 1 is due to the discrete character of the poisson distribution.
plot.table[, l95 := qgamma(0.05,shape = Shape, scale = Scale)]

#Plot graph
ggplot(data=plot.table) + 
  geom_line(aes(x=est.grid, y=Mean),col="blue") + 
  geom_ribbon(aes(x=est.grid, y=Mean, ymin=l95, ymax=u95),alpha=0.5, fill= "lightblue")

As can be seen the negative estimates for the lower bound of the hazard rate are now gone.

As an additional supplement, we can improve the performance of the bootstrap function by slimming down the parameters inside.

data(ovarian, package="survival")

library(muhaz)

haz.bootstrap.v2 <- function(x) {
  x <- x[sample(1:nrow(x), nrow(x), replace=TRUE), ]
  muhaz(x$futime, x$fustat, min.time=0, max.time=744)
}

# bootstrap
boot <- replicate(1e3, haz.bootstrap.v2(ovarian))

There is a slight performance burst of approx. 15% (1e3 reps. as above).

Unit: seconds
     expr      min       lq     mean   median       uq      max neval cld
 version1 6.376984 6.501841 6.700725 6.552303 6.677898 8.027701   100   b
 version2 5.443420 5.516658 5.686726 5.565615 5.674727 6.953493   100  a 

We have thrown the aggregation out of the function and do it now after boostrapping.

# aggregate data.table
p <- cbind(est.grid=unlist(boot[2, ]), haz.est=unlist(boot[3, ]))

Then we'll go on like before.

# calculate Mean, gamma parameters
library(data.table)
p <- data.table(p)

library(rGammaGamma)
p <- p[, .(mean=mean(haz.est),
           shape=gammaMME(haz.est)["shape"],
           scale=gammaMME(haz.est)["scale"]), by=est.grid]

Note that two-sided CIs are calculated with 1 - alpha/2!

# calculate CIs
# note: the + 1 is due to the discrete character of the poisson distribution:
p[, u95 := qgamma(0.975, shape=shape + 1, scale=scale)]
p[, l95 := qgamma(0.025, shape=shape, scale=scale)]

And eventually the plot:

library(ggplot2)
ggplot(data=p) + 
  geom_line(aes(x=est.grid, y=mean), col="blue") + 
  geom_line(aes(x=est.grid, y=u95), lty=2, col="blue") + 
  geom_line(aes(x=est.grid, y=l95), lty=2, col="blue") + 
  labs(title="Hazard Estimates", x="Analysis Time", y="Hazard Rate",
       caption=expression(paste(italic("Note: "),
                                alpha, "=0.05"))) +
  theme_bw()

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!