Parallelize a rolling window regression in R

前端未结

关注

 2  1095

眼角桃花 2021-02-02 01:33

I\'m running a rolling regression very similar to the following code:

library(PerformanceAnalytics)
library(quantmod)
data(managers)

FL <- as.formula(Next(HA


      
      
        
          2条回答        

        
                    
            
            
                         
                
              
              
                
                   我在风中等你
                                             
                
                
                (楼主)
            
              
              
                2021-02-02 02:18
              

            
            
                        
New answer

I wrote a package, rollRegres, that does this much faster. It is ~ 58 times faster than Gavin Simpson's answer. Here is an example

# simulate data
set.seed(101)
n <- 10000
wdth <- 50
X <- matrix(rnorm(10 * n), n, 10)
y <- drop(X %*% runif(10)) + rnorm(n)
Z <- cbind(y, X)

# assign other function
lm_version <- function(Z, width = wdth) {
  pred <- Z[width + 1, -1, drop = FALSE]
  ## fit the model
  Z <- Z[-nrow(Z), ]
  fit <- lm.fit(Z[, -1, drop = FALSE], Z[,1])
  ## get things we need to predict, in case pivoting turned on in lm.fit
  p <- fit$rank
  p1 <- seq_len(p)
  piv <- fit$qr$pivot[p1]
  ## model coefficients
  beta <- fit$coefficients
  ## this gives the predicted value for next obs
  drop(pred[, piv, drop = FALSE] %*% beta[piv])
}

# show that they yield the same
library(rollRegres) # the new package
library(zoo)
all.equal(
  rollapply(Z, wdth + 1, FUN = lm_version,
            by.column = FALSE,  align = "right", fill = NA_real_),
  roll_regres.fit(
    x = X, y = y, width = wdth, do_compute = "1_step_forecasts"
    )$one_step_forecasts)
#R [1] TRUE

# benchmark
library(compiler)
lm_version <- cmpfun(lm_version)
microbenchmark::microbenchmark(
  newnew = roll_regres.fit(
    x = X, y = y, width = wdth, do_compute = "1_step_forecasts"),
  prev   = rollapply(Z, wdth + 1, FUN = lm_version,
                     by.column = FALSE,  align = "right", fill = NA_real_),
  times = 10)
#R Unit: milliseconds
#R   expr       min        lq      mean    median        uq       max neval
#R newnew  10.27279  10.48929  10.91631  11.04139  11.13877  11.87121    10
#R   prev 555.45898 565.02067 582.60309 582.22285 602.73091 605.39481    10


Old answer

You can reduce the run time by updating a decomposition. This yields an  cost at each iteration instead of  where n is you window width. Below is a code to compare the two. It would likely be much faster doing it in C++ but the LINPACK dchud and dchdd are not included with R so you would have to write a package to do so. Further, I recall reading that you may do faster with other implementations than the LINPACK dchud and dchdd for the R update

library(SamplerCompare) # for LINPACK `chdd` and `chud`
roll_forcast <- function(X, y, width){
  n <- nrow(X)
  p <- ncol(X)
  out <- rep(NA_real_, n)

  is_first <- TRUE
  i <- width 
  while(i < n){
    if(is_first){
      is_first <- FALSE
      qr. <- qr(X[1:width, ])
      R <- qr.R(qr.)

      # Use X^T for the rest
      X <- t(X)

      XtY <- drop(tcrossprod(y[1:width], X[, 1:width]))
    } else {
      x_new <- X[, i]
      x_old <- X[, i - width]

      # update R 
      R <- .Fortran(
        "dchud", R, p, p, x_new, 0., 0L, 0L, 
        0., 0., numeric(p), numeric(p), 
        PACKAGE = "SamplerCompare")[[1]]

      # downdate R
      R <- .Fortran(
        "dchdd", R, p, p, x_old, 0., 0L, 0L, 
        0., 0., numeric(p), numeric(p), integer(1),
        PACKAGE = "SamplerCompare")[[1]]

      # update XtY
      XtY <- XtY + y[i] * x_new - y[i - width] * x_old
    }

    coef. <- .Internal(backsolve(R, XtY, p, TRUE, TRUE))
    coef. <- .Internal(backsolve(R, coef., p, TRUE, FALSE))

    i <- i + 1
    out[i] <- X[, i] %*% coef.
  }

  out
}

# simulate data
set.seed(101)
n <- 10000
wdth = 50
X <- matrix(rnorm(10 * n), n, 10)
y <- drop(X %*% runif(10)) + rnorm(n)
Z <- cbind(y, X)

# assign other function
lm_version <- function(Z, width = wdth) {
  pred <- Z[width + 1, -1, drop = FALSE]
  ## fit the model
  Z <- Z[-nrow(Z), ]
  fit <- lm.fit(Z[, -1, drop = FALSE], Z[,1])
  ## get things we need to predict, in case pivoting turned on in lm.fit
  p <- fit$rank
  p1 <- seq_len(p)
  piv <- fit$qr$pivot[p1]
  ## model coefficients
  beta <- fit$coefficients
  ## this gives the predicted value for row 31 of data passed in
  drop(pred[, piv, drop = FALSE] %*% beta[piv])
}

# show that they yield the same
library(zoo)
all.equal(
  rollapply(Z, wdth + 1, FUN = lm_version,  
            by.column = FALSE,  align = "right", fill = NA_real_),
  roll_forcast(X, y, wdth))
#R> [1] TRUE

# benchmark
library(compiler)
roll_forcast <- cmpfun(roll_forcast)
lm_version <- cmpfun(lm_version)
microbenchmark::microbenchmark(
  new =  roll_forcast(X, y, wdth),
  prev = rollapply(Z, wdth + 1, FUN = lm_version,  
                   by.column = FALSE,  align = "right", fill = NA_real_), 
  times = 10)
#R> Unit: milliseconds
#R> expr      min       lq     mean   median       uq      max neval cld
#R>  new 113.7637 115.4498 129.6562 118.6540 122.4930 230.3414    10  a 
#R> prev 639.6499 674.1677 682.1996 678.6195 686.8816 763.8034    10   b

    
             
                                                        
            
            
              
                
                0
              
                   
                
               讨论(0)
              
                                                  
              
              
                          
             
       
          
              
                                       
     查看其它2个回答


            
                         
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
                              			
        
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复