Extracting orthogonal polynomial coefficients from R's poly() function?

前端 未结 2 1009
深忆病人
深忆病人 2020-12-09 16:47

R\'s poly() function produces orthogonal polynomials for data fitting. However, I would like to use the results of the regression outside of R (say in C++), and

2条回答
  •  执念已碎
    2020-12-09 17:38

    Note 1: I don't mean the regression coefficients, but the coefficients of the orthogonal polynomials themselves), e.g. the p_i(x) in

    y = a0 + a1*p_1(x) + a2*p_2(x) + ...
    

    As josliber mentions, the functions are constructed recursively and the most compact way of representing them is with the norms and "alpha" coefficients used by R. A C++ version using (Rcpp)Armadillo can look something like

    // [[Rcpp::depends(RcppArmadillo)]]
    #include 
    
    namespace polycpp {
    class poly_basis {
      void scale_basis(arma::mat &X) const {
        if(X.n_cols > 0)
          X.each_row() /= 
            arma::sqrt(norm2.subvec(1L, norm2.n_elem - 1L)).t();
      }
    
    public:
      arma::vec const alpha, norm2;
    
      poly_basis(arma::vec const &alpha, arma::vec const &norm2):
        alpha(alpha), norm2(norm2) { 
        for(size_t i = 0; i < norm2.size(); ++i)
          if(norm2[i] <= 0.)
            throw std::invalid_argument("invalid norm2");
        if(alpha.n_elem + 2L != norm2.n_elem)
          throw std::invalid_argument("invalid alpha");
      }
    
      /**
       behaves like poly(x, degree). The orthogonal polynomial is returned by 
       reference.
       */
      static poly_basis get_poly_basis
        (arma::vec x, size_t const degree, arma::mat &X){
        size_t const n = x.n_elem, 
                    nc = degree + 1L;
        double const x_bar = arma::mean(x); 
        x -= x_bar;
        arma::mat XX(n, nc);
        XX.col(0).ones();
        for(size_t d = 1L; d < nc; d++){
          double       * xx_new = XX.colptr(d);
          double const * xx_old = XX.colptr(d - 1);
          for(size_t i = 0; i < n; ++i, ++xx_new, ++xx_old)
            *xx_new = *xx_old * x[i];
        }
    
        arma::mat R;
        /* TODO: can be done smarter by calling LAPACK or LINPACK directly */
        if(!arma::qr_econ(X, R, XX))
          throw std::runtime_error("QR decomposition failed");
    
        for(size_t c = 0; c < nc; ++c)
          X.col(c) *= R.at(c, c);
    
        arma::vec norm2(nc + 1L), 
                  alpha(nc - 1L);
        norm2[0] = 1.;
        for(size_t c = 0; c < nc; ++c){
          double z_sq(0), 
               x_z_sq(0);
          double const *X_i = X.colptr(c);
          for(size_t i = 0; i < n; ++i, ++X_i){
            double const z_sq_i = *X_i * *X_i;
            z_sq += z_sq_i;
            if(c < degree)
              x_z_sq += x[i] * z_sq_i;
          }
          norm2[c + 1] = z_sq;
          if(c < degree)
            alpha[c] = x_z_sq / z_sq + x_bar;
        }
    
        poly_basis out(alpha, norm2);
        out.scale_basis(X);
        return out;
      }
    
      /** behaves like predict(, newdata). */
      arma::mat operator()(arma::vec const &x) const {
        size_t const n = x.n_elem;
        arma::mat out(n, alpha.n_elem + 1L);
        out.col(0).ones();
        if(alpha.n_elem > 0L){
          out.col(1) = x;
          out.col(1) -= alpha[0];
          for(size_t c = 1; c < alpha.n_elem; c++){
            double       * x_new  = out.colptr(c + 1L);
            double const * x_prev = out.colptr(c), 
                         * x_old  = out.colptr(c - 1L), 
                         * x_i    = x.memptr();
            double const fac = norm2[c + 1L] / norm2[c];
            for(size_t i = 0; i < n; ++i, ++x_new, ++x_prev, ++x_old, ++x_i)
              *x_new = (*x_i - alpha[c]) * *x_prev - fac * *x_old;
          } 
        }
    
        scale_basis(out);
        return out;
      }
    };
    } // namespace polycpp
    
    // export the functions to R to show that we get the same
    using namespace polycpp;
    using namespace Rcpp;
    
    // [[Rcpp::export(rng = false)]]
    List my_poly(arma::vec const &x, unsigned const degree){
      arma::mat out;
      auto basis = poly_basis::get_poly_basis(x, degree, out);
      return List::create(
        Named("X") = out, 
        Named("norm2") = basis.norm2, 
        Named("alpha") = basis.alpha);
    }
    
    // [[Rcpp::export(rng = false)]]
    arma::mat my_poly_predict(arma::vec const &x, arma::vec const &alpha, 
                              arma::vec const &norm2){
      poly_basis basis(alpha, norm2);
      return basis(x);
    }
    

    We can easily get rid of the Armadillo dependence if needed. I verify below that we get the same as the R function

    set.seed(1L)
    x <- rnorm(100)
    Rp <- poly(x, degree = 4L)
    Cp <- my_poly(x, 4L)
    
    all.equal(unclass(Rp), Cp$X[, -1L], check.attributes = FALSE)
    #R> [1] TRUE
    all.equal(attr(Rp, "coefs"), 
              lapply(Cp[c("alpha", "norm2")], drop))
    #R> [1] TRUE
    
    z <- rnorm(20)
    Rpred <- predict(Rp, z)
    Cpred <- my_poly_predict(z, Cp$alpha, Cp$norm2)
    all.equal(Rpred, Cpred[, -1], check.attributes = FALSE)
    #R> [1] TRUE
    all.equal(Cp$X, my_poly_predict(x, Cp$alpha, Cp$norm2))
    #R> [1] TRUE
    

    A nice bonus, although it likely does not matter in practice, is that the new functions are faster

    options(digits = 3)
    microbenchmark::microbenchmark(
      R = poly(x, degree = 4L), cpp = my_poly(x, 4L))
    #R> Unit: microseconds
    #R> expr    min     lq  mean median    uq   max neval
    #R>    R 118.93 123.63 135.4  126.1 129.0 469.1   100
    #R>  cpp   7.22   7.97  11.3   10.9  11.7  89.4   100
    microbenchmark::microbenchmark(
      R = predict(Rp, z), cpp = my_poly_predict(z, Cp$alpha, Cp$norm2))
    #R> Unit: microseconds
    #R> expr  min    lq  mean median    uq   max neval
    #R>    R 18.6 19.20 20.50  19.43 19.89 92.86   100
    #R>  cpp  1.2  1.39  1.92   1.98  2.23  8.85   100
    

提交回复
热议问题