Is there a predict function for PLM in R?

后端 未结 5 552
心在旅途
心在旅途 2020-12-08 22:36

I have a small N large T panel which I am estimating via plm (panel linear regression model), with fixed effects.

Is there any way to get predicted values for a new

5条回答
  •  粉色の甜心
    2020-12-08 23:27

    I wrote a function called predict.out.plm that can create predictions for the original data and for a manipulated data set (with equal column names).

    The predict.out.plm calculates a) the predicted (fitted) outcome of the transformed data and b) constructs the according to level outcome. The function works for First Difference (FD) estimations and Fixed Effects (FE) estimations using plm. For FD it creates the differenced outcome over time and for FE it creates the time-demeaned outcome.

    The function is largely untested, and probably only works with strongly balanced data frames.

    Any suggestions and corrections are very welcome. Help to develop a small R package would be very appreciated.

    The function predict.out.plm

    predict.out.plm<-function(
      estimate,
      formula,
      data,
      model="fd",
      pname="y",
      pindex=NULL,
      levelconstr=T
    ){
      # estimate=e.fe
      # formula=f
      # data=d
      # model="within"
      # pname="y"
      # pindex=NULL
      # levelconstr=T
      #get index of panel data
      if (is.null(pindex) && class(data)[1]=="pdata.frame") {
        pindex<-names(attributes(data)$index)
      } else {
        pindex<-names(data)[1:2]
      }
      if (class(data)[1]!="pdata.frame") { 
        data<-pdata.frame(data)
      }
      #model frame
      mf<-model.frame(formula,data=data)
      #model matrix - transformed data
      mn<-model.matrix(formula,mf,model)
    
      #define variable names
      y.t.hat<-paste0(pname,".t.hat")
      y.l.hat<-paste0(pname,".l.hat")
      y.l<-names(mf)[1]
    
      #transformed data of explanatory variables 
      #exclude variables that were droped in estimation
      n<-names(estimate$aliased[estimate$aliased==F])
      i<-match(n,colnames(mn))
      X<-mn[,i]
    
      #predict transformed outcome with X * beta
      # p<- X %*% coef(estimate)
      p<-crossprod(t(X),coef(estimate))
      colnames(p)<-y.t.hat
    
      if (levelconstr==T){
        #old dataset with original outcome
        od<-data.frame(
          attributes(mf)$index,
          data.frame(mf)[,1]
        )
        rownames(od)<-rownames(mf) #preserve row names from model.frame
        names(od)[3]<-y.l
    
        #merge old dataset with prediciton
        nd<-merge(
          od,
          p,
          by="row.names",
          all.x=T,
          sort=F
        )
        nd$Row.names<-as.integer(nd$Row.names)
        nd<-nd[order(nd$Row.names),]
    
        #construct predicted level outcome for FD estiamtions
        if (model=="fd"){
          #first observation from real data
          i<-which(is.na(nd[,y.t.hat]))
          nd[i,y.l.hat]<-NA
          nd[i,y.l.hat]<-nd[i,y.l]
          #fill values over all years
          ylist<-unique(nd[,pindex[2]])[-1]
          ylist<-as.integer(as.character(ylist))
          for (y in ylist){
            nd[nd[,pindex[2]]==y,y.l.hat]<-
              nd[nd[,pindex[2]]==(y-1),y.l.hat] + 
              nd[nd[,pindex[2]]==y,y.t.hat]
          }
        } 
        if (model=="within"){
          #group means of outcome
          gm<-aggregate(nd[, pname], list(nd[,pindex[1]]), mean)
          gl<-aggregate(nd[, pname], list(nd[,pindex[1]]), length)
          nd<-cbind(nd,groupmeans=rep(gm$x,gl$x))
          #predicted values + group means
          nd[,y.l.hat]<-nd[,y.t.hat] + nd[,"groupmeans"]
        } 
        if (model!="fd" && model!="within") {
          stop('funciton works only for FD and FE estimations')
        }
      }
      #results
      results<-p
      if (levelconstr==T){
        results<-list(results,nd)
        names(results)<-c("p","df")
      }
      return(results)
    }
    

    Testing the the function:

    ##packages
    library(plm)
    
    ##test dataframe
    #data structure
    N<-4
    G<-2
    M<-5
    d<-data.frame(
      id=rep(1:N,each=M),
      year=rep(1:M,N)+2000,
      gid=rep(1:G,each=M*2)
    )
    #explanatory variable
    d[,"x"]=runif(N*M,0,1)
    #outcome
    d[,"y"] = 2 * d[,"x"] + runif(N*M,0,1)
    #panel data frame
    d<-pdata.frame(d,index=c("id","year"))
    
    ##new data frame for out of sample prediction
    dn<-d
    dn$x<-rnorm(nrow(dn),0,2)
    
    ##estimate
    #formula
    f<- pFormula(y ~ x + factor(year))
    #fixed effects or first difffernce estimation
    e<-plm(f,data=d,model="within",index=c("id","year"))
    e<-plm(f,data=d,model="fd",index=c("id","year"))
    summary(e)
    
    ##fitted values of estimation
    #transformed outcome prediction 
    predict(e)
    c(pmodel.response(e)-residuals(e))
    predict.out.plm(e,f,d,"fd")$p
    # "level" outcome prediciton 
    predict.out.plm(e,f,d,"fd")$df$y.l.hat
    #both
    predict.out.plm(e,f,d,"fd")
    
    ##out of sampel prediciton 
    predict(e,newdata=d) 
    predict(e,newdata=dn) 
    # Error in crossprod(beta, t(X)) : non-conformable arguments
    # if plm omits variables specified in the formula (e.g. one year in factor(year))
    # it tries to multiply two matrices with different length of columns than regressors
    # the new funciton avoids this and therefore is able to do out of sample predicitons
    predict.out.plm(e,f,dn,"fd")
    

提交回复
热议问题