How to recode variables in table 1 using info from table 2 (in SAS)

后端 未结 3 469
太阳男子
太阳男子 2021-01-26 12:18

The overal goal is to stratify quantitative variables based on their percentile. I would like to break it up into 10 levels (e.g. 10th, 20th, ...100th percentile)

3条回答
  •  死守一世寂寞
    2021-01-26 12:36

    No need for all of that code generation. Just use an array. Basically load the percentiles from the dataset generated by PROC UNIVARIATE into an two dimensional array and then find the decile rank for your actual values.

    %macro stratify(varlist,in=,out=,pcts=pcts);
    %local nvars pctls droplist recodes ;
    %let varlist=%sysfunc(compbl(&varlist));
    %let nvars=%sysfunc(countw(&varlist));
    %let pctls=pctl_%sysfunc(tranwrd(&varlist,%str( ),%str( pctl_)));
    %let droplist=pctl_%sysfunc(tranwrd(&varlist,%str( ),%str(: pctl_))):;
    %let recodes=recode_%sysfunc(tranwrd(&varlist,%str( ),%str( recode_)));
    
    proc univariate data=&in noprint ;
      var &varlist;
      output out=&pcts pctlpre=&pctls
             pctlpts = 10 20 30 40 50 60 70 80 90 100 
      ;
    run;
    
    data want ;
      if _n_=1 then set &pcts ;
      array _pcts (10,&nvars) _numeric_;
      set test;
      array _in &varlist ;
      array out &recodes ;
      do i=1 to dim(_in);
        do j=1 to 10 while(_in(i) > _pcts(j,i)); 
        end;
        out(i)=j;
      end;
      drop i j &droplist;
    run;
    %mend stratify;
    

    So if I use your generated sample here is what the log looks like with the MPRINT option turned on.

    1093  %stratify(a b c,in=test,out=want);
    MPRINT(STRATIFY):   proc univariate data=test noprint ;
    MPRINT(STRATIFY):   var a b c;
    MPRINT(STRATIFY):   output out=pcts pctlpre=pctl_a pctl_b pctl_c pctlpts = 10 20 30 40 50 
    60 70 80 90 100 ;
    MPRINT(STRATIFY):   run;
    
    NOTE: The data set WORK.PCTS has 1 observations and 30 variables.
    NOTE: PROCEDURE UNIVARIATE used (Total process time):
          real time           0.01 seconds
          cpu time            0.01 seconds
    
    
    MPRINT(STRATIFY):   data want ;
    MPRINT(STRATIFY):   if _n_=1 then set pcts ;
    MPRINT(STRATIFY):   array _pcts (10,3) _numeric_;
    MPRINT(STRATIFY):   set test;
    MPRINT(STRATIFY):   array _in a b c ;
    MPRINT(STRATIFY):   array out recode_a recode_b recode_c ;
    MPRINT(STRATIFY):   do i=1 to dim(_in);
    MPRINT(STRATIFY):   do j=1 to 10 while(_in(i) > _pcts(j,i));
    MPRINT(STRATIFY):   end;
    MPRINT(STRATIFY):   out(i)=j;
    MPRINT(STRATIFY):   end;
    MPRINT(STRATIFY):   drop i j pctl_a: pctl_b: pctl_c:;
    MPRINT(STRATIFY):   run;
    
    NOTE: There were 1 observations read from the data set WORK.PCTS.
    NOTE: There were 1000 observations read from the data set WORK.TEST.
    NOTE: The data set WORK.WANT has 1000 observations and 6 variables
    

    And the first five observations are:

提交回复
热议问题