 %**********************************************************************
 * RMAIN03 creates RXHCC and score variables for each person who is
 * present in a person file.
 * If a person has at least one diagnosis in DIAG file then RXHCCs are
 * created, otherwise RXHCCs are set to 0 .
 * Score variables are created using coefficients from 2 final
 * models: risk adjustable and new enrollee.
 *
 * Assumptions about input files:
 *   - both files are sorted by person ID
 *   - person level file has the following variables:
 *     :HICNO   - person ID variable
 *     :DOB     - date of birth
 *     :SEX     - sex
 *     :OREC    - original reason for entitlement
 *     :SP      - optional, Special Population identifier 
 *                (see 'RXHCC R0109C1 software description.doc' and 
 *                'Special populations.txt' documents included in 
 *                this package)
 *     
 *   - diagnosis level file has the following vars:
 *     :HICNO   - person ID variable
 *     :DIAG    - diagnosis
 *
 * Parameters:
 *      INP       - input person dataset
 *      IND       - input diagnosis dataset
 *      OUTDATA   - output dataset
 *      IDVAR     - name of person id variable (HICNO for medicare data)
 *      KEEPVAR   - variables to keep in the output dataset
 *      DATE_ASOF - sas date for age calculation
 *      FMT       - format to go from ICD9 to RXCC
 *      DF        - factor set by CMS to scale coefficients if required
 *                  (1 = no adjustment made)
 *
 * External macros:
 *      %AGESEX01    - create age/sex, originally disabled, disabled vars
 *      %REDIT07     - perform edits to diagnosis
 *      %R01A84M     - assign one ICD9 to multiple RXCCs
 *      %R01H84L     - assign labels to RXHCCs
 *      %R01H84H     - set RXHCC=0 according to hierarchies
 *      %SCOREVR     - calculate a score variable
 *
 * Format:
 *      $R01091U     - to go from ICD9 to RXCC
 *
 ***********************************************************************;

 %MACRO RMAIN03(INP=, IND=, OUTDATA=, IDVAR=, KEEPVAR=, SEDITS=,
                DATE_ASOF=, FMT= , DF= );

 ***********************************************************************;
 * step1: include external macros;
 ***********************************************************************;
 %INCLUDE IN0(AGESEX01) /SOURCE2; %*create demographic variables;
 %INCLUDE IN0(REDIT07)  /SOURCE2; %*perform edits;
 %INCLUDE IN0(R01H84L)  /SOURCE2; %*rxhcc labels;
 %INCLUDE IN0(R01H84H)  /SOURCE2; %*hierarchies;
 %INCLUDE IN0(R01A84M)  /SOURCE2; %*multiple RXCCs;
 %INCLUDE IN0(SCOREVR)  /SOURCE2; %*calculate score variable;

 ***********************************************************************;
 * step2: define internal macro variables;
 ***********************************************************************;

 %LET N_RX=197;         %*max # of RXHCCs;
 %LET NVARS_RISK = 113; %*# of vars in continuing enrollee model: 24+3+2+84;
 

  %*age/sex variables;
  %*for risk regression;
 %LET AGESEXV=  F0_34  F35_44 F45_54 F55_59 F60_64 F65_69
                F70_74 F75_79 F80_84 F85_89 F90_94 F95_GT
                M0_34  M35_44 M45_54 M55_59 M60_64 M65_69
                M70_74 M75_79 M80_84 M85_89 M90_94 M95_GT;
 
  %*list of RXHCCs included in models;
 %LET RXlistY07 =%STR(RXHCC1 RXHCC2  RXHCC3   RXHCC8   RXHCC9   RXHCC10                                
 RXHCC17  RXHCC18  RXHCC19  RXHCC20  RXHCC21  RXHCC24  RXHCC31  RXHCC33 
 RXHCC34  RXHCC37  RXHCC39  RXHCC40  RXHCC41  RXHCC42  RXHCC43  RXHCC44 
 RXHCC45  RXHCC47  RXHCC48  RXHCC51  RXHCC52  RXHCC54  RXHCC55  RXHCC57 
 RXHCC59  RXHCC60  RXHCC65  RXHCC66  RXHCC67  RXHCC75  RXHCC76  RXHCC77 
 RXHCC78  RXHCC79  RXHCC80  RXHCC81  RXHCC82  RXHCC83  RXHCC85  RXHCC86 
 RXHCC87  RXHCC91  RXHCC92  RXHCC98  RXHCC99  RXHCC102 RXHCC105 RXHCC106                    
 RXHCC108 RXHCC109 RXHCC110 RXHCC111 RXHCC112 RXHCC113 RXHCC120 RXHCC121 
 RXHCC122 RXHCC123 RXHCC126 RXHCC129 RXHCC130 RXHCC132 RXHCC134 RXHCC135 
 RXHCC137 RXHCC138 RXHCC139 RXHCC140 RXHCC144 RXHCC145 RXHCC157 RXHCC158 
 RXHCC159 RXHCC160 RXHCC165 RXHCC166 RXHCC186 RXHCC187  );

   %*interaction variables;                        
  %LET INTERRAC_VARS = %STR(D_RXHCC65 D_RXHCC66 D_RXHCC108);

   %*variables for community regression COM;
  %LET MODRISK= %STR(&AGESEXV &INTERRAC_VARS 
                     OriginallyDisabled_Female OriginallyDisabled_Male
                     &RXlistY07);

   %*check if Special Population multipliers should be used:
     output all variable names from person level input file into 
     dataset CHK to check for presence of SP variable;
  Proc Contents NOPRINT data=&INP out=CHK(keep= name);  Run;
   %*create macro variable &names with all the var names separated by 
     blank;
  Proc SQl NOPRINT;
       Select name into :names separated by " " 
       from CHK;
  Quit;
   %*find position of variable SP: will be 0 if it is not in the file;
  %let SPpresent=%index(%upcase(&names), SP);
 

 ***********************************************************************;
 * step3: merge person and diagnosis files outputting one record
 *        per person with score and RXHCC variables for each input person
 *        level record
 ***********************************************************************;

 DATA &OUTDATA(KEEP=&IDVAR &KEEPVAR );
    ****************************************************;
    * step3.1: declaration section;
    ****************************************************;

    %R01H84L;  %*RXHCC labels;

    %*length of new variables;
    LENGTH RXCC $4. AGEF 3.
           OriginallyDisabled_Female OriginallyDisabled_Male
           RXCC1-RXCC&N_RX
           RXHCC1-RXHCC&N_RX
           &INTERRAC_VARS       3.;

    %*retain cc & age vars;
    RETAIN RXCC1-RXCC&N_RX 0  AGEF
           ;
    %*arrays;
    ARRAY R(&N_RX)  RXCC1-RXCC&N_RX;
    ARRAY RX(&N_RX) RXHCC1-RXHCC&N_RX;
    %*interaction vars;
    ARRAY RV &INTERRAC_VARS;
    %*array of coefficients for new enrollee model;
    ARRAY COEF(32,2) NECOEF1_1 NECOEF1_2 NECOEF2_1 NECOEF2_2 NECOEF3_1 
                     NECOEF3_2 NECOEF4_1 NECOEF4_2 NECOEF5_1 NECOEF5_2        
                     NECOEF6_1 NECOEF6_2 NECOEF7_1 NECOEF7_2 NECOEF8_1 
                     NECOEF8_2 NECOEF9_1 NECOEF9_2 NECOEF10_1 NECOEF10_2 
                     NECOEF11_1 NECOEF11_2 NECOEF12_1 NECOEF12_2 
                     NECOEF13_1 NECOEF13_2 NECOEF14_1 NECOEF14_2  
                     NECOEF15_1 NECOEF15_2 NECOEF16_1 NECOEF16_2 
                     NECOEF17_1 NECOEF17_2 NECOEF18_1 NECOEF18_2 
                     NECOEF19_1 NECOEF19_2 NECOEF20_1 NECOEF20_2  
                     NECOEF21_1 NECOEF21_2 NECOEF22_1 NECOEF22_2 
                     NECOEF23_1 NECOEF23_2 NECOEF24_1 NECOEF24_2 
                     NECOEF25_1 NECOEF25_2 NECOEF26_1 NECOEF26_2  
                     NECOEF27_1 NECOEF27_2 NECOEF28_1 NECOEF28_2 
                     NECOEF29_1 NECOEF29_2 NECOEF30_1 NECOEF30_2 
                     NECOEF31_1 NECOEF31_2 NECOEF32_1 NECOEF32_2  
                     ;                                                                  
    %*array of age/sex cells for new enrollee model;
    ARRAY NEAS(32) NEF0_34  NEF35_44 NEF45_54 NEF55_59 NEF60_64
                   NEF65    NEF66    NEF67    NEF68    NEF69
                   NEF70_74 NEF75_79 NEF80_84 NEF85_89 NEF90_94
                   NEF95_GT
                   NEM0_34  NEM35_44 NEM45_54 NEM55_59 NEM60_64
                   NEM65    NEM66    NEM67    NEM68    NEM69
                   NEM70_74 NEM75_79 NEM80_84 NEM85_89 NEM90_94
                   NEM95_GT
                 ;

    ****************************************************;
    * step3.2: to bring in regression coefficients;
    ****************************************************;
    IF _N_ = 1 THEN SET INCOEF.RXCOEFF;
   
    ****************************************************;
    * step3.3: merge
    ****************************************************;
    MERGE &INP(IN=IN1)
          &IND(IN=IN2) END=EOF;
    BY &IDVAR;

    RETAIN CNT 0;
    IF IN1 THEN DO;

    ********************************************************;
    * step3.4: for the first record for a person set RXCC to 0
    *          and calculate age
    ********************************************************;

       IF FIRST.&IDVAR THEN DO;
           %*set rxccs to 0;
           DO I=1 TO &N_RX;
            R(I)=0;
           END;
           %*sas calculates age as 50.9999 instead of 51 in this case;
           IF DAY(DOB)=DAY(&DATE_ASOF) & MONTH(DOB)=MONTH(&DATE_ASOF)
           THEN _BIRTH=DOB - 1 ;
           ELSE _BIRTH=DOB;

           AGEF  =INT((&DATE_ASOF - _BIRTH)/ 365.25);
       END;

    ****************************************************;
    * step3.5 if there are any diagnoses for a person
    *         then do the following:
    *         - create RXCCs using format $R01091U
    *         - peform ICD9 edits using macro REDIT07
    *           if SEDITS=1
    *         - assign additional RXCC using R01A84M macro
    ****************************************************;
       IF IN1 & IN2 THEN DO;

           RXCC = LEFT(PUT(DIAG,$&FMT..));

           IF RXCC NE "-1.0" THEN DO;
              %IF &SEDITS = 1 %THEN
                %REDIT07(ICD9=DIAG, AGE=AGEF); %*perform edits;
              IND=INPUT(RXCC,4.);
              IF 1<= IND <= &N_RX THEN DO;
                R(IND)=1;
                %R01A84M(ICD9=DIAG); %*multiple rxs;
              END;
           END;
       END; %*RXCC creation;

    **************************************************************;
    * step3.6 for the last record for a person do the
    *         following:
    *         - create demographic variables needed (macro AGESEX01)
    *         - create RXHCC using hierarchies (macro R01H84H)
    *         - create RXHCC and DISABL interaction variables
    *         - set RXHCCs and interaction vars to zero if there
    *           are no diagnoses for a person
    *         - create score for continuing enrollee model
    *         - create score for new enrollee model
    **************************************************************;
       IF LAST.&IDVAR THEN DO;

           *****************************;
           * demographic vars           ;
           *****************************;
           %*create age/sex cells, originally disabled, disabled vars;
           %AGESEX01(AGEF=AGEF, SEX=SEX, OREC=OREC);
           %*age/sex interactions;
           OriginallyDisabled_Female= ORIGDS*(SEX='2');
           OriginallyDisabled_Male  = ORIGDS*(SEX='1');

           IF IN1 & IN2 THEN DO;
               **********************;
               * hierarchies;
               **********************;
               %R01H84H;
               *****************************;
               * other interaction variables;
               *****************************;
   
           %*interactions with disabled ;
               D_RXHCC65 =   DISABL*RXHCC65 ; %*Schizophrenia;
               D_RXHCC66 =   DISABL*RXHCC66; %*Significant Psychiatric Symptoms/Syndromes;
               D_RXHCC108 =  DISABL*RXHCC108; %*Cystic Fibrosis;
               
           END; %*there are some diagnoses for a person;
           ELSE DO;
              DO I=1 TO &N_RX;
                 RX(I)=0;
              END;
              DO OVER RV;
                 RV=0;
              END;
           END;

           %*score calculation;

           /***************************/
           /*    continuing enrollee model           */
           /***************************/;
           ARRAY RCOEF(&NVARS_RISK) RCOEF1-RCOEF&NVARS_RISK;
           ARRAY RREGLIST(&NVARS_RISK) &MODRISK;
           *predicted & score calculations;
           %SCOREVR(PVAR=SCORE_RISK, RLIST=RREGLIST,
                    CLIST=RCOEF, N=&NVARS_RISK);

          
           /***************************/
           /*   new enrollees model   */
           /***************************/;

           %*to find a row in the array of coefficients that corresponds
           to a person age/sex cell;
           TROW=0;
           DO _I=1 TO 32 WHILE(TROW=0);
               TROW =_I *(NEAS(_I) = 1 );
           END;

           %*to find a column in the array of coefficients that
           corresponds to a person medicaid/originally disabled info;
           TCOLUMN =(NOT ORIGDS)* 1 +
                    (ORIGDS)* 2 ;

           SCORE_NEW_ENROLLEE = COEF(TROW, TCOLUMN);

           %*use Special Population multipliers if required;
           %IF &SPpresent > 0 %THEN %DO;
             IF 0<=SP<=3 THEN
               MULTIPLIER=
                   &DF*((SP=0) +
                        (SP=1) * LIS1_multiplier +
                        (SP=2) * LIS2_multiplier +
                        (SP=3) * (AGEF >= 65) * LTI_aged_multiplier +
                        (SP=3) * (AGEF <  65) * LTI_lt65_multiplier );
             ELSE DO;                                                                                                                   
               MULTIPLIER=.;  
               IF CNT<=2 THEN                                                                                                           
               PUT "******* Invalid SP found in the input file, which resulted in missing SCORE!!!";                                                                                                   
               CNT=CNT+1;
             END;             
           %END;
           %ELSE MULTIPLIER=&DF;;

           SCORE_RISK         =  SCORE_RISK * MULTIPLIER;        
           SCORE_NEW_ENROLLEE =  SCORE_NEW_ENROLLEE *MULTIPLIER;
           

           OUTPUT &OUTDATA;
       END; %*last record for a person;
     END; %*there is a person record;
     IF EOF THEN CALL SYMPUT('INVSP',TRIM(LEFT(PUT(CNT,8.))));
 RUN;

 %IF &INVSP > 0 %THEN %DO;                                                                                                             
    %PUT ******* Input file contains &INVSP case(s) of invalid SP -- please see 'RXHCC R0109C1 software description.doc' for valid SP codes!!!;;                                                                                                    
 %END;    


 ***********************************************************************;
 * step4: data checks and proc contents
 ***********************************************************************;
 PROC PRINT U DATA=&OUTDATA(OBS=46);
     TITLE '*** file outputted by the software ***';
 RUN ;
 PROC CONTENTS DATA=&OUTDATA;
 RUN;

 %MEND RMAIN03;
