* Program survmacs.v5.sas ; * Created by Nancy Cook (BWH, HMS, HSPH) ; * Reclassification macros for SURVIVAL data; * Updated 12-06-2018 to control html output; * Macro to compute net reclssification improvement (NRI) - SURV_NRI; * Uses non-null variance; * Macro for Infinite category version of NRI - SURV_CONTNRI; * Macro to compute integrated discrimination improvement (IDI) - SURV_IDI; * Note: Should bootstrap for better estimate of SE; * Macro to compute H-L Calibration event for Reclassification - SURV_RECLASS; * Note: the last is best for low censoring situations by time T; * DETAIL = 2 for detailed printout, 1 for limited, 0 for none; * Usage examples: * %surv_nri(probs,1,outxy,pyrs,10,pdx,pdxy,4,0.05,0.10,0.20); * %surv_contnri(probs,1,outxy,pyrs,10,pdx,pdxy); * %surv_idi(probs,1,outxy,pyrs,10,pdx,pdxy); * %surv_reclass(probs,1,outxy,pyrs,10,pdx,pdxy, 4, 0.05, 0.10, 0.20); ****************************************************************; %macro SURV_NRI(DSNAME,DETAIL,EVENT,PYRS,T,PROB1,PROB2,NCAT,C1,C2,C3,C4,C5,C6,C7,C8,C9); * Macro to compute Net Reclassification Index (NRI) of Pencina, Stat Med 2007; * Uses up to 10 categories with cutpoints c1-c9; * Variables: * DSNAME = dataset name; * DETAIL = 2 for detailed printout, 1 for limited, 0 for none; * EVENT = outcome variable (coded 0,1); * PYS = follow-up time; * T = time of predicted risk (eg., 10 years) = scalar; * PROB1 = probability for model 1; * PROB2 = probability for model 2; * NCAT = number of categories in classification; * C1-C9 = category cutpoints (should have ncat-1 cutpoints); *** NOTE: data need to be bootstrapped to get correct SE; title2 Survival NRI for &NCAT Categories for Event &EVENT at Time &T : &PROB2 vs &PROB1 ; title3 "Need to Bootstrap for SE"; data nri1; set &dsname; _id_=_N_; * compute diffs in probs; event=&EVENT; prob1=&PROB1; prob2=&PROB2; diffp=prob2-prob1; pyrs=&pyrs; if prob1>. and prob2>.; keep _id_ event pyrs prob1 prob2 ; run; data nri1; set nri1; * compute categories - set to above cutpoints; %do i=1 %to &ncat-1; cut&i=&&c&i; %end; cut&ncat=1.01; array cut {10} cut1-cut10; array pcat {10} pcat1-pcat10; if .pcat1 then disc=1; else if pcat2=pcat1 then disc=0; else if pcat20 %then %do; proc print data=_all_; var ncat cut1-cut9 totn ecase econt n_up n_same n_down inc_up inc_same inc_down pup_case pdown_case pup_cont pdown_cont ri_case ri_contr nri evri_case evri_contr evnri esenri enricil enriciu ezri_case ezri_contr eznri ep2ri_case ep2ri_contr ep2nri ; run; %end; title2; run; * Usage: * %surv_nri(probs,1,outxy,pyrs,10,pdx,pdxy,4,0.05,0.10,0.20); %exit: %mend SURV_NRI; ****************************************************************; %macro SURV_CONTNRI(DSNAME,DETAIL,EVENT,PYRS,T,PROB1,PROB2); * Macro to compute NRI with any up or down (no categories); * Variables: * DSNAME = dataset name; * DETAIL = 2 for detailed printout, 1 for limited, 0 for none; * EVENT = outcome variable (coded 0,1); * PYS = follow-up time; * T = time of predicted risk (eg., 10 years) = scalar; * PROB1 = probability for model 1; * PROB2 = probability for model 2; *** NOTE: data need to be bootstrapped to get correct SE; title2 Continuous NRI for Event &EVENT at Time &T : &PROB2 vs &PROB1 ; title3 "Need to Bootstrap for SE"; data nri1; set &dsname; _id_=_N_; * compute diffs in probs; event=&EVENT; pyrs=&pyrs; prob1=&PROB1; prob2=&PROB2; diffp=prob2-prob1; if prob1>. and prob2>.; keep _id_ event pyrs prob1 prob2 diffp; run; data nri1; set nri1; if prob2>prob1 then disc=1; else if prob2=prob1 then disc=0; else if prob20 %then %do; proc print data=_all_; var totn ecase econt inc_all n_up n_same n_down inc_up inc_down pup_case pdown_case pup_cont pdown_cont ri_case ri_contr nri evri_case evri_contr evnri esenri enricil enriciu ezri_case ezri_contr eznri ep2ri_case ep2ri_contr ep2nri ; run; %end; title2; run; * Usage: * %surv_contnri(probs,1,outxy,pyrs,10,pdx,pdxy); %exit: %mend SURV_CONTNRI; ****************************************************************; %macro SURV_IDI(DSNAME,DETAIL,EVENT,PYRS,T,PROB1,PROB2); * Macro to compute difference in Yates slopes or integrated discrimination improvement (IDI) from Pencina, 2007; * Variables: * DSNAME = dataset name; * DETAIL = 1 or 2 for limited printout, 0 for none; * EVENT = outcome variable (coded 0,1) (if 1,2 alter signs); * PYS = follow-up time; * T = time of predicted risk (eg., 10 years) = scalar; * PROB1 = probability for model 1; * PROB2 = probability for model 2; *** NOTE: data need to be bootstrapped to get correct SE; title2 Difference in Yates Slope (IDI) for Event &EVENT at Time &T : &PROB2 vs &PROB1 ; title3 "Need to Bootstrap for SE"; data _ididat_; set &dsname; _id_=_N_; diffprob=&prob2-&prob1; event=&EVENT; pyrs=&PYRS; if &prob1>. and &prob2>.; run; %if &detail<2 %then %do; ods listing exclude all; ods html exclude all; %end; proc ttest data=_ididat_; class event; var &prob1 &prob2 diffprob; title4 "Crude Difference in Probs by Any Event - Ignoring Time"; run; title4; run; proc means data=_ididat_; var &prob1 &prob2 ; output out=idistat mean=meanp1 meanp2 var=varp1 varp2 n=np1 np2; title4 "Crude Difference in Predicted Probs"; run; title4; run; * Compute KM estimator for survival for those moving up or down; data two; _id_=0; event=.; pyrs=&t; run; data _all_; set _ididat_ two; run; proc phreg data=_all_ noprint; model pyrs*event(0) = ; id _id_; output out=survdat survival=_survest_ ; run; data base; set survdat; if _id_=0; * survival estimate for referent person at time &t; _basesurv_=_survest_; inc=1-_basesurv_; keep _basesurv_ inc; run; data _idi_; merge idistat base; yates1=varp1/(inc*(1-inc)); yates2=varp2/(inc*(1-inc)); idi=yates2-yates1; rel_idi=yates2/yates1 - 1; drop _TYPE_ _FREQ_; run; ods listing exclude none; ods html exclude none; %if &detail>0 %then %do; proc print data=_idi_; run; %end; title2; run; * Usage: * %survidi(probs,1,outxy,pyrs,10,pdx,pdxy); %mend SURV_IDI; ****************************************************************; %macro SURV_RECLASS(DSNAME,DETAIL,EVENT,PYRS,T,PROB1,PROB2,NCAT,C1,C2,C3,C4,C5,C6,C7,C8,C9); * Macro to compute calibration eventistics for SURVIVAL DATA; * KxK (K=NCAT) table formed using categs of predicted probs (eg 0, 5, 10, 20%); * Counts categs for DF and computes for n>=20; * Allows up to 10 categories (usually 3 or 4); * Variables: * DSNAME = dataset name; * DETAIL = 2 for detailed printout, 1 for limited, 0 for none; * EVENT = outcome variable (coded 0,1); * PYRS = person-time for Cox model; * T = desired follow-up time for probability estimate; * PROB1 = probability for model 1 (at time t); * PROB2 = probability for model 2 (at time t); * NCAT = number of categories in classification; * C1-C9 = category cutpoints (should have ncat-1 cutpoints); * Reclassification calibration test for cells with n>=20; title2 Survival Reclassification for &NCAT Categories with CutPoints = %do j = 1 %to &ncat; &&c&j %end;; title3 Event &EVENT at Time &T : &PROB2 vs &PROB1 ; data calc; set &DSNAME; _id_=_N_; event=&EVENT; prob1=&PROB1; prob2=&PROB2; if prob1>. and prob2>.; * compute product terms for J eventistic; var1=prob1*(1-prob1); var2=prob2*(1-prob2); k=&ncat; * compute categories - set to above cutpoints; %do i=1 %to &ncat-1; cut&i=&&c&i; %end; cut&ncat=1.01; array cut {10} cut1-cut10; if .2 then diffcat2=2; else if .0 %then %do; proc print data=reclass; run; %end; %if &detail=2 %then %do; proc freq data=calc; tables pcat1*pcat2*obsp/ list; title3 "Calibration table for &PROB1 and &PROB2"; run; %end; ods listing exclude all; ods html exclude all; ods output Summary=calibs; proc means data=calc n sum mean median min max; * Eliminate missing from all variables; where obsp ne . and prob1 ne . and prob2 ne .; class pcat1 pcat2; types pcat1*pcat2; var prob1 prob2 var1 var2 obsp k; run; ods listing exclude none; ods html exclude none; run; %if &detail>0 %then %do; proc print data=calibs; var pcat1 pcat2 obsp_N obsp_Sum prob1_Mean prob2_Mean obsp_Mean var1_Sum var2_Sum; title3 "Estimated Risks"; run; %end; data caltermsb calstatsb(keep= ncat maxcat dfmax nccat df chisq1 pcalib1 chisq_adj1 pcal_adj1 chisq2 pcalib2 chisq_adj2 pcal_adj2 dfj jsq1 pj1 jsq_adj1 pj_adj1 jsq2 pj2 jsq_adj2 pj_adj2 nreclass ncorr pctcorr) ; set calibs end=eof; retain chisq1 chisq_adj1 chisq2 chisq_adj2 jsq1 jsq_adj1 jsq2 jsq_adj2 nccat nreclass ncorr 0; * only save cells with count at least 20; if obsp_N >=20 then do; num1=(obsp_Sum - prob1_Sum)**2; denom1=prob1_Sum*(1-prob1_Mean); denom12=(prob1_Sum+1)*(1-prob1_Mean+1/prob1_N); chisq1=chisq1 + num1/denom1; chisq_adj1=chisq_adj1 + num1/denom12; phi1=var1_Sum/denom1; jsq1=jsq1 + num1/(phi1*denom1); * Note: evaluate adjusted Jsq, but do not print; jsq_adj1=jsq_adj1 + num1/(phi1*denom12); num2=(obsp_Sum - prob2_Sum)**2; denom2=prob2_Sum*(1-prob2_Mean); denom22=(prob2_Sum+1)*(1-prob2_Mean+1/prob2_N); chisq2=chisq2 + num2/denom2; chisq_adj2=chisq_adj2 + num2/denom22; phi2=var2_Sum/denom2; jsq2=jsq2 + num2/(phi2*denom2); jsq_adj2=jsq_adj2 + num2/(phi2*denom22); nccat=nccat+1; OEratio1=obsp_Sum/prob1_Sum; OEratio2=obsp_Sum/prob2_Sum; if obsp_Sum>0 then do; sdo=sqrt(1/obsp_Sum); OEcil1=OEratio1*exp(-1.96*sdo); OEciu1=OEratio1*exp(1.96*sdo); OEcil2=OEratio2*exp(-1.96*sdo); OEciu2=OEratio2*exp(1.96*sdo); end; * compute those reclass correctly; if pcat2 ne pcat1 then nreclass=nreclass+obsp_N; %do i=1 %to &ncat-1; cut&i=&&c&i; if pcat2>pcat1 and pcat2=&i+1 then do; * put pcat2= pcat1= cut&i= obsp_Sum= obsp_Mean= obsp_N=; if obsp_Mean>=cut&i then ncorr=ncorr+obsp_N; end; else if pcat20 then pctcorr=ncorr/nreclass; output calstatsb; end; %if &detail=2 %then %do; proc print data=caltermsb; var pcat1 pcat2 num1 denom1 denom12 phi1 num2 denom2 denom22 phi2 nreclass ncorr OEratio1 OEcil1 OEciu1 OEratio2 OEcil2 OEciu2; title3 "Calibration Terms for &PROB1 and &PROB2 in Predicting &EVENT"; title4 "Using KM Incidence Estimates and Cross-Classified Cells with N >= 20"; run; %end; %if &detail>0 %then %do; proc print data=calstatsb; var ncat maxcat dfmax nccat df chisq1 pcalib1 chisq_adj1 pcal_adj1 chisq2 pcalib2 chisq_adj2 pcal_adj2 nreclass ncorr pctcorr; title3 "Calibration Statistics for &PROB1 and &PROB2 in Predicting &EVENT"; title4 "Using KM Incidence Estimates and Cross-Classified Cells with N >= 20"; run; %end; title2; run; * Usage: * %surv_reclass(probs,1,outxy,pyrs,10,pdx,pdxy, 4, 0.05, 0.10, 0.20); %mend SURV_RECLASS; ****************************************************************;