* Program survwtmacs.v3.sas ; * Created by Nancy Cook (BWH, HMS, HSPH) ; * Reclassification macros for WEIGHTED SURVIVAL data; * Macro to compute net reclssification improvement (NRI) - SURV_NRIWT; * Uses non-null variance; * Macro for Infinite category version of NRI - SURV_CONTNRIWT; * Macro to compute integrated discrimination improvement (IDI) - SURV_IDIWT; * Note: Should bootstrap for better estimate of SE; * Macro to compute H-L Calibration event for Reclassification - SURV_RECLASSWT; * Note: the last is best for low censoring situations by time T; * DETAIL = 2 for detailed printout, 1 for limited, 0 for none; * Usage examples: * %surv_nriwt(probs,1,wt,outxy,pyrs,10,pdx,pdxy,4,0.05,0.10,0.20); * %surv_contnriwt(probs,1,wt,outxy,pyrs,10,pdx,pdxy); * %surv_idiwt(probs,1,wt,outxy,pyrs,10,pdx,pdxy); * %surv_reclasswt(probs,1,wt,outxy,pyrs,10,pdx,pdxy, 4, 0.05, 0.10, 0.20); ****************************************************************; %macro SURV_NRIWT(DSNAME,DETAIL,WT,EVENT,PYRS,T,PROB1,PROB2,NCAT,C1,C2,C3,C4,C5,C6,C7,C8,C9); * Macro to compute Net Reclassification Index (NRI) of Pencina, Stat Med 2007; * Uses up to 10 categories with cutpoints c1-c9; * Variables: * DSNAME = dataset name; * DETAIL = 2 for detailed printout, 1 for limited, 0 for none; * WT = sample weight for case-cohort analysis (indiv, not risk set); * EVENT = outcome variable (coded 0,1); * PYS = follow-up time; * T = time of predicted risk (eg., 10 years) = scalar; * PROB1 = probability for model 1; * PROB2 = probability for model 2; * NCAT = number of categories in classification; * C1-C9 = category cutpoints (should have ncat-1 cutpoints); *** NOTE: data need to be bootstrapped to get correct SE; title2 Weighted Survival NRI for &NCAT Categories for Event &EVENT at Time &T : &PROB2 vs &PROB1 ; title3 "Need to Bootstrap for SE"; data nri1; set &dsname; _id_=_N_; * compute diffs in probs; event=&EVENT; prob1=&PROB1; prob2=&PROB2; diffp=prob2-prob1; pyrs=&pyrs; wt=&wt; if prob1>. and prob2>.; keep _id_ wt event pyrs prob1 prob2 ; run; data nri1; set nri1; * compute categories - set to above cutpoints; %do i=1 %to &ncat-1; cut&i=&&c&i; %end; cut&ncat=1.01; array cut {10} cut1-cut10; array pcat {10} pcat1-pcat10; if .pcat1 then disc=1; else if pcat2=pcat1 then disc=0; else if pcat20 %then %do; proc print data=_all_; var ncat cut1-cut9 totn ecase econt n_up n_same n_down inc_up inc_same inc_down pup_case pdown_case pup_cont pdown_cont ri_case ri_contr nri evri_case evri_contr evnri esenri enricil enriciu ezri_case ezri_contr eznri ep2ri_case ep2ri_contr ep2nri ; run; %end; title2; run; * Usage: * %surv_nriwt(probs,1,wt,outxy,pyrs,10,pdx,pdxy,4,0.05,0.10,0.20); %exit: %mend SURV_NRIWT; ****************************************************************; %macro SURV_CONTNRIWT(DSNAME,DETAIL,WT,EVENT,PYRS,T,PROB1,PROB2); * Macro to compute NRI with any up or down (no categories); * Variables: * DSNAME = dataset name; * DETAIL = 2 for detailed printout, 1 for limited, 0 for none; * WT = sample weight for case-cohort analysis (indiv, not risk set); * EVENT = outcome variable (coded 0,1); * PYS = follow-up time; * T = time of predicted risk (eg., 10 years) = scalar; * PROB1 = probability for model 1; * PROB2 = probability for model 2; *** NOTE: data need to be bootstrapped to get correct SE; title2 Weighted Continuous NRI for Event &EVENT at Time &T : &PROB2 vs &PROB1 ; title3 "Need to Bootstrap for SE"; data nri1; set &dsname; _id_=_N_; * compute diffs in probs; event=&EVENT; pyrs=&pyrs; prob1=&PROB1; prob2=&PROB2; diffp=prob2-prob1; wt=&wt; if prob1>. and prob2>.; keep _id_ wt event pyrs prob1 prob2 diffp; run; data nri1; set nri1; if prob2>prob1 then disc=1; else if prob2=prob1 then disc=0; else if prob20 %then %do; proc print data=_all_; var totn ecase econt inc_all n_up n_same n_down inc_up inc_down pup_case pdown_case pup_cont pdown_cont ri_case ri_contr nri evri_case evri_contr evnri esenri enricil enriciu ezri_case ezri_contr eznri ep2ri_case ep2ri_contr ep2nri ; run; %end; title2; run; * Usage: * %surv_contnriwt(probs,1,wt,outxy,pyrs,10,pdx,pdxy); %exit: %mend SURV_CONTNRIWT; ****************************************************************; %macro SURV_IDIWT(DSNAME,DETAIL,WT,EVENT,PYRS,T,PROB1,PROB2); * Macro to compute difference in Yates slopes or integrated discrimination improvement (IDI) from Pencina, 2007; * Variables: * DSNAME = dataset name; * DETAIL = 1 or 2 for limited printout, 0 for none; * WT = sample weight for case-cohort analysis (indiv, not risk set); * EVENT = outcome variable (coded 0,1) (if 1,2 alter signs); * PYS = follow-up time; * T = time of predicted risk (eg., 10 years) = scalar; * PROB1 = probability for model 1; * PROB2 = probability for model 2; *** NOTE: data need to be bootstrapped to get correct SE; title2 Weighted Difference in Yates Slope (IDI) for Event &EVENT at Time &T : &PROB2 vs &PROB1 ; title3 "Need to Bootstrap for SE"; data _ididat_; set &dsname; _id_=_N_; diffprob=&prob2-&prob1; prob1=&prob1; prob2=&prob2; prob1sq=prob1*prob1; prob2sq=prob2*prob2; event=&EVENT; pyrs=&PYRS; wt=&wt; if prob1>. and prob2>.; run; %if &detail<2 %then %do; ods listing exclude all; ods html exclude all; %end; proc ttest data=_ididat_; class event; var prob1 prob2 diffprob; weight wt; title4 "Crude Difference in Probs by Any Event - Ignoring Time"; run; title4; run; proc means data=_ididat_; var prob1 prob2 prob1sq prob2sq; weight wt; output out=idistat mean=meanp1 meanp2 meansqp1 meansqp2 ; title4 "Mean Predicted Probs"; run; title4; run; * Compute KM estimator for survival for those moving up or down; data two; _id_=0; wt=1; event=.; pyrs=&t; run; data _all_; set _ididat_ two; run; proc phreg data=_all_ noprint; model pyrs*event(0) = ; id _id_; weight wt; output out=survdat survival=_survest_ ; run; data base; set survdat; if _id_=0; * survival estimate for referent person at time &t; _basesurv_=_survest_; inc=1-_basesurv_; keep _basesurv_ inc; run; data _idi_; merge idistat base; * Note: compute population var from mean and squared probs; varp1=meansqp1-(meanp1)**2; varp2=meansqp2-(meanp2)**2; yates1=varp1/(inc*(1-inc)); yates2=varp2/(inc*(1-inc)); idi=yates2-yates1; rel_idi=yates2/yates1 - 1; drop _TYPE_ _FREQ_; run; ods listing exclude none; ods html exclude none; %if &detail>0 %then %do; proc print data=_idi_; run; %end; title2; run; * Usage: * %survidiwt(probs,1,wt,outxy,pyrs,10,pdx,pdxy); %mend SURV_IDIWT; ****************************************************************; %macro SURV_RECLASSWT(DSNAME,DETAIL,WT,EVENT,PYRS,T,PROB1,PROB2,NCAT,C1,C2,C3,C4,C5,C6,C7,C8,C9); * Macro to compute WEIGHTED calibration eventistics for SURVIVAL DATA; * KxK (K=NCAT) table formed using categs of predicted probs (eg 0, 5, 10, 20%); * Counts categs for DF and computes for n>=20; * Allows up to 10 categories (usually 3 or 4); * Variables: * DSNAME = dataset name; * DETAIL = 2 for detailed printout, 1 for limited, 0 for none; * WT = sampling weights; * EVENT = outcome variable (coded 0,1); * PYRS = person-time for Cox model; * T = desired follow-up time for probability estimate; * PROB1 = probability for model 1 (at time t); * PROB2 = probability for model 2 (at time t); * NCAT = number of categories in classification; * C1-C9 = category cutpoints (should have ncat-1 cutpoints); * Reclassification calibration test for cells with n>=20; * Also computes the percent reclassified correctly; title2 Weighted Survival Reclassification for &NCAT Categories with CutPoints = %do j = 1 %to &ncat; &&c&j %end;; title3 Event &EVENT at Time &T : &PROB2 vs &PROB1 ; data calc; set &DSNAME; _id_=_N_; event=&EVENT; prob1=&PROB1; prob2=&PROB2; wt=&wt; if prob1>. and prob2>.; * compute product terms for J eventistic; var1=prob1*(1-prob1); var2=prob2*(1-prob2); k=&ncat; * compute categories - set to above cutpoints; %do i=1 %to &ncat-1; cut&i=&&c&i; %end; cut&ncat=1.01; array cut {10} cut1-cut10; if .2 then diffcat2=2; else if .0 %then %do; proc print data=reclass; run; %end; %if &detail=2 %then %do; proc freq data=calc; tables pcat1*pcat2*obsp/ list; weight wt; title3 "Calibration table for &PROB1 and &PROB2"; run; %end; ods listing exclude all; ods html exclude all; ods output Summary=calibs; proc means data=calc n sumwgt sum mean median min max; * Eliminate missing from all variables; where obsp ne . and prob1 ne . and prob2 ne .; class pcat1 pcat2; types pcat1*pcat2; var prob1 prob2 var1 var2 obsp k; weight wt; run; ods listing exclude none; ods html exclude none; run; %if &detail>0 %then %do; proc print data=calibs; var pcat1 pcat2 obsp_N obsp_SumWgt obsp_Sum prob1_Mean prob2_Mean obsp_Mean var1_Sum var2_Sum; title3 "Estimated Risks"; run; %end; data caltermsb calstatsb(keep= ncat maxcat dfmax nccat df chisq1 pcalib1 chisq_adj1 pcal_adj1 chisq2 pcalib2 chisq_adj2 pcal_adj2 dfj jsq1 pj1 jsq_adj1 pj_adj1 jsq2 pj2 jsq_adj2 pj_adj2 nreclass ncorr pctcorr) ; set calibs end=eof; retain chisq1 chisq_adj1 chisq2 chisq_adj2 jsq1 jsq_adj1 jsq2 jsq_adj2 nccat nreclass ncorr 0; * only save cells with count at least 20; if obsp_N >=20 then do; num1=(obsp_Sum - prob1_Sum)**2; denom1=prob1_Sum*(1-prob1_Mean); denom12=(prob1_Sum+1)*(1-prob1_Mean+1/prob1_SumWgt); chisq1=chisq1 + num1/denom1; chisq_adj1=chisq_adj1 + num1/denom12; phi1=var1_Sum/denom1; jsq1=jsq1 + num1/(phi1*denom1); * Note: evaluate adjusted Jsq, but do not print; jsq_adj1=jsq_adj1 + num1/(phi1*denom12); num2=(obsp_Sum - prob2_Sum)**2; denom2=prob2_Sum*(1-prob2_Mean); denom22=(prob2_Sum+1)*(1-prob2_Mean+1/prob2_SumWgt); chisq2=chisq2 + num2/denom2; chisq_adj2=chisq_adj2 + num2/denom22; phi2=var2_Sum/denom2; jsq2=jsq2 + num2/(phi2*denom2); jsq_adj2=jsq_adj2 + num2/(phi2*denom22); nccat=nccat+1; OEratio1=obsp_Sum/prob1_Sum; OEratio2=obsp_Sum/prob2_Sum; if obsp_Sum>0 then do; sdo=sqrt(1/obsp_Sum); OEcil1=OEratio1*exp(-1.96*sdo); OEciu1=OEratio1*exp(1.96*sdo); OEcil2=OEratio2*exp(-1.96*sdo); OEciu2=OEratio2*exp(1.96*sdo); end; * compute those reclass correctly; if pcat2 ne pcat1 then nreclass=nreclass+obsp_SumWgt; %do i=1 %to &ncat-1; cut&i=&&c&i; if pcat2>pcat1 and pcat2=&i+1 then do; * put pcat2= pcat1= cut&i= obsp_Sum= obsp_Mean= obsp_N= obsp_SumWgt= ; if obsp_Mean>=cut&i then ncorr=ncorr+obsp_SumWgt; end; else if pcat2= 20"; run; %end; %if &detail>0 %then %do; proc print data=calstatsb; var ncat maxcat dfmax nccat df chisq1 pcalib1 chisq_adj1 pcal_adj1 chisq2 pcalib2 chisq_adj2 pcal_adj2 dfj jsq1 pj1 jsq2 pj2 nreclass ncorr pctcorr; title3 "Weighted Calibration Statistics for &PROB1 and &PROB2 in Predicting &EVENT"; title4 "Using KM Incidence Estimates and Cross-Classified Cells with N >= 20"; run; %end; title2; run; * Usage: * %surv_reclasswt(probs,1,wt,outxy,pyrs,10,pdx,pdxy, 4, 0.05, 0.10, 0.20); %mend SURV_RECLASSWT; ****************************************************************;