* Program surv_calibmacs.v3.sas ; * Created by Nancy Cook (BWH, HMS, HSPH) ; * Macros for tests of calibration using Hosmer-Lemeshow test; * Modified by D'Agostino-Nam (Handbook of Statistics, 2004); * For a single model, eg., with deciles or 10 risk categories; * For survival data ONLY; * Note: appropiate for low censoring situations by time T only; * EXAMPLE USAGE OF MACROS; * %surv_calibhl(probs,1,pdx,outxy,pyrs,8); * %surv_calibhl(probs,1,pdx,outxy,pyrs,8,test=1); * %surv_calibr2(probs,1,pdx,outxy,pyrs,8, 10,0.02,0.04,0.06,0.08,0.10,0.12,0.14,0.16,0.18); ****************************************************************; %macro SURV_CALIBHL(DSNAME,DETAIL,PROB,OUTC,PYRS,T,TEST=0); * Calibration macro for Hosmer-Lemeshow statistic with deciles; * For SURVIVAL data; * Default k-2, adds 1 df if test set; * For test set specify TEST=1 in macro call; * OUTC = outcome variable (coded 0,1); * PYRS = person-time for Cox model; * T = desired follow-up time for probability estimate; data calc; set &DSNAME; _id_=_N_; prob=&PROB; stat=&OUTC; * compute product terms for J statistic; varp=prob*(1-prob); run; proc univariate noprint data=calc; id _id_; where stat ne .; var prob; output out=decp pctlpts=10 20 30 40 50 60 70 80 90 pctlpre=pred; title2 "Predicted Survival Event Probabilities &PROB - Deciles"; run; data calc; if _n_=1 then set decp; set calc; if .0 then do; sdo=sqrt(1/obsp_Sum); OEcil=OEratio*exp(-1.96*sdo); OEciu=OEratio*exp(1.96*sdo); EOratio=prob_Sum/obsp_Sum; EOcil=EOratio*exp(-1.96*sdo); EOciu=EOratio*exp(1.96*sdo); end; output calterms; if eof then do; df=8; %if &test=1 %then %do; df=df+1; %end; pcalib=1-probchi(chisq,df); pcal_adj=1-probchi(chisq_adj,df); dfj=9; %if &test=1 %then %do; dfj=dfj+1; %end; pj=1-probchi(jsq,dfj); pj_adj=1-probchi(jsq_adj,dfj); output calstats; end; %if &detail>0 %then %do; proc print data=calterms; var prob_N obsp_Mean prob_Mean OEratio OEcil OEciu EOratio EOcil EOciu num denom denom2 phi; title2 "Survival OE Ratios for Decile Categories of &PROB and &OUTC"; %end; %if &detail>0 %then %do; proc print data=calstats; var df chisq pcalib chisq_adj pcal_adj ; title2 "Survival Calibration Statistics for Decile Categories of &PROB and &OUTC"; %if &test=1 %then %do; title3 "For TEST Set"; %end; run; %end; %mend SURV_CALIBHL; ***************************************************************; %macro SURV_CALIBR2(DSNAME,DETAIL,PROB,OUTC,PYRS,T, NCAT,C1,C2,C3,C4,C5,C6,C7,C8,C9,TEST=0); * Macro to compute calibration statistics for NCAT (K) categories; * For SURVIVAL data; * Default df = k-2, adds 1 df if test set; * For test set specify TEST=1 in macro call; * Formed using categs of predicted probs (eg 0, 5, 10, 20%); * OUTC = outcome variable (coded 0,1); * PYRS = person-time for Cox model; * T = desired follow-up time for probability estimate; * NCAT = number of categories (max 10); * C1-C9 = category cut points (max 9); * E.g., format for 10 categories of 2% increased risk; * e.g., value probf 0-<0.02='0-<2%' .02-<0.04='2-<4%' 0.04-<0.06='4-<6%' 0.06-<0.08='6-<8%' 0.08-<0.10='8-<10%' 0.10-<0.12='10-<12%' 0.12-<0.14='12-<14%' 0.14-<0.16='14-<16%' 0.16-0.18='16-<18%' 0.18-high='18%+'; data calc; set &DSNAME; _id_=_N_; prob=&prob; stat=&OUTC; k=&ncat; * compute product terms for J statistic; varp=prob*(1-prob); * compute categories pcat; * compute categories - set to above cutpoints; %do i=1 %to &ncat-1; cut&i=&&c&i; %end; cut&ncat=1.01; min=0; max=1; array chigh {10} cut1-cut9 max; array clow {10} min cut1-cut9; do i=1 to &ncat; if clow(i) <= prob < chigh(i) then pcat=i; end; run; * Compute KM estimator for survival within strata; data two; _id_=0; stat=.; &pyrs=&t; k=&ncat; * Set strata variables in categories; do i=1 to k; pcat=i; output; end; run; data _all_; set calc two; proc phreg data=_all_ noprint; model &pyrs*stat(0) = ; id _id_; strata pcat; output out=survdat survival=_survest_ ; run; data base; set survdat; if _id_=0; * survival estimate for referent person; _basesurv_=_survest_; keep pcat _basesurv_ ; run; proc sort data=base; by pcat; proc sort data=calc; by pcat; data calc ; merge calc base; by pcat; if _id_ ne 0; surv=_basesurv_; obsp=1-surv; run; ods listing exclude all; ods html exclude all; ods output Summary=calibr; proc means data=calc n sum mean median min max; * Eliminate missing from both variables; where obsp ne . and prob ne .; class pcat; var prob obsp varp k; run; ods listing exclude none; ods html exclude none; run; %if &detail=2 %then %do; proc print data=calibr; title2 "Survival Calibration Statistics for &NCAT Risk Categories of &PROB and &OUTC"; %end; data calterms calstats (keep=df chisq pcalib chisq_adj pcal_adj dfj jsq pj jsq_adj pj_adj); set calibr end=eof; retain chisq chisq_adj jsq jsq_adj 0; num=(obsp_Sum - prob_Sum)**2; denom=prob_Sum*(1-prob_Mean); denom2=(prob_Sum+1)*(1-prob_Mean+1/prob_N); chisq=chisq + num/denom; chisq_adj=chisq_adj + num/denom2; phi=varp_Sum/denom; jsq=jsq + num/(phi*denom); jsq_adj=jsq_adj + num/(phi*denom2); OEratio=obsp_Sum/prob_Sum; if obsp_Sum>0 then do; sdo=sqrt(1/obsp_Sum); OEcil=OEratio*exp(-1.96*sdo); OEciu=OEratio*exp(1.96*sdo); EOratio=prob_Sum/obsp_Sum; EOcil=EOratio*exp(-1.96*sdo); EOciu=EOratio*exp(1.96*sdo); end; output calterms; if eof then do; df=k_Mean-2; %if &test=1 %then %do; df=df+1; %end; pcalib=1-probchi(chisq,df); pcal_adj=1-probchi(chisq_adj,df); dfj=k_Mean-1; %if &test=1 %then %do; dfj=dfj+1; %end; pj=1-probchi(jsq,dfj); pj_adj=1-probchi(jsq_adj,dfj); output calstats; end; %if &detail>0 %then %do; proc print data=calterms; var pcat prob_N obsp_Mean prob_Mean OEratio OEcil OEciu EOratio EOcil EOciu num denom denom2 phi; title2 "Survival OE Ratios for &NCAT Risk Categories of &PROB and &OUTC"; %end; %if &detail>0 %then %do; proc print data=calstats; var df chisq pcalib chisq_adj pcal_adj ; title2 "Survival Calibration Statistics for &NCAT Risk Categories of &PROB and &OUTC"; %if &test=1 %then %do; title3 "For TEST Set"; %end; run; %end; %mend SURV_CALIBR2; ***************************************************************;