* Program bootbinopt.v2.sas; * Created by Nancy Cook (BWH, HMS, HSPH); * Macro BOOTBINOPT for Bootstrap sampling for optimism adjustment; * For BINARY data with logistic model; * Refits model in each bootstrap sample with logistic regression; * For two NESTED models; * Computes predicted probs ; * Calls macros to compute reclassification statistics with Bootstrap sampling ; * RC stat, NRI, contin NRI, and IDI for BINARY data; * (Use bootoptmacro.sas for Cox model at Time T); * (Use bootsurvoptmac.sas for full survival data); * Macros: * BOOTBINOPT takes overall bootstrap with no weights; * Includes seed for surveyselect; * Adjusts for optimism using score function in proc logistic in full data; * Read in reclassification macros - Use correct path; %include '/home/swift/nancyc/macros/shared/binary/reclass.macros.v4.sas' ; /**************** * Example code to include at end to run and save results in file; * Change datafile names as necessary; %BOOTBINOPT(DSNAME,PROBDAT,ID,DETAIL,NBOOT,OUTC,BASEVARS,NEWVARS, PROB1,PROB2,NCAT,C1,C2,C3,C4,C5,C6,C7,C8,C9,SEED=0); * Examples; %bootbinopt(data,probdata,id,1,100,outxy,age hxdiab currsmok lnsbp lnchol, hdl, pdx,pdxy,4,0.05,0.10,0.20,seed=923); run; * To save in files; * Bootstrap samples; data out.rcbootb; set rcbootb; by strat; data out.nribootb; set nribootb; by strat; data out.contbootb; set contbootb; by strat; data out.idibootb; set idibootb; by strat; * Original data (especially useful for simulations); data out.rcorigb; set rcorigb; by strat; data out.nriorigb; set nriorigb; by strat; data out.contorigb; set contorigb; by strat; data out.idiorigb; set idiorigb; by strat; run; * delete to stop appending if rerun; proc datasets; delete rcbootb nribootb contbootb idibootb rcorigb nriorigb contorigb idiorigb; run; ***************/ *******************************************************************************; %macro BOOTBINOPT(DSNAME,PROBDAT,ID,DETAIL,NBOOT,OUTC,BASEVARS,NEWVARS, PROB1,PROB2,NCAT,C1,C2,C3,C4,C5,C6,C7,C8,C9,SEED=0); * Macro to compute RC statistic and NRI in bootstrap samples; * And save results in file; * Bootstrap of overall sample - revise if stratify; * Variables: * DSNAME = dataset name; * PROBDAT = New dataset with all model variables plus computed probabilities; * ID = identification variable; * DETAIL = 2 for detailed printout, 1 for limited, 0 for none; * Applies to print out - 0 or >0 only (1 or 2); * Individual results for reclass not printed; * NBOOT = number of bootstrap samples; * OUTC = BINARY outcome variable (coded 0,1); * BASEVARS = variables in original model; * NEWVARS = new variables to add; * PROB1 = probability for model 1; * PROB2 = probability for model 2; * NCAT = number of categories in classification; * C1-C9 = category cutpoints (should have ncat-1 cutpoints); * SEED = seed for random number generator (default=0); %put SEED= &seed ; %let nstrat=1; * note: leaving nstrat and istrat in although not used here (placeholder); data _one_; set &DSNAME ; _id_=_n_; keep _id_ &ID &OUTC &BASEVARS &NEWVARS ; run; proc sort data=_one_; by _id_; run; * Run programs in original data; * Fit models in original data; title2 "Model Fit in Original Data"; %PREDBIN (_one_, &OUTC, &BASEVARS, &PROB1); data _temp_; merge _one_ preddat (keep=_id_ &PROB1); by _id_; run; %PREDBIN (_one_, &OUTC, &BASEVARS &NEWVARS, &PROB2); data &PROBDAT; merge _temp_ preddat (keep=_id_ &PROB2); by _id_; * Dataset _one_ = input data (with var _id_ added); * Dataset &PROBDAT = input data with vars _id_ &PROB1 and &PROB2 added ; run; %reclass(&PROBDAT,&DETAIL,&OUTC,&PROB1,&PROB2,&NCAT,&C1,&C2,&C3,&C4,&C5,&C6,&C7,&C8,&C9); data reclassdat; set calstatsb; strat=1; iter=0; run; %nricat(&PROBDAT,&DETAIL,&OUTC,&PROB1,&PROB2,&NCAT,&C1,&C2,&C3,&C4,&C5,&C6,&C7,&C8,&C9); data nridat; set _all_; strat=1; iter=0; run; %contnri(&PROBDAT,&DETAIL,&OUTC,&PROB1,&PROB2); data contnridat; set _all_; strat=1; iter=0; run; %idimacro(&PROBDAT,&DETAIL,&OUTC,&PROB1,&PROB2); data _ididat_; set _idi_; strat=1; iter=0; run; *** Start Bootstraps ***; options nonotes; %do istrat= 1 %to &nstrat; %do iiter= 1 %to &nboot; %let seed=%eval(&seed+1); proc surveyselect method=urs data=_one_ out=boot rate=1 seed=&seed outhits noprint; proc sort data=boot; by _id_; run; * Refit models in bootstrap data; * Score in full data set; %SCOREBIN (boot, _one_, &OUTC, &BASEVARS, prboot1, prorig1); data tempboot; set preddat; by _id_; run; data temporig; set scoredat; by _id_; run; %SCOREBIN (tempboot, temporig, &OUTC, &BASEVARS &NEWVARS, prboot2, prorig2); run; data tempboot; merge tempboot preddat (keep=_id_ &NEWVARS prboot2); by _id_; run; data temporig; merge temporig scoredat (keep=_id_ &NEWVARS prorig2); by _id_; run; *** Compute measures in BOOTSTRAP samples ***; * Reclass calibration with NCAT categories; %reclass(tempboot,0,&OUTC,prboot1,prboot2,&NCAT,&C1,&C2,&C3,&C4,&C5,&C6,&C7,&C8,&C9); data calstatsb; set calstatsb; strat=&istrat; iter=&iiter; if .0 %then %do; *** Output parameter and sample stats ***; *** Adjust for Optimism by Subtracting ***; *** Reclassification calibration ***; title2 "Bootstrap Reclassification Calibration Statistics for &NCAT Categories"; proc means n mean stddev median min max data=rcbootb; by strat; output out=outrcb mean(chisq1 chisq2)=mb_chi1 mb_chi2 ; title3 "Bootstrap data"; * format strat stratf.; run; proc means n mean stddev median min max data=rcorigb; by strat; output out=outrco mean(chisq1 chisq2 df)=mo_chi1 mo_chi2 mo_df; title3 "Original data"; * format strat stratf.; run; data rcstats; merge reclassdat outrcb outrco; by strat; opt_chi1=mo_chi1-mb_chi1; adj_chi1=chisq1 + opt_chi1; opt_chi2=mo_chi2-mb_chi2; adj_chi2=chisq2 + opt_chi2; adj_diffchi=adj_chi1-adj_chi2; * compute p-values using average df in original data; df=round(mo_df); padj1=1-probchi(adj_chi1,df); padj2=1-probchi(adj_chi2,df); run; proc print data=rcstats; title2 "Reclassification Bootstrap Statistics based on &NBOOT bootstrap samples"; title3 "Adjusted for Optimism (estimated)"; var chisq1 opt_chi1 adj_chi1 padj1 chisq2 opt_chi2 adj_chi2 padj2 adj_diffchi; run; *** NRI ***; title2 "Bootstrap NRI Statistics for &NCAT Categories"; proc means n mean stddev median min max data=nribootb; by strat; output out=outnrib mean(ri_case ri_contr nri znri)=mbri_case mbri_contr mb_nri mb_znri; title3 "Bootstrap data"; * format strat stratf.; run; proc means n mean stddev median min max data=nriorigb; by strat; output out=outnrio mean(ri_case ri_contr nri znri)=mori_case mori_contr mo_nri mo_znri; title3 "Original data"; * format strat stratf.; run; * Compute CI and p for NRI; * Use bootstrap data; proc univariate data=nribootb noprint; by strat; var nri ri_case ri_contr; output out=nridist mean=avenri averi_case averi_contr std=sdnri sdri_case sdri_contr pctlpts=2.5 5 10 50 90 95 97.5 pctlpre=nri_q ricase_q ricontr_q; run; data nristats; merge nridat outnrib outnrio nridist; by strat; * assumes same number (no missing) bootstraps; opt_case=mbri_case-mori_case; adj_case=ri_case-opt_case; opt_contr=mbri_contr-mori_contr; adj_contr=ri_contr-opt_contr; opt_nri=mb_nri-mo_nri; adj_nri=nri-opt_nri; opt_znri=mb_znri-mo_znri; adj_znri=znri-opt_znri; * use SE ests from bootstrap data; * Check; znri=adj_nri/sdnri; pnri=2*(1-probnorm(abs(znri))); adj_nricil=adj_nri-1.96*sdnri; adj_nriciu=adj_nri+1.96*sdnri; run; proc print data=nristats; title2 "NRI Bootstrap Statistics based on &NBOOT bootstrap samples"; title3 "Adjusted for Optimism (SD from Bootstrap Samples)"; var nri opt_nri adj_nri sdnri pnri adj_nricil adj_nriciu ri_case opt_case adj_case ri_contr opt_contr adj_contr ; run; *** Continuous NRI ***; title2 "Bootstrap NRI Statistics for Infinite Categories (Continuous NRI)"; proc means n mean stddev median min max data=contbootb; by strat; output out=outcontb mean(ri_case ri_contr nri znri)=mbri_case mbri_contr mb_nri mb_znri; title3 "Bootstrap data"; * format strat stratf.; run; proc means n mean stddev median min max data=contorigb; by strat; output out=outconto mean(ri_case ri_contr nri znri)=mori_case mori_contr mo_nri mo_znri; title3 "Original data"; * format strat stratf.; run; * Compute CI and p for NRI; * Use bootstrap data; proc univariate data=contbootb noprint; by strat; var nri ri_case ri_contr; output out=contnridist mean=avenri averi_case averi_contr std=sdnri sdri_case sdri_contr pctlpts=2.5 5 10 50 90 95 97.5 pctlpre=nri_q ricase_q ricontr_q; run; data contnristats; merge contnridat outcontb outconto contnridist; by strat; * assumes same number (no missing) bootstraps; opt_case=mbri_case-mori_case; adj_case=ri_case-opt_case; opt_contr=mbri_contr-mori_contr; adj_contr=ri_contr-opt_contr; opt_nri=mb_nri-mo_nri; adj_nri=nri-opt_nri; opt_znri=mb_znri-mo_znri; adj_znri=znri-opt_znri; * use SE ests from bootstrap data; * Check; znri=adj_nri/sdnri; pnri=2*(1-probnorm(abs(znri))); adj_nricil=adj_nri-1.96*sdnri; adj_nriciu=adj_nri+1.96*sdnri; run; proc print data=contnristats; title2 "Continuous NRI Bootstrap Statistics based on &NBOOT bootstrap samples"; title3 "Adjusted for Optimism (SD from Bootstrap Samples)"; var nri opt_nri adj_nri sdnri pnri adj_nricil adj_nriciu ri_case opt_case adj_case ri_contr opt_contr adj_contr ; run; *** IDI ***; title2 "Bootstrap IDI Statistics"; proc means n mean stddev median min max data=idibootb; by strat; output out=outidib mean(yates1 yates2 idi rel_idi idit)=mb_yates1 mb_yates2 mb_idi mb_relidi mb_idit; title3 "Bootstrap data"; * format strat stratf.; run; proc means n mean stddev median min max data=idiorigb; by strat; output out=outidio mean(yates1 yates2 idi rel_idi idit)=mo_yates1 mo_yates2 mo_idi mo_relidi mo_idit; title3 "Original data"; * format strat stratf.; run; * Compute CI and p for IDI; * Use bootstrap data; proc univariate data=idibootb noprint; by strat; var idi rel_idi; output out=ididist mean=aveidi averel_idi std=sdidi sdrel_idi pctlpts=2.5 5 10 50 90 95 97.5 pctlpre=idiq rel_idiq; run; data idistats; merge _ididat_ outidib outidio ididist; by strat; * assumes same number (no missing) bootstraps; opt_yates1=mb_yates1-mo_yates1; adj_yates1=yates1-opt_yates1; opt_yates2=mb_yates2-mo_yates2; adj_yates2=yates2-opt_yates2; opt_idi=mb_idi-mo_idi; adj_idi=idi-opt_idi; opt_relidi=mb_relidi-mo_relidi; adj_relidi=rel_idi-opt_relidi; opt_idit=mb_idit-mo_idit; adj_idit=idit-opt_idit; * use SE ests from bootstrap data; * Check; zidi=adj_idi/sdidi; pidi=2*(1-probnorm(abs(zidi))); adj_idicil=adj_idi-1.96*sdidi; adj_idiciu=adj_idi+1.96*sdidi; adj_relidicil=adj_relidi-1.96*sdrel_idi; adj_relidiciu=adj_relidi+1.96*sdrel_idi; run; proc print data=idistats; title2 "IDI Bootstrap Statistics based on &NBOOT bootstrap samples"; title3 "Adjusted for Optimism (SD from Bootstrap Samples)"; var idi opt_idi adj_idi sdidi pidi adj_idicil adj_idiciu rel_idi opt_relidi adj_relidi sdrel_idi adj_relidicil adj_relidiciu yates1 opt_yates1 adj_yates1 yates2 opt_yates2 adj_yates2 ; run; %end; %mend bootbinopt; *********************************************************************************; %macro PREDBIN (DSNAME, OUTC, MODVARS, PROB); proc logistic data=&DSNAME ; model &OUTC (event='1') = &MODVARS / rl; output out=preddat pred=&prob ; run; %mend PREDBIN; * EXAMPLE USAGE OF MACRO; * %predbin(one, cvd, X1-X20, prcvd10); *********************************************************************************; %macro SCOREBIN (DSORIG, DSNSCORE, OUTC, MODVARS, PROB1, PROB2); proc logistic data=&DSORIG outest=_betas_(drop=_type_ _name_) noprint; model &OUTC (event='1') = &MODVARS / rl; output out=preddat pred=&PROB1 xbeta=bx; score data=&DSNSCORE out=scoredat; run; data scoredat; set scoredat ; &PROB2=P_1; drop P_0 P_1 F_&OUTC I_&OUTC ; run; %mend SCOREBIN; * EXAMPLE USAGE OF MACRO; * %scorebin(boot, full, cvd8, X1-X20, prboot, prfull); *********************************************************************************;