############### # reclass # returns reclassification statistics # inputs: # pred.orig = predicted probablity from original model # pred.new = predicted probablity from new model # out=outcome # cutvect = desired breaks for intervals # cutdir = "left"(default) or "right", on which side of interval division to cut # print.res = TRUE (default) or FALSE, if TRUE prints reclassification table, and the # distribution of reclassification cetegories for cases and controls # bootstrap= TRUE or FALSE (default), if TRUE returns bootstrap estimates 95% lower and upper limits, standard errors and p values # outputs: # nri = Net reclassification improvement (NRI) for the whole sample # pnri= p value for the NRI based on the formulas in Pencina et al 2008 Statistics in Medicine # cnri= naive or observed cNRI calculated using the NRI formula applied to the table excluding the highest and lowest categories of risk under the old model (intermediate risk) # cpnri= p value for the naive or observed cNRI based on the formulas in Paynter and Cook 2012 Medical Decision Making # exp.cnri= value for the expected cNRI under symmetry # cnri.adj= adjusted or bias corrected cNRI (= cnri-exp.cnri) # cpnri.adj= p value for the naive or observed cNRI based on the formulas in Paynter and Cook 2012 Medical Decision Making # additional outputs if bootstrap=TRUE: # cnri.adj.boot.95ul = upper limit of the bootstrap 95% confidence interval # cnri.adj.boot.95ll = upper limit of the bootstrap 95% confidence interval # cnri.adj.boot.p = p value using the full sample cNRI and the bootstrap SE ########### reclass = function(pred.old, pred.new, out, cutvect=NULL, cutdir="left", print.res=TRUE, bootstrap=FALSE){ datause=data.frame(pred.old,pred.new,out,count=1) #adding 1 to cutpoints if not included if (max(cutvect)<1) {cutvect=c(cutvect,1)} #categories based on old model datause$estcut.old=as.numeric(cut2(datause$pred.old,cuts=cutvect)) #categories based on new model datause$estcut.new=as.numeric(cut2(datause$pred.new,cuts=cutvect)) #reclassification of 0 if no change, 1 if new higher than old, and -1 if new lower than old datause$reclass=0 + 1*(as.numeric(datause$estcut.new) > as.numeric(datause$estcut.old)) + - 1*(as.numeric(datause$estcut.new) < as.numeric(datause$estcut.old)) datause$reclass.abs=abs(datause$reclass) fabtable=aggregate.data.frame(datause[c("count","out","pred.new","pred.old")], by=list(estcut.new=datause$estcut.new, estcut.old=datause$estcut.old), FUN=sum) fabtable$observed.per=fabtable$out/fabtable$count fabtable$pred.new.per=fabtable$pred.new/fabtable$count fabtable$pred.old.per=fabtable$pred.old/fabtable$count fabtable$pred.expect=(fabtable$pred.new + fabtable$pred.old)/2 fabtable$lowerlim.new=cutvect[as.numeric(fabtable$estcut.new)] fabtable$upperlim.new=cutvect[as.numeric(fabtable$estcut.new)+1] if (print.res==TRUE) { print("Reclassification table: total, percent of old model row, observed event rate, mew model predicted event rate, old model predicted event rate") #reclassification table table.rold.cnew=cbind(table(datause$estcut.old,datause$estcut.new),PerRecl=0) for (i in 1:dim(table.rold.cnew)[1]){ sumrowt=sum(fabtable$count[as.numeric(fabtable$estcut.old)==i]) sumreclass=sum(fabtable$count[as.numeric(fabtable$estcut.old)==i & as.numeric(fabtable$estcut.new)!=i]) table.rold.cnew[i,dim(table.rold.cnew)[2]]=round(100*sumreclass/sumrowt,1) for (j in 1:dim(table.rold.cnew)[1]){ row=fabtable[as.numeric(fabtable$estcut.old)==i & as.numeric(fabtable$estcut.new)==j,] table.rold.cnew[i,j]=paste(row$count,paste(round(100*c(row$count/sumrowt,row$observed.per, row$pred.new.per,row$pred.old.per),2),"%",sep="",collapse=" "),sep=" ") } } print(table.rold.cnew) t.cases=table(datause$estcut.old[datause$out==1],datause$estcut.new[datause$out==1]) t.controls=table(datause$estcut.old[datause$out==0],datause$estcut.new[datause$out==0]) print("distribution of cases in reclassification categories") print(t.cases) print("distribution of controls in reclassification categories") print(t.controls) } # framingham NRI for full table nri=sum(datause$reclass[datause$out==1])/sum(datause$out) - sum(datause$reclass[datause$out==0])/sum(abs(1-datause$out)) znri.denom=sqrt( (sum(datause$reclass.abs[datause$out==1])/sum(datause$out))^2 + (sum(datause$reclass.abs[datause$out==0])/sum(1-datause$out))^2) znri=ifelse(znri.denom!=0,nri/znri.denom,NA) pnri=ifelse(is.na(znri)==TRUE,NA,2*(1-pnorm(abs(znri)))) #base.cnri num.cases=0 denom.cases=0 num.controls=0 denom.controls=0 for (r in 2:(length(cutvect)-2)){ for (c in 1:(length(cutvect)-1)){ n.case=(length(datause$estcut.old[datause$estcut.old==r & datause$estcut.new==c & datause$out==1]) + length(datause$estcut.old[datause$estcut.old==c & datause$estcut.new==r & datause$out==1]))/2 n.control=(length(datause$estcut.old[datause$estcut.old==r & datause$estcut.new==c & datause$out==0]) + length(datause$estcut.old[datause$estcut.old==c & datause$estcut.new==r & datause$out==0]))/2 denom.cases=denom.cases + n.case denom.controls=denom.controls + n.control if (r != c) { num.cases=num.cases + ifelse(r>c,-1,1)*n.case num.controls=num.controls + ifelse(r>c,1,-1)*n.control } #print(c(r,c)) #print(c(denom.cases, n.case, denom.controls, n.control, num.cases, num.controls)) } } base.cnri=(num.cases/denom.cases) + (num.controls/denom.controls) #cnri for middle categories only cdat=datause[as.numeric(datause$estcut.old)>min(as.numeric(datause$estcut.old)) & as.numeric(datause$estcut.old)min(as.numeric(all$estcut.old)) & as.numeric(all$estcut.old)c,-1,1)*n.case num.controls=num.controls + ifelse(r>c,1,-1)*n.control } } } #calculate expected cnri for boot strap sample boot.base.cnri=(num.cases/denom.cases) + (num.controls/denom.controls) #calculate observed cnri for bootstrap sample cnri=sum(boot.cdat$reclass[boot.cdat$out==1])/sum(boot.cdat$out) - sum(boot.cdat$reclass[boot.cdat$out==0])/sum(abs(1-boot.cdat$out)) #simple se for cnri for bootstrap sample se.simple=sqrt( (sum(boot.cdat$reclass.abs[boot.cdat$out==1])/sum(boot.cdat$out)^2) + (sum(boot.cdat$reclass.abs[boot.cdat$out==0])/sum(1-boot.cdat$out)^2)) return(c(cnri=cnri, base=boot.base.cnri, calc.se=se.simple)) }#end strat boot if (bootstrap==TRUE){ cat("stratified bootstrap\n") b = boot(data=datause, statistic=cnri.boot.strata, strata=datause$estcut.old, R=reps) print(b) bout=data.frame(b$t) names(bout)=c("cnri","exp", "calc.se") bout$adj.cnri=bout$cnri-bout$exp strat.ul=quantile(bout$adj.cnri, probs=c(.975), na.rm=TRUE) strat.ll=quantile(bout$adj.cnri, probs=c(.025), na.rm=TRUE) se.adj.cnri.boot=sd(bout$adj.cnri) bias.exp=mean(bout$exp) - mean(bout$cnri) zcnri.strat.boot=ifelse(se.adj.cnri.boot!=0, cnri.adj/se.adj.cnri.boot,NA) pcnri.strat.boot=ifelse(is.na(zcnri.strat.boot)==TRUE,NA,2*(1-pnorm(abs(zcnri.strat.boot)))) oldnames=names(results) results=c(results, cnri.adj.boot.95ul=strat.ul, cnri.adj.boot.95ll=strat.ll, cnri.adj.boot.p=pcnri.strat.boot) } # Table of all results for compare results }