Mar 11, 2018

Allison's (2014) book on event history and survival analysis

This replicates the analyses in Allison (2014). There is another approach here, but I like mine better.

// Table 2.1
use https://statisticalhorizons.com/wp-content/uploads/rank.dta, clear

ltable dur promo, failure hazard noadjust

// Table 2.2
use https://statisticalhorizons.com/wp-content/uploads/rank.dta, clear
generate id = _n 
reshape long art cit, i(id) j(year) 
drop if year > dur                    // Remove empty observation created during reshape 
replace promo = 0 if year < dur       // Create time-varying failure variable
generate jobpres = prest1             // Create-time varying prestige varianle
replace jobpres = prest2 if year >= jobtime 

eststo clear
eststo: logit promo undgrad phdmed phdprest jobpres art cit 
estadd expb
eststo: logit promo undgrad phdmed phdprest jobpres art cit year c.year#c.year 
estadd expb

esttab, cell("b(fmt(3)) z(fmt(2) star) expb(fmt(2))") varwidth(17) label nonumber ///
        mtitle("Model 1" "Model 2") interaction( X ) ///
  stats(ll N, label("Log-likelihood") fmt(2 %9.0gc)) ///
        title("Logistic Models Predicting the Probability of Promotion") ///
  legend

// Likelihood ratio test on p. 13
lrtest est1 est2

// Additional model on p. 13
logit promo undgrad phdmed phdprest jobpres art cit year c.year#c.year c.phdprest#c.year 

// Table 2.3
  // Scenario A: All censored cases are in fact promoted
use https://statisticalhorizons.com/wp-content/uploads/rank.dta, clear
generate id = _n 
reshape long art cit, i(id) j(year) 
drop if year > dur                          // Remove empty observation created during reshape 
replace promo = 0 if year  < dur            // Create time-varying failure variable
replace promo = 1 if year == dur & dur < 10 // All censored cases are promoted
generate jobpres = prest1                   // Create-time varying prestige varianle
replace jobpres = prest2 if year >= jobtime 
eststo: logistic promo undgrad phdmed phdprest jobpres art cit year c.year#c.year
  
  // Scenario B: All censored cases did not experience event until the end of the observation period
use https://statisticalhorizons.com/wp-content/uploads/rank.dta, clear
generate id = _n 
reshape long art cit, i(id) j(year) 
drop if year > dur & promo == 1       // Remove empty observation created during reshape 
                                      // if they are promoted
replace promo = 0 if year < dur       // Create time-varying failure variable
replace art = art[_n-1] if art == .   // Carry observations forward
replace cit = cit[_n-1] if cit == .   // Carry observations forward
generate jobpres = prest1             // Create-time varying prestige varianle
replace jobpres = prest2 if year >= jobtime 

eststo: logistic promo undgrad phdmed phdprest jobpres art cit year c.year#c.year

esttab est2 est3 est4, eform b(2) z wide staraux varwidth(17) label nonumber ///
        mtitle("Standard analysis" "Scenario A" "Scenario B") interaction( X ) ///
  stats(ll N, label("Log-likelihood") fmt(2 %9.0gc)) ///
        title("Extreme Case Scenarios for Informative Censoring") ///
  legend modelwidth(18)

// Table 3.1
use https://statisticalhorizons.com/wp-content/uploads/recid.dta, clear
stset week, failure(arrest==1) 

eststo clear
eststo: streg fin age race wexp mar paro prio, dist(exponential)  
estadd expb

eststo: streg fin age race wexp mar paro prio, dist(weibull) 
estadd expb
eststo: streg fin age race wexp mar paro prio, dist(ggamma) 
estadd expb

esttab, cell("b(fmt(3)) z(fmt(2) star) expb(fmt(3))") varwidth(17) label nonumber ///
        mtitle("Exponential" "Weibull" "Gamma") ///
  stats(ll N, label("Log-likelihood") fmt(2 %9.0gc)) ///
        title("Estimates for Three Models of Recidivism") ///
  legend keep(_t:*)  eqlabels("", none) /// // Removes equation label
        coeflabel(fin "Financial aid" age "Age at release" race "Black" ///
                  wexp "Work experience" mar "Married" paro "Paroled" ///
                  prio "Prior convictions")

// Table 3.2
use https://statisticalhorizons.com/wp-content/uploads/recid.dta, clear
stset week, failure(arrest==1) 

eststo clear
qui eststo: streg fin age race wexp mar paro prio, dist(ggamma) 
qui estadd scalar dev = -2*e(ll)
qui eststo: streg fin age race wexp mar paro prio, dist(lognormal) 
qui estadd scalar dev = -2*e(ll)
qui eststo: streg fin age race wexp mar paro prio, dist(llogistic) 
qui estadd scalar dev = -2*e(ll)
qui eststo: streg fin age race wexp mar paro prio, dist(weibull) 
qui estadd scalar dev = -2*e(ll)
qui eststo: streg fin age race wexp mar paro prio, dist(gompertz) 
qui estadd scalar dev = -2*e(ll)
qui eststo: streg fin age race wexp mar paro prio, dist(exponential) 
qui estadd scalar dev = -2*e(ll)


qui esttab, cells(none) scalars("dev Deviance" "aic AIC" "bic BIC") sfmt(3) nomtitles noobs 
esttab r(stats, transpose), coeflabel(est1 "Gamma" est2 "Log-normal" ///
                                      est3 "Log-logistic" est4 "Weibull" ///
                                      est5 "Gompertz" est6 "Exponential") ///
                            title("Goodness of Fit for Recidivism Models") ///
                            nomtitle collabels("Deviance" "AIC" "BIC") 

// Figure 3.1: Hazard Function for Weibull Regression Model
qui streg fin age race wexp mar paro prio, dist(weibull) 
stcurve, hazard xtitle("Weeks since release")

// Figure 3.2: Hazard Function for Log-Logistic Regression Model
qui streg fin age race wexp mar paro prio, dist(llogistic) 
stcurve, hazard xtitle("Weeks since release")

// Table 4.1
use https://statisticalhorizons.com/wp-content/uploads/recid.dta, clear
stset week, failure(arrest==1) 
eststo clear
eststo: stcox fin age race wexp mar paro prio 
estadd expb

generate id = _n 
reshape long work, i(id) j(stop) 
generate start = stop - 1                             
drop if stop > week                                  // Drop empty observations
replace arrest = 0 if week != stop                   // Create time-varying failure variable
stset stop, failure(arrest == 1) id(id) origin(start) 
eststo: stcox fin age race wexp mar paro prio work 
estadd expb

generate worklag = work[_n-1] if start > 0
eststo: stcox fin age race wexp mar paro prio worklag
estadd expb

esttab, cell("b(fmt(3)) z(fmt(2) star) expb(fmt(3))") varwidth(18) label nonumber ///
        mtitle("Basic" "Time-varying X" "Lagged X") modelwidth(15) ///
  stats(ll N, label("Log-likelihood") fmt(2 %9.0gc)) ///
        title("Cox Regression Estimates for Recidivism Data") legend ///
        coeflabel(fin "Financial aid" age "Age at release" race "Black" ///
                  wexp "Work experience" mar "Married" paro "Paroled" ///
                  prio "Prior convictions" work "Employment") rename(worklag work)
// Table 4.2
list id stop start arrest work fin age if inlist(id, 339, 417), noobs sepby(id)

// Table 4.3
estimates restore est2
estat phtest, detail 

// Table 4.4
eststo clear
eststo: stcox fin age race wexp mar paro prio work, tvc(age wexp)
estadd expb

esttab, cell("b(fmt(3)) z(fmt(2) star) expb(fmt(3))") varwidth(22) label nonumber ///
        mtitle("Interactions with time") modelwidth(22) ///
  stats(ll N, label("Log-likelihood") fmt(2 %9.0gc)) ///
        title("Cox Regression Estimates with Time Interactions") legend ///
  eqlabels("Main effects" "Interactions with time") ///
  coeflabel(fin "Financial aid" age "Age at release" race "Black" ///
                  wexp "Work experience" mar "Married" paro "Paroled" ///
                  prio "Prior convictions" work "Employment") 
  
eststo clear
eststo: stcox fin age race wexp mar paro prio work, strata(wexp)
estadd expb

esttab, cell("b(fmt(3)) z(fmt(2) star) expb(fmt(3))") varwidth(22) label nonumber ///
        mtitle("Stratification") modelwidth(22) ///
  stats(ll N, label("Log-likelihood") fmt(2 %9.0gc)) ///
        title("Cox Regression Estimates with Stratification") legend ///
  coeflabel(fin "Financial aid" age "Age at release" race "Black" ///
                  wexp "Work experience" mar "Married" paro "Paroled" ///
                  prio "Prior convictions" work "Employment") dropped(--)
// Table 4.5
qui stcox fin age race wexp mar paro prio work, tvc(age wexp)

foreach x of numlist 0 10 20 30 40 50 {
 local b1    = _b[age] + `x' * _b[tvc:age]
 local expb1 = exp(`b1')
 local b2    = _b[wexp] + `x' * _b[tvc:wexp]
 local expb2 = exp(`b2')
 matrix results = (`x' , `b1' , `expb1' , `b2', `expb2')
 if `x' == 0 matrix table45 = results
 else matrix table45 = (table45\results)
}

esttab matrix(table45, fmt(0 3 3 3 3)), nomtitle ///
                 collabel("Weeks" "b" "Exp(b)" "b" "Exp(b)") varwidth(0) ///
     title("Effects of Age and Work Experience at Different Times") 

// Table 4.6
use https://statisticalhorizons.com/wp-content/uploads/recid.dta, clear
stset week, failure(arrest==1) 
stcox fin age race wexp mar paro prio 
stcurve, survival at(fin = 1 age = 21 race = 1 wexp = 1 mar = 0 paro = 1 prio = 4) ///
         outfile(surv, replace) 
use surv, clear

egen pickone = tag(_t)
list _t surv1 if inlist(_t, 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 52) & pickone, noobs sep(0)

// Table 5.1
use https://statisticalhorizons.com/wp-content/uploads/tarp.dta, clear

eststo clear
stset arrstday, failure(type == 1 2) 
eststo: stcox fin age white male married paro numprop crimprop numarst edcomb 
estadd expb

stset arrstday, failure(type == 1) 
eststo: stcox fin age white male married paro numprop crimprop numarst edcomb 
estadd expb
 
stset arrstday, failure(type == 2) 
eststo: stcox fin age white male married paro numprop crimprop numarst edcomb 
estadd expb 

esttab, cell("b(fmt(3)) z(fmt(2) star) expb(fmt(3))") varwidth(22) label nonumber ///
        mtitle("All arrests" "Property arrests" "Non-property arrests") modelwidth(18) ///
  stats(ll N, label("Log-likelihood") fmt(2 %9.0gc)) ///
        title("Estimates of Proportional Hazards Models for Different Arrest Types") legend ///
        coeflabel(fin "Financial aid" age "Age at release" white "White" ///
                  male "Male" married "Married" paro "Paroled" ///
                  numprop "No. of property convictions" ///
                  crimprop "Imprisoned for property crime" ///
                  numarst "No. of arrests" ///
                  edcomb "Education")

// Figure 5.1
stset arrstday, failure(type == 1)
stcompet cumin = ci, compet1(2) 
sort _t
twoway (line cumin _t if type == 1) ///
       (line cumin _t if type == 2) ///
    (scatteri .2    375 "Property", msymbol(none) mlabpos(0)) ///
    (scatteri .15   370 "Non-property", msymbol(none) mlabpos(0)) ///
    , legend(off) ///
    xtitle("Days since release") ytitle("Probability of arrest") ///
       title("Cumulative incidence")

// Table 5.2 
eststo clear
stset arrstday, failure(type == 1 2) 
eststo: stcox fin age white male married paro numprop crimprop numarst edcomb 
estadd expb
stset arrstday, failure(type == 2)
eststo: stcrreg fin age white male married paro numprop crimprop numarst edcomb, compete(type == 1) 
estadd expb
stset arrstday, failure(type==1) 
eststo: stcrreg fin age white male married paro numprop crimprop numarst edcomb, compete(type==2) 
estadd expb

esttab, cell("b(fmt(3)) z(fmt(2) star) expb(fmt(3))") varwidth(22) label nonumber ///
        mtitle("All arrests" "Property arrests" "Non-property arrests") modelwidth(18) ///
  stats(ll N, label("Log-likelihood") fmt(2 %9.0gc)) ///
        title("Estimates of Subdistribution Hazards Models for Different Arrest Types") legend ///
        coeflabel(fin "Financial aid" age "Age at release" white "White" ///
                  male "Male" married "Married" paro "Paroled" ///
                  numprop "No. of property convictions" ///
                  crimprop "Imprisoned for property crime" ///
                  numarst "No. of arrests" ///
                  edcomb "Education")

// Figure 5.2
qui stcrreg fin age white male married paro numprop crimprop numarst edcomb, compete(type==2) 
stcurve, cif xtitle("Days since release") ///
         title("Cumulative incidence of non-property arrests") ///
   ylab(, format(%6.2f))
   
// Table 6.1
use https://statisticalhorizons.com/wp-content/uploads/tarp.dta, clear
eststo clear
estpost tabulate arrstcount
esttab, cell(b) nonumber collabel("Number of persons", lhs("Number of arrests")) nomtitle noobs ///
        modelwidth(20) varwidth(20) varlabels(, blist(Total "{hline @width}{break}")) ///
  title(Frequency Distribution for Number of Arrests)

// Table 6.2
eststo clear
eststo: nbreg arrstcount fin age white male married paro numprop crimprop numarst edcomb 
estadd expb

use https://statisticalhorizons.com/wp-content/uploads/arrests.dta, clear
stset length, failure(arrind == 1) 
eststo: stcox fin age white male married paro numprop crimprop numarst edcomb 
estadd expb 

stcox fin age white male married paro numprop crimprop numarst edcomb, cluster(id) 

set matsize 942
eststo: stcox fin age white male married  paro  numprop crimprop numarst edcomb, shared(id) 
estadd expb 

esttab, cell("b(fmt(3)) z(fmt(2) star) expb(fmt(3))") varwidth(22) label nonumber ///
        mtitle("Negstive binomial count model" "Cox regression, gap time" "Cox regression, shared frailty") modelwidth(18) ///
  stats(ll N, label("Log-likelihood") fmt(2 %9.0gc)) ///
        title("Regression Models for Repeated Arrests") legend ///
        coeflabel(fin "Financial aid" age "Age at release" white "White" ///
                  male "Male" married "Married" paro "Paroled" ///
                  numprop "No. of property convictions" ///
                  crimprop "Imprisoned for property crime" ///
                  numarst "No. of arrests" ///
                  edcomb "Education") keep(main:*) drop(_cons)
// Table 6.3
use https://statisticalhorizons.com/wp-content/uploads/arrests.dta, clear
stset length, failure(arrind == 1)

eststo clear 
eststo: streg spellnum fin age white male married  paro  numprop crimprop numarst edcomb, cluster(id) dist(weibull)  
estadd expb

eststo: streg spellnum fin age white male married  paro  numprop crimprop numarst edcomb, shared(id) dist(weibull)  
estadd expb

stset end, failure(arrind==1) origin(begin)  
eststo: stcox fin age white male married paro numprop crimprop numarst edcomb, cluster(id) 
estadd expb

esttab, cell("b(fmt(3)) z(fmt(2) star) expb(fmt(3))") varwidth(22) label nonumber ///
        mtitle("Weibull, robust z" "Weibull, shared frailty" "Cox regression, origin times") modelwidth(18) ///
  stats(ll N, label("Log-likelihood") fmt(2 %9.0gc)) ///
        title("Regression Models for Repeated Arrests") legend ///
        coeflabel(fin "Financial aid" age "Age at release" white "White" ///
                  male "Male" married "Married" paro "Paroled" ///
                  numprop "No. of property convictions" ///
                  crimprop "Imprisoned for property crime" ///
                  numarst "No. of arrests" ///
                  edcomb "Education") keep(main:)

// Analyses on p. 74
stcox fin age white male married  paro  numprop crimprop numarst edcomb, cluster(id) tvc(numarst) texp(_t/30.4) 


Reference

Allison, Paul D. 2014. Event History and Survival Analysis, 2nd ed. Sage. doi: 10.4135/9781452270029