Oct 26, 2019

Specification curve analysis

use "ZA4612_v1-0-1.dta", clear
do "ZA4612_patch_v1-0-1.do" // Some patch from data provider

keep v399-v404 v933 v827 v301 v298 

// Outcomes
mvdecode v399-v404, mv(9 = .a )

generate stress  = 5 - v399
generate depress = 5 - v400
generate calm    = v401 - 1
generate energy  = v402 - 1
generate pain    = 5 - v403
generate lonely  = 5 - v404

// Predictors
  // Topbot
mvdecode v933 v827, mv(96 = .a \ 99 = .b) 
generate topbot = v933
replace  topbot = v827 if missing(topbot)
  // Age
mvdecode v301, mv(999 = .a)
rename v301 age
  // Female
generate female = (v298 == 2)

drop v399-v404 v933 v827 v298 // Drop unnecessary variables

// Post definition
tempname foo
postfile `foo' str50 spec k1 k2 k3 b se using deleteme.dta, replace

// Loop
forvalues k1 = 1/6 {
  if `k1' == 1 local y "stress"
  if `k1' == 2 local y "depress"
  if `k1' == 3 local y "pain"
  if `k1' == 4 local y "calm"
  if `k1' == 5 local y "energy"
  if `k1' == 6 local y "lonely"
    forvalues k2 = 1/4 {
      if `k2' == 1 local agecontrol "age"
      if `k2' == 2 local agecontrol "c.age##c.age"
      if `k2' == 3 local agecontrol "c.age##c.age##c.age"    
      if `k2' == 4 local agecontrol "c.age##c.age##c.age##c.age"
        forvalues k3 = 1/3 {
          if `k3' == 1 local ifs " "
          if `k3' == 2 local ifs "if female == 1"
          if `k3' == 3 local ifs "if female == 0"
 
        local spec regress `y' topbot `agecontrol' female `ifs'

        qui `spec'
        local  b =  _b[topbot]
        local se = _se[topbot]

        post `foo' ("`spec'") (`k1') (`k2') (`k3') (`b') (`se')
        }
    }
}
postclose `foo'

// Plot specification curve
use deleteme, clear

// Generate ranked analytical choice variable
sort b
generate sk = _n
label var sk "Specification (sorted by coefficient size)"

// Calculate CI's
generate ub = b + 1.96 * se
generate lb = b - 1.96 * se

// Remind yourself what the variables mean
label var k1 "Outcome"
label var k2 "Age control"
label var k3 "Subsample"

// Stack indicators
generate k3c = k3
generate k2c = k2 + 1 + 3 // 3 because K3 has 3 categories, 1 for title
generate k1c = k1 + 1 + 3 + 4 + 1  // K2 has 4 categories

// Calculate some things for size of second axis
qui summarize b
global brange = r(max) - r(min)
global bmin = r(min)
global bmax = r(max)
global from_y = $bmin - (4.5 * $brange)

// Plot
twoway (scatter k1c k2c k3c sk, msymbol(o o o) msize(vsmall vsmall vsmall) ///
        yscale(range(1 22)) ///        
        ylabel( 1 "Full sample" 2 "Females only" 3 "Males only" 4 "{bf:Subsample}" ///
                5 "Age" 6 "Age squared" 7 "Age cubed" 8 "Age quartic" 9 "{bf:Age control}" ///
               10 "Stress"   11 "Depression" 12 "Pain" ///
               13 "Calmness" 14 "Energy"     15 "Loneliness" 16 "{bf:Outcome}", tstyle(notick) axis(1)))  ///
       (rcap b b sk, yaxis(2) yscale(range($from_y $bmax) axis(2)) ///
                     ylab(, format(%2.1g) axis(2)) ///
                     ytitle("{bf:Coefficient size}", axis(2) placement(north)) ///
                     yline(0, axis(2))) ///
       (rspike ub lb sk, yaxis(2)), ///
        xlabel(none)  ///
        legend(off) name(curve, replace) 
 

Reference

Simonsohn, Uri, Joseph P. Simmons, and Leif D. Nelson. 2015. Specification Curve. Descriptive and Inferential Statistics on All Reasonable Specifications. University of Pennsylvania. doi: 10.2139/ssrn.2694998