May 20, 2016

Merging the Demographic and Health Surveys in Stata

// Unzip files
clear
cd "C:\dhs"

local filelist : dir . files "*.zip"  // Create local with all filenames ending with ".zip"
di `filelist'

local first : word 1 of `filelist'      // Identify first file
di "`first'"

local total_ : word count `filelist' // Identify total number of files
di `total_'        

forvalues x = 1/`total_' {
    di `x'
    local y : word `x' of `filelist'
    unzipfile "`y'", replace
}

// Append all files
clear
cd "C:\dhs"
local directorylist : dir . dirs  "*ir*"  // Create local with all directory names that contain women's data

di `directorylist'

local firstdir : word 1 of `directorylist'      // Identify first directory
di "`firstdir'"

local total_ : word count `directorylist' // Identify total number of directories
di `total_'

local firstfile = strupper("`firstdir'")
local firstfile = subinstr("`firstfile'","DT","FL",.)
di "`firstfile'"

use caseid v000 v005 v007 v012 v106 v107 v155 v191 v201 v212 v525 v531 using "`firstdir'/`firstfile'", clear                    
capture decode v106, gen(v106s)
drop v106

save testfile, replace

forvalues x = 2/`total_' {
    local y : word `x' of `directorylist'
 local filename = strupper("`y'")
    local filename = subinstr("`filename'", "DT", "FL", .)
 use "`y'/`filename'", clear
 
 // Source: https://stackoverflow.com/questions/17056016/stata-how-to-keep-a-list-of-variables-given-some-of-them-may-not-exist
 local masterlist "caseid v000 v005 v007 v012 v106 v107 v155 v191 v201 v212 v525 v531"
    local keeplist = ""

    foreach i of local masterlist  {
    capture confirm variable `i'
        if !_rc {
            local keeplist "`keeplist' `i'"
        }
     }
    keep `keeplist'
 capture decode v106, gen(v106s)
    capture drop v106
 
 tempfile new
 save `new', replace
 use testfile, clear
 append using `new', force
 save testfile, replace
}

// Prepare variables
use testfile, clear

//Country and wave identifiers
replace v000 = "VN3" if v000 == "VNT"
generate cntry = substr(v000,1,2)
generate wavex  = substr(v000,3,1)
replace wavex = "1" if wavex == ""
encode wavex, gen(wave)
drop wavex

  // Fix unusual country abbreviations
replace cntry = "BI" if cntry == "BU"
replace cntry = "IN" if cntry == "IA"
replace cntry = "KZ" if cntry == "KK"
replace cntry = "BI" if cntry == "BU"
replace cntry = "MD" if cntry == "MB"
replace cntry = "NA" if cntry == "NM"
replace cntry = "DO" if cntry == "DR"

kountry cntry, from(iso2c)
encode NAMES_STD, gen(country)
drop NAMES_STD

// Select last wave
keep if wave == 6

// Prepare variables
  // Age at first intercourse
generate age1stintercourse = .
replace  age1stintercourse = v531 if inrange(v531, 1, 63)
replace  age1stintercourse = .a   if v525 == 0
replace  age1stintercourse = .b   if v525 == 95
replace  age1stintercourse = .c   if v525 == 97
replace  age1stintercourse = .d   if v525 == 98
replace  age1stintercourse = .e   if v525 == 99
label define age1stintercourse .a "Not had intercourse" ///
                               .b "95?" ///
                               .c "inconsistent" ///
                               .d "don't know" ///
                               .e "99?" 
label val age1stintercourse age1stintercourse
label var age1stintercourse "Age at first intercourse"

  // Age at first birth
rename v212 afb
label var afb "Age at first birth"

// Calculate correlation
preserve
statsby mean_ = _b[age1stintercourse] ///
        loci  = (_b[age1stintercourse] - 1.96 * _se[age1stintercourse]) ///
        hici  = (_b[age1stintercourse] + 1.96 * _se[age1stintercourse]) ///
      , by(country) total clear: ///
        regress afb age1stintercourse

// Label total value
replace country = 1000 if country == .
label define country 1000 "{bf: Total}", modify

egen order_ = rank(mean_), unique
labmask order_, value(country) decode

// Plot
twoway (rcap mean_ mean_ order_, horizontal) ///
       (rspike loci hici order_, horizontal) ///
      , legend(off) ylabel(1/38, valuelabels ang(h) labsize(*.8)) ///
        xlabel(.7 (.1) 1.0, grid format(%6.1f)) name(ols, replace)  ///
        xmtick(.7 (.05) 1.0) ///
        ytitle("") xtitle("Association between" ///
                          "age at first intercourse" ///
                          "and age at first birth") xscale(alt) ysize(8) ///
        note(" " ///
             "{it:Source:} DHS VI, own calculations" , span size(*.8))
restore