Mar 14, 2013

Random graphs (6): Line plot

version 12
*ssc install sxpose   // -xpose- has problems with string variables

// Downloaded data from UNESCO Institute of Statistics Data Center:
// stats.uis.unesco.org/unesco/TableViewer/
// document.aspx?ReportId=136&IF_Language=eng&BR_Topic=0

insheet using "X:\Data\Desktop\download.csv", clear

ren v1 country

drop if country == "Country"  // Drop first row with variable names

drop v2 v44            // Drop empty columns

// Create variable names

local year = 1971
foreach X of numlist 3/43 {
 ren v`X' y`year'
 local year = `year' + 1
}

// Change missing value indicator
foreach Y of numlist 1971/2011 {
 replace y`Y' = "" if y`Y' == "..."
}

// Create country variable
kountryadd "United Kingdom of Great Britain and Northern Ireland" ///
  to "United Kingdom" add
kountry country, from(other) stuck
ren _ISO3N_ ctry
kountry ctry, from(iso3n) to(iso2c)
drop ctry
ren _ISO2C_ cntry

// Transpose data set
order cntry   // Make sure that Country code is first column in data set
drop country  // Drop unnecesary variable
sxpose, clear firstnames
order CH NO, last // Put non-EU countries last
destring BG-NO, replace // destring option of sxpose doesn't work 
                        // together with the firstnames option

gen year = 1970 + _n  // Recreate year variable

// Minor changes
scores avg = mean(BG-GB)  // Calculate EU-27
label var avg "EU-27 average (unweighted)"
drop if year == 2011 // Drop near-empty year

// Create graph

#delimit ;
twoway line BG-GB year, cmissing(n) sort
             // cmissing(n) allows gaps for missing data
    || line avg year, lwidth(vthick) sort
       yline(50) xtitle("")
       ytitle("Percentage of female students" "in tertiary education")
       note("Source: UNESCO Institute of Statistics,
             date of extraction: 2013-03-14", span)
       title("Percentage of female students in tertiary education, EU-27", span)
       legend(order(17) ring(0))
             // order() also allows suppressing legend items
 ;
#delimit cr