STATA FUN: Random graphs (47): Complex scatterplot

May 21, 2015
Random graphs (47): Complex scatterplot


import excel "C:\table+graph field match countries.xlsx", sheet("Sheet2") cellrange(A1:I27) clear firstrow

renvars, lower // Switch variable names to lower case

list // Check data

// Bring variable names into systematic shape
foreach x of varlist healthwelfare education socialsciencesbusinesslaw ///
                     artshumanities services sciencemathcomputing ///
                     agricultureveterinary engineeringmanufacturing {
  ren `x' var_`x'
}

list // Check data

// Reshape data
reshape long var_, i(country) j(field_str) string

// Convert proportion into percentage

replace var_ = var_ * 100

// Get field_str variable into numerical format
encode field_str, gen(field1)

label define field1 1 "Agriculture and Veterinary" ///
                    2 "Arts and Humanities" ///
                    3 "Education" ///
                    4 "Engineering and Manufacturing" ///
                    5 "Health and Welfare" ///
                    6 "Science, Math, and Computing" ///
                    7 "Services" ///
                    8 "Social Sciences, Business, and Law", modify

// Sort by average segregation per field
list field1 var if country == "EU average"  
recode field1 (5 = 1 "Health and Welfare") ///
              (3 = 2 "Education") ///
              (8 = 3 "Social Sciences, Business, and Law") ///
              (2 = 4 "Arts and Humanities") ///     
              (7 = 5 "Services") ///          
              (6 = 6 "Science, Math, and Computing") ///
              (1 = 7 "Agriculture and Veterinary") ///
              (4 = 8 "Engineering and Manufacturing") ///
              , gen(field)
       
// Plot
sort field var_
twoway (scatter field var_ if country != "EU average") ///
       (scatter field var_ if country == "EU average", connect(l) lwidth(thick)) ///
      , ylabel(1/8, val) xscale(alt) ///
        xtitle("Percentage women by field of study" "in European countries") ///
        ytitle("") note(" " "{it:Source:} European Labor Force Survey 2011, own calculations.", span) ///
        legend(order(2) label(2 "Average" "across" "countries") ring(0) pos(1))