Aug 11, 2014

Random graphs (27): Between and within regression

// Case A
clear
set seed 1
set obs 300

generate  e = 0 + (200 - 0) * runiform()  // To generate random variates over the
generate  x = 0 + (600 - 0) * runiform()  // interval [a,b), a+(b-a)*runiform()

generate  y = e // Relationship between X and Y random
generate id = 1
replace   y = (e + 200) if x > 200 & x <= 400
replace  id = 2         if x > 200 & x <= 400
replace   y = (e + 400) if x > 400
replace  id = 3         if x > 400

bys id: egen ym = mean(y)  // Generate cluster means for Y and X
bys id: egen xm = mean(x)

twoway (scatter y x if id == 1, msymbol(oh)) ///
       (scatter y x if id == 2, msymbol(dh)) ///
       (scatter y x if id == 3, msymbol(sh)) ///
       (scatter ym xm,          msymbol(S)) ///
       (lfit ym xm,             lpattern(solid)) ///
       (lfit y x if id == 1,    lpattern(dash)) ///
       (lfit y x if id == 2,    lpattern(dash)) ///
       (lfit y x if id == 3,    lpattern(dash)) ///
      , /*legend(order(1 "A" 2 "B" 3 "C") title(Region))*/  legend(off) ///
        xtitle("Component 1") ytitle("Component 2") ///
        title("Case A:" "Regional correlation," "individual orthogonality") ///
        xlabel(none) ylabel(none) name(casea, replace)

// Case B
clear
set seed 1

set obs 4 // Generate four clusters

generate id = _n
generate u_i = 300 if id == 1  // Generate cluster-specific intercepts
replace u_i = 0 if id == 2
replace u_i = 0 if id == 3
replace u_i = -300 if id == 4

expand 100  // Generate units in clusters

bysort id: generate x = -100 + (600 + 100) * runiform()
generate e_ij = rnormal(0,70)                // Generate level-1 error term

generate y = x + u_i + e_ij
drop if y < 0 | x < 0
drop if id == 2 & x >= 300
drop if id == 3 & x  < 300

bys id: egen ym = mean(y)
bys id: egen xm = mean(x)

twoway (scatter ym xm, msymbol(S)) ///
       (scatter y  x  if id == 1, msymbol(oh)) ///
       (scatter y  x  if id == 2, msymbol(dh)) ///
       (scatter y  x  if id == 3, msymbol(sh)) ///
       (scatter y  x  if id == 4, msymbol(th)) ///
       (lfit y x if id == 1,    lpattern(dash)) ///
       (lfit y x if id == 2,    lpattern(dash)) ///
       (lfit y x if id == 3,    lpattern(dash)) ///
       (lfit y x if id == 4,    lpattern(dash)) ///
       (lfit ym xm, lpattern(solid)) ///
      , legend(order(2 "A" 3 "B" 4 "C" 5 "D" 1 "Means") size(small) title(Region:, size(small)) row(1)) ///
        xtitle("Component 1") ytitle("Component 2") ///
        title("Case B:" "Regional orthogonality," "individual correlation") ///
        xlabel(none) ylabel(none) name(caseb, replace)


// Case C
clear
set seed 1
set obs 3

generate  id = _n
generate u_i = 300 if id == 1
replace u_i = 0 if id == 2
replace u_i = -300 if id == 3

expand 150

bysort id: generate x = -100 + (600 + 100) * runiform()
generate e_ij = rnormal(0,70)                // Generate level-1 error term

generate y = x + u_i + e_ij
drop if y < 0 | x < 0

bys id: egen ym = mean(y)
bys id: egen xm = mean(x)

twoway (scatter y  x  if id == 1, msymbol(oh)) ///
       (scatter y  x  if id == 2, msymbol(dh)) ///
       (scatter y  x  if id == 3, msymbol(sh)) ///
       (scatter ym xm, msymbol(S)) ///
       (lfit y x if id == 1,    lpattern(dash)) ///
       (lfit y x if id == 2,    lpattern(dash)) ///
       (lfit y x if id == 3,    lpattern(dash)) ///
       (lfit ym xm, lpattern(solid)) ///
       , /*legend(order(1 "A" 2 "B" 3 "C") title(Region))*/ legend(off) ///
       xtitle("Component 1") ytitle("Component 2") ///
       title("Case C:" "Negative regional correlation," "positive individual correlation") ///
       xlabel(none) ylabel(none) ///
       name(casec, replace)

grc1leg casea caseb casec, legendfrom(caseb) span pos(6) row(1) name(combined, replace)