Jul 24, 2018

Random graphs (136): Linear probability model

clear

// Generate data
set seed 1
set obs 50
gen hours = rnormal(3, 1) // Number of hours studied
gen e = rnormal(1,1)
gen questions = 2 + 2*hours + 1*e // Questions answered correctly
qui sum questions, detail
generate pass = (questions >= r(p75)) // Passing the exam

// 1) Histogram of outcome variable
twoway (histogram pass, discrete percent), ///
        xlabel(0 "[0] Failed" 1 "[1] Passed") xtitle("") ///
        ytitle("Percent of students") xsize(4) ysize(4) name(figure5, replace)

// 2) Scatterplot
twoway (scatter pass hours), ///
        xlabel(0 (1) 5) xtitle("Hours studied for exam") ///
        ytitle("Exam success") ///
        ylabel(0 "[0] Failed" 1 "[1] Passed") legend(off) ///
        xsize(4) ysize(4) name(figure6, replace)    

// 3) Scatterplot with regression line
regress pass hours
local intercept = round(_b[_cons], .01)
local x = round(_b[hours], .01)      
       
twoway (scatter pass hours) ///
       (lfit pass hours, lpattern(solid) range(1 5)), ///
        xlabel(0 (1) 5) xtitle("Hours studied for exam") ///
        text(.8 2 "y = `intercept' + `x' x + e", size(large)) ///
        ytitle("Exam success") ///
        ylabel(0 "[0] Failed" 1 "[1] Passed") legend(off) ///
        xsize(4) ysize(4) name(figure7, replace)

// 4) Heteroskedastic residuals
regress pass hours
predict resid, resid

twoway (scatter resid hours), ///
        xlabel(0 (1) 5) xtitle("Hours studied for exam") ///
        yline(0) name(figure7a, replace)

graph combine figure5 figure6 figure7 figure7a, ///
              col(2) xsize(8) ysize(8) altshrink name(figures57, replace)