clear
// Generate data
set seed 1
set obs 50
gen hours = rnormal(3, 1) // Number of hours studied
gen e = rnormal(1,1)
gen questions = 2 + 2*hours + 1*e // Questions answered correctly
qui sum questions, detail
generate pass = (questions >= r(p75)) // Passing the exam
// 1) Histogram of outcome variable
twoway (histogram pass, discrete percent), ///
xlabel(0 "[0] Failed" 1 "[1] Passed") xtitle("") ///
ytitle("Percent of students") xsize(4) ysize(4) name(figure5, replace)
// 2) Scatterplot
twoway (scatter pass hours), ///
xlabel(0 (1) 5) xtitle("Hours studied for exam") ///
ytitle("Exam success") ///
ylabel(0 "[0] Failed" 1 "[1] Passed") legend(off) ///
xsize(4) ysize(4) name(figure6, replace)
// 3) Scatterplot with regression line
regress pass hours
local intercept = round(_b[_cons], .01)
local x = round(_b[hours], .01)
twoway (scatter pass hours) ///
(lfit pass hours, lpattern(solid) range(1 5)), ///
xlabel(0 (1) 5) xtitle("Hours studied for exam") ///
text(.8 2 "y = `intercept' + `x' x + e", size(large)) ///
ytitle("Exam success") ///
ylabel(0 "[0] Failed" 1 "[1] Passed") legend(off) ///
xsize(4) ysize(4) name(figure7, replace)
// 4) Logit curve
logit pass hours
predict yhat
twoway (scatter pass hours) ///
(line yhat hours, lpattern(solid) sort), ///
xlabel(0 (1) 5) xtitle("Hours studied for exam") ///
ytitle("Exam success") ///
ylabel(0 "[0] Failed" 1 "[1] Passed") legend(off) ///
xsize(4) ysize(4) name(figure8, replace)
graph combine figure5 figure6 figure7 figure8, ///
col(2) xsize(8) ysize(8) altshrink name(figures58, replace)
Jul 24, 2018
Random graphs (137): Logistic regression
Labels:
logit,
predict,
Random graphs,
Simulation,
twoway histogram,
twoway lfit,
twoway line,
twoway scatter
