# delimit;

clear;

set scheme s1color;
set more 1;
set mem 800m;

log using crowd.log, replace;

insheet using bhl_dat.csv;

keep fips statename stateabrev;
sort fips;

save junk0.dta, replace;
clear;

foreach var in 2003 2004 2005 2006 2007 2008 { ; 
insheet using bhl_dat.csv;
gen state = statename;
gen dataset=`var'; 

gen index = index2004;
gen psat_part = psat_part_2004;
gen ap_part = ap_part_2004;
gen tenplus_hw = tenplus_hw_hours_2004;
gen fiveplus_apps = fiveplus_apps_2004;
gen privtestprep = privtestprep_2004;

gen indexxprep = index - privtestprep;

sort dataset fips; save junk`var'.dta, replace;
clear;

} ;


use merged_datasets; 

sort dataset fips;
merge dataset fips using junk2003.dta;
 drop _merge; sort dataset fips;

merge dataset fips using junk2004.dta;
 drop _merge; sort dataset fips;

merge dataset fips using junk2005.dta;
 drop _merge; sort dataset fips;

merge dataset fips using junk2006.dta;
 drop _merge; sort dataset fips;

merge dataset fips using junk2007.dta;
 drop _merge; sort dataset fips;

merge dataset fips using junk2008.dta;
 drop _merge; sort dataset fips;

*drop if dataset==.;

drop if dataset<2003;

drop if (age<18 | age>=65);

gen agecat=1 if age>=18 & age<=24;
replace agecat=2 if age>=25 & age<=34;
replace agecat=3 if age>=35 & age<=44;
replace agecat=4 if age>=45 & age<=54;
replace agecat=5 if age>=55 & age<=64;

gen infant = ageyngst<=1;
gen toddler = ageyngst==2;
gen preschool = (ageyngst<6 & ageyngst>2);
gen elementary1= (ageyngst>=6 & ageyngst<10);
gen elementary2= (ageyngst>=10 & ageyngst<14);
gen teen= (ageyngst>=14 & ageyngst<18);

gen under182 = under18^2;

gen haveyoung = under5>0;
replace haveyoung = under7>0 if dataset==1998;
replace haveyoung = ageyngst<5 if dataset>=2003;

drop if fips==.;
drop if fips<0;

drop if student==1;


summ childtot [aw=recwght] if mother==1 & college==1;

xi:reg childtot i.agecat 
  married infant toddler preschool elementary1 elementary2 under18 under182 
  [aw=recwght] if mother==1 & college==1;

predict fchild_cl_mother if mother==1 & college==1;

gen child_cl_mother = childtot - fchild_cl_mother 
  if mother==1 & college==1;

xi:reg childtot i.agecat 
  married infant toddler preschool elementary1 elementary2 under18 under182 
  [aw=recwght] if mother==1 & college==0;

predict fchild_nc_mother if mother==1 & college==0;

gen child_nc_mother = childtot - fchild_nc_mother 
  if mother==1 & college==0;

xi:reg childtot i.agecat 
  married infant toddler preschool elementary1 elementary2 under18 under182 
  [aw=recwght] if father==1 & college==1;

predict fchild_cl_father if father==1 & college==1;

gen child_cl_father = childtot - fchild_cl_father 
  if father==1 & college==1;

xi:reg childtot i.agecat 
  married infant toddler preschool elementary1 elementary2 under18 under182 
  [aw=recwght] if father==1 & college==0;

predict fchild_nc_father if father==1 & college==0;

gen child_nc_father = childtot - fchild_nc_father 
  if father==1 & college==0;

drop if under18==0 | under18==.;

collapse (sum) recwght (mean) child_cl_mother child_nc_mother child_cl_father
  child_nc_father index indexxprep psat_part ap_part tenplus_hw
  fiveplus_apps privtestprep, by(fips);

sort fips;
merge fips using junk0.dta;


foreach var in index indexxprep psat_part ap_part tenplus_hw fiveplus_apps 
  privtestprep { ;

reg child_cl_mother `var';
reg child_nc_mother `var';
reg child_cl_father `var'; 
reg child_nc_father `var';

} ;

replace child_cl_mother = 16.76 + child_cl_mother;

label var child_cl_mother "Actual";
reg child_cl_mother index;
predict fitted;

tw scatter child_cl_mother fitted index, ml(stateabrev) ytitle("hours per week") 
   xtitle("college competitiveness index") c(. l) ms(c i);


log close;

