clear
set memory 500000
set matsize 5000
set more off
/* ************************************************************************************************************** */
cd "C:\Documents and Settings\Rafael.LaPorta\My Documents\InformalEconomy\Nick\Brookings Submission\Stata"
/* ************************************************************************************************************** */
use nick10, clear
keep if type=="Informal"
drop if country=="Cameroon"
drop if country=="Guatemala" & year==2006
replace year=2003 if country=="Senegal"  & year==2004
replace year=2003 if country=="Pakistan" & year==2002

sort idstd code year enterprise_survey

* NOTE: fix_informal IS GENERATED BY CALIBRATE_COMPETITION_INFORMAL03.DO

merge idstd code year enterprise_survey using fix_informal, update
drop if country=="Cameroon"
/* ************************************************************************************************************** */
egen big=rowmax(sales_customers sales_traders sales_smallbus sales_largebus sales_other)
replace buyer_largefirm =(sales_largebus ==big) if (buyer_largefirm==.) & (sales_largebus+big~=.)
replace buyer_individual=(sales_customers==big) if (buyer_individual==.) & (sales_customers+big~=.)

drop big
egen big=rowmax(salesgovt salesstateowned salesmnc salesparent saleslarge salesother)
replace buyer_largefirm =(saleslarge==big) if (buyer_largefirm ==.) & (saleslarge+big~=.)
replace buyer_individual=(salesother==big) if (buyer_individual==.) & (salesother+big~=.)

generate income_pc=exp(gdp_pc_usd)
generate s=ln((s_l*(1+employees))/income_pc)
generate lab_emp=(labor_cost/100)*sales/employees
generate w=(lab_emp/income_pc)
replace capacity=capacity/100
replace buil_own=buil_own/100
replace mgr_primary=mgr_primary/100
replace mgr_secondary=mgr_secondary/100
replace mgr_vocational=mgr_vocational/100
replace mgr_college=mgr_college/100
replace wkr_primary=wkr_primary/100
replace wkr_secondary=wkr_secondary/100
replace wkr_college=wkr_college/100
replace finance_int=finance_int/100
replace fin_trade=fin_trade/100
replace fin_family=fin_family/100
replace finance_bnk=finance_bnk/100
replace emp_growth=(1+emp_growth/100)^0.5-1
replace sales_growth=(1+sales_growth/100)^0.5-1
/* ************************************************************************************************************** */
replace sales_customers=salesother if sales_customers==.
/* ************************************************************************************************************** */
generate group=0 if informalsurvey==1 & registered_ctral==0
replace  group=1 if informalsurvey==1 & registered_ctral==1
replace  group=size_new+1 if informalsurvey==0
replace  group=2 if size_new==. & formal==1 & employees<20
replace  group=4 if size_new==. & formal==1 & employees>100 & employees~=.
replace  group=3 if size_new==. & formal==1 & employees>=20 & employees<100 
/* ************************************************************************************************************** */
drop if group==.
* generate file=(group<2)
generate file=1 if (group==0 | group==1)
replace  file=0 if (group==2 | group==3 | group==4)
/* ************************************************************************************************************** */
bysort country: su obst* if file==0
/* ************************************************************************************************************** */
bysort code year: egen cap=pctile(sales), p(95)
replace sales=cap if sales>=cap & sales~=.
drop cap

generate comp=.
replace  comp=comp_full if formal==0

replace  comp=comp_perm if formal==1
replace  comp=comp_emp  if formal==1 & comp==.

replace  comp=(comp/income_pc)

sort code year

foreach var in  "comp" "w" "lab_emp" {
	by code year: egen cap=pctile(`var'), p(95)
	replace `var'=cap if `var'>cap & `var'~=.
	drop cap
}

table country, c(mean w mean comp mean lab_emp mean labor_cost mean income_pc)

table country if formal==1, c(mean emp_growth  mean sales_growth)
table country if formal==0, c(mean emp_growth  mean sales_growth)

replace competition_informal=obst16	if obst16~=.
/* ************************************************************************************************************** */
generate compet_inf=.
replace  compet_inf=1 if (competition_informal==3 | competition_informal==4)
replace  compet_inf=0 if (competition_informal< 3) 
/* ************************************************************************************************************** */
forvalues i=1(1)20 {
	replace maj_obst`i'=1 if obst`i'==3 | obst`i'==4
	replace maj_obst`i'=0 if obst`i'<3
	replace maj_obst`i'=. if obst`i'==.
}
/* ************************************************************************************************************** */
bysort country: su maj_obst16

#delimit ;

local X age
registered_ctral
registered_loc
registered_agcy
inownershouse
permstructure
land_own
buil_own
lackoftitle
generator
own_transp
capacity
hours
electricalconnection
buyer_largefirm
buyer_individual
xport
email
website
fearsinformal
competition_informal
obst*
maj_obst*

sales
s

employees
mgr_educ
mgr_primary
mgr_secondary
mgr_vocational
mgr_college
educ_most
wkr_primary
wkr_secondary
wkr_college

loan_ever
finance_int
fin_trade
fin_family
finance_bnk
term

emp_growth;


save tmp, replace;
collapse (mean) `X' (median) median_s=sales median_e=employees, by(country year group);

local Tstat 
age registered_ctral registered_loc registered_agcy land_own buil_own generator capacity buyer_largefirm buyer_individual
competition_informal employees mgr_educ mgr_primary mgr_secondary mgr_vocational mgr_college educ_most
wkr_primary wkr_secondary wkr_college loan_ever finance_int fin_trade fin_family finance_bnk term emp_growth;

log using informal_t_stat, replace;

generate group1=(group==4);
generate group2=(group==2);

foreach l in `Tstat'  {;
	display "`l'";
	quietly: count if (`l'~=. & group1==1);
	if r(N)==0 {;
		continue;
	};
	quietly: count if (`l'~=. & group2==1);
	if r(N)==0 {;
			continue;
	};
	ttest `l' if (group1==1 | group2==1), by(group2);
};

ranksum median_s if (group1==1 | group2==1), by(group2);

log close;
* fuck;
collapse (mean) `X' (median) median_s=sales median_e=employees, by(group);
xpose, clear varname;
save output, replace;
* fuck;
use tmp, clear;
collapse (mean) `X' (median) median_s=sales median_e=employees, by(country year file);
/* ************************************************************************************************************** */
/* ************************************************************************************************************** */
local Tstat 
age registered_ctral registered_loc registered_agcy land_own buil_own generator capacity buyer_largefirm buyer_individual
competition_informal employees mgr_educ mgr_primary mgr_secondary mgr_vocational mgr_college educ_most
wkr_primary wkr_secondary wkr_college loan_ever finance_int fin_trade fin_family finance_bnk term emp_growth;

log using informal_t_stat, replace;

foreach l in `Tstat'  {;
	display "`l'";
	quietly: count if (`l'~=. & file==0);
	if r(N)==0 {;
		continue;
	};
	quietly: count if (`l'~=. & file==1);
	if r(N)==0 {;
			continue;
	};
	ttest `l', by(file);
};

ranksum median_s, by(file);

log close;
/* ************************************************************************************************************** */
/* ************************************************************************************************************** */
collapse (mean) `X' (median) median_s=sales median_e=employees, by(file);
xpose, clear varname;
rename v1 formal;
rename v2 informal;

joinby _varname using output;

edit _varname v1 v2 informal v3 v4 v5 formal;

/* ************************************************************************************************************** */
/* ************************************************************************************************************** */
/* ************************************************************************************************************** */
/* ************************************************************************************************************** */
#delimit cr
use nick10, clear
keep if type=="Micro"
sort idstd code year enterprise_survey
* NOTE: fix_micro IS GENERATED BY CALIBRATE_COMPETITION_INFORMAL03.DO
merge idstd code year enterprise_survey using fix_micro, update
/* ************************************************************************************************************** */
egen big=rowmax(salesgovt salesstateowned salesmnc salesparent saleslarge salesother)
replace buyer_largefirm =(saleslarge==big) if (buyer_largefirm==.) &  (saleslarge+big~=.)
replace buyer_individual=(salesother==big) if (buyer_individual==.) & (salesother+big~=.)
drop big

generate group=0 if microinformal==1
replace  group=1 if microformal==1
replace  group=size_new+1 if enterprise_survey==1
replace  group=2 if size_new==. & (enterprise_survey==1) & employees<20
replace  group=4 if size_new==. & (enterprise_survey==1) & employees>100 & employees~=.
replace  group=3 if size_new==. & (enterprise_survey==1) & employees>=20 & employees<100 

drop if group==.
generate file=(group<2)
table country if file==0, c(mean obst16 mean maj_obst16)

generate income_pc=exp(gdp_pc_usd)
generate s=ln((s_l*(1+employees))/income_pc)
generate lab_emp=(labor_cost/100)*sales/employees
generate w=(lab_emp/income_pc)
replace capacity=capacity/100
replace buil_own=buil_own/100
replace mgr_primary=mgr_primary/100
replace mgr_secondary=mgr_secondary/100
replace mgr_vocational=mgr_vocational/100
replace mgr_college=mgr_college/100
replace wkr_primary=wkr_primary/100
replace wkr_secondary=wkr_secondary/100
replace wkr_college=wkr_college/100
replace finance_int=finance_int/100
replace fin_trade=fin_trade/100
replace fin_family=fin_family/100
replace finance_bnk=finance_bnk/100
replace emp_growth=(1+emp_growth/100)^0.5-1
replace sales_growth=(1+sales_growth/100)^0.5-1

bysort code year: egen cap=pctile(sales), p(95)
replace sales=cap if sales>=cap & sales~=.
drop cap

generate comp=.
replace  comp=comp_full if formal==0

replace  comp=comp_perm if formal==1
replace  comp=comp_emp  if formal==1 & comp==.

replace  comp=(comp/income_pc)


sort code year

foreach var in  "comp" "w" "lab_emp" {
	by code year: egen cap=pctile(`var'), p(95)
	replace `var'=cap if `var'>cap & `var'~=.
	drop cap
}

table country, c(mean w mean comp mean lab_emp mean labor_cost mean income_pc)

replace competition_informal=obst17	if obst17~=.
/* ************************************************************************************************************** */
generate compet_inf=.
replace  compet_inf=1 if (competition_informal==3 | competition_informal==4)
replace  compet_inf=0 if (competition_informal< 3) 
/* ************************************************************************************************************** */
forvalues i=1(1)20 {
	replace maj_obst`i'=1 if obst`i'==3 | obst`i'==4
	replace maj_obst`i'=0 if obst`i'<3
	replace maj_obst`i'=. if obst`i'==.
}
/* ************************************************************************************************************** */
table country if file==0, c(mean obst16 mean maj_obst16)
table country if file==1, c(mean obst16 mean maj_obst16)
bysort country: su maj_obst16 if file==0

#delimit ;

local X age
registered_ctral
registered_loc
registered_agcy
inownershouse
permstructure
land_own
buil_own
lackoftitle
generator
own_transp
capacity
hours
electricalconnection
buyer_largefirm
buyer_individual
xport
email
website
fearsinformal
competition_informal
obst*
maj_obst*
compet_inf

sales
s

employees
mgr_educ
mgr_primary
mgr_secondary
mgr_vocational
mgr_college
educ_most
wkr_primary
wkr_secondary
wkr_college

loan_ever
finance_int
fin_trade
fin_family
finance_bnk
term

emp_growth;

save tmp, replace;

collapse (mean) `X' (median) median_s=sales median_e=employees, by(country year group file);

generate formal=0;
replace  formal=1 if group>0;
/* ************************************************************************************************************** */
/* ************************************************************************************************************** */
local Tstat 
age registered_ctral registered_loc registered_agcy inownershouse permstructure land_own lackoftitle generator own_transp
hours  electricalconnection buyer_largefirm buyer_individual email website competition_informal xport employees mgr_educ mgr_primary mgr_secondary
mgr_vocational mgr_college educ_most wkr_primary wkr_secondary wkr_college loan_ever finance_int fin_trade fin_family finance_bnk
term emp_growth;

log using micro_t_stat, replace;

generate group1=(group==0);
generate group2=(group==1);

foreach l in `Tstat'  {;
	display "`l'";
	quietly: count if (`l'~=. & group1==1);
	if r(N)==0 {;
		continue;
	};
	quietly: count if (`l'~=. & group2==1);
	if r(N)==0 {;
			continue;
	};
	ttest `l' if (group1==1 | group2==1), by(group2);
};

ranksum median_s if (group1==1 | group2==1), by(group2);

log close;

collapse (mean) `X' (median) median_s=sales median_e=employees, by(group);
xpose, clear varname;
save output, replace;

use tmp, clear;
collapse (mean) `X' (median) median_s=sales median_e=employees, by(country year file);
/* ************************************************************************************************************** */
/* ************************************************************************************************************** */
local Tstat 
age registered_ctral registered_loc registered_agcy inownershouse permstructure land_own lackoftitle generator own_transp
hours  electricalconnection buyer_largefirm buyer_individual email website competition_informal employees mgr_educ mgr_primary mgr_secondary
mgr_vocational mgr_college educ_most wkr_primary wkr_secondary wkr_college loan_ever finance_int fin_trade fin_family finance_bnk
term emp_growth;

log using micro_t_stat, replace;

foreach l in `Tstat'  {;
	display "`l'";
	quietly: count if (`l'~=. & file==0);
	if r(N)==0 {;
		continue;
	};
	quietly: count if (`l'~=. & file==1);
	if r(N)==0 {;
			continue;
	};
	ttest `l', by(file);
};

ranksum median_s, by(file);

log close;
/* ************************************************************************************************************** */
collapse (mean) `X' (median) median_s=sales median_e=employees, by(file);
xpose, clear varname;
rename v1 formal;
rename v2 informal;
joinby _varname using output;
edit _varname v1 v2 informal v3 v4 v5 formal;
