#delimit;

/*************************************************************************************************************
This do-file cleans the MMF data for analysis and generates some variables used.

*************************************************************************************************************/

/*************************************************************************************************************
1. Load data
*************************************************************************************************************/
local zipdata: dir `"$DataPath"' files `"*"';
if !regexm(`"`zipdata'"',`"mmf-analyzer-data.zip"') {;
	do MMF-analyzer-data;
};
qui unzipfile `"$DataPath/MMF-analyzer-data.zip"', replace;
qui use MMF-analyzer-data if year(daten)>=2005;
qui erase MMF-analyzer-data.dta;

/*Sample filters*/
desc, short;
qui egen meanComplex = mean(id_complex), by(id);
qui egen FundComplexChange = max(id_complex != meanComplex), by(id);
drop if FundComplexChange; /*Drops funds with a change in their ultimate sponsor*/
qui drop meanComplex FundComplexChange;
drop if regexm(FundComplexHistorical,"Lehman Brothers *"); /*Drops Lehman sponsored funds*/
drop if inlist(UniqueIdentifier,"46637K844","4812C2262","4812C2270"); /*Three JPMorgan Funds that only appear on
one date and with 0.1m in Share Class Assets*/

qui gen year = year(daten);
qui egen id_fund = group(id_complex MasterClassFundName_n);
/*Manual adjustments to separate funds with different holdings as indicated by portfolio assets, but same fund complex and master class name*/
sum id_fund if Unique=="64127K109", meanonly;
local Neuberger1 = r(mean);
sum id_fund if UniqueIdentifier=="64127K208", meanonly;
local Neuberger2 = r(mean);
qui tostring id_fund, replace;
/*Deutsche begins reporting two funds as one on July 31, 2007*/
list UniqueIdentifier daten FundName MasterClassFundName PortfolioAssetswkmils if MasterClassFundName=="Deutsche Cash Mgmt Portfolio" & inlist(daten,td(24jul2007),td(31jul2007));
qui replace id_fund = id_fund + "-2" if inlist(UniqueIdentifier,`"52470G791"',`"23336Y722"',`"23337T110"',`"23337T128"',`"23339E533"');
qui replace id_fund = id_fund + "-3" if inlist(UniqueIdentifier,`"014470405"',`"014470108"');
qui replace id_fund = "`Neuberger1'-2" if Unique=="52520H104"; /*Lehman-Neuberger*/
qui replace id_fund = "`Neuberger2'-2" if inlist(UniqueIdentifier,`"52520H401"',`"52520H500"');/*Lehman-Neuberger*/
forvalues d = `=td(21nov2006)'(7)`=td(20feb2007)' {;
	sum ShareClassAssetswkmils if date==`d' & UniqueIdentifier==`"808515688"', meanonly;
	qui replace ShareClassAssetswkmils = ShareClassAssetswkmils - r(mean) if date==`d' & UniqueIdentifier==`"808515795"';
}; /*Seems to be a mistake*/

rename id_fund id_fundstring;
qui encode id_fundstring, gen(id_fund);

/*1 month Treasury bill to compute excess return*/
preserve;
qui freduse DGS1MO, clear;
tempfile TBill;
qui save `TBill';
restore;
merge m:1 daten using `TBill', keep(matched) nogenerate;
#delimit;
foreach var of varlist _7* _1Mo* {;
	qui gen `var'E = `var' - DGS1MO;
	local varlabel: variable label `var';
	label variable `var'E `"`varlabel', excess return"';
};

/*************************************************************************************************************
2. Generate additional master fund level variables
*************************************************************************************************************/
/*Average expense ratio, weighted by share class assets*/
egen PortfolioAssetswkmils2 = total(ShareClassAssetswkmils), by(id_fund daten); 
/*Should be same as PortfolioAssets but there are some discrepancies due to the adjustments above*/
qui gen sharewgt = ShareClassAssetswkmils/PortfolioAssetswkmils2;
qui gen WvdExpenseratiomo = IncdExpenseRatiomo - ChgdExpenseRatiomo; /*Waived fees*/
foreach var of varlist ChgdExpenseRatiomowk ChgdExpenseRatiomo-ChgdShrSvcFeemo IncdExpenseRatiomo-IncdShrSvcFeemo WvdExpenseratiomo _7* _1Mo* {;
	 qui egen double Folio`var' = sum(sharewgt * `var'), by(id_fund daten); /*Using abbreviation Folio for Portfolio to avoid names getting too long*/
};
qui gen TaxFree = regexm(SubCategoryHistorical,`"T-F"');
qui gen Institutional = regexm(SubCategoryHistorical,`"Inst(it)?$"');
qui gen Prime = regexm(SubCategoryHistorical,`"[(First)(Second)] Tier"');
foreach cat in TaxFree Prime Institutional {;
	qui egen Any`cat' = max(`cat'), by(id_fund daten);
};
assert AnyTaxFree == TaxFree; /*Tax free refers to asset holdings and should be constant at the fund level.*/
assert AnyPrime == Prime; /*Prime refers to asset holdings and should be constant at the fund level.*/

qui gen KS_holdingrisk = DomesticBankObligations + ForeignBankObligations - USTreasury - USOther - Repos;

