clear all
set more off
#delimit;

/*************************************************************************************************************
This do-file creates a stata dataset from the MMF analyzer data. The data are downloaded from imoneynet, 
http://www.moneyfundanalyzer.com, (username=PW=greich).

*************************************************************************************************************/
do Paths;
capture log close;
log using "$LogPath/MMF-analyzer-data.log", replace;
tempfile data;

/*************************************************************************************************************
1. Load data
*************************************************************************************************************/
foreach variables in `"monthly fund names and expenses"' `"monthly fund data"' `"weekly fund data"' {;
	qui unzipfile "$DataPath/imoneynet/`variables'", replace;
	local variables_name = substr(subinstr(`"`variables'"',`" "',`"_"',.),1,20);
	local files: dir . files `"`variables'*.xls*"'; /*Had to upconvert some files by hand because stata wouldn't read the 2003 version*/
	foreach file of local files {;
	
		di `"`file'"';
		qui import excel using `"`file'"', clear;
		qui erase `"`file'"';
		
		/*Rename variables using variable name. GCR written ado program*/
		if regexm(`"`file'"',`"\.xlsx$"') {;
			local row 4;
		};
		else {;
			local row 3;
		}; 
		StataName *, row(`row') droprows dropmissing label;
		
		/*Correct my mistake - I downloaded June in H2 for some years*/
		qui drop if regexm(`"`file'"',`"-[Hh]2"') & month(date(Date,`"DMY"'))==6;
		
		di `"drop if UniqueIdentifier=="N/A""';
		drop if UniqueIdentifier=="N/A"; /*Not sure why these exist, but I would have to match
		longitudinally on string variables*/
		
		/*Append years together*/
		if `"`first`variables_name''"' == `""' {;
			tempfile `variables_name';
			qui save ``variables_name'';
			local first`variables_name' `"Not"';
		};
		else {;
			qui append using ``variables_name'';
			qui save ``variables_name'', replace;
		};		
	};
	qui destring *, replace ignore(-);
	qui gen daten = date(Date,`"DMY"');
	qui gen month = mofd(daten);
	qui gen week = wofd(daten);
	qui format daten %td;
	di `"drop if missing(UniqueIdentifier,Date)"';
	drop if missing(UniqueIdentifier,Date);
	
	qui save `"`variables'"', replace;
	
	/*Merge variables*/
	if `"`variables'"' == `"monthly fund names and expenses"' {; 
		/*Handle small number of duplicate observations*/
		qui duplicates tag month UniqueIdentifier, gen(duplicate_flag);
		tab FundName duplicate_flag if duplicate_flag>0; /*Problem stops after around 2004*/	
		qui egen maxdups = max(duplicate_flag), by(UniqueIdentifier);
		qui replace UniqueIdentifier = UniqueIdentifier + word(ShareClass,2) if maxdups==1 & length(word(ShareClass,2))==1;
		qui replace UniqueIdentifier = UniqueIdentifier + "-2" if inlist(UniqueIdentifier,"628263600") & inlist(FundName,"Chase Vista Global MMF/Vista *");
		qui drop maxdups;
		duplicates report UniqueIdentifier month;
		
		qui save MMF-analyzer-data, replace;
	};
	else if `"`variables'"'==`"monthly fund data"' {;
		/*Handle small number of duplicate observations*/
		qui duplicates tag month UniqueIdentifier, gen(duplicate_flag);
		tab FundName duplicate_flag if duplicate_flag>0; /*Problem stops after around 2004*/	
		qui egen maxdups = max(duplicate_flag), by(UniqueIdentifier);
		qui levelsof FundName if maxdups==1, local(duplicates);
		foreach duplicate of local duplicates {;
			tokenize `duplicate';
			local words: word count `duplicate';
			local shareclass `"``=`words'-1''"';
			if length(`"`shareclass'"')==1 {;
				qui replace UniqueIdentifier = UniqueIdentifier + `"`shareclass'"' if maxdups==1 & FundName==`"`duplicate'"';
			};
		};
		qui replace UniqueIdentifier = UniqueIdentifier + "-2" if inlist(UniqueIdentifier,"628263600") & inlist(FundName,"Chase Vista Global MMF/Vista *");
		qui drop maxdups;
		duplicates report UniqueIdentifier month;

		merge 1:1 month UniqueIdentifier using MMF-analyzer-data, nogenerate;
		/*I bizzarely don't have _m==3 for all observations. I tried redoing the query for ING UniqueIdentifer==449797372 and found including 7-DSY in the variables led to dropping
		observations. This seems like a bug, but since it affects only 0.25% of observations I'm not pursuing it.*/
		qui save MMF-analyzer-data, replace;
	};
	else if `"`variables'"'==`"weekly fund names"' {; 
		merge 1:1 month UniqueIdentifier using MMF-analyzer-data;
		tab daten _m;
		qui drop if _m==2; /*Handful of observations in monthly file not in weekly file*/
		qui drop _m;
		qui save MMF-analyzer-data, replace;
	};
	else if `"`variables'"'==`"weekly fund data"' {; 
		/*Handle small number of duplicate observations*/
		qui duplicates tag daten Unique, gen(duplicate_flag);
		tab FundName duplicate_flag if duplicate_flag>0;
		qui egen maxdups = max(duplicate_flag), by(UniqueIdentifier);
		qui levelsof FundName if maxdups==1, local(duplicates);
		foreach duplicate of local duplicates {;
			tokenize `duplicate';
			local words: word count `duplicate';
			local shareclass `"``=`words'-1''"';
			if length(`"`shareclass'"')==1 {;
				qui replace UniqueIdentifier = UniqueIdentifier + `"`shareclass'"' if maxdups==1 & FundName==`"`duplicate'"';
			};
		};
		qui replace UniqueIdentifier = UniqueIdentifier + "-2" if inlist(UniqueIdentifier,"628263600") & inlist(FundName,"Chase Vista Global MMF/Vista *");
		qui drop maxdups;
		duplicates report UniqueIdentifier daten;
	
		merge m:1 month UniqueIdentifier using MMF-analyzer-data;
		tab daten _m;
		qui gen FundVariablesMissing = _m==1;
		qui drop _m;
		qui save MMF-analyzer-data, replace;
	};
};

/*************************************************************************************************************
2. Clean longitudinal linkages and save
*************************************************************************************************************/
#delimit;
qui format month %tm;
qui format week %tw;

qui encode UniqueIdentifier, gen(id);
qui encode FundComplexHistorical, gen(id_complex); /*Fund complex refers to the ultimate sponsor of the fund*/
qui encode MasterClassFundName, gen(MasterClassFundName_n); /*Master class fund name refers to the name of the fund*/
qui tsset id daten, daily;

/*Most data items with missing months are because they are the first or last month of the fund in the data*/
foreach extreme in min max {;
	qui egen `extreme'month = `extreme'(month), by(id);
	qui gen `extreme'month_flag = month==`extreme'month;
	qui drop `extreme'month;
	tab FundVariablesMissing `extreme'month_flag;
};

/*Impute values for missing months. Start with beginning and end months*/
foreach var of varlist id_complex MasterClassFundName_n {;
	replace `var' = L28.`var' if FundVariablesMissing & maxmonth_flag;
	replace `var' = F28.`var' if FundVariablesMissing & minmonth_flag;
};
qui gen stillmissing = FundVariablesMissing & missing(id_complex,MasterClassFundName_n);
tab stillmissing;
qui egen maxstillmissing = max(stillmissing), by(id);

/*Impute values for other missing months if no change in fund status*/
qui levelsof id if maxstillmissing, local(ids);
foreach id of local ids {;
	foreach var of varlist id_complex MasterClassFundName_n {;
		sum `var' if id==`id', meanonly;
		if r(min)==r(max) {;
			qui replace `var' = r(min) if id==`id' & missing(`var');
		};
	};
};
foreach var of varlist id_complex MasterClassFundName_n {;
	qui replace `var' = L7.`var' if inlist(UniqueIdentifier,"561717703") & missing(`var');
};
qui replace stillmissing = FundVariablesMissing & missing(id_complex,MasterClassFundName_n);
tab stillmissing;
qui drop *stillmissing;

qui save MMF-analyzer-data, replace;
qui zipfile MMF-analyzer-data.dta, saving("$DataPath/MMF-analyzer-data", replace);
erase MMF-analyzer-data.dta;

capture log close;
