cd "C:\Users\chausman\Desktop\Data package to post"

**READING IN DATA AND MILD CLEANING FOR BPEA

///////////////
// CONSUMPTION DATA
///////////////
foreach y in vcs vdv veu vgl vgp vgt vin vrs{
	import excel using Data\EIA\Demand\ng_cons_sum_a_EPG0_`y'_mmcf_m.xls, ///
		sheet("Data 1") cellrange(A2) firstrow clear
	drop in 1
	destring, replace
	compress 
	capture rename sourcekey datestring
	capture rename Sourcekey datestring
	saveold Data\EIA\Demand\ng_cons_sum_a_EPG0_`y'_mmcf_m, replace
}
clear all
use Data\EIA\Demand\ng_cons_sum_a_EPG0_vcs_mmcf_m, clear
foreach y in vdv veu vgl vgp vgt vin vrs{
	merge 1:1 datestring using Data\EIA\Demand\ng_cons_sum_a_EPG0_`y'_mmcf_m, nogen
}
drop BB C
drop NA1570* //state-level vehicle usage. needs to be done separately.
replace dates=subinstr(dates,"15","",1)
	drop if dates==""
	gen date=date(dates,"MY")
	format %td date
	gen year=year(date)
	gen month=month(date)
	drop date
reshape long N3020 N3025 N3045 N9160 N9170 N3060 N3035 N3010, ///
	i(year month datestring) j(state) string
replace state=subinstr(state,"2","",.)
rename N3020 deliv_commercial_mmcf //before 1989, national only
rename N3025 deliv_vehicle_mmcf //before 2010, national only
rename N3045 deliv_electricpower_mmcf
rename N9160 deliv_leaseplantfuel_mmcf
rename N9170 deliv_pipelineuse_mmcf
rename N3060 deliv_consumers_mmcf
rename N3035 deliv_industrial_mmcf
rename N3010 deliv_residential_mmcf
compress
saveold Data\EIA\Demand\EIA_consumption, replace


//////////////
//PRODUCTION DATA
/////////////

//MARKETED PRODUCTION, NATIONAL
import excel using Data\EIA\Supply\NG_PROD_SUM_DCU_NUS_M.xls, ///
	sheet("Data 1") cellrange(A2) firstrow clear
	drop in 1
	rename N9010US2 withdrawals_gross_mmcf
	rename N9011US2 withdrawals_gaswells_mmcf
	rename N9012US2 withdrawals_oilwells_mmcf
	rename NGM_EPG0_FGS_NUS_MMCF withdrawals_shale_mmcf
	rename NGM_EPG0_FGC_NUS_MMCF withdrawals_coalbed_mmcf
	rename N9020US2 repressuring_mmcf
	rename N9040US2 ventedflared_mmcf
	rename N9030US2 nonhydrocarbon_mmcf
	rename N9050US2 production_marketed_mmcf
	rename N9070US2 production_dry_mmcf
	foreach v in gross gaswells oilwells shale coalbed{
		label variable withdrawals_`v' "withdraw `v'"
	}
	foreach v in repressuring ventedflared nonhydrocarbon production_marketed production_dry{
		label variable `v' "`v'"
	}
	replace Source=subinstr(Source,"15","",1)
	drop if Source==""
	gen date=date(Source,"MY")
	format %td date
	gen year=year(date)
	gen month=month(date)
	drop date Source
	drop L
	destring, replace
	compress
saveold Data\EIA\Supply\production_summary, replace

//MARKETED PRODUCTION, STATE
import excel using Data\EIA\Supply\NG_PROD_SUM_A_EPG0_VGM_MMCF_M.xls, ///
	sheet("Data 1") cellrange(A3) firstrow clear
	rename Alaska pAK
	rename Federal pfedoffshore
	rename Louisiana pLA
	rename NewMex pNM
	rename Ok pOK
	rename Tex pTX
	rename Wy pWY
	rename US pUS
	drop J 
	drop if year(D)<1989 //no state-level data
	drop if Date==.
	gen year=year(Date)
	gen month=month(Date)
	drop Date
	reshape long p, i(year month) j(state) string
	rename p production_marketed_mmcf
	compress
	saveold Data\EIA\Supply\production_marketed_state, replace
	
//MARKETED PRODUCTION, OTHER STATES
import excel using Data\EIA\Supply\NG_PROD_SUM_A_EPG0_VGM_MMCF_M.xls, ///
	sheet("Data 2") cellrange(A3) firstrow clear
	drop Other AD
	rename Alabama pAL
	rename Arizona pAZ
	rename Arkansas pAR
	rename California pCA
	rename Colorado pCO
	rename Florida pFL
	rename Illinois pIL
	rename Indiana pIN
	rename Kansas pKS
	rename Kentucky pKY
	rename Maryland pMD
	rename Michigan pMI
	rename Mississippi pMS
	rename Missouri pMO
	rename Montana pMT
	rename Nebraska pNE
	rename Nevada pNV
	rename NewYork pNY
	rename NorthDakota pND
	rename Ohio pOH
	rename Oregon pOR
	rename Pennsylvania pPA
	rename SouthDakota pSD
	rename Tennessee pTN
	rename Utah pUT
	rename Virginia pVA
	rename WestVirginia pWV
	drop if year(D)<1989 //no state-level data
	drop if Date==.
	gen year=year(Date)
	gen month=month(Date)
	drop Date
	reshape long p, i(year month) j(state) string
	rename p production_marketed_mmcf
	compress
	saveold Data\EIA\Supply\production_marketed_stateother, replace
	
	* Combine with the non-other state marketed production
	append using Data\EIA\Supply\production_marketed_state
	saveold Data\EIA\Supply\production_marketed_stateall, replace
	
//WITHDRAWALS
foreach v in FGC FGG FGO FGS FGW{
	import excel using Data\EIA\Supply\ng_prod_sum_a_EPG0_`v'_mmcf_m.xls, ///
		sheet("Data 1") cellrange(A2) firstrow clear
	drop in 1
	destring, replace
	compress
	capture rename sourcekey datestring
	capture rename Sourcekey datestring
	saveold Data\EIA\Supply\ng_prod_sum_a_EPG0_`v'_mmcf_m_sheet1, replace
	
	import excel using Data\EIA\Supply\ng_prod_sum_a_EPG0_`v'_mmcf_m.xls, ///
		sheet("Data 2") cellrange(A2) firstrow clear
	drop in 1
	destring, replace
	compress
	capture rename sourcekey datestring
	capture rename Sourcekey datestring
	saveold Data\EIA\Supply\ng_prod_sum_a_EPG0_`v'_mmcf_m_sheet2, replace
}

//we do the following in two separate chunks because of the way we process state names
clear all
use Data\EIA\Supply\ng_prod_sum_a_EPG0_FGC_mmcf_m_sheet1, clear
merge 1:1 datestring using Data\EIA\Supply\ng_prod_sum_a_EPG0_FGC_mmcf_m_sheet2, nogen
foreach v in FGS{
	merge 1:1  datestring using Data\EIA\Supply\ng_prod_sum_a_EPG0_`v'_mmcf_m_sheet1, nogen
	merge 1:1  datestring using Data\EIA\Supply\ng_prod_sum_a_EPG0_`v'_mmcf_m_sheet2, nogen
}
drop J AD I
replace dates=subinstr(dates,"15","",1)
	drop if dates==""
	gen date=date(dates,"MY")
	format %td date
	gen year=year(date)
	gen month=month(date)
	drop date
reshape long NGM_EPG0_FGC_ NGM_EPG0_FGS_, ///
	i(year month datestring) j(state) string
replace state=subinstr(state,"_","",.)
	replace state=subinstr(state,"MMCF","",.)
	replace state="US" if state=="NUS"
	replace state="offshore" if state=="R3FM"|state=="FX"
	replace state="other_aggreg" if state=="98"|state=="R98"
		/*" Beginning with monthly data for January 2006, "Other States" volumes include 
		all of the natural gas producing states except: Alaska, Louisiana, New Mexico, 
		Oklahoma, Texas, Wyoming, and the Gulf of Mexico. "Other States" volumes prior to 
		January 2006 include: Arkansas, Illinois, Kentucky, Maryland, Missouri, Nebraska, 
		Nevada, New York, Ohio, Pennsylvania, South Dakota, Tennessee, Virginia, and West 
		Virginia." */
	replace state=subinstr(state,"S","",1) if length(state)==3	
rename NGM_EPG0_FGC_ withdraw_coalbed_bcf
rename NGM_EPG0_FGS_ withdraw_shale_bcf
	foreach v in coal shale{
		replace withdraw_`v'=withdraw_`v'/10^3
	}
compress
saveold Data\EIA\Supply\EIA_production_coalshale, replace
	
use Data\EIA\Supply\ng_prod_sum_a_EPG0_FGG_mmcf_m_sheet1, clear
merge 1:1 datestring using Data\EIA\Supply\ng_prod_sum_a_EPG0_FGG_mmcf_m_sheet2, nogen
foreach v in FGO FGW{
	merge 1:1  datestring using Data\EIA\Supply\ng_prod_sum_a_EPG0_`v'_mmcf_m_sheet1, nogen
	merge 1:1  datestring using Data\EIA\Supply\ng_prod_sum_a_EPG0_`v'_mmcf_m_sheet2, nogen
}
drop J AD 
replace dates=subinstr(dates,"15","",1)
	drop if dates==""
	gen date=date(dates,"MY")
	format %td date
	gen year=year(date)
	gen month=month(date)
	drop date
reshape long N9011 N9012 N9010 , ///
	i(year month datestring) j(state) string
replace state=subinstr(state,"2","",.)
	replace state="offshore" if state=="R3FM"|state=="FX"
	replace state="other_aggreg" if state=="98"|state=="R98"
		/*" Beginning with monthly data for January 2006, "Other States" volumes include 
		all of the natural gas producing states except: Alaska, Louisiana, New Mexico, 
		Oklahoma, Texas, Wyoming, and the Gulf of Mexico. "Other States" volumes prior to 
		January 2006 include: Arkansas, Illinois, Kentucky, Maryland, Missouri, Nebraska, 
		Nevada, New York, Ohio, Pennsylvania, South Dakota, Tennessee, Virginia, and West 
		Virginia." */
rename N9011 withdraw_gaswell_bcf
rename N9012 withdraw_oilwell_bcf
rename N9010 withdraw_gross_bcf
	foreach v in gas oil gross{
		replace withdraw_`v'=withdraw_`v'/10^3
	}
compress
saveold Data\EIA\Supply\EIA_production_welltype, replace

use Data\EIA\Supply\EIA_production_coalshale, clear
merge 1:1 year month state using Data\EIA\Supply\EIA_production_welltype, nogen
sort state year month
compress
saveold Data\EIA\Supply\EIA_production, replace 


/////////////
//DRILLING DATA
/////////////////
import excel using Data\EIA\Supply\NG_ENR_DRILL_S1_M.xls, ///
	sheet("Data 1") cellrange(A3) firstrow clear
	destring, replace
	gen year=year(Date)
	gen month=month(Date)
	drop Date C
rename USC rigs_all
drop if rigs==.
compress
saveold Data\EIA\Supply\rigs, replace

import excel using Data\EIA\Supply\NG_ENR_DRILL_S1_M.xls, ///
	sheet("Data 3") cellrange(A3) firstrow clear
	destring, replace
	gen year=year(Date)
	gen month=month(Date)
	drop Date
rename USC rigs_oil
rename USN rigs_gas
drop D
drop if rigs_gas==. & rigs_oil==.
compress
merge 1:1 year month using Data\EIA\Supply\rigs, nogen
saveold Data\EIA\Supply\rigs, replace

import excel using Data\EIA\Supply\NG_ENR_DRILL_S1_M.xls, ///
	sheet("Data 4") cellrange(A3) firstrow clear
	destring, replace
	gen year=year(Date)
	gen month=month(Date)
	drop Date
rename USC rigs_service
drop C
drop if rigs==. 
compress
merge 1:1 year month using Data\EIA\Supply\rigs, nogen
saveold Data\EIA\Supply\rigs, replace

import excel using Data\EIA\Supply\NG_ENR_WELLEND_S1_M.xls, ///
	sheet("Data 1") cellrange(A3) firstrow clear
	destring, replace
	gen year=year(Date)
	gen month=month(Date)
	drop Date
rename USNaturalGasExploratoryand wells_all_gas_drilled
rename USCrudeOilExploratoryand wells_all_oil_drilled
rename USDryExploratoryand wells_all_dry_drilled
rename USNaturalGasExplor wells_explor_gas_drilled
rename USNaturalGasDevel wells_develop_gas_drilled
keep wells* year month
drop if wells_e==.|wells_d==. 
compress
merge 1:1 year month using Data\EIA\Supply\rigs, nogen
saveold Data\EIA\Supply\rigs, replace
//also available: onshore vs offshore; oil vs gas; service rigs
//also available: well depth; exploratory wells; footage drilled


/////////////
//INVENTORIES DATA
/////////////////
import excel using Data\EIA\Other\NG_STOR_WKLY_S1_W.xls, ///
	sheet("Data 1") cellrange(A3) firstrow clear
	gen year=year(Date)
	gen month=month(Date)
	gen week=week(Date)
	gen day=day(Date)
	drop Date
rename WeeklyL inventories_bcf
drop if inventories_bcf==.
drop Week* H
compress
saveold Data\EIA\Other\inventories, replace
collapse (mean) inventories, by(year month)
saveold Data\EIA\Other\inventories_monthly, replace
//also available: onshore vs offshore; oil vs gas; service rigs
//also available: well depth; exploratory wells; footage drilled


/////////////
//TRADE - IMPORTS/EXPORTS DATA
/////////////////
import excel using Data\EIA\Other\NG_MOVE_IMPC_S1_M.xls, ///
	sheet("Data 1") cellrange(A3) firstrow clear
	gen year=year(Date)
	gen month=month(Date)
	drop Date
rename USNaturalGasIm imports_mmcf
drop if imports==.
keep year month imports
isid year month 
compress
saveold Data\EIA\Other\imports, replace

import excel using Data\EIA\Other\NG_MOVE_EXPC_S1_M.xls, ///
	sheet("Data 1") cellrange(A3) firstrow clear
	gen year=year(Date)
	gen month=month(Date)
	drop Date
rename USNaturalGasEx exports_mmcf
drop if exports==.
keep year month exports
isid year month
compress
saveold Data\EIA\Other\exports, replace


/////////////
//PRICE DATA
////////////////

//BLOOMBERG, UK NBP (DATA NOT PUBLICLY POSTED)
import excel using "Data\Bloomberg\ngspot_uk_nbp.xlsx", ///
	sheet("original_bloomberg") firstrow clear
	drop in 1
	destring, replace
	corr B E
	rename NBPGDAH datestring //day-ahead, last trading price
	rename B uknbp_gbptherm
	gen date=date(datestring,"MDY")
	format %td date
	keep date uknbp datestring
	compress
	saveold Data\Bloomberg\uknbp, replace
	gen year=year(date)
	gen month=month(date)
	collapse (mean) uknbp, by(year month)
	compress
	saveold Data\Bloomberg\uknbp_monthly, replace
	
//worldbank "pink sheet"
import excel using Data\pink_data_m.xlsx, ///
	sheet("Monthly Prices") cellrange(a7) firstrow clear
replace A=subinstr(A,"M",",",.)
	gen date=date(A,"YM")
	gen year=year(date)
	gen month=month(date)
	drop A date
keep NGAS* year month
rename NGAS_US ngprice_us_dmmbtu
rename NGAS_EU ngprice_eu_dmmbtu
rename NGAS_JP ngprice_jp_dmmbtu
keep if year>=1977 & year!=.
destring, replace 
compress 
saveold Data\pinksheet, replace
	
//EIA COMMODITY, available monthly or daily	
import excel using Data\EIA\Prices\ng_pri_fut_s1_m.xls, ///
	sheet("Data 1") cellrange(A2) firstrow clear
	drop in 1
	destring, replace
	rename Sourcekey datestring
	rename RNG henryhub_monthly_dmmbtu
	drop NGM
	gen date=date(datestring,"DMY")
	gen year=year(date)
	gen month=month(date)
	drop date datestring D
	drop if hen==.
	compress
	saveold Data\EIA\Prices\henryhub_monthly, replace
		//not used: EIA does have daily.

import excel using Data\EIA\Prices\ng_pri_fut_s1_m.xls, ///
	sheet("Data 2") cellrange(A3) firstrow clear
	rename NaturalGasFuturesContract1 henryhub_near_eia
	drop Natural* F
	gen year=year(Date)
	gen month=month(Date)
	drop D
	drop if hen==.
	compress
	saveold Data\EIA\Prices\henryhub_near_monthly, replace
		//not used: EIA does have daily. also 2-4 months out contracts.
		
//EIA RETAIL		
foreach v in FWA PCS PEU PG1 PIN PRS{
	import excel using Data\EIA\Prices\ng_pri_sum_a_EPG0_`v'_DMcf_m.xls, ///
		sheet("Data 1") cellrange(A2) firstrow clear
	drop in 1
	destring, replace
	compress
	capture rename sourcekey datestring
	capture rename Sourcekey datestring
	saveold Data\EIA\Prices\ng_pri_sum_a_EPG0_`v'_DMcf_m, replace
}
clear all
use Data\EIA\Prices\ng_pri_sum_a_EPG0_FWA_DMcf_m, clear
foreach v in PCS PEU PG1 PIN PRS{
	merge 1:1 datestring using Data\EIA\Prices\ng_pri_sum_a_EPG0_`v'_DMcf_m, nogen
}
drop C BB
replace dates=subinstr(dates,"15","",1)
	drop if dates==""
	gen date=date(dates,"MY")
	format %td date
	gen year=year(date)
	gen month=month(date)
	drop date
reshape long N9190 N3020 N3045 N3050 N3035 N3010 , ///
	i(year month datestring) j(state) string
replace state=subinstr(state,"3","",.)
rename N3020 price_commercial_dmcf
rename N3045 price_electricpower_dmcf
rename N3050 price_citygate_dmcf
rename N3035 price_industrial_dmcf
rename N3010 price_residential_dmcf
rename N9190 price_wellhead_dmcf
compress
saveold Data\EIA\Prices\EIA_prices, replace

//BRENT CRUDE (VIA EIA)
import excel using Data\EIA\Prices\RBRTEm.xls, ///
	sheet("Data 1") cellrange(A3) firstrow clear
gen year=year(Date)
gen month=month(Date)
drop Date
rename Eu brent_dbarrel
compress
saveold Data\EIA\Prices\brent, replace	

import excel using Data\EIA\Prices\RWTCm.xls, ///
	sheet("Data 1") cellrange(A3) firstrow clear
gen year=year(Date)
gen month=month(Date)
drop Date
rename Cushing wti_dbarrel
compress
saveold Data\EIA\Prices\wti, replace	


////////////////
//POPULATION AND OTHER CENSUS
/////////////
// this is 2010 population data.
insheet using Data\Census\NST-EST2014-01.csv, comma clear
	keep v1 v2 //using "census" rather than "estimates" 
		//The estimates are based on the 2010 Census and reflect changes to the April 1, 
		//2010 population due to the Count Question Resolution program and geographic 
		//program revisions. 
	drop in 1/4
	drop if v2==""
	drop if strmatch(v1,"*.*")==0
	replace v1=subinstr(v1,".","",.)
	destring v2, replace ignore(",")
	rename v1 state_name
	rename v2 population_2010
	compress
	saveold Data\Census\population, replace

insheet using Data\Census\NST-EST2014-01.csv, comma clear
	keep v1 v4-v8 //using "estimates" 
		//The estimates are based on the 2010 Census and reflect changes to the April 1, 
		//2010 population due to the Count Question Resolution program and geographic 
		//program revisions. 
	drop in 1/4
	drop if v7==""
	drop if strmatch(v1,"*.*")==0
	replace v1=subinstr(v1,".","",.)
	destring v4-v8, replace ignore(",")
	rename v1 state_name
	rename v4 population_2010
	rename v5 population_2011
	rename v6 population_2012
	rename v7 population_2013
	rename v8 population_2014
	compress
	saveold Data\Census\population_20102014, replace
	
import excel using Data\Census\state_geocodes_v2011.xls, cellrange(A6) firstrow clear
	rename Region region
	rename Division division
	rename StateF fips
	drop Name
	destring, replace
	drop if fips==0
	compress
	saveold Data\Census\census_regions, replace
	
insheet using Data\Census\state_fips.csv, comma clear
	saveold Data\Census\state_fips, replace
	
/////////////////
//WEATHER DATA
///////////////
//NOTE HAWAII AND ALASKA AND DC NOT INCLUDED
//Authors compile annual txt files using the following: (For space purposes, original txt files not posted)
/*
clear all
foreach type in Cool Heat{
	foreach y of numlist 1981/2014 {
		import delimited using Data\NOAA\StatesCONUS.`type'ing`y'.txt, ///
			delimiter("|") rowrange(4) stringcols(_all) clear
		rename product region
		reshape long v, i(region) j(temp)
		gen `type'=v if region!="Region"
		gen temp2=v if region=="Region"
		bysort temp: egen temp3=mode(temp2)
		capture rename Cool cdd
		capture rename Heat hdd
		rename temp3 datestring
		drop temp*
		drop if region=="Region"
		drop v
		capture destring cdd, replace
		capture destring hdd, replace
		compress
		saveold Data\NOAA\StatesCONUS.`type'ing`y', replace
	}
}
clear all
foreach y of numlist 1981/2014{
	append using Data\NOAA\StatesCONUS.Cooling`y'
}
foreach y of numlist 1981/2014{
	merge 1:1 region datestring using Data\NOAA\StatesCONUS.Heating`y', nogen
	rename hdd hdd`y'
}
gen hdd=hdd1981
foreach y of numlist 1982/2014{
	replace hdd=hdd`y' if hdd==.
}
drop hdd1* hdd2*
gen date=date(datestring,"YMD")
	format %td date
rename region state
isid state date
compress
saveold Data\NOAA\daily_degree_days, replace //daily count per state
*/

use Data\NOAA\daily_degree_days, clear //daily count per state
gen year=year(date)
gen month=month(date)
collapse (mean) cdd hdd, by(state month year)
saveold Data\NOAA\monthly_degree_days, replace //mean daily count per state
collapse (mean) cdd hdd, by(month year)
saveold Data\NOAA\monthly_degree_days_US, replace //mean daily count


///////////////
//EXCHANGE RATES, GDP, CPI, AND OTHER MACRO
/////////////

//exchange rate
import excel using Data\Fed\EXUSUK.xls, cellrange(A28) first clear	
gen month=month(DATE)
gen year=year(DATE)
rename VALUE usd_per_gbp
drop DATE
compress
saveold Data\Fed\usd_per_gbp_monthly, replace	
	
// GDP annual, real, billions of chained 2009 dollars:
insheet using Data\Fed\GDPC1.csv, comma clear
rename date year
	replace year=subinstr(year,"-01-01","",.)
	destring, replace
rename value gdp_billions_real
compress
saveold Data\Fed\GDP_real, replace

//CPI. 
// Consumer Price Index for All Urban Consumers: All Items Less Energy , not seasonally adjusted
import excel using Data\Fed\CPILEGNS.xls, ///
	cellrange(A14) first clear //less food and energy, not seas adj.
gen month=month(DATE)
gen year=year(DATE)
rename VALUE cpi_leg_ns
drop DATE
compress
saveold Data\Fed\CPI_LEG_NS, replace

//industrial production
insheet using Data\Fed\INDPRO.csv, clear
gen temp=date(date,"YMD")
gen month=month(temp)
gen year=year(temp)
drop date temp
rename value ip_sa
compress
saveold Data\Fed\ip_sa, replace


/////////////
//DATA MERGED - STATE-LEVEL
/////////////
//Consumption: state-level. Wells and rigs no state-level, but marketed production is for a handful.
//NOTE THIS INCLUDES "US" as a state, so that we can look at variables that aren't disaggregated (like wells) without having to create two datasets
use Data\EIA\Supply\production_summary, clear
	rename production_m production_m_national_temp
	merge 1:1 year month using Data\EIA\Supply\rigs, nogen
	merge 1:1 year month using Data\EIA\Other\imports, nogen
	merge 1:1 year month using Data\EIA\Other\exports, nogen
	merge 1:1 year month using Data\EIA\Other\inventories_monthly, keep(master match) nogen //NOT MERGED: early years
		gen inventories_mmcf=inventories_bcf*1000 
		drop inventories_mmcf
	gen state="US"
merge 1:1 state year month using Data\EIA\Demand\EIA_consumption, nogen 
merge 1:1 state year month using Data\EIA\Supply\production_marketed_stateall, nogen 
	replace production_marketed=production_m_national if state=="US"
	drop production_m_national
merge m:1 year month using Data\EIA\Prices\henryhub_monthly, nogen //NOT MERGED: early years
merge m:1 year month using Data\EIA\Prices\henryhub_near_monthly, nogen //NOT MERGED: early years
merge 1:1 year month state using Data\EIA\Prices\EIA_prices, keep(master match) nogen //NOT MERGED: offshore
merge 1:1 year month state using Data\NOAA\monthly_degree_days, keep(master match) nogen //NOT MERGED: AK, DC, HI, US, offshore; early years
rename state state_abbr
	merge m:1 state_abbr using Data\Census\state_fips, keep(master match) nogen //NOT MERGED: US, offshore
	rename fipscode fips
	replace fips=9999 if state_a=="US"
	replace fips=8888 if state_a=="fedoffshore"
	merge m:1 fips using Data\Census\census_regions, nogen //NOT MERGED: US, offshore
merge m:1 state_name using Data\Census\population, keep(master match) nogen //NOT MERGED: DC, US, offshore
	rename population population_timeinvar
merge m:1 year using Data\Fed\GDP_real, keep(master match) nogen
merge m:1 year month using Data\EIA\Prices\brent, keep(master match) nogen //NOT MERGED: early
merge m:1 year month using Data\EIA\Prices\wti, keep(master match) nogen //NOT MERGED: early
merge m:1 year month using Data\Fed\CPI_LEG_NS, keep(master match) nogen

//basic coverage:
gen ym=ym(year, month)
	xtset fips ym, monthly
	move ym datestring
	label variable datestring "datestring"
drop if year>=2014 //preliminary data only for some variables. 
drop if year<1990 //no price data - makes it easier to see year effects dummies
drop if state_a=="AK"|state_a=="HI"

//fixed effects:
xi i.year, prefix(YY)
xi i.month, prefix(MM)
egen division_by_month=group(division month)
xi i.division_by_month, prefix(DM) noomit
egen division_by_year=group(division year)
xi i.division_by_year, prefix(DY) noomit
egen state_by_month=group(state_a month)
xi i.state_by_month, prefix(SM)	

//orthogonal polynomial trends:
gen DateS = ym / 365		/* scale */
	egen MaxDate = max(DateS)
	egen MinDate = min(DateS)
	gen Z = 2 * (DateS - MinDate) / (MaxDate - MinDate) - 1
	gen Time1 = Z
	gen Time2 = 2 * Z^2 - 1
	gen Time3 = 4 * Z^3 - 3 * Z
	gen Time4 = 2 * Z * Time3 - Time2
	gen Time5 = 2 * Z * Time4 - Time3
	gen Time6 = 2 * Z * Time5 - Time4
	gen Time7 = 2 * Z * Time6 - Time5
	gen Time8 = 2 * Z * Time7 - Time6
	gen Time9 = 2 * Z * Time8 - Time7
	gen Time10 = 2 * Z * Time9 - Time8
	drop Z

gen henryhub_dmcf=henryhub_monthly*1.025 //use spot prices for 1997 forwards
	order henryhub_dmcf

//weather:
//ssc inst _gwtmean
bysort ym: egen hdd_national=mean(hdd)
bysort ym: egen cdd_national=mean(cdd)
bysort ym: egen hdd_national_wtd=wtmean(hdd), weight(population)
bysort ym: egen cdd_national_wtd=wtmean(cdd), weight(population)
//weather in other divisions	
//tab division if state_a=="US"
gen hdd_other=.
gen hdd_other_wtd=.
gen cdd_other=.
gen cdd_other_wtd=.
forval division=1/9{
	gen temp1=hdd if division!=`division'
	bysort ym: egen temp2=mean(temp1)
	bysort ym: egen temp3=wtmean(temp1), weight(population)
	replace hdd_other=temp2 if division==`division'
	replace hdd_other_wtd=temp3 if division==`division'
	drop temp*

	gen temp1=cdd if division!=`division'
	bysort ym: egen temp2=mean(temp1)
	bysort ym: egen temp3=wtmean(temp1), weight(population)
	replace cdd_other=temp2 if division==`division'
	replace cdd_other_wtd=temp3 if division==`division'
	drop temp*
}
//cumulative: 
xtset fips ym, monthly
foreach var in hdd cdd{
	gen cumul_`var'_o2=l2.`var'_other_wtd
		forval v = 3/14{
		local y=`v'-1
			gen cumul_`var'_o`v'=cumul_`var'_o`y'+l`v'.`var'_other_wtd
		}
		gen check=l2.`var'_other_wtd+l3.`var'_other_wtd+l4.`var'_other_wtd+l5.`var'_other_wtd-cumul_`var'_o5
		sum check
		drop check
	gen cumul_`var'_n2=l2.`var'_national_wtd
		forval v = 3/14{
		local y=`v'-1
			gen cumul_`var'_n`v'=cumul_`var'_n`y'+l`v'.`var'_national_wtd
		}
		gen check=l2.`var'_national_wtd+l3.`var'_national_wtd+l4.`var'_national_wtd+l5.`var'_national_wtd-cumul_`var'_n5
		sum check
		drop check
}

foreach v in deliv_comm deliv_elec deliv_indu deliv_resi deliv_cons ///
	production_marketed wells_all_gas rigs_gas henryhub_dmcf price_city ///
	price_resi price_comm price_indu price_elec ///
	{
	gen ln`v'=ln(`v') 
}
foreach v in deliv_comm deliv_elec deliv_indu deliv_resi deliv_cons ///
	production_marketed wells_all_gas rigs_gas henryhub_dmcf price_city ///
	price_resi price_comm price_indu price_elec {
	count if `v'==0 & state_abb!="US" & state_abb!="DC" & year>=2001 //deliv_elec only
}
replace lndeliv_elec=ln(0.1) if deliv_elec==0
	
//real prices 
quietly sum cpi_leg if year==2013
	local temp=r(mean)
foreach v in henryhub_dmcf price_city price_resi price_comm price_indu price_elec {
	gen `v'_real=`v'/cpi_leg*`temp'
	gen ln`v'_real=ln(`v'_real)
}
	
xtset fips ym, monthly
compress
saveold Data\statedata, replace
