clear all
set mem 5g
set more off

insheet using "rawdata\tax_phrases.csv"
duplicates drop phrase_name,force
sort phrase_name
*merge 1:m phrase_n using phrase23_panel_oct_27_2012.dta, gen(mergephrases)
merge 1:m phrase_n using phrase23_panel_replication.dta, gen(mergephrases)
drop if googlefreq==.
tsfill, full
replace freq=0 if freq==.

gen taxes = mergephrases==3
/*few different ways of finding tax phrases*/
gen tax_rx = regexm(phrase_name, "\.tax\.")
replace tax_rx = regexm(phrase_name, "\.tax") if tax_rx==0
replace tax_rx = regexm(phrase_name, "tax\.") if tax_rx==0
gen itax_rx = regexm(phrase_name, "incom\.tax")
/*check correlation*/
corr taxes tax_rx itax_rx
global vars taxes tax_rx itax_rx
g absideology=abs(plscorrparty_trimmed)

bys cong: egen sumgoogle=sum(googlecounts)
*bys cong: egen sumfreq=sum(freq)

foreach j of global vars {
replace `j'=0 if `j'==.
egen `j'freqsum=sum(`j'*freq), by(cong)
egen `j'googlesum=sum(`j'*googlecounts), by(cong)


replace `j'freqsum=`j'freqsum/sumf
replace `j'googlesum=`j'googlesum/sumg

egen `j'sum=sum(freq*`j'), by(cong)
egen `j'sum_google=sum(googlecounts*`j'), by(cong)


egen `j'weightedpartisanship=sum(freq*plscorrparty_trimmed*`j'), by(cong)
egen `j'weightedpolarization=sum(freq*absideology*`j'), by(cong)
g `j'partisanship=`j'weightedpartisanship/`j'sum
g `j'polarization=`j'weightedpolarization/`j'sum

egen `j'weightedpartisanshipg=sum(googlecounts*plscorrparty_trimmed*`j'), by(cong)
egen `j'weightedpolarizationg=sum(googlecounts*absideology*`j'), by(cong)
g `j'partisanshipg=`j'weightedpartisanship/`j'sum_google
g `j'polarizationg=`j'weightedpolarization/`j'sum_google

drop `j'weighted*
}

collapse (mean) *polar* *partisan* *sum, by(cong)
g year=2011-(112-cong)*2
save "topicsdataset", replace

use "topicsdataset", clear
merge year using "rawdata/gallupreshaped.dta", sort
cd "Pics_Tables"

gen taxsent=( taxestoohighpercent -taxestoolowpercent) /(taxestoohighpercent +taxestoolowpercent +taxesnoopinion )

drop if year >2000

twoway (tsline taxsent) if year > 1955
graph export taxsent_over_time.png, replace

egen z2taxsent = std(taxsent)
egen z2taxespolarization = std(taxespolarization)
egen z2taxespolarizationg = std(taxespolarizationg)  
egen z2tax_rxpolarization = std(tax_rxpolarization)
egen z2tax_rxpolarizationg = std(tax_rxpolarizationg)  
egen z2itax_rxpolarizationg = std(itax_rxpolarizationg)  

rename z2taxsent tax_high_sentiment
rename z2taxespolarizationg taxes_polarizingg
rename z2tax_rxpolarizationg tax_rx_polarizingg
rename z2itax_rxpolarizationg itax_rx_polarizingg

label variable tax_high_sentiment "Anti-tax sentiment (Gallup)"
label variable tax_rx_polarizingg "Polarizing Tax phrases (RegExp) (Google N-grams)"
twoway (line tax_high_sentiment year, lwidth(medium) lpattern(dash) legend(rows(2))) (line tax_rx_polarizingg year, lwidth(medium)),  xlabel(#12, format(%9.0g) valuelabel) legend(on size(med) margin(vsmall) linegap(tiny))
cap graph save nice_polarizationg_sentiment_tax_rx, replace
graph export nice_polarizationg_sentiment_tax_rx.png, replace
graph export nice_polarizationg_sentiment_tax_rx.eps, replace
