#SETUP-----------------------
# Run the setup lines in Master if running as stand-alone
# garbage collection
gc()
# remove all variables except those defining directory and graph theme
rm(list = setdiff(ls(), c(ls()[grep("Dir", ls())], "mytheme")))

baseDir = "/Users/andreamanera/Dropbox (MIT)/Research/Brookings20/replicationKit"

binDir = paste0(baseDir,"/R/bin")

source(paste0(binDir, "/setupLines.R"))

# PART I: Import and arrange data  -----------

# Step 1. Import IRS_IDB, obtain the share of S-corporations over partnership + corporations Net income-----------
# IRS data on income by type of corporation
tableIRS = setDT(read_csv(paste0(taxDir,"/IRS_IBD.csv"))) 
# Net income less deficit, net income for corp, C-corp, S-corp, Partnerships, RIC/REIT are pass-through. Assume all goes to shareholders
tableIRS = tableIRS[Line == 12 | Line == 13 | Line ==19 | Line ==20 | Line == 26 | Line == 27 | 
                      Line ==33 | Line ==34 | Line ==40 | Line ==41 , ]
tableIRS$Item = c("NI_D_TOT_CORP","NI_TOT_CORP", "NI_D_C_CORP", "NI_C_CORP", "NI_D_RIC_REIT", "NI_RIC_REIT",
                  "NI_D_S_CORP", "NI_S_CORP", "NI_D_PART", "NI_PART")


tableIRS = tableIRS[,Line:= NULL]

tableIRS = melt(tableIRS, 
                id.vars = c("Item"),
                measure.vars = colnames(tableIRS)[colnames(tableIRS) !="Item"],
                variable.name = "Year",
                value.name = "Value")
tableIRS$Value = as.numeric(tableIRS$Value)
tableIRS$Year = as.numeric(as.character(tableIRS$Year))

tableIRS = setDT(dcast(tableIRS, Year~ Item, value.var="Value"))
tableIRS = tableIRS[!(is.na(Year)),]

# UNITS are thousands of dollars, translate into billions
colNorm = setdiff(colnames(tableIRS), "Year" )
tableIRS  = tableIRS[, c(colNorm) := lapply(.SD , function(x) {x/1000000} ),
                     .SDcols  = c(colNorm)]

# GET SHARES----------
tableIRS = tableIRS[ , SHARE_RR_CORP :=  NI_RIC_REIT/NI_TOT_CORP ]
tableIRS = tableIRS[ , SHARE_S_CORP :=  NI_S_CORP/NI_TOT_CORP ]
tableIRS = tableIRS[ , SHARE_C_CORP :=  NI_C_CORP/NI_TOT_CORP ]
tableIRS = tableIRS[ , SHARE_S_SPART :=  NI_S_CORP/(NI_S_CORP + NI_PART) ] # share of S over S + Partnerships


# INTERPOLATE MISSING VALUES FOR 1990
tableIRS[Year==1990, "SHARE_RR_CORP" ] = (tableIRS[Year==1989, "SHARE_RR_CORP" ] +
                                               tableIRS[Year==1991, "SHARE_RR_CORP" ])/2 
tableIRS[Year==1990, "SHARE_S_CORP" ] = (tableIRS[Year==1989, "SHARE_S_CORP" ] +
                                            tableIRS[Year==1991, "SHARE_S_CORP" ])/2 
tableIRS[Year==1990, "SHARE_C_CORP" ] = (tableIRS[Year==1989, "SHARE_C_CORP" ] +
                                            tableIRS[Year==1991, "SHARE_C_CORP" ])/2 
tableIRS[Year==1990, "SHARE_S_SPART" ] = (tableIRS[Year==1989, "SHARE_S_SPART" ] +
                                            tableIRS[Year==1991, "SHARE_S_SPART" ])/2 

# COMPLETE::::: FILL LAST VALUE FOR YEARS AFTER 2013---------------
#tableBEA[Year>2013, "SHARE_INC_C_CORP" ] = tableBEA[Year==2013, "SHARE_INC_C_CORP" ]

# Step 2. Import for \tau^{\text{DIV}}: tax series from LT_taxes_77_2014 and NetCGTaxes from office of Tax Analysis --------

tableOTA = setDT(read_csv(paste0(capTaxDir,"/LT_taxes_All_77_2014.csv"))) 
colnames(tableOTA) <- c("Year", "LT_GAIN", "TAX_LT", "TAU_DIV")
tableOTA$TAU_DIV = tableOTA$TAU_DIV/100
tableOTA$Year = as.numeric(as.character(tableOTA$Year))

tableOTA_TOT = setDT(read_csv(paste0(capTaxDir,"/NetCG_taxes_53_2014.csv"))) 

tableOTA = tableOTA_TOT[tableOTA, on =.(Year = Year)]

tableOTA = tableOTA[, TAU_ST:= (TAX_CG - TAX_LT) / (TOT_CG - LT_GAIN)]

# Step 3. Get qualified/unqualified dividends-----------

tableDiv = setDT(read_csv(paste0(capTaxDir,"/17intaba.csv")))

tableDiv = tableDiv[Line == 17 | Line == 19 , ]
tableDiv$Item = c("ORD_DIV","QUALIF_DIV")

# SetDT and translate into billions
tableDiv = tableDiv[,Line:= NULL]

tableDiv = melt(tableDiv, 
                id.vars = c("Item"),
                measure.vars = colnames(tableDiv)[colnames(tableDiv) !="Item"],
                variable.name = "Year",
                value.name = "Value")
tableDiv$Value = as.numeric(tableDiv$Value)
tableDiv$Year = as.numeric(as.character(tableDiv$Year))

tableDiv = setDT(dcast(tableDiv, Year~ Item, value.var="Value"))
tableDiv = tableDiv[!(is.na(Year)),]
tableDiv[, 2:3] = tableDiv[, 2:3]/1000000


# Step 4. Get Net Dividends from BEA ----------

tableBEADiv = setDT(read_csv(
  paste0(capTaxDir,"/BEA_1_12_National_income_by_type.csv")))
tableBEADiv = tableBEADiv[Line == 16 , ]
tableBEADiv$Item = c("NET_DIV_CORP")

tableBEADiv = tableBEADiv[,Line:= NULL]

tableBEADiv = melt(tableBEADiv, 
                id.vars = c("Item"),
                measure.vars = colnames(tableBEADiv)[colnames(tableBEADiv) !="Item"],
                variable.name = "Year",
                value.name = "Value")
tableBEADiv$Value = as.numeric(tableBEADiv$Value)
tableBEADiv$Year = as.numeric(as.character(tableBEADiv$Year))

tableBEADiv = setDT(dcast(tableBEADiv, Year~ Item, value.var="Value"))
tableBEADiv = tableBEADiv[!(is.na(Year)),]


# Step 5. get the BEA table containing NOSCORP_IRS (and the other tax rates)-------

load(paste0(outDir,"/TaxSeries.RData") )
rm( list = "tableTaxes")

# Keep only net operating surpluses
tableBEA = tableBEA[ , c("Year", "NOSCORP_IRS", "NOSPUE_IRS", "T_K_corp_net", "T_INC", "NOSPUE_IRS"),
                     with = F]



# Step 6. Get IRS data on short-term capital gains in various forms--------

tableCapGain = setDT(read_csv(paste0(capTaxDir,"/yearly_cap_gain.csv")))
tableCapGain[, 2:length(colnames(tableCapGain))] = 
  tableCapGain[, 2:length(colnames(tableCapGain))] /1000000

# Step 7. Import Line Items to get S-corp income ------------

tableLineItems = setDT(read_csv(paste0(capTaxDir,"/Line_Counts_S_corps.csv")))
tableLineItems[, 2:length(colnames(tableLineItems))] = 
  tableLineItems[, 2:length(colnames(tableLineItems))] /1000000

# Merge all data sets

tableDATA = merge.data.frame(tableBEA, tableBEADiv, by = "Year", all = T)
tableDATA = merge.data.frame(tableCapGain, tableDATA, by = "Year", all = T)
tableDATA = merge.data.frame(tableDiv, tableDATA, by = "Year", all = T)
tableDATA = merge.data.frame(tableIRS, tableDATA, by = "Year", all = T)
tableDATA = merge.data.frame(tableLineItems, tableDATA, by = "Year", all = T)
tableDATA = merge.data.frame(tableOTA, tableDATA, by = "Year", all = T)

# PART II: Compute quantities -------------

# Step 1: xi_ST_C_Corp Share of surplus realized as non-qualif. dividends, IRS distributions, cap gain distributions, S.T. cap gain -------
tableDATA = setDT(tableDATA)

# most complete measure including NET short-term capital gains minus losses (assets can change hands multiple times)
tableDATA[ , xi_ST_C_Corp :=
             (ORD_DIV + cap_gain_Dist_TOT + taxable_IRA + Corp_ST_net)/(SHARE_C_CORP * NOSCORP_IRS) ]
# Excluding ST CORP capital gain per se, these are available only for very few years
tableDATA[ , xi_ST_C_Corp_noCG :=
             (ORD_DIV + cap_gain_Dist_TOT + taxable_IRA)/(SHARE_C_CORP * NOSCORP_IRS) ]
# Including only dividends
tableDATA[ , xi_ST_C_Corp_DIV :=
             (ORD_DIV)/(SHARE_C_CORP * NOSCORP_IRS) ]
# Including only dividends and CG
tableDATA[ , xi_ST_C_Corp_DIV_ST :=
             (ORD_DIV + Corp_ST_net)/(SHARE_C_CORP * NOSCORP_IRS) ]


tableDATA[, xi_ST_C_Corp_DIV_mean := mean(xi_ST_C_Corp_DIV, na.rm = T)]
tableDATA[, xi_ST_C_Corp_noCG_mean := mean(xi_ST_C_Corp_noCG, na.rm = T)]
tableDATA[, xi_ST_C_Corp_mean := mean(xi_ST_C_Corp, na.rm = T)]

# Step 2: xi_ST_S_Corp, share of S_CORP surplus realized as s.t. gain---------

tableDATA[, share_S_C := SHARE_C_CORP + SHARE_S_CORP]
tableDATA[, xi_ST_S_Corp :=  SHARE_S_SPART * SP_ST_net / (SHARE_S_CORP * NOSCORP_IRS) ]
tableDATA[ xi_ST_S_Corp < 0 , "xi_ST_S_Corp"] = NA 

# Step 3: xi_inc,  share of S_CORP surplus realized as income ---------
tableDATA[, xi_inc :=  S_net_inc  / (SHARE_S_CORP * NOSCORP_IRS) ]
tableDATA[, xi_S_inc_ST :=  (S_net_inc + SHARE_S_SPART * SP_ST_net)  / (SHARE_S_CORP * NOSCORP_IRS) ]

# PART II: take averages and apply formula.
xi_S_inc_ST = mean(tableDATA$xi_S_inc_ST, na.rm = T)
xi_ST_C_Corp = tableDATA$xi_ST_C_Corp_mean[1]

# FORMULA:
tableDATA = tableDATA[, one_minus_T_c_div:=  SHARE_C_CORP * (1 - T_K_corp_net) * 
                        (xi_ST_C_Corp_mean * (1-T_INC) + (1 - xi_ST_C_Corp_mean) * (1 - TAU_DIV)) + 
                        SHARE_S_CORP * (mean(xi_S_inc_ST, na.rm = T) * (1-T_INC) + (1 - mean(xi_S_inc_ST, na.rm = T)) * (1 - TAU_DIV)) + 
                        (1 - SHARE_C_CORP - SHARE_S_CORP) * (1-T_INC)]



tableDATA = tableDATA[, T_c_div:=   1 -one_minus_T_c_div]


# FIX MISSING VALUES AT LATEST AVAILABLE DATE--------------

tableDATA[Year>2013, "TAU_ST" ] = tableDATA[Year==2014, "TAU_ST" ]
tableDATA[Year>2013, "TAU_DIV" ] = tableDATA[Year==2014, "TAU_DIV" ]
tableDATA[Year>2012, "SHARE_C_CORP" ] = tableDATA[Year==2012, "SHARE_C_CORP" ]
tableDATA[Year>2012, "SHARE_S_CORP" ] = tableDATA[Year==2012, "SHARE_S_CORP" ]

tableTaxDiv = tableDATA
save(tableTaxDiv, file = paste0(outDir,"/TaxSeriesDiv.RData"))
  
