# %%
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import xlrd
from fuzzywuzzy import fuzz

from config import *

pol_name = 'v2cacamps_mean'
vio_name = 'v2caviol_osp'


def load_vdem_data():
    """Extract relevant columns from V-Dem data
    """
    
    if 'vdem.csv' not in os.listdir(DATA_DIR):
        # V-Dem.csv is removed from data/ folder due to its size. It can be dowloaded from 
        # https://v-dem.net/data/the-v-dem-dataset/country-year-v-dem-fullothers-v14/
        df = pd.read_csv(VDEM_DIR / 'V-Dem.csv')
        cols = [
            'country_name', 'country_text_id',
            'country_id', 'year', 'historical_date',
            pol_name, vio_name
        ]
        res = df[cols]
        res.to_csv(DATA_DIR / 'vdem.csv', index=False)
    else:
        res = pd.read_csv(DATA_DIR / 'vdem.csv')

    return res


def load_wdi_data():
    if 'wd_data.csv' not in os.listdir(DATA_DIR):
        filename = 'cfd2884d-a4aa-478e-b6fd-7e5c89c4627e_Series - Metadata.csv'
        res = pd.read_csv(WD_DIR / filename).replace("..", np.nan)
        res.to_csv(DATA_DIR / 'wd_data.csv', index=False)
    else:
        res = pd.read_csv(DATA_DIR / 'wd_data.csv')
    return res


def get_vdem_data():
    """Parse V-Dem data and return relevant columns
    
    polar and vio are n*k matrices where n is the number of years  and k is the number of countries

    Returns:
       tuples(pd.DataFrame): vdem, code_map, polar, vio
    """

    vdem = load_vdem_data()
    vdem = vdem[['country_name', 'country_text_id', 'year', pol_name, vio_name]]\
        .rename(columns={
            'country_text_id': 'code',
            'country_name': 'country'
        })
    vdem['year'] = pd.to_datetime(vdem['year'], format='%Y')
    code_map = vdem[['country', 'code']].drop_duplicates().set_index('code')

    polar = vdem.pivot_table(
        values=pol_name, index='code', columns='year').T.astype(float)
    vio = vdem.pivot_table(values=vio_name, index='code',
                           columns='year').T.astype(float)

    return vdem, code_map, polar, vio


def resolve_problematic_mapping(imf_mapper):
    imf_mapper = imf_mapper.copy()
    imf_mapper['score'] = imf_mapper.apply(lambda x: fuzz.ratio(
        x['weo_country'], x['datamapper_country']), axis=1)
    problems = imf_mapper[imf_mapper['score'] != 100]
    problem_index = problems.index
    weo_country = problems['weo_country'].copy().to_list()
    datamapper_country = problems['datamapper_country'].copy().to_list()

    failed_weo = []

    for i in range(len(weo_country)):
        for j in range(len(datamapper_country)):
            country1_name = weo_country[i].strip()
            country2_name = datamapper_country[j]

            if 'St.' in country1_name:
                country1 = country1_name.replace('St.', 'Saint')
            else:
                country1 = country1_name

            if ',' in country2_name:
                temp = country2_name.split(',')
                country2 = temp[1].strip() + ' ' + temp[0].strip()
            else:
                country2 = country2_name.strip()

            if fuzz.ratio(country1, country2) == 100:
                problems.loc[problem_index[i],
                             'datamapper_country'] = country2_name
                datamapper_country.remove(country2_name)
                break

            if j == len(datamapper_country) - 1:
                failed_weo.append(country1)
                break

    problems.loc[problems['weo_country'] == 'South Sudan',
                 'datamapper_country'] = 'South Sudan, Republic of'
    problems.loc[problems['weo_country'] == 'Türkiye',
                 'datamapper_country'] = 'Türkiye, Republic of'
    return problems.drop('score', axis=1)


def imf_data_iso_mapping():
    workbook = xlrd.open_workbook(
        IMF_DIR / 'imf-cent_gross_debt.xls', ignore_workbook_corruption=True)
    imf_datamapper = pd.read_excel(
        workbook, skiprows=1).replace('no data', np.nan)
    imf_datamapper = imf_datamapper.iloc[:, 0]

    if 'Africa (Region)' in imf_datamapper.values:
        cut_ind = imf_datamapper[imf_datamapper == 'Africa (Region)'].index[0]
        imf_datamapper = imf_datamapper.iloc[:cut_ind]

    imf_mapper = pd.read_csv(IMF_DIR / 'WEO_Data.csv',
                             encoding='latin-1', skipfooter=2)
    imf_mapper = imf_mapper.dropna(subset='Estimates Start After')[['ISO', 'Country']].rename(
        columns={'Country': 'weo_country', 'ISO': 'code'}
    ).reset_index(drop=True)

    imf_mapper['datamapper_country'] = imf_datamapper

    problems = resolve_problematic_mapping(imf_mapper)
    imf_mapper.loc[problems.index] = problems

    return imf_mapper


def parse_imf_data(name):
    workbook = xlrd.open_workbook(
        IMF_DIR / name, ignore_workbook_corruption=True)
    res = pd.read_excel(workbook).replace('no data', np.nan)
    n1 = res.columns[0]
    dt = dict([(i, np.float64) for i in res.columns[1:]])
    res = res.rename(columns={n1: 'country'}).astype(dt).dropna(how='all')

    if 'imf_mapper.csv' not in os.listdir(DERIVED_DIR):
        imf_mapper = imf_data_iso_mapping()
        imf_mapper.to_csv(DERIVED_DIR / 'imf_mapper.csv', index=False)
    imf_mapper = pd.read_csv(DERIVED_DIR / 'imf_mapper.csv')

    res = imf_mapper[['code', 'datamapper_country']].merge(
        res, left_on='datamapper_country', right_on='country', how='right'
    ).dropna(subset='code').drop(columns=['datamapper_country'])

    res = res.set_index('code').drop(columns=['country']).T
    res.index = pd.to_datetime(res.index, format='%Y')
    return res


def get_imf_data():
    imf_debt = parse_imf_data('imf-cent_gross_debt.xls')
    imf_bal = parse_imf_data('imf_pri_balance.xls')

    wdi = load_wdi_data()

    wdi_sup = wdi[wdi['Series Name'] == 'Central government debt, total (% of GDP)'].drop(
        columns=['Series Name', 'Series Code', 'Country Name']
    ).rename(columns={'Country Code': 'code'}).set_index('code').T.iloc[:, :-1]
    wdi_sup.index = pd.to_datetime([i[:4] for i in wdi_sup.index], format='%Y')
    for c in imf_debt:
        if c in wdi_sup:
            imf_debt[c] = imf_debt[c].fillna(wdi_sup[c])

    wdi_sup = wdi[wdi['Series Name'] == 'Net lending (+) / net borrowing (-) (% of GDP)'].drop(
        columns=['Series Name', 'Series Code', 'Country Name']
    ).rename(columns={'Country Code': 'code'}).set_index('code').T.iloc[:, :-1]
    wdi_sup.index = pd.to_datetime([i[:4] for i in wdi_sup.index], format='%Y')
    for c in imf_debt:
        if c not in imf_bal and c in wdi_sup:
            imf_bal[c] = np.nan
        if c in wdi_sup:
            imf_bal[c] = imf_bal[c].fillna(wdi_sup[c])

    imf_debt = imf_debt.astype(float)
    imf_bal = imf_bal.astype(float).dropna(how='all', axis=1)
    return imf_debt, imf_bal


def get_frule_data():
    """Returns fiscal rule data from IMF data
    """
    
    frule = pd.read_excel(IMF_DIR / 'imf_fiscal_rule.xlsx',
                          sheet_name='Rules', index_col=[0, 1, 2], header=[0, 1])
    frule = frule.loc[frule.index.dropna()]

    frule = frule['Type of fiscal rule in place']

    frule.columns = ['ER', 'RR', 'BBR', 'DR']
    frule = frule.replace('-', np.nan).fillna(0)
    frule = frule.reset_index().rename({
        'level_0': 'ind',
        'level_1': 'year',
        'level_2': 'country'
    }, axis=1)

    frule['country'] = frule['country'].replace({
        'Montenegro, Rep. of': 'Montenegro',
        'Guinea Bissau': 'Guinea-Bissau'
    })

    if 'imf_mapper.csv' not in os.listdir(DERIVED_DIR):
        imf_mapper = imf_data_iso_mapping()
        imf_mapper.to_csv(DERIVED_DIR / 'imf_mapper.csv', index=False)
    imf_mapper = pd.read_csv(DERIVED_DIR / 'imf_mapper.csv')
    imf_mapper['datamapper_country'] = imf_mapper['datamapper_country'].str.strip()

    temp1 = imf_mapper[['code', 'weo_country']].merge(
        frule, left_on='weo_country', right_on='country', how='right'
    ).drop(columns=['weo_country'])

    temp2 = imf_mapper[['code', 'datamapper_country']].merge(
        frule, left_on='datamapper_country', right_on='country', how='right'
    ).drop(columns=['datamapper_country'])

    frule['code'] = temp1['code'].fillna(temp2['code'])

    frule['year'] = pd.to_datetime(frule['year'], format='%Y')

    bbr = frule.pivot_table(values='BBR', index='code', columns='year').T
    dr = frule.pivot_table(values='DR', index='code', columns='year').T
    return frule, bbr, dr



if __name__ == '__main__':
    imf_mapper = imf_data_iso_mapping()
    imf_mapper.to_csv(DERIVED_DIR / 'imf_mapper.csv', index=False)

    vdem, code_map, polar, vio = get_vdem_data()
    imf_debt, imf_bal = get_imf_data()
    frule, bbr, dr = get_frule_data()

# %%
