clear all;
plotflag = 0;
warning off all

if(~isdeployed)
  cd(fileparts(which(mfilename)));
end % cd to current folder

%% load in data
UU = readtable('../../output/u_sexstate_nsa.xlsx');
UU = table2array(UU);
date = UU(:,1)*100+01;
year = floor(UU(:,1)/100);
UU(:,1)=[];
[T,M] = size(UU);

group = 102; % group size

for i=1:group
 U_sm(:,i) = UU(:,i);
end
for i=1:group
 U_m(:,i) =UU(:,i+group);
end
for i=1:group
 E_m(:,i) =UU(:,i+group*2);
end
for i=1:group
 N_m(:,i) =UU(:,i+group*3);
end

for i=0:50
    U_sm(217:end,i*2+1) = 1.153209*U_sm(217:end,i*2+1);
    U_sm(217:end,i*2+2) = 1.158577*U_sm(217:end,i*2+2);
end

U = U_m;
U_s = U_sm;
E = E_m;
N = N_m;

% if 0 stock make it NaN
U(U==0)=NaN;
U_s(U_s==0)=NaN;
E(E==0)=NaN;
N(N==0)=NaN;

% no observation for some states in 1976 and 1977
date = date(25:end,:);
year = year(25:end,:);
U=U(25:end,:);
U_s=U_s(25:end,:);
E=E(25:end,:);
N=N(25:end,:);
T=length(date);

%% calculate aggregate unemployment rates
urate = U./(U+E);
urate = [date urate];
urate = array2table(urate);
ur_names = readtable('ur_names.xlsx');
urate.Properties.VariableNames = ur_names.Properties.VariableNames;
writetable(urate,'csv/urates_sexstate.csv');

%% calculate hazard rates for each group
for i=1:group
 F = 1 - ((U(2:end,i)-U_s(2:end,i))./U(1:end-1,i));
 Fall(:,i) = F;
end
Fall(Fall >= 1) = NaN; % define Fall to be NaN if >=1 - this might be the case with averaging monthly stocks
fall = -log(1 - Fall);
fall_nan=isnan(fall);
% change all NaN to some number to be able to use fsolve below
fall(fall_nan)=0.5;
U_nan=isnan(U);
U(U_nan)=0.5;
E_nan=isnan(E);
E(E_nan)=0.5;

opts = optimset('Display','off');
sall = ones(T-1,group);
s0   = 0.034;
for i=1:group
for t=1:T-1
    U_t = U(t,i);
    FU  = U(t+1,i);
    f_t = fall(t,i);
    Lf_t= U(t,i)+E(t,i);
    s(t) = fsolve('shimerall',s0,opts,U_t,FU,f_t,Lf_t);
    s0  = s(t);
end
sall(:,i) = s;
end

% change entries with NaN stocks back to be NaN
fall(fall_nan)=NaN;
sall(fall_nan)=NaN;
sall(U_nan(1:T-1,:))=NaN;
sall(U_nan(2:T,:))=NaN;
sall(E_nan(1:T-1,:))=NaN;

U(U_nan)=NaN;
E(E_nan)=NaN;

%% calculate aggregate hazard rates using total stocks
for t=1:T
    Utotal(t) = nansum(U(t,1:group));
    Ustotal(t) = nansum(U_s(t,1:group));
    Etotal(t) = nansum(E(t,1:group));
    Ntotal(t) = nansum(N(t,1:group));
    utotal(t) = Utotal(t)./(Utotal(t)+Etotal(t));
end

Ftotal = 1 - ((Utotal(2:end)-Ustotal(2:end))./Utotal(1:end-1));
ftotal = -log(1 - Ftotal);

opts = optimset('Display','off');
s0   = 0.034;
for t=1:T-1
    U_t = Utotal(t);
    FU  = Utotal(t+1);
    f_t = ftotal(t);
    Lf_t= Utotal(t)+Etotal(t);
    stotal(t) = fsolve('shimerall',s0,opts,U_t,FU,f_t,Lf_t);
    s0  = stotal(t);
end

%% calculate aggregate hazard rates using weighted sum of s and f
for i=1:group
weights(:,i) = (E(:,i)+N(:,i))./(Etotal'+Ntotal'); % s and f weight
weightf(:,i) = U(:,i)./Utotal';
popshare(:,i) = (E(:,i)+N(:,i)+U(:,i))./(Etotal'+Ntotal'+Utotal'); % population share
lfshare(:,i) = (E(:,i)+U(:,i))./(Etotal'+Utotal'); % labor force share
end

for i=1:T-1
stotal_ws(i,:) = nansum(sall(i,:).*weights(i,:),2);
end

for i=1:T-1
ftotal_ws(i,:) = nansum(fall(i,:).*weightf(i,:),2);
end

weight = [weights weightf lfshare popshare];

hazrate = [fall sall ftotal_ws stotal_ws ftotal' stotal'];

%% get monthly average for hazrate 1978-2018
[ay,~,cy] = unique(year,'rows');
[ay1,~,cy1] = unique(year(1:T-1),'rows');
urate(:,1)=[];
urate = table2array(urate);
urate_avg=zeros(length(ay),size(urate,2));
weight_avg=zeros(length(ay),size(weight,2));
hazrate_avg=zeros(length(ay),size(hazrate,2));
lfshare_avg=zeros(length(ay),size(lfshare,2));
sall_avg=zeros(length(ay),size(sall,2));
fall_avg=zeros(length(ay),size(fall,2));
for i=1:size(urate,2)
urate_avg_m = [accumarray(cy,urate(:,i),[],@nanmean)];
urate_avg(:,i) = urate_avg_m;
end
for i=1:size(weight,2)
weight_avg_m = [accumarray(cy,weight(:,i),[],@nanmean)];
weight_avg(:,i) = weight_avg_m;
end
for i=1:size(lfshare,2)
lfshare_avg_m = [accumarray(cy,lfshare(:,i),[],@nanmean)];
lfshare_avg(:,i) = lfshare_avg_m;
end
for i=1:size(hazrate,2)
hazrate_avg_m = [accumarray(cy1,hazrate(:,i),[],@nanmean)];
hazrate_avg(:,i) = hazrate_avg_m;
end
for i=1:size(sall,2)
sall_avg_m = [accumarray(cy1,sall(:,i),[],@nanmean)];
sall_avg(:,i) = sall_avg_m;
fall_avg_m = [accumarray(cy1,fall(:,i),[],@nanmean)];
fall_avg(:,i) = fall_avg_m;
end

date=ay;

urate=urate_avg;
weight=weight_avg;
lfshare=lfshare_avg;
hazrate=hazrate_avg;
sall=sall_avg;
fall=fall_avg;

weight = [date weight];
weight = array2table(weight);
weight_names = readtable('weight_names.xlsx');
weight.Properties.VariableNames = weight_names.Properties.VariableNames;
writetable(weight,'csv/weights_sexstate.csv');

hazrate = [date hazrate];
hazrate = array2table(hazrate);
haz_names = readtable('haz_names.xlsx');
hazrate.Properties.VariableNames = haz_names.Properties.VariableNames;
writetable(hazrate,'csv/hazards_sexstate.csv');

%% calculate gender gap by state for urate lfshare s and f (eg.(u_f-u_m)/u_m)
for i=1:51
urate_gap(:,i)=(urate(:,i*2)-urate(:,i*2-1))./urate(:,i*2-1);
lfshare_gap(:,i)=(lfshare(:,i*2-1)-lfshare(:,i*2))./lfshare(:,i*2-1); % gap defined as (m-f)/m
sall_gap(:,i)=(sall(:,i*2)-sall(:,i*2-1))./sall(:,i*2-1);
fall_gap(:,i)=(fall(:,i*2)-fall(:,i*2-1))./fall(:,i*2-1);
end

urate_avggap = mean(urate_gap);
urategap_first3 = mean(urate_gap(1:3,:));
urategap_last3 = mean(urate_gap(end-3:end,:));
urate_gap = [date urate_gap];
urate_gap = array2table(urate_gap);
urgap_names = readtable('urgap_names.xlsx');
urate_gap.Properties.VariableNames = urgap_names.Properties.VariableNames;
writetable(urate_gap,'csv/urgap_sexstate.csv');

lfshare_avggap = mean(lfshare_gap);
lfsharegap_first3 = mean(lfshare_gap(1:3,:))*100;
lfsharegap_last3 = mean(lfshare_gap(end-3:end,:))*100;
lfsharegap_change = lfshare_gap(end-1,:)-lfshare_gap(1,:);
lfshare_gap = [date lfshare_gap];
lfshare_gap = array2table(lfshare_gap);
lfsharegap_names = readtable('lfsharegap_names.xlsx');
lfshare_gap.Properties.VariableNames = lfsharegap_names.Properties.VariableNames;
writetable(lfshare_gap,'csv/lfsharegap_sexstate.csv');

sall_avggap = mean(sall_gap);
sallgap_first3 = mean(sall_gap(1:3,:))*100;
sallgap_last3 = mean(sall_gap(end-3:end,:))*100;
sallgap_change = sall_gap(end,:)-sall_gap(1,:);
sall_gap = [date sall_gap];
sall_gap = array2table(sall_gap);
sallgap_names = readtable('sgap_names.xlsx');
sall_gap.Properties.VariableNames = sallgap_names.Properties.VariableNames;
writetable(sall_gap,'csv/sgap_sexstate.csv');

fall_avggap = mean(fall_gap);
fallgap_first3 = mean(fall_gap(1:3,:))*100;
fallgap_last3 = mean(fall_gap(end-3:end,:))*100;
fallgap_change = fall_gap(end,:)-fall_gap(1,:);
fall_gap = [date fall_gap];
fall_gap = array2table(fall_gap);
fallgap_names = readtable('fgap_names.xlsx');
fall_gap.Properties.VariableNames = fallgap_names.Properties.VariableNames;
writetable(fall_gap,'csv/fgap_sexstate.csv');

%% scatter plot first 3 vs last 3 years of lfpr and s gaps (s gap defined as(F-M)/M and lfpr gap defined as(M-F)/M)
figure;
first3=scatter(lfsharegap_first3,sallgap_first3,'filled','r');
[~,state_names] = xlsread('state_names.xlsx','A1:AY1');
dx = 0.3; dy = 0.5; % displacement so the text does not overlay the data points
text(lfsharegap_first3+dx, sallgap_first3+dy, state_names);
hold on;
last3=scatter(lfsharegap_last3,sallgap_last3,'filled','b');
%hline = refline([0 0]);
%line([0,0],ylim);
dx = 0.3; dy = 0.5; % displacement so the text does not overlay the data points
text(lfsharegap_last3+dx, sallgap_last3+dy, state_names);
xlabel('Participation Gap (lfpr_m-lfpr_f)/lfpr_m (%)');
ylabel('Inflow Rate Gap (s_f-s_m)/s_f (%)');
ylim([-40,140]);
xlim([-10,50]);
hold off;
legend([first3 last3], {'1978-1980' '2016-2018'},'Location','northeast')
set(gca,'FontSize',14)
saveas(gcf,'../../tex/figure/sexstate/scatter_lfr_s','epsc')

% take out DC AK HI
%lfsharegap_first3_minus=lfsharegap_first3;
%lfsharegap_last3_minus=lfsharegap_last3;
%sallgap_first3_minus=sallgap_first3;
%sallgap_last3_minus=sallgap_last3;
%fallgap_first3_minus=fallgap_first3;
%fallgap_last3_minus=fallgap_last3;
%lfsharegap_first3_minus(:,[DC AK HI])=[];
%lfsharegap_last3_minus(:,[DC AK HI])=[];
%sallgap_first3_minus(:,[DC AK HI])=[];
%sallgap_last3_minus(:,[DC AK HI])=[];

%figure;
%first3=scatter(lfsharegap_first3_minus,sallgap_first3_minus,'filled','r');
%dx = 0.003; dy = 0.006; % displacement so the text does not overlay the data points
%text(lfsharegap_first3_minus+dx, sallgap_first3_minus+dy, state_names_minus);
%hold on;
%last3=scatter(lfsharegap_last3_minus,sallgap_last3_minus,'filled','b');
%hline = refline([0 0]);
%dx = 0.003; dy = 0.006; % displacement so the text does not overlay the data points
%text(lfsharegap_last3_minus+dx, sallgap_last3_minus+dy, state_names_minus);
%xlabel('Participation Gap (lfpr_m-lfpr_f)/lfpr_m');
%ylabel('Inflow Rate Gap (s_f-s_m)/s_f');
%hold off;
%legend([first3 last3], {'1978-1980' '2016-2018'},'Location','northeast')
%saveas(gcf,'../../tex/figure/sexstate/scatter_lfr_s_minus','epsc')

%% scatter plot first 3 vs last 3 years of lfpr and f gaps (f gap defined as(F-M)/M and lfpr gap defined as(M-F)/M)
figure;
first3=scatter(lfsharegap_first3,fallgap_first3,'filled','r');
dx = 0.3; dy = 0.5; % displacement so the text does not overlay the data points
text(lfsharegap_first3+dx, fallgap_first3+dy, state_names);
hold on;
last3=scatter(lfsharegap_last3,fallgap_last3,'filled','b');
%hline = refline([0 0]);
%line([0,0],ylim);
dx = 0.3; dy = 0.5; % displacement so the text does not overlay the data points
text(lfsharegap_last3+dx, fallgap_last3+dy, state_names);
xlabel('Participation Gap (lfpr_m-lfpr_f)/lfpr_m (%)');
ylabel('Outflow Rate Gap (f_f-f_m)/f_f (%)');
ylim([-40,140]);
xlim([-10,50]);
hold off;
legend([first3 last3], {'1978-1980' '2016-2018'},'Location','northeast')
set(gca,'FontSize',14)
saveas(gcf,'../../tex/figure/sexstate/scatter_lfr_f','epsc')

%% scatter plot changes in lfpr and s gaps (s gap defined as(F-M)/M and lfpr gap defined as(M-F)/M)
%figure;
%scatter(lfsharegap_change,sallgap_change,'filled');
%xlabel('Participation Gap Change (%)');
%ylabel('Inflow Rate (s) Gap Change (%)');
%[~,state_names] = xlsread('state_names.xlsx','A1:AY1');
%dx = 0.003; dy = 0.005; % displacement so the text does not overlay the data points
%text(lfsharegap_change+dx, sallgap_change+dy, state_names);
%hline = refline([0 0]);
%line(xlim,[0,0]);
%refline;
%saveas(gcf,'../../tex/figure/sexstate/scatter_lfr_s_change','epsc')

%% scatter plot changes in lfpr and f gaps (f gap defined as(F-M)/M and lfpr gap defined as(M-F)/M)
%figure;
%scatter(lfsharegap_change,fallgap_change,'filled');
%xlabel('Participation Gap Change (%)');
%ylabel('Outflow Rate (f) Gap Change (%)');
%[~,state_names] = xlsread('state_names.xlsx','A1:AY1');
%dx = 0.003; dy = 0.005; % displacement so the text does not overlay the data points
%text(lfsharegap_change+dx, fallgap_change+dy, state_names);
%hline = refline([0 0]);
%line(xlim,[0,0]);
%refline;
%saveas(gcf,'../../tex/figure/sexstate/scatter_lfr_f_change','epsc')

close all