clear

load data_positive

dummy=1; % 0 for mean entry and 1 for sum entry

% cic2
cic2_index=[6	7	8	9	10	11	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29	30	31	32	33	34	35	36	37	39	40	41	42  43  44	45	46]';

% min_age for the definition of entry
min_age=1;

for t=1998:2007
    
    % by industry
    for i=1:39

        % year and industry index
        index=find(year==t & cic2==cic2_index(i));
        index_soe=find(year==t & cic2==cic2_index(i) & soe==1);
        index_nso=find(year==t & cic2==cic2_index(i) & soe==0);
        
        % HI index
        HI(t-1997,i)=sum((va(index)/sum(va(index))).^2);
        HI_soe(t-1997,i)=sum((va(index_soe)/sum(va(index_soe))).^2);
        HI_nso(t-1997,i)=sum((va(index_nso)/sum(va(index_nso))).^2);

        % size
        Y(t-1997,i)=sum(va(index));
        Y_soe(t-1997,i)=sum(va(index_soe));
        Y_nso(t-1997,i)=sum(va(index_nso));
        
        % numbers of firm
        num(t-1997,i)=length(index);
        num_soe(t-1997,i)=length(index_soe);
        num_nso(t-1997,i)=length(index_nso);
        
        % incumbent
        if t == 1998
            
            inc(i)=length(index);
            inc_soe(i)=length(index_soe);
            inc_nso(i)=length(index_nso);
            
        % entry
        elseif t > 1998
            
            % all
            index_entry=find(year(first(index)) == t & age(index) <= min_age);
            ent(t-1998,i)=length(index_entry);
            
            % soe
            index_entry_soe=find(year(first(index_soe)) == t & age(index_soe) <= min_age);
            ent_soe(t-1998,i)=length(index_entry_soe);
            
            % nso
            index_entry_nso=find(year(first(index_nso)) == t & age(index_nso) <= min_age);
            ent_nso(t-1998,i)=length(index_entry_nso);
            
        end
            
    end
    
    % va share
    va_share(t-1997,:)=Y(t-1997,:)/sum(Y(t-1997,:));
    va_share_soe(t-1997,:)=Y_soe(t-1997,:)/sum(Y_soe(t-1997,:));
    va_share_nso(t-1997,:)=Y_nso(t-1997,:)/sum(Y_nso(t-1997,:));
        
end

%% average and difference over time
HI_mean=mean(HI);
HI_soe_mean=mean(HI_soe);
HI_diff=HI(10,:)-HI(1,:);
HI_soe_diff=HI_soe(10,:)-HI_soe(1,:);
HI_g=(HI(10,:)./HI(1,:)).^(1/9)-1;
HI_g_soe=(HI_soe(10,:)./HI_soe(1,:)).^(1/9)-1;
HI_g_nso=(HI_nso(10,:)./HI_nso(1,:)).^(1/9)-1;
va_share_mean=mean(va_share);
va_share_soe_mean=mean(va_share_soe);
va_share_nso_mean=mean(va_share_nso);
va_soe_share=Y_soe./Y;
va_share_d=va_soe_share(10,:)-va_soe_share(1,:);
va_share_g=(va_soe_share(10,:)./va_soe_share(1,:)).^(1/9)-1;
save data_concentration HI_mean HI_soe_mean HI_diff HI_soe_diff HI_g HI_g_soe HI_g_nso va_share_mean va_share_soe_mean va_share_nso_mean va_share_d va_share_g

HI_max=0.05; 
HI_x=0:HI_max/100:HI_max;
scatter(HI(1,:),HI(10,:),va_share_mean*1000)
hold on
plot(HI_x,HI_x,'- k')
hold off
axis([0 HI_max 0 HI_max])

%% entry
entry=sum(ent)./inc;
entry_soe=sum(ent_soe)./inc_soe;
entry_nso=sum(ent_nso)./inc_nso;

entry_r(1:9,:)=ent(1:9,:)./num(1:9,:);
entry_soe_r(1:9,:)=ent_soe(1:9,:)./num_soe(1:9,:);
entry_nso_r(1:9,:)=ent_nso(1:9,:)./num_nso(1:9,:);

num_g=(num(10,:)./num(1,:)).^(1/9)-1;
num_soe_g=(num_soe(10,:)./num_soe(1,:)).^(1/9)-1;
num_nso_g=(num_nso(10,:)./num_nso(1,:)).^(1/9)-1;

entry_r_ave=mean(entry_r);
entry_soe_r_ave=mean(entry_soe_r);
entry_nso_r_ave=mean(entry_nso_r);

save data_entry entry entry_soe entry_nso inc inc_soe inc_nso ent ent_soe ent_nso num_g num_soe_g num_nso_g entry_r_ave entry_soe_r_ave entry_nso_r_ave

% entry_tfp

index=find(cic2_index ~= 7 & cic2_index ~= 10 & cic2_index ~= 16  & cic2_index ~= 43 & cic2_index ~= 45);
corrcoef(entry_nso_r_ave(index),HI_g(index))
corrcoef(entry_soe_r_ave(index),HI_g(index))
corrcoef(entry_nso_r_ave(index),HI_g(index))
corrcoef(entry_soe_r_ave(index),HI_g_soe(index))

%% average entry rate of private firms
for t=1999:2007

    index=find(year==t & soe~=1);
    entry=find(year(first(index)) == t & age(index) <= min_age);
    entry_rate_nso(t-1998)=length(entry)/length(index);

    index=find(year==t & soe==1);
    entry=find(year(first(index)) == t & age(index) <= min_age);
    entry_rate_soe(t-1998)=length(entry)/length(index);

end
mean(entry_rate_nso)
mean(entry_rate_soe)