% ReadCovidData
%
%  Reads Country/State/City death data from the Johns Hopkins spreadsheets
%
%   https://coronavirus.jhu.edu/map.html
%   https://github.com/CSSEGISandData/COVID-19
%   https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv
%
%   Codes:  https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv

clear; close all;
diarychad('ReadCovidData');

% ===============================================================================
% ReadStateData.m
%
% SOURCE of the data (all Johns Hopkins)
%   https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv
%   Codes:  https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv
% ===============================================================================
ReadCovidStates   % Saved to CovidStateData.mat


% ===============================================================================
% ReadCountryData
%
% SOURCE of the data (all Johns Hopkins)
%   https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv
%   Codes:  https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv
%
% ===============================================================================
clear
fname='time_series_covid19_deaths_global.csv';  % Death data
cname='UID_ISO_FIPS_LookUp_Table.xls';      % Country codes
FirstDataDate=datetime('2020-01-22');


%CountryUID=readmatrix(cname,'Sheet','Countries','Range','F2:F181');
CountryNames=readmatrix(cname,'Sheet','Countries','Range','H2:H184','OutputType','char');
CountryCodes=readmatrix(cname,'Sheet','Countries','Range','C2:C184','OutputType','char');
CountryLat =readmatrix(cname,'Sheet','Countries','Range','I2:I184'); % Latitude
CountryLong=readmatrix(cname,'Sheet','Countries','Range','J2:J184'); % Longitude
CountryPop=readmatrix(cname,'Sheet','Countries','Range','L2:L184');
CountryPop=CountryPop';
N=length(CountryPop);

RawNames=readmatrix(fname,'Range','B2:B267','OutputType','char');
data=readmatrix(fname); % All strings become NaN, but data is stored in appropriate cols/rows!
lat =data(:,3);
long=data(:,4);
deaths=data(:,5:end);
[NumRecs,T]=size(deaths);
CovidDates=FirstDataDate+caldays(0:(T-1))';
datadate=upper(['Data through ' datestr(CovidDates(end))])
LASTDATE=CovidDates(end);

CountryLatLong=CountryLat*100+CountryLong;
latlong=lat*100+long;

% Country Codes: AFG=2, ARG=8, BRA=24, USA=1, etc.
for i=1:N;
    eval([CountryCodes{i} '=i;']);
end;

% Assign country deaths
CountryDeaths=zeros(T,N);
for s=1:N;
    i=find(CountryLatLong(s)==latlong);    % s=state index
    if length(i)>1;
        disp(['More than one latitude match found. Skipping: ' CountryNames{s}]);
    else;
        if ~isempty(i);
            CountryDeaths(:,s)=deaths(i,:)';
        else;
            disp(['Country not found: ' CountryNames{s}]);
        end;
    end;
end;
Ncountries=N;
CountryDeathsPerMillion=div(CountryDeaths,CountryPop)*1e6;
save CovidCountryData

% ===============================================================================
% Merge the Country and State data into a single CovidDeaths variable
% ===============================================================================

load CovidStateData
CovidDeaths=[CountryDeaths StateDeaths CityDeaths];
CovidDeathsPerMillion=[CountryDeathsPerMillion StateDeathsPerMillion CityDeathsPerMillion];
CovidPop=[CountryPop StatePop CityPop];
CovidCodes=[CountryCodes; StateCodes; CityCodes];
CovidNames=[CountryNames; StateNames; CityNames];

% State Codes:  Adjusting for NcountriesAL=1, CA=5, NY=33
Nstates=length(StateCodes);
for i=1:Nstates;
    eval([StateCodes{i} '=Ncountries+i;']);
end;

% State Codes: AL=1, CA=5, NY=33, etc.
for i=1:length(CityCodes);
    eval([CityCodes{i} '=Ncountries+Nstates+i;']);
end;

% Add a "US excluding NYC"
USXdeaths=CovidDeaths(:,USA)-CovidDeaths(:,NYR);
USXpop   =CovidPop(:,USA)-CovidPop(:,NYR);
CovidDeaths = [CovidDeaths USXdeaths];
CovidDeathsPerMillion = [CovidDeathsPerMillion USXdeaths/USXpop*10^6];
CovidPop    = [CovidPop USXpop];
CovidCodes  = [CovidCodes; {'USX'}];
CovidNames  = [CovidNames; {'U.S. excluding NYC'}];
USX         = find(ismember(CovidCodes,{'USX'}));


%%%%% Fix "spikes" in the daily death data by spreading them out over
%%%%% previous dates

% Fix New York City: May 17 to May 18 they added a bunch of old deaths
[newdeaths,newdeathspm]=fixdeathburst(NYC, '2020-05-17','2020-05-18',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,NYC)=newdeaths; CovidDeathsPerMillion(:,NYC)=newdeathspm;
[newdeaths,newdeathspm]=fixdeathburst(NYR, '2020-05-17','2020-05-18',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,NYR)=newdeaths; CovidDeathsPerMillion(:,NYR)=newdeathspm;
[newdeaths,newdeathspm]=fixdeathburst(NY, '2020-05-17','2020-05-18',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,NY)=newdeaths; CovidDeathsPerMillion(:,NY)=newdeathspm;

% Fix New York City: Aug 30-31 they added a bunch of old deaths
[newdeaths,newdeathspm]=fixdeathburst(NYC, '2020-08-30','2020-08-31',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,NYC)=newdeaths; CovidDeathsPerMillion(:,NYC)=newdeathspm;
[newdeaths,newdeathspm]=fixdeathburst(NYR, '2020-08-30','2020-08-31',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,NYR)=newdeaths; CovidDeathsPerMillion(:,NYR)=newdeathspm;
[newdeaths,newdeathspm]=fixdeathburst(NY, '2020-08-30','2020-08-31',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,NY)=newdeaths; CovidDeathsPerMillion(:,NY)=newdeathspm;


% Fix Hubei, China April 17=4512 ajdustment (April 16 = 3222)
[newdeaths,newdeathspm]=fixdeathburst(HUB,'2020-04-16','2020-04-17',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,HUB)=newdeaths; CovidDeathsPerMillion(:,HUB)=newdeathspm;

% Fix New Jersey: June 24 to June 25 they added a bunch of old deaths
[newdeaths,newdeathspm]=fixdeathburst(NJ, '2020-06-24','2020-06-25',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,NJ)=newdeaths; CovidDeathsPerMillion(:,NJ)=newdeathspm;

% Fix Madrid: May 24 to May 25 they *shrunk* numbers by 250
[newdeaths,newdeathspm]=fixdeathburst(MAD,'2020-05-24','2020-05-25',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,MAD)=newdeaths; CovidDeathsPerMillion(:,MAD)=newdeathspm;

% Fix Madrid: June 18 to June 19 they *shrunk* numbers by 250
[newdeaths,newdeathspm]=fixdeathburst(MAD,'2020-06-18','2020-06-19',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,MAD)=newdeaths; CovidDeathsPerMillion(:,MAD)=newdeathspm;

% Fix Italy: Aug 14-15 added 100+ deaths
[newdeaths,newdeathspm]=fixdeathburst(ITA,'2020-08-14','2020-08-15',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,ITA)=newdeaths; CovidDeathsPerMillion(:,ITA)=newdeathspm;

% Fix Peru: Aug 13-14 and Jul 22-23
[newdeaths,newdeathspm]=fixdeathburst(PER,'2020-08-13','2020-08-14',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,PER)=newdeaths; CovidDeathsPerMillion(:,PER)=newdeathspm;
[newdeaths,newdeathspm]=fixdeathburst(PER,'2020-07-22','2020-07-23',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,PER)=newdeaths; CovidDeathsPerMillion(:,PER)=newdeathspm;

% Fix Israel: Aug 18-19
[newdeaths,newdeathspm]=fixdeathburst(ISR,'2020-08-18','2020-08-19',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,ISR)=newdeaths; CovidDeathsPerMillion(:,ISR)=newdeathspm;

% Fix Boston: Aug 19-20
[newdeaths,newdeathspm]=fixdeathburst(BOS,'2020-08-19','2020-08-20',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,BOS)=newdeaths; CovidDeathsPerMillion(:,BOS)=newdeathspm;
[newdeaths,newdeathspm]=fixdeathburst(MA,'2020-08-19','2020-08-20',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,MA)=newdeaths; CovidDeathsPerMillion(:,MA)=newdeathspm;

% Fix Spain: June 18-19 and May 21-22 and 24-25
[newdeaths,newdeathspm]=fixdeathburst(ESP,'2020-06-18','2020-06-19',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,ESP)=newdeaths; CovidDeathsPerMillion(:,ESP)=newdeathspm;
[newdeaths,newdeathspm]=fixdeathburst(ESP,'2020-05-21','2020-05-22',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,ESP)=newdeaths; CovidDeathsPerMillion(:,ESP)=newdeathspm;
[newdeaths,newdeathspm]=fixdeathburst(ESP,'2020-05-24','2020-05-25',CovidDates,CovidDeaths,CovidPop,CovidNames);
CovidDeaths(:,ESP)=newdeaths; CovidDeathsPerMillion(:,ESP)=newdeathspm;



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Show data for high death locations
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[blah,indx]=sort(-CovidDeathsPerMillion(end,:));
cshow(CovidNames(indx(1:40)),CovidDeathsPerMillion(end,indx(1:40))','%12.1f','DeathsP.M.');

CovidNames(TWN)={'Taiwan'};

definecolors  % Colors for plotting graphs
clear apr15 cname data deaths fips fname indx ny nyapr15 nyapr16 nyapr17 oldratio17to15 predictedApr17 regions s uid uidraw
%save('CovidData','CovidDeaths','CovidDeathsPerMillion','CovidPop','CovidCodes','CovidNames','CovidDates','FirstDataDate');
clear t2 t1 nj2 nj1 njdeaths hub2 hub1 hubeideaths
clear i N T lat long latlong RawNames RawCountyPlace datarange namerange blah A regions...
    lastcolC lastcolS Nctys fname fnameStates NumRecs NYfips ...
      NYXDeaths NYXPop NYXDeathsPerMillion

save CovidData;


diary off


%%% FUNCTION %%%
function [newdeaths,newdeathspermillion]=fixdeathburst(code,date1,date2,CovidDates,CovidDeaths,CovidPop,CovidNames);

  % code = ITA or MAD
  % date1,date2 = '2020-08-15'
				
  % Fix Italy: Aug 14-15 added 100+ deaths
  t2=find(ismember(CovidDates,{date2}));
  t1=find(ismember(CovidDates,{date1}));
  dth2=CovidDeaths(t2,code);
  dth1=CovidDeaths(t1,code);
  % Still need to fix the new date1 so daily deaths for date2 is not zero
  % Assign average of past 7 days
  dailydeaths=diff(CovidDeaths(:,code));
  mean7 = mean(dailydeaths(t1-7:t1-1))
  dth1adj=dth1-mean7;
  adjfactor=dth2/dth1*dth1adj/dth1;
  newdeaths=[CovidDeaths(1:t1,code)*adjfactor; CovidDeaths(t2:end,code)];
  say(CovidNames(code));
  fprintf([' Correction: ' date2 ' = %5.0f   ' date1 ' = %5.0f  AdjDate1 = %5.0f   Factor=%6.3f\n'],[dth2 dth1 dth1adj adjfactor]);
  disp '   Adjusting all earlier data by this factor...';
  newdeathspermillion=newdeaths/CovidPop(code)*10^6;
  dailydeathsnew=diff(newdeaths);
  dailydeathsnew(t1-7:t2)
  %keyboard
end


