Files
covid19-age-stratified-ifr/apply_ifr.py
T
2020-09-27 17:56:13 -07:00

195 lines
6.5 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/python3
#
# Apply various estimates of the age-stratified Infection Fatality Ratio of COVID-19 to
# countries' population pyramids in order to calculate their overall IFR.
# Author: Marc Bevand — @zorinaq
import pandas as pd
# Pyramid data is from the United Nations: this file is a CSV export of the first sheet
# of "Population by Age Groups - Both Sexes" linked from:
# https://population.un.org/wpp/Download/Standard/Population/
# Direct link:
# https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/EXCEL_FILES/1_Population/WPP2019_POP_F07_1_POPULATION_BY_AGE_BOTH_SEXES.xlsx
file_pyramids = 'WPP2019_POP_F07_1_POPULATION_BY_AGE_BOTH_SEXES.csv'
maxage = 100
# Age groups defined in the CSV file
age_groups = [(0,4), (5,9), (10,14), (15,19), (20,24), (25,29), (30,34), (35,39), (40,44), (45,49), (50,54), (55,59), (60,64), (65,69), (70,74), (75,79), (80,84), (85,89), (90,94), (95,99), (100,maxage)]
# This will hold parsed pyramid data. Example to get the number of people in the
# age group 20-24 in France: pyramid['France'][(20,24)]
pyramid = {}
# Various age-stratified IFR estimates
ifrs = [
# Calculated from Spanish ENE-COVID study
# (see calc_ifr.py)
('ENE-COV', {
(0,9): 0.003,
(10,19): 0.004,
(20,29): 0.015,
(30,39): 0.030,
(40,49): 0.064,
(50,59): 0.213,
(60,69): 0.718,
(70,79): 2.384,
(80,89): 8.466,
(90,maxage): 12.497,
}),
# US CDC estimate as of 10 Sep 2020
# https://www.cdc.gov/coronavirus/2019-ncov/hcp/planning-scenarios.html
# (table 1)
('US_CDC', {
(0,19): 0.003,
(20,49): 0.02,
(50,69): 0.5,
(70,maxage): 5.4,
}),
# Verity et al.
# https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(20)30243-7/fulltext
# (table 1)
('Verity', {
(0,9): 0.00161,
(10,19): 0.00695,
(20,29): 0.0309,
(30,39): 0.0844,
(40,49): 0.161,
(50,59): 0.595,
(60,69): 1.93,
(70,79): 4.28,
(80,maxage): 7.80,
}),
# Levin et al.
# https://www.medrxiv.org/content/10.1101/2020.07.23.20160895v5
# (table 3)
('Levin', {
(0,34): 0.004,
(35,44): 0.06,
(45,54): 0.2,
(55,64): 0.7,
(65,74): 2.3,
(75,84): 7.6,
(85,maxage): 22.3,
}),
# Gudbjartsson et al., Humoral Immune Response to SARS-CoV-2 in Iceland
# https://www.nejm.org/doi/full/10.1056/NEJMoa2026116
# Supplementary Appendix 1
# https://www.nejm.org/doi/suppl/10.1056/NEJMoa2026116/suppl_file/nejmoa2026116_appendix_1.pdf
# (table S7)
('Gudbj', {
(0,70): 0.1,
(71,80): 2.4,
(81,maxage): 11.2,
}),
# ODriscoll et al., Age-specific mortality and immunity patterns of SARS-CoV-2 infection in 45 countries
# https://www.medrxiv.org/content/10.1101/2020.08.24.20180851v1
# (table S4)
("O'Drisc", {
(0,4): 0.002,
(5,9): 0.000,
(10,14): 0.000,
(15,19): 0.002,
(20,24): 0.004,
(25,29): 0.009,
(30,34): 0.017,
(35,39): 0.029,
(40,44): 0.053,
(45,49): 0.086,
(50,54): 0.154,
(55,59): 0.241,
(60,64): 0.359,
(65,69): 0.642,
(70,74): 1.076,
(75,79): 2.276,
(80,maxage): 7.274,
}),
]
def ag2str(age_group):
if age_group[1] == maxage:
return f'{age_group[0]}+'
return f'{age_group[0]}-{age_group[1]}'
def parse_pyramids():
df = pd.read_csv(file_pyramids)
# ignore labels as they don't contain any data
df = df[df['Type'] != 'Label/Separator']
# only take rows with data as of 2020
df = df[df['Reference date (as of 1 July)'] == 2020]
# only parse countries, world, and continents
df = df[df['Type'].isin(('Country/Area', 'World', 'Region'))]
# remove spaces used as thousands separators, and convert cell values to floats
columns = [ag2str(x) for x in age_groups]
for col in columns:
df[col] = df[col].str.replace('\s+', '').astype(float)
regions = list(df['Region, subregion, country or area *'])
#regions = ('France',)
for region in regions:
pyramid[region] = {}
df_region = df[df['Region, subregion, country or area *'] == region]
for ag in age_groups:
# values are in thousands
pyramid[region][ag] = 1000 * float(df_region[ag2str(ag)])
def people_of_age(pyramid_region, age):
# Returns the number of people of exact age 'age', given the provided age pyramid
for ((a, b), n) in pyramid_region.items():
if age in range(a, b + 1):
return n / float(b - a + 1)
def overall_ifr(pyramid_region, ifr_age_stratified):
pop = 0
deaths = 0
for (age_group, ifr) in ifr_age_stratified.items():
for age in range(age_group[0], age_group[1] + 1):
pop += people_of_age(pyramid_region, age)
deaths += people_of_age(pyramid_region, age) * ifr / 100.0
assert pop == sum(pyramid_region.values())
return 100.0 * deaths / pop
def calc_overall_ifrs():
oifrs = []
for region in pyramid.keys():
# The overall IFRs are appended to the array in the same order as listed in ifrs
tmp = []
for i in ifrs:
ifr_age_stratified = i[1]
# calculate the overall IFR for region, using IFR estimate ifr_age_stratified
tmp.append(overall_ifr(pyramid[region], ifr_age_stratified))
oifrs.append((region, *tmp))
return oifrs
def show_overall_ifrs(oifrs):
def header():
for i in ifrs:
print(f'| {i[0]:>7} ', end='')
print('| Region |')
# Each entry in the oifrs array is a tuple:
# (<region_name>, <ifr_according_to_1st_estimate>, <ifr_according_to_2nd_estimate>, ...)
# Sort by element index 1, that is by <ifr_according_to_1st_estimate>
# To sort by region name, use index 0 (x[0])
oifrs.sort(key=lambda x: x[1], reverse=True)
header()
for region in oifrs:
for i in region[1:]:
print(f'| {i:7.3f} ', end='')
print(f'| {region[0]} |')
header()
def main():
parse_pyramids()
oifrs = calc_overall_ifrs()
show_overall_ifrs(oifrs)
if __name__ == '__main__':
main()