add covid_vs_flu analysis

2020-09-30 22:54:40 -07:00
parent 0dcf3bd2f9
commit e5c38dd4ae
3 changed files with 438 additions and 16 deletions
@@ -0,0 +1,375 @@
+#!/usr/bin/python3
+#
+# Author: Marc Bevand — @zorinaq
+
+import sys
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+import numpy as np
+import scipy.stats
+from scipy.optimize import curve_fit
+
+maxage = 100
+
+# Age-stratified IFR estimates for COVID-19
+ifrs_covid = [
+
+        # Calculated from Spanish ENE-COVID study
+        # (see calc_ifr.py)
+        ('ENE-COVID', {
+            (0,9):    0.003,
+            (10,19):  0.004,
+            (20,29):  0.015,
+            (30,39):  0.030,
+            (40,49):  0.064,
+            (50,59):  0.213,
+            (60,69):  0.718,
+            (70,79):  2.384,
+            (80,89):  8.466,
+            (90,maxage): 12.497,
+        }),
+
+        # US CDC estimate as of 10 Sep 2020
+        # https://www.cdc.gov/coronavirus/2019-ncov/hcp/planning-scenarios.html
+        # (table 1)
+        ('US CDC', {
+            (0,19):   0.003,
+            (20,49):  0.02,
+            (50,69):  0.5,
+            (70,maxage): 5.4,
+        }),
+
+        # Verity et al.
+        # https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(20)30243-7/fulltext
+        # (table 1)
+        ('Verity', {
+            (0,9):    0.00161,
+            (10,19):  0.00695,
+            (20,29):  0.0309,
+            (30,39):  0.0844,
+            (40,49):  0.161,
+            (50,59):  0.595,
+            (60,69):  1.93,
+            (70,79):  4.28,
+            (80,maxage): 7.80,
+        }),
+
+        # Levin et al.
+        # https://www.medrxiv.org/content/10.1101/2020.07.23.20160895v5
+        # (table 3)
+        ('Levin', {
+            (0,34):   0.004,
+            (35,44):  0.06,
+            (45,54):  0.2,
+            (55,64):  0.7,
+            (65,74):  2.3,
+            (75,84):  7.6,
+            (85,maxage): 22.3,
+        }),
+
+        # Perez-Saez et al.
+        # https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(20)30584-3/fulltext
+        ('Perez-Saez', {
+            (5,9):    0.0016,
+            (10,19):  0.00032,
+            (20,49):  0.0092,
+            (50,64):  0.14,
+            (65,maxage): 5.6,
+        }),
+
+        # Poletti et al.
+        # https://www.eurosurveillance.org/content/10.2807/1560-7917.ES.2020.25.31.2001383
+        # (table 1, column "Any time")
+        ('Poletti', {
+            (0,19):   0,
+            (20,49):  0,
+            (50,59):  0.46,
+            (60,69):  1.42,
+            (70,79):  6.87,
+            (80,maxage):  18.35,
+        }),
+
+        # Picon et al.
+        # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7493765/
+        # (table 2)
+        ('Picon', {
+            (20,39):  0.08,
+            (40,59):  0.24,
+            (60,maxage):  4.63,
+        }),
+
+        # Gudbjartsson et al.: Humoral Immune Response to SARS-CoV-2 in Iceland
+        # https://www.nejm.org/doi/full/10.1056/NEJMoa2026116
+        # Supplementary Appendix 1
+        # https://www.nejm.org/doi/suppl/10.1056/NEJMoa2026116/suppl_file/nejmoa2026116_appendix_1.pdf
+        # (table S7)
+        ('Gudbjartsson', {
+            (0,70):   0.1,
+            (71,80):  2.4,
+            (81,maxage): 11.2,
+        }),
+
+        # Public Health Agency of Sweden
+        # https://www.folkhalsomyndigheten.se/contentassets/53c0dc391be54f5d959ead9131edb771/infection-fatality-rate-covid-19-stockholm-technical-report.pdf
+        # (table B.1)
+        ('PHAS', {
+            (0,49):   0.01,
+            (50,59):  0.27,
+            (60,69):  0.45,
+            (70,79):  1.92,
+            (80,89):  7.20,
+            (90,maxage):  16.21,
+        }),
+
+        # O’Driscoll et al.: Age-specific mortality and immunity patterns of SARS-CoV-2 infection in 45 countries
+        # https://www.medrxiv.org/content/10.1101/2020.08.24.20180851v1
+        # (table S4)
+        ('O’Driscoll', {
+            (0,4):   0.002,
+            (5,9):   0,
+            (10,14): 0,
+            (15,19): 0.002,
+            (20,24): 0.004,
+            (25,29): 0.009,
+            (30,34): 0.017,
+            (35,39): 0.029,
+            (40,44): 0.053,
+            (45,49): 0.086,
+            (50,54): 0.154,
+            (55,59): 0.241,
+            (60,64): 0.359,
+            (65,69): 0.642,
+            (70,74): 1.076,
+            (75,79): 2.276,
+            (80,maxage): 7.274,
+        }),
+
+        # Ward et al.: Antibody prevalence for SARS-CoV-2 in England following first peak of the pandemic: REACT2 study in 100,000 adults
+        # https://www.medrxiv.org/content/10.1101/2020.08.12.20173690v2
+        # Supplementary Appendix
+        # https://www.medrxiv.org/highwire/filestream/93745/field_highwire_adjunct_files/0/2020.08.12.20173690-1.docx
+        # (table S2a, column "Based on confirmed COVID-19 deaths")
+        ('REACT2', {
+            (15,44):  0.03,
+            (45,64):  0.52,
+            (65,74):  3.87,
+            (75,maxage): 18.71,
+        }),
+
+        # Yang et al.: Estimating the infection fatality risk of COVID-19 in New York City during the spring 2020 pandemic wave
+        # https://www.medrxiv.org/content/10.1101/2020.06.27.20141689v2
+        # (table 1)
+        ('Yang', {
+            (0,24):   0.0097,
+            (25,44):  0.12,
+            (45,64):  0.94,
+            (65,74):  4.87,
+            (75,maxage): 14.17,
+        }),
+
+        # Molenberghs et al.: Belgian Covid-19 Mortality, Excess Deaths, Number of Deaths per Million, and Infection Fatality Rates
+        # https://www.medrxiv.org/content/10.1101/2020.06.20.20136234v1
+        # (table 6)
+        ('Molenberghs', {
+            (0,24):   0.0005,
+            (25,44):  0.017,
+            (45,64):  0.21,
+            (65,74):  2.24,
+            (75,84):  4.29,
+            (85,maxage): 11.77,
+        }),
+
+]
+
+# In the CDC influenza burden pages (eg. table 1 in
+# https://www.cdc.gov/flu/about/burden/2018-2019.html), only symptomatic
+# illnesses are estimated. We must account for asymptomatic ones as well.
+#
+# In https://www.cdc.gov/flu/about/keyfacts.htm the CDC implies 55-60% of
+# illnesses are symptomatic:
+#
+# «on average, about 8% of the U.S. population gets sick from flu each season,
+# with a range of between 3% and 11%, depending on the season.
+# [...]
+# The commonly cited 5% to 20% estimate was based on a study that examined both
+# symptomatic and asymptomatic influenza illness, which means it also looked at
+# people who may have had the flu but never knew it because they didn’t have
+# any symptoms. The 3% to 11% range is an estimate of the proportion of people
+# who have symptomatic flu illness.»
+#
+# The CDC thus acknowledges that 55-60% of illnesses are symptomatic:
+# 3/5 = 60%
+# 11/20 = 55%
+#
+# We use the mid-point, 57.5%, as an estimate to account for both symptomatic
+# and asymptomatic illnesses.
+cdc_sympt = .575
+
+# Age-stratified IFR estimates for seasonal influenza
+ifrs_flu = [
+
+        # US CDC 2018-2019 influenza burden
+        # https://www.cdc.gov/flu/about/burden/2018-2019.html
+        ('US CDC 2018-2019', {
+            (0,4):              266/3_633_104 * 100 * cdc_sympt,
+            (5,17):             211/7_663_310 * 100 * cdc_sympt,
+            (18,49):            2_450/11_913_203 * 100 * cdc_sympt,
+            (50,64):            5_676/9_238_038 * 100 * cdc_sympt,
+            (65,maxage):        25_555/3_073_227 * 100 * cdc_sympt,
+        }),
+
+        # US CDC 2017-2018 influenza burden
+        # https://www.cdc.gov/flu/about/burden/2017-2018.htm
+        ('US CDC 2017-2018', {
+            (0,4):              115/3_678_342 * 100 * cdc_sympt,
+            (5,17):             528/7_512_601 * 100 * cdc_sympt,
+            (18,49):            2_803/14_428_065 * 100 * cdc_sympt,
+            (50,64):            6_751/13_237_932 * 100 * cdc_sympt,
+            (65,maxage):        50_903/5_945_690 * 100 * cdc_sympt,
+        }),
+
+        # US CDC 2016-2017 influenza burden
+        # https://www.cdc.gov/flu/about/burden/2016-2017.html
+        ('US CDC 2016-2017', {
+            (0,4):              126/2_381_218 * 100 * cdc_sympt,
+            (5,17):             125/6_452_110 * 100 * cdc_sympt,
+            (18,49):            1_365/9_292_804 * 100 * cdc_sympt,
+            (50,64):            3_780/7_448_184 * 100 * cdc_sympt,
+            (65,maxage):        32_833/3_646_206 * 100 * cdc_sympt,
+        }),
+
+        # US CDC 2015-2016 influenza burden
+        # https://www.cdc.gov/flu/about/burden/2015-2016.html
+        ('US CDC 2015-2016', {
+            (0,4):              180/2_195_276 * 100 * cdc_sympt,
+            (5,17):             88/4_140_269 * 100 * cdc_sympt,
+            (18,49):            1_703/9_121_242 * 100 * cdc_sympt,
+            (50,64):            3_277/6_640_358 * 100 * cdc_sympt,
+            (65,maxage):        17_458/1_407_174 * 100 * cdc_sympt,
+        }),
+
+        # US CDC 2014-2015 influenza burden
+        # https://www.cdc.gov/flu/about/burden/2014-2015.html
+        ('US CDC 2014-2015', {
+            (0,4):              396/3_207_314 * 100 * cdc_sympt,
+            (5,17):             407/6_388_401 * 100 * cdc_sympt,
+            (18,49):            985/8_606_083 * 100 * cdc_sympt,
+            (50,64):            4_780/7_283_766 * 100 * cdc_sympt,
+            (65,maxage):        44_808/4_679_888 * 100 * cdc_sympt,
+        }),
+
+]
+
+def col(is_covid, i):
+    if is_covid:
+        return plt.cm.OrRd(255 - i * 8)
+    else:
+        return plt.cm.Blues(255 - i * 30)
+
+def plot(ax, ifrs, is_covid):
+    lstyles = ('solid', 'dashed', 'dashdot', 'dotted')
+    markers = ('o', 's', 'v', '^', '<', '>', 'P', '*', 'X', 'D', 'p')
+    i = 0
+    for ifr in ifrs:
+        name, ifr_by_age = ifr
+        x, y = [], []
+        for age_group, ifr_val in sorted(ifr_by_age.items()):
+            # place the marker at the middle (mean) of the age group
+            x.append(np.mean(age_group))
+            y.append(ifr_val)
+        ax.plot(x, y, color=col(is_covid, i), label=name, lw=1, alpha=.8,
+                marker=markers[i % len(markers)], ms=4,
+                ls=lstyles[i % len(lstyles)])
+        i += 1
+
+def interpolate(age, x1, y1, x2, y2):
+    def func_exp(x, a, b):
+        return a * (b ** x)
+    popt, pcov = curve_fit(func_exp, [x1, x2], [y1, y2])
+    return func_exp(age, *popt)
+
+def ifr_for_model(age, ifr_model):
+    # calculate IFR for age <age>
+    m_prev = ifr_prev = None
+    # iterate over the age groups in order
+    for age_group, ifr in sorted(ifr_model[1].items()):
+        m = np.mean(age_group)
+        if m == age:
+            return ifr
+        if m > age:
+            if ifr_prev == None:
+                sys.stderr.write(f'{ifr_model[0]}: no data, age {age} too young\n')
+                return None
+            if ifr_prev == 0 or ifr == 0:
+                sys.stderr.write(f'{ifr_model[0]}: ignoring IFR zero for age {age}\n')
+                return None
+            return interpolate(age, m_prev, ifr_prev, m, ifr)
+        m_prev, ifr_prev = m, ifr
+    sys.stderr.write(f'{ifr_model[0]}: no data, age {age} too old\n')
+    return None
+
+def mean_ifr(age, ifr_models):
+    # calculate the geometric mean of IFR estimates in <ifr_models> for age <age>
+    values = []
+    for ifr_model in ifr_models:
+        ifr = ifr_for_model(age, ifr_model)
+        if ifr != None:
+            values.append(ifr)
+    return scipy.stats.gmean(values)
+
+def plot_comp(ax):
+    for age in np.arange(30, 90, 10):
+        y1 = mean_ifr(age, ifrs_flu)
+        y2 = mean_ifr(age, ifrs_covid)
+        assert not np.isnan(y1) and not np.isnan(y2)
+        ax.annotate(s='', xy=(age, y1), xytext=(age, y2),
+                arrowprops=dict(arrowstyle='|-|', shrinkA=0, shrinkB=0,
+                    alpha=.7))
+        ax.text(age, y1 * .6, f'{y2/y1:.1f}×', ha='center', va='top',
+                weight='bold', size=12, alpha=.7)
+
+def main():
+    (fig, ax) = plt.subplots(dpi=300, figsize=(8,6))
+    # plot ifrs_covid
+    plot(ax, ifrs_covid, True)
+    ax.text(3, 35, 'COVID-19:')
+    handles, labels = fig.gca().get_legend_handles_labels()
+    first_legend = ax.legend(handles=handles, labels=labels, loc='upper left',
+            frameon=False, fontsize='x-small', handlelength=5)
+    fig.gca().add_artist(first_legend)
+    # plot ifrs_flu
+    plot(ax, ifrs_flu, False)
+    # plot vertical comparison bars
+    plot_comp(ax)
+    ax.semilogy()
+    ax.grid(True, which='minor', linewidth=0.1)
+    ax.grid(True, which='major', linewidth=0.3)
+    ax.spines['top'].set_visible(False)
+    ax.spines['bottom'].set_visible(True)
+    ax.spines['left'].set_visible(True)
+    ax.spines['right'].set_visible(False)
+    ax.set_ylabel('IFR (%)')
+    ax.set_xlabel('Age')
+    ax.set_xlim(left=0)
+    ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=5))
+    ax.xaxis.set_major_locator(ticker.MultipleLocator(base=10))
+    ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%g'))
+    ax.text(75, .0016, 'Seasonal Influenza:')
+    handles, labels = fig.gca().get_legend_handles_labels()
+    x = len(ifrs_flu)
+    ax.legend(handles=handles[-x:], labels=labels[-x:], loc='lower right',
+            frameon=False, fontsize='x-small', handlelength=5)
+    fig.suptitle('Infection Fatality Ratio of COVID-19 vs. Seasonal Influenza')
+    ax.text(0, -0.11,
+    'Source: https://github.com/mbevand/covid19-age-stratified-ifr\n'
+    'Note: the vertical lines on two COVID-19 IFR curves (Poletti and O’Driscoll) are caused by the IFR being\n'
+    'estimated to be zero by Poletti for age groups 0-19 and 20-49, and by O’Driscoll for 5-9 and 10-14.\n',
+            transform=ax.transAxes, fontsize='small', verticalalignment='top',
+    )
+    ax.text(1, 1, 'Created by: Marc Bevand — @zorinaq',
+            transform=ax.transAxes, fontsize='xx-small', va='top', ha='right')
+    fig.savefig('covid_vs_flu.png', bbox_inches='tight')
+    plt.close()
+
+if __name__ == '__main__':
+    main()