Disparities Analysis: COVID-19 Cases

May 24, 2021

When looking at disparities for COVID-19 cases between different race/ethnicity groups, please keep in mind that some states and counties have data completeness issues, as discussed in the case data completeness analysis.

In [ ]:
#@title
import pandas as pd
import altair as alt
from vega_datasets import data

from google.colab import auth
auth.authenticate_user()

# Turn off the three-dot menu for Altair/Vega charts.
alt.renderers.set_embed_options(actions=False)
#%load_ext google.colab.data_table
In [ ]:
#@title
CASES = 'Cases'
DATASET = 'cdc'
metric = CASES

project_id = 'msm-secure-data-1b'
cdc_table = '`%s.ndunlap_secure.cdc_restricted_access_20210430`' % project_id
date = 'DATE(2021, 4, 15)'
date_display_name = 'Apr 15'

# Chart settings.
total_cases_scale_max = 2000000
scatter_height = 350
scatter_width = 350
map_height = 350
map_width = 500
us_states = alt.topo_feature(data.us_10m.url, 'states')
us_counties = alt.topo_feature(data.us_10m.url+"#", 'counties')

territories = ('PR', 'GU', 'VI', 'MP', 'AS')
In [ ]:
#@title
cdc_states_query = ('''
SELECT
  res_state,
  COUNT(*) as cdc_cases
FROM
  %s
GROUP BY
   res_state
''' % cdc_table)

cdc_counties_query = ('''
SELECT
  res_state,
  res_county,
  race_ethnicity_combined,
  COUNT(*) as cases
FROM
  %s
GROUP BY
   res_county,
   res_state,
   race_ethnicity_combined
''' % cdc_table)

cdc_overall_query = ('''
SELECT
  race_ethnicity_combined,
  COUNT(*) as cases
FROM
  %s
GROUP BY
   1
''' % cdc_table)

cdc_age_query = ('''
SELECT
  race_ethnicity_combined,
  age_group,
  COUNT(*) as cases
FROM
  %s
GROUP BY
   1, 2
''' % cdc_table)
In [ ]:
#@title
df = pd.io.gbq.read_gbq(cdc_counties_query, project_id=project_id)
for territory in territories:
  df = df[df.res_state != territory]

project_id = 'msm-secure-data-1b'
df_county_fips_map = pd.io.gbq.read_gbq(f'''
SELECT
*
FROM
  `msm-secure-data-1b.ndunlap_secure.county_fips_mapping`
''', project_id=project_id)

df_county_fips_map.cdc_county = df_county_fips_map.cdc_county.str.lower()
df_county_fips_map['state_county'] = df_county_fips_map.state + '-' + df_county_fips_map.cdc_county
df_county_fips_map['state_county'] = df_county_fips_map.state_county.astype('string').str.strip()
df_county_fips_map.set_index('state_county', inplace=True)
In [ ]:
#@title
# Concatenate the state and county names because county names are not unique across states.
df.res_county = df.res_county.str.lower()
df['state_county'] = df.res_state + '-' + df.res_county
df['state_county'] = df.state_county.astype('string').str.strip()
df.set_index('state_county', inplace=True)
df['race_ethnicity_combined'] = df.race_ethnicity_combined.astype('string').str.strip()

race_ethnicity_combined_map = {
    'Asian, Non-Hispanic': 'asian_cases',
    'Black, Non-Hispanic': 'black_cases',
    'White, Non-Hispanic': 'white_cases',
    'American Indian/Alaska Native, Non-Hispanic': 'aian_cases',
    'Hispanic/Latino': 'hispanic_cases',
    'Multiple/Other, Non-Hispanic': 'other_cases',
    'Native Hawaiian/Other Pacific Islander, Non-Hispanic': 'nhpi_cases',
    'Missing': 'unknown_cases',
    'Unknown': 'unknown_cases',
    'NA': 'na_cases',
}
df = df.replace(to_replace={'race_ethnicity_combined': race_ethnicity_combined_map})
In [ ]:
#@title
merged_df = df.join(df_county_fips_map, on="state_county", how='inner', lsuffix='_left', rsuffix='_right')

# Create a crosstab table with rows = counties, columns = race_ethnicity_combined.
crosstab_df = pd.crosstab(merged_df['county_fips'], merged_df.race_ethnicity_combined, values=merged_df.cases, aggfunc=sum,
                          margins=True,
                          margins_name='total_cases'
)
# Have to reset_index() to go from pandas multi-index to single index.
crosstab_df = crosstab_df.reset_index()
crosstab_df.drop(axis=0, index=len(crosstab_df) - 1, inplace=True)
crosstab_df['county_fips'] = crosstab_df.county_fips.astype(int)
crosstab_df['total_known_cases'] = crosstab_df['total_cases'] - crosstab_df.na_cases.fillna(0) - crosstab_df.unknown_cases.fillna(0)
In [ ]:
#@title
df_acs_name_lookup = pd.io.gbq.read_gbq(f'''
SELECT
  *
FROM
  `msm-internal-data.ipums_acs.acs_2019_5year_county`
''', project_id=project_id)

df_acs_name_lookup['state_county'] = df_acs_name_lookup.county.astype('string').str.strip() + ', ' + df_acs_name_lookup.state.astype('string').str.strip()
df_acs_name_lookup.drop(columns=['state', 'county'], inplace=True)
df_acs_name_lookup.set_index('county_fips', inplace=True)

county_chart_df = crosstab_df.join(df_acs_name_lookup, on="county_fips", how='inner', lsuffix='_left', rsuffix='_right')
county_chart_df.county_fips = county_chart_df.county_fips.astype(int)
In [ ]:
#@title

chart_df = county_chart_df.copy(deep=True)
chart_df.reset_index(inplace=True)
chart_df.county_fips = chart_df.county_fips.astype(int)
chart_df['percent_known_cases'] = round(chart_df.total_known_cases / chart_df.total_cases, 2)
chart_df['total_known_or_na_cases'] = chart_df.total_known_cases + chart_df.na_cases
chart_df['percent_known_or_na_cases'] = round(chart_df.total_known_or_na_cases / chart_df.total_cases, 2)
In [ ]:
#@title
race_ethnicity_groups = ['black', 'hispanic', 'aian', 'nhpi', 'asian', 'white', 'other']

def GenerateColNames(group):
  cases_col = group + '_cases'
  pop_col = group + '_pop'
  pop_percent_col = group + '_percent'
  cases_percent_col = group + '_cases_percent'
  cases_percent_with_unknown_col = group + '_cases_percent_with_unknown'
  cases_per_100_col = group + '_cases_per_100'
  cases_to_pop_col= group + '_cases_to_pop'
  cases_to_pop_with_unknown_col= group + '_cases_to_pop_with_unknown'
  return {'cases': cases_col,
          'pop': pop_col,
          'pop_percent': pop_percent_col,
          'cases_per_100': cases_per_100_col,
          'cases_percent': cases_percent_col,
          'cases_percent_with_unknown': cases_percent_with_unknown_col,
          'cases_to_pop': cases_to_pop_col,
          'cases_to_pop_with_unknown': cases_to_pop_with_unknown_col,
  }

group_names = {}
for group in race_ethnicity_groups:
  group_names[group] = GenerateColNames(group)

for group in race_ethnicity_groups:
  chart_df[group_names[group]['cases_per_100']] = round(chart_df[group_names[group]['cases']] / chart_df[group_names[group]['pop']], 4)
  chart_df[group_names[group]['cases_percent']] = round(chart_df[group_names[group]['cases']] / chart_df.total_known_cases, 2)
  chart_df[group_names[group]['cases_percent_with_unknown']] = round(chart_df[group_names[group]['cases']] / chart_df.total_cases, 2)
  chart_df[group_names[group]['cases_to_pop']] = round(
      chart_df[group_names[group]['cases_percent']] / chart_df[group_names[group]['pop_percent']], 2)
  chart_df[group_names[group]['cases_to_pop_with_unknown']] = round(
      chart_df[group_names[group]['cases_percent_with_unknown']] / chart_df[group_names[group]['pop_percent']], 2)
  
chart_df.reset_index(inplace=True)
In [ ]:
#@title
filter_data = False
#MIN_POP_PERCENT = 0.001
MIN_POP = 100
MIN_PERCENT_KNOWN = 0.5
MIN_CASES = 5

group_to_display_name = {
    'black': 'Black',
    'white': 'White',
    'hispanic': 'Hispanic/Latino',
    'asian': 'Asian',
    'nhpi': 'Native Hawaiian/Pacific Islander',
    'aian': 'American Indian/Alaska Native',
    'other': 'Other or multiple race/ethnicity',
    'total': 'Total'
}

group_to_short_name = {
    'black': 'Black',
    'white': 'White',
    'hispanic': 'Hispanic',
    'asian': 'Asian',
    'nhpi': 'NHPI',
    'aian': 'AIAN',
    'other': 'Other',
    'total': 'Total'
}

chart_col_to_color_scheme = {
    'cases_per_100': 'yelloworangebrown',
    'cases_to_pop': 'blueorange',
    'cases_to_pop_with_unknown': 'blueorange',
    'percent_known_cases': 'redyellowblue',
    'percent_known_or_na_cases': 'redyellowblue',
}
chart_col_to_legend_format = {
    'cases_per_100': '.0%',
    'cases_to_pop': '.1f',
    'cases_to_pop_with_unknown': '.1f',
    'percent_known_cases': '.0%',
    'percent_known_or_na_cases': '.0%',
}

def GenerateCountyMap(chart_df, chart_col, group, group_names, metric, date):
  group_chart_col = chart_col
  if group:
    group_chart_col = group_names[group][chart_col]
  group_display_name = ''
  if group:
    group_display_name = group_to_short_name[group]
    group_short_name = group_to_short_name[group]
  
  chart_col_to_range = {
    'cases_per_100': [0, .2],
    'cases_to_pop': [0, 2],
    'cases_to_pop_with_unknown': [0, 2],
    'percent_known_cases': [0, 1],
    'percent_known_or_na_cases': [0, 1],
  }    

  prevalence_text = 'that had COVID-19'

  col_to_title = {
      'total_cases': group_display_name + ' ' + metric + ' up to ' + date,
      'cases_per_100': 'Percent of ' + group_display_name + ' Population ' + prevalence_text + ' up to ' + date,
      'cases_to_pop': 'Ratio of ' + group_display_name + ' ' + metric + ' Share to Population Share'  + ' up to ' + date,
       'cases_to_pop_with_unknown': 'Ratio of ' + group_display_name + ' ' + metric + ' Share to Population Share'  + ' up to ' + date,
     'percent_known_cases': 'Percent of CDC ' + metric + ' with Known Race/Ethnicity' + ' up to ' + date,
      'percent_known_or_na_cases': 'Percent of CDC ' + metric + ' with Known or Suppressed Race/Ethnicity' + ' up to ' + date,
  }

  filtered_chart_df = chart_df
  if group and filter_data:
    #filtered_chart_df = filtered_chart_df[filtered_chart_df[group_names[group]['pop_percent']] > MIN_POP_PERCENT]
    filtered_chart_df = filtered_chart_df[filtered_chart_df[group_names[group]['pop']] > MIN_POP]
    filtered_chart_df = filtered_chart_df[filtered_chart_df['percent_known_cases'] > MIN_PERCENT_KNOWN]
    filtered_chart_df = filtered_chart_df[filtered_chart_df[group_names[group]['cases']] > MIN_CASES]

  highlight = alt.selection_single(on='mouseover', fields=['id', 'county_fips'], empty='none')

  data_cols = ['state_county',
               'percent_known_cases',
               'percent_known_or_na_cases',
               'total_cases']
  if group:
    data_cols.extend([
                      group_names[group]['cases'],
                      group_names[group]['pop'],
                      group_names[group]['pop_percent'],
                      group_names[group]['cases_per_100'],
                      group_names[group]['cases_percent'],
                      group_names[group]['cases_percent_with_unknown'],
                      group_names[group]['cases_to_pop'],
                      group_names[group]['cases_to_pop_with_unknown'],
                      ])

  tooltips = [alt.Tooltip('state_county:N', title='County'),
              alt.Tooltip('percent_known_cases:Q', format='.0%', title=metric + ' with race/ethnicity')
  ]
  if chart_col in ('percent_known_cases', 'percent_known_or_na_cases'):
    tooltips.extend([
               alt.Tooltip('total_cases:Q', format=',.0f', title=metric)
   ])
  if chart_col == 'percent_known_or_na_cases':
    tooltips.extend([
               alt.Tooltip('percent_known_or_na_cases:Q', format='.0%',
                           title=metric + ' with known or suppressed race/ethnicity')
   ])
  if group:
    tooltips.extend([
                alt.Tooltip(group_names[group]['cases'] + ':Q', format=',',
                            title=group_short_name + ' ' + metric.lower()),
    ])
    if chart_col == 'cases_per_100':
      tooltips.extend([
                  alt.Tooltip(group_names[group]['pop'] + ':Q', format=',',
                            title=group_short_name + ' population'),
                  alt.Tooltip(group_names[group]['cases_per_100'] + ':Q', format='.2%',
                              title='Percent ' + prevalence_text)
      ])
    elif chart_col in ('cases_to_pop', 'cases_to_pop_with_unknown'):
      tooltips.extend([
                  alt.Tooltip(group_names[group]['cases_percent_with_unknown'] + ':Q', format='.1%',
                              title='Percent of total ' + metric.lower()),
                  alt.Tooltip(group_names[group]['cases_percent'] + ':Q', format='.1%',
                              title='Percent of known race/ethnicity ' + metric.lower()),
                  alt.Tooltip(group_names[group]['pop_percent'] + ':Q', format='.1%',
                              title=group_short_name + ' percent of population'),
                  alt.Tooltip(group_names[group]['cases_to_pop'] + ':Q', format='.2f',
                              title='Ratio of percent of known race/ethnicity ' + metric.lower() + ' to percent of population'),
                  alt.Tooltip(group_names[group]['cases_to_pop_with_unknown'] + ':Q', format='.2f',
                              title='Ratio of percent of total ' + metric.lower() + ' to percent of population'),
      ])
  reverse_scale = False
  if chart_col == 'cases_to_pop':
    reverse_scale = False

  plot = alt.Chart(us_counties).mark_geoshape(
      stroke='white',
      strokeOpacity=.2,
      strokeWidth=1
  ).project(
    type='albersUsa'
  ).transform_lookup(
      lookup='id',
      from_=alt.LookupData(filtered_chart_df, 'county_fips', data_cols)
  ).encode(
      alt.Color(group_chart_col,  
                type='quantitative', 
                legend=alt.Legend(format=chart_col_to_legend_format[chart_col]),
                scale=alt.Scale(scheme=chart_col_to_color_scheme[chart_col],
                                reverse=reverse_scale,
                                domain=chart_col_to_range[chart_col],
                                clamp=True,
                                ),
                title=''),
       tooltip=tooltips
  ).add_selection(
      highlight,
  )

  states_outline = alt.Chart(us_states).mark_geoshape(stroke='white', strokeWidth=1.5, fillOpacity=0, fill='white').project(
      type='albersUsa'
  )

  states_fill = alt.Chart(us_states).mark_geoshape(
      fill='silver',
      stroke='white'
  ).project('albersUsa')

  layered_map = alt.layer(states_fill, plot, states_outline).properties(
      title=col_to_title[chart_col],
  )
  return layered_map
In [ ]:
#@title
group_charts = {'cases_per_100': {}, 'cases_to_pop': {}, 'cases_to_pop_with_unknown': {}}

for group in race_ethnicity_groups:
  for value in ('cases_per_100', 'cases_to_pop', 'cases_to_pop_with_unknown'):
    group_charts[value][group] = GenerateCountyMap(
        chart_df, value, group, group_names, metric, date_display_name)
In [ ]:
#@title
overall_df = pd.io.gbq.read_gbq(cdc_overall_query, project_id=project_id)
overall_df['race_ethnicity_combined'] = overall_df.race_ethnicity_combined.astype('string').str.strip()
overall_df = overall_df.replace(to_replace={'race_ethnicity_combined': race_ethnicity_combined_map})
overall_df = overall_df.set_index('race_ethnicity_combined')

chart_denominator = 1000000
cases_list = [overall_df.cases['hispanic_cases'] / chart_denominator,
         overall_df.cases['black_cases'] / chart_denominator,
         overall_df.cases['white_cases'] / chart_denominator,
         overall_df.cases['asian_cases'] / chart_denominator,
         overall_df.cases['nhpi_cases'] / chart_denominator,
         overall_df.cases['aian_cases'] / chart_denominator,
         overall_df.cases.sum() / chart_denominator,
]

# Population data from https://api.census.gov/data/2019/acs/acs1/profile?get=NAME,DP05_0071E,DP05_0078E,DP05_0077E,DP05_0080E,DP05_0081E,DP05_0079E,DP05_0070E&for=us:1
pop_list = [
    60481746 / chart_denominator,
    40596040  / chart_denominator,
    196789401 / chart_denominator,
    18427914  / chart_denominator,
    565473 / chart_denominator,
    2236348 / chart_denominator,
    328239523 / chart_denominator,
]
percent_list = []
for i in range(len(cases_list)):
  percent_list.append(cases_list[i] / pop_list[i])
prevalence = pd.DataFrame.from_dict({'group': [
    'Hispanic/Latino',
    'Black',
    'White',
    'Asian',
    'Native Hawaiian/Pacific Islander',
    'American Indian/Alaska Native',
    '*Total Including Unknowns*',
], 'percent': percent_list,
   'cases': cases_list,
   'population': pop_list,
})
bars = alt.Chart(prevalence).mark_bar().encode(
      x=alt.X('percent', axis=alt.Axis(format='.1%'), title=''),
      y=alt.Y('group', sort='-x', title=''),
      color=alt.Color('group', 
                      scale=alt.Scale(scheme='tableau20'),
                      title='',
                      legend=None),
      tooltip=[
                  alt.Tooltip('group:N', title='Race/Ethnicity Group'),
                  alt.Tooltip('percent:Q', format='.2%', title='Prevalence in group'),
                  alt.Tooltip('cases:Q', format=',.2f', title='Cases in group (millions)'),
                  alt.Tooltip('population:Q', format=',.2f', title='Population of group (millions)'),
      ]
).properties(
   title='Percent of Race/Ethnicity Group who had COVID-19 based on Incomplete CDC Data up to %s' % date_display_name
)

bars.display()
#alt.concat(bars).properties(
#    title=alt.TitleParams(
#        ['Source: U.S. Census Bureau\'s American Community Survey 2019 5-year estimates for population data.'],
#        baseline='bottom',
#        dy=20,
#        orient='bottom',
#        fontWeight='normal',
#        fontSize=11
#    )
#).display()
In [ ]:
#@title
# The age population numbers come from the ACS 2019 1-year via IPUMS microdata
# which allows us to calculate age buckets not available in the ACS API.
# We had to combine Asian and NHPI due to IPUMS reporting categories.
# https://usa.ipums.org/usa-action/variables/RACE#codes_section
# The age_dict was pre-calculated in another colab because it take a while to run.
# https://colab.research.google.com/drive/1b2U0SvZq4oxRXDujc8oWg5LbQDJKRawe#scrollTo=GC1CVtok_GwL

age_dict = {'aian': {'0-9': 288641.0,
  '10-19': 341153.0,
  '20-29': 319143.0,
  '30-39': 302075.0,
  '40-49': 270994.0,
  '50-59': 284191.0,
  '60-69': 236906.0,
  '70-79': 122475.0,
  '80+': 46877.0,
  'total': 2212455.0},
 'api': {'0-9': 1857491.0,
  '10-19': 2136643.0,
  '20-29': 2788000.0,
  '30-39': 3235052.0,
  '40-49': 2909062.0,
  '50-59': 2361661.0,
  '60-69': 1893205.0,
  '70-79': 1084520.0,
  '80+': 560923.0,
  'total': 18826557.0},
 'black': {'0-9': 5144709.0,
  '10-19': 5869466.0,
  '20-29': 6302817.0,
  '30-39': 5703178.0,
  '40-49': 5152576.0,
  '50-59': 5060647.0,
  '60-69': 4240796.0,
  '70-79': 2154455.0,
  '80+': 1022325.0,
  'total': 40650969.0},
 'hispanic': {'0-9': 10021829.0,
  '10-19': 10673378.0,
  '20-29': 9714731.0,
  '30-39': 9112371.0,
  '40-49': 7989372.0,
  '50-59': 6076318.0,
  '60-69': 3913694.0,
  '70-79': 1991765.0,
  '80+': 991319.0,
  'total': 60484777.0},
 'other': {'0-9': 2339910.0,
  '10-19': 2027458.0,
  '20-29': 1513225.0,
  '30-39': 1169024.0,
  '40-49': 822915.0,
  '50-59': 626001.0,
  '60-69': 453421.0,
  '70-79': 222395.0,
  '80+': 95165.0,
  'total': 9269514.0},
 'total': {'0-9': 39028311.0,
  '10-19': 42736680.0,
  '20-29': 44726365.0,
  '30-39': 44148037.0,
  '40-49': 40644111.0,
  '50-59': 41899718.0,
  '60-69': 38395555.0,
  '70-79': 23971046.0,
  '80+': 12689700.0,
  'total': 328239523.0},
 'white': {'0-9': 19375731.0,
  '10-19': 21688582.0,
  '20-29': 24088449.0,
  '30-39': 24626337.0,
  '40-49': 23499192.0,
  '50-59': 27490900.0,
  '60-69': 27657533.0,
  '70-79': 18395436.0,
  '80+': 9973091.0,
  'total': 196795251.0}
}

age_pop_df = pd.DataFrame.from_dict(age_dict)
age_pop_df = age_pop_df.drop('total')
In [ ]:
#@title
age_df = pd.io.gbq.read_gbq(cdc_age_query, project_id=project_id)
age_df['race_ethnicity_combined'] = age_df.race_ethnicity_combined.astype('string').str.strip()
age_df = age_df.replace(to_replace={'race_ethnicity_combined': race_ethnicity_combined_map})
age_crosstab_df = pd.crosstab(age_df['age_group'], age_df.race_ethnicity_combined, values=age_df.cases, aggfunc=sum,
                          margins=True,
                          margins_name='total_cases'
)
age_crosstab_df = age_crosstab_df.drop('total_cases')
#age_crosstab_df = age_crosstab_df.drop('Unknown')
age_crosstab_df = age_crosstab_df.drop('Missing')
age_crosstab_df = age_crosstab_df.drop('NA')

The CDC data allows us to see the percentage of people within each age and race/ethnicity group who had COVID-19.

In [ ]:
#@title
race_list = ['Hispanic/Latino'] * 9
race_list.extend(['Black'] * 9)
race_list.extend(['White'] * 9)
race_list.extend(['Asian/NHPI'] * 9)
race_list.extend(['AIAN'] * 9)
race_list.extend(['*Total Including Unknowns*'] * 9)

chart_denominator = 1000
cases_list = list(age_crosstab_df.hispanic_cases.fillna(0).values / chart_denominator)
cases_list.extend(list(age_crosstab_df.black_cases.fillna(0).values / chart_denominator))
cases_list.extend(list(age_crosstab_df.white_cases.fillna(0).values / chart_denominator))
cases_list.extend(list((age_crosstab_df.asian_cases.fillna(0).values + age_crosstab_df.fillna(0).nhpi_cases.values) / chart_denominator))
cases_list.extend(list(age_crosstab_df.aian_cases.fillna(0).values / chart_denominator))
cases_list.extend(list(age_crosstab_df.total_cases.fillna(0).values / chart_denominator))

pop_list = list(age_pop_df.hispanic.values / chart_denominator)
pop_list.extend(list(age_pop_df.black.values / chart_denominator))
pop_list.extend(list(age_pop_df.white.values / chart_denominator))
pop_list.extend(list(age_pop_df.api.values / chart_denominator))
pop_list.extend(list(age_pop_df.aian.values / chart_denominator))
pop_list.extend(list(age_pop_df.total.values / chart_denominator))

percent_list = list(age_crosstab_df.hispanic_cases.fillna(0).values / age_pop_df.hispanic.values)
percent_list.extend(list(age_crosstab_df.black_cases.fillna(0).values / age_pop_df.black.values))
percent_list.extend(list(age_crosstab_df.white_cases.fillna(0).values / age_pop_df.white.values))
percent_list.extend(list((age_crosstab_df.asian_cases.fillna(0).values + list(age_crosstab_df.nhpi_cases.values)) / age_pop_df.api.values))
percent_list.extend(list(age_crosstab_df.aian_cases.fillna(0).values / age_pop_df.aian.values))
percent_list.extend(list(age_crosstab_df.total_cases.fillna(0).values / age_pop_df.total.values))

age_chart_df = pd.DataFrame.from_dict({'group': race_list,
                               'age': ['0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+'] * 6,
                               'percent': percent_list,
                               'cases': cases_list,
                               'population': pop_list
                               })
alt.Chart(age_chart_df).mark_bar().encode(
      x=alt.X('percent', sort='y', axis=alt.Axis(format='.0%'), title=''),
      y=alt.Y('age', title='Age'),
      column=alt.Column('group',
                        title='Percent of Age and Race/Ethnicity Group who had COVID-19 based on Incomplete CDC Data up to %s' % date_display_name,
                        header=alt.Header(titleFontSize=13)
                        ),
      color=alt.Color('group',
                      # Set the 6 colors manually to match the 7 groups in the chart above.
                      scale=alt.Scale(range=['#4E79A7', '#A0CBE8', '#F28E2B', '#FFBE7D', '#59A14F', '#B6992D']), title='Race/Ethnicity', legend=None),
      tooltip=[
                  alt.Tooltip('group:N', title='Race/Ethnicity group'),
                  alt.Tooltip('age:N', title='Age'),
                  alt.Tooltip('percent:Q', format='.2%', title='Prevalence in group'),
                  alt.Tooltip('cases:Q', format=',.2f', title='Cases in group (thousands)'),
                  alt.Tooltip('population:Q', format=',.2f', title='Population of group (thousands)'),               
      ]
).properties(
  width=110, 
).display()

Note:

  • AIAN is American Indian/Alaska Native.
  • NHPI is Native Hawaiian/Pacific Islander.
  • We combined Asian and NHPI due to the limited availability of population data for those groups broken down by age (group descriptions).

Percentage of each population group who died from COVID-19 based on incomplete data:

In [ ]:
#@title
black = group_charts['cases_per_100']['black'].properties(width=450, height=325)
hispanic = group_charts['cases_per_100']['hispanic'].properties(width=450, height=325)
white = group_charts['cases_per_100']['white'].properties(width=450, height=325)
asian = group_charts['cases_per_100']['asian'].properties(width=450, height=325)
aian = group_charts['cases_per_100']['aian'].properties(width=450, height=325)
nhpi = group_charts['cases_per_100']['nhpi'].properties(width=450, height=325)

((black | hispanic) &
 (white | asian) &
 (aian | nhpi)).configure_legend(
      orient='top',
      gradientLength=400,
      titleLimit=0,
  ).configure_view(
      strokeWidth=0,
  ).display()

We can also view disparities by comparing the percentage of cases with known race/ethnicity that a race/ethnicity group accounts for in a county (the cases share) vs. the percentage of the total population that a race/ethnicity accounts for in a county (the population share). There is no disparity when the cases share is equal to the population share for all race/ethnicity groups in a county (ratio = 1.0). When the ratio of cases share to population share is above 1.0, then a group has a disproportionate number of cases relative to its share of the population.

In [ ]:
#@title
black = group_charts['cases_to_pop']['black'].properties(width=450, height=325)
hispanic = group_charts['cases_to_pop']['hispanic'].properties(width=450, height=325)
white = group_charts['cases_to_pop']['white'].properties(width=450, height=325)
asian = group_charts['cases_to_pop']['asian'].properties(width=450, height=325)
aian = group_charts['cases_to_pop']['aian'].properties(width=450, height=325)
nhpi = group_charts['cases_to_pop']['nhpi'].properties(width=450, height=325)

((black | hispanic) &
 (white | asian) &
 (aian | nhpi)).configure_legend(
      orient='top',
      gradientLength=400,
      titleLimit=0,
  ).configure_view(
      strokeWidth=0,
  ).display()

We can also view disparities by comparing the percentage of total cases -- with or without known race/ethnicity -- that a race/ethnicity group accounts for (the cases share) vs. the percentage of the total population that a race/ethnicity accounts for in a county (the population share). Counties with more complete data will change less vs. the charts above, and counties with less complete data will change more.

In [ ]:
#@title
black = group_charts['cases_to_pop_with_unknown']['black'].properties(width=450, height=325)
hispanic = group_charts['cases_to_pop_with_unknown']['hispanic'].properties(width=450, height=325)
white = group_charts['cases_to_pop_with_unknown']['white'].properties(width=450, height=325)
asian = group_charts['cases_to_pop_with_unknown']['asian'].properties(width=450, height=325)
aian = group_charts['cases_to_pop_with_unknown']['aian'].properties(width=450, height=325)
nhpi = group_charts['cases_to_pop_with_unknown']['nhpi'].properties(width=450, height=325)

((black | hispanic) &
 (white | asian) &
 (aian | nhpi)).configure_legend(
      orient='top',
      gradientLength=400,
      titleLimit=0,
  ).configure_view(
      strokeWidth=0,
  ).display()
In [ ]:
# Large county-level disparity maps
#@title
#for group in ['black', 'hispanic', 'white', 'asian', 'aian', 'nhpi']:
#  (group_charts['cases_per_100'][group]).properties(
#      width=900,
#      height=650,
#  ).configure_legend(
#      orient='top-right',
#      gradientLength=400,
#      titleLimit=0,
#      padding=0
#  ).configure_view(
#      strokeWidth=0,
#  ).display()
In [ ]:
#@title
#for group in ['black', 'hispanic', 'white', 'asian', 'aian', 'nhpi']:
#  (group_charts['cases_to_pop'][group]).properties(
#      width=900,
#      height=650,
#  ).configure_legend(
#      orient='top-right',
#      gradientLength=400,
#      titleLimit=0,
#      padding=0
#  ).configure_view(
#      strokeWidth=0,
#  ).display()

Data Citations and Disclaimers

  • CDC data full citation: Centers for Disease Control and Prevention, COVID-19 Response. COVID-19 Case Surveillance Data Access, Summary, and Limitations (version date: April 30, 2021).
  • Per the CDC data agreement: The CDC does not take responsibility for the scientific validity or accuracy of methodology, results, statistical analyses, or conclusions presented.
  • Population data: U.S. Census Bureau's American Community Survey 2019 5-year estimates accessed via API; e.g., sample query.
  • Age population data: U.S. Census Bureau's American Community Survey 2019 1-year estimates accessed via microdata from IPUMS USA, University of Minnesota, www.ipums.org to calculate 10-year age and race/ethnicity groups not available from the Census API.

Contact information

Please email us at shli-covid-data-analysis@googlegroups.com with questions or comments.

In [ ]:
#%%shell
#jupyter nbconvert --to html 'cdc_case_disparities.ipynb'