Disparities Analysis: COVID-19 Cases¶

May 24, 2021

When looking at disparities for COVID-19 cases between different race/ethnicity groups, please keep in mind that some states and counties have data completeness issues, as discussed in the case data completeness analysis.

#@title
import pandas as pd
import altair as alt
from vega_datasets import data

from google.colab import auth
auth.authenticate_user()

# Turn off the three-dot menu for Altair/Vega charts.
alt.renderers.set_embed_options(actions=False)
#%load_ext google.colab.data_table

#@title
CASES = 'Cases'
DATASET = 'cdc'
metric = CASES

project_id = 'msm-secure-data-1b'
cdc_table = '`%s.ndunlap_secure.cdc_restricted_access_20210430`' % project_id
date = 'DATE(2021, 4, 15)'
date_display_name = 'Apr 15'

# Chart settings.
total_cases_scale_max = 2000000
scatter_height = 350
scatter_width = 350
map_height = 350
map_width = 500
us_states = alt.topo_feature(data.us_10m.url, 'states')
us_counties = alt.topo_feature(data.us_10m.url+"#", 'counties')

territories = ('PR', 'GU', 'VI', 'MP', 'AS')

#@title
cdc_states_query = ('''
SELECT
  res_state,
  COUNT(*) as cdc_cases
FROM
  %s
GROUP BY
   res_state
''' % cdc_table)

cdc_counties_query = ('''
SELECT
  res_state,
  res_county,
  race_ethnicity_combined,
  COUNT(*) as cases
FROM
  %s
GROUP BY
   res_county,
   res_state,
   race_ethnicity_combined
''' % cdc_table)

cdc_overall_query = ('''
SELECT
  race_ethnicity_combined,
  COUNT(*) as cases
FROM
  %s
GROUP BY
   1
''' % cdc_table)

cdc_age_query = ('''
SELECT
  race_ethnicity_combined,
  age_group,
  COUNT(*) as cases
FROM
  %s
GROUP BY
   1, 2
''' % cdc_table)

#@title
df = pd.io.gbq.read_gbq(cdc_counties_query, project_id=project_id)
for territory in territories:
  df = df[df.res_state != territory]

project_id = 'msm-secure-data-1b'
df_county_fips_map = pd.io.gbq.read_gbq(f'''
SELECT
*
FROM
  `msm-secure-data-1b.ndunlap_secure.county_fips_mapping`
''', project_id=project_id)

df_county_fips_map.cdc_county = df_county_fips_map.cdc_county.str.lower()
df_county_fips_map['state_county'] = df_county_fips_map.state + '-' + df_county_fips_map.cdc_county
df_county_fips_map['state_county'] = df_county_fips_map.state_county.astype('string').str.strip()
df_county_fips_map.set_index('state_county', inplace=True)

#@title
# Concatenate the state and county names because county names are not unique across states.
df.res_county = df.res_county.str.lower()
df['state_county'] = df.res_state + '-' + df.res_county
df['state_county'] = df.state_county.astype('string').str.strip()
df.set_index('state_county', inplace=True)
df['race_ethnicity_combined'] = df.race_ethnicity_combined.astype('string').str.strip()

race_ethnicity_combined_map = {
    'Asian, Non-Hispanic': 'asian_cases',
    'Black, Non-Hispanic': 'black_cases',
    'White, Non-Hispanic': 'white_cases',
    'American Indian/Alaska Native, Non-Hispanic': 'aian_cases',
    'Hispanic/Latino': 'hispanic_cases',
    'Multiple/Other, Non-Hispanic': 'other_cases',
    'Native Hawaiian/Other Pacific Islander, Non-Hispanic': 'nhpi_cases',
    'Missing': 'unknown_cases',
    'Unknown': 'unknown_cases',
    'NA': 'na_cases',
}
df = df.replace(to_replace={'race_ethnicity_combined': race_ethnicity_combined_map})

#@title
merged_df = df.join(df_county_fips_map, on="state_county", how='inner', lsuffix='_left', rsuffix='_right')

# Create a crosstab table with rows = counties, columns = race_ethnicity_combined.
crosstab_df = pd.crosstab(merged_df['county_fips'], merged_df.race_ethnicity_combined, values=merged_df.cases, aggfunc=sum,
                          margins=True,
                          margins_name='total_cases'
)
# Have to reset_index() to go from pandas multi-index to single index.
crosstab_df = crosstab_df.reset_index()
crosstab_df.drop(axis=0, index=len(crosstab_df) - 1, inplace=True)
crosstab_df['county_fips'] = crosstab_df.county_fips.astype(int)
crosstab_df['total_known_cases'] = crosstab_df['total_cases'] - crosstab_df.na_cases.fillna(0) - crosstab_df.unknown_cases.fillna(0)

#@title
df_acs_name_lookup = pd.io.gbq.read_gbq(f'''
SELECT
  *
FROM
  `msm-internal-data.ipums_acs.acs_2019_5year_county`
''', project_id=project_id)

df_acs_name_lookup['state_county'] = df_acs_name_lookup.county.astype('string').str.strip() + ', ' + df_acs_name_lookup.state.astype('string').str.strip()
df_acs_name_lookup.drop(columns=['state', 'county'], inplace=True)
df_acs_name_lookup.set_index('county_fips', inplace=True)

county_chart_df = crosstab_df.join(df_acs_name_lookup, on="county_fips", how='inner', lsuffix='_left', rsuffix='_right')
county_chart_df.county_fips = county_chart_df.county_fips.astype(int)

#@title

chart_df = county_chart_df.copy(deep=True)
chart_df.reset_index(inplace=True)
chart_df.county_fips = chart_df.county_fips.astype(int)
chart_df['percent_known_cases'] = round(chart_df.total_known_cases / chart_df.total_cases, 2)
chart_df['total_known_or_na_cases'] = chart_df.total_known_cases + chart_df.na_cases
chart_df['percent_known_or_na_cases'] = round(chart_df.total_known_or_na_cases / chart_df.total_cases, 2)

#@title
race_ethnicity_groups = ['black', 'hispanic', 'aian', 'nhpi', 'asian', 'white', 'other']

def GenerateColNames(group):
  cases_col = group + '_cases'
  pop_col = group + '_pop'
  pop_percent_col = group + '_percent'
  cases_percent_col = group + '_cases_percent'
  cases_percent_with_unknown_col = group + '_cases_percent_with_unknown'
  cases_per_100_col = group + '_cases_per_100'
  cases_to_pop_col= group + '_cases_to_pop'
  cases_to_pop_with_unknown_col= group + '_cases_to_pop_with_unknown'
  return {'cases': cases_col,
          'pop': pop_col,
          'pop_percent': pop_percent_col,
          'cases_per_100': cases_per_100_col,
          'cases_percent': cases_percent_col,
          'cases_percent_with_unknown': cases_percent_with_unknown_col,
          'cases_to_pop': cases_to_pop_col,
          'cases_to_pop_with_unknown': cases_to_pop_with_unknown_col,
  }

group_names = {}
for group in race_ethnicity_groups:
  group_names[group] = GenerateColNames(group)

for group in race_ethnicity_groups:
  chart_df[group_names[group]['cases_per_100']] = round(chart_df[group_names[group]['cases']] / chart_df[group_names[group]['pop']], 4)
  chart_df[group_names[group]['cases_percent']] = round(chart_df[group_names[group]['cases']] / chart_df.total_known_cases, 2)
  chart_df[group_names[group]['cases_percent_with_unknown']] = round(chart_df[group_names[group]['cases']] / chart_df.total_cases, 2)
  chart_df[group_names[group]['cases_to_pop']] = round(
      chart_df[group_names[group]['cases_percent']] / chart_df[group_names[group]['pop_percent']], 2)
  chart_df[group_names[group]['cases_to_pop_with_unknown']] = round(
      chart_df[group_names[group]['cases_percent_with_unknown']] / chart_df[group_names[group]['pop_percent']], 2)
  
chart_df.reset_index(inplace=True)

#@title
filter_data = False
#MIN_POP_PERCENT = 0.001
MIN_POP = 100
MIN_PERCENT_KNOWN = 0.5
MIN_CASES = 5

group_to_display_name = {
    'black': 'Black',
    'white': 'White',
    'hispanic': 'Hispanic/Latino',
    'asian': 'Asian',
    'nhpi': 'Native Hawaiian/Pacific Islander',
    'aian': 'American Indian/Alaska Native',
    'other': 'Other or multiple race/ethnicity',
    'total': 'Total'
}

group_to_short_name = {
    'black': 'Black',
    'white': 'White',
    'hispanic': 'Hispanic',
    'asian': 'Asian',
    'nhpi': 'NHPI',
    'aian': 'AIAN',
    'other': 'Other',
    'total': 'Total'
}

chart_col_to_color_scheme = {
    'cases_per_100': 'yelloworangebrown',
    'cases_to_pop': 'blueorange',
    'cases_to_pop_with_unknown': 'blueorange',
    'percent_known_cases': 'redyellowblue',
    'percent_known_or_na_cases': 'redyellowblue',
}
chart_col_to_legend_format = {
    'cases_per_100': '.0%',
    'cases_to_pop': '.1f',
    'cases_to_pop_with_unknown': '.1f',
    'percent_known_cases': '.0%',
    'percent_known_or_na_cases': '.0%',
}

def GenerateCountyMap(chart_df, chart_col, group, group_names, metric, date):
  group_chart_col = chart_col
  if group:
    group_chart_col = group_names[group][chart_col]
  group_display_name = ''
  if group:
    group_display_name = group_to_short_name[group]
    group_short_name = group_to_short_name[group]
  
  chart_col_to_range = {
    'cases_per_100': [0, .2],
    'cases_to_pop': [0, 2],
    'cases_to_pop_with_unknown': [0, 2],
    'percent_known_cases': [0, 1],
    'percent_known_or_na_cases': [0, 1],
  }    

  prevalence_text = 'that had COVID-19'

  col_to_title = {
      'total_cases': group_display_name + ' ' + metric + ' up to ' + date,
      'cases_per_100': 'Percent of ' + group_display_name + ' Population ' + prevalence_text + ' up to ' + date,
      'cases_to_pop': 'Ratio of ' + group_display_name + ' ' + metric + ' Share to Population Share'  + ' up to ' + date,
       'cases_to_pop_with_unknown': 'Ratio of ' + group_display_name + ' ' + metric + ' Share to Population Share'  + ' up to ' + date,
     'percent_known_cases': 'Percent of CDC ' + metric + ' with Known Race/Ethnicity' + ' up to ' + date,
      'percent_known_or_na_cases': 'Percent of CDC ' + metric + ' with Known or Suppressed Race/Ethnicity' + ' up to ' + date,
  }

  filtered_chart_df = chart_df
  if group and filter_data:
    #filtered_chart_df = filtered_chart_df[filtered_chart_df[group_names[group]['pop_percent']] > MIN_POP_PERCENT]
    filtered_chart_df = filtered_chart_df[filtered_chart_df[group_names[group]['pop']] > MIN_POP]
    filtered_chart_df = filtered_chart_df[filtered_chart_df['percent_known_cases'] > MIN_PERCENT_KNOWN]
    filtered_chart_df = filtered_chart_df[filtered_chart_df[group_names[group]['cases']] > MIN_CASES]

  highlight = alt.selection_single(on='mouseover', fields=['id', 'county_fips'], empty='none')

  data_cols = ['state_county',
               'percent_known_cases',
               'percent_known_or_na_cases',
               'total_cases']
  if group:
    data_cols.extend([
                      group_names[group]['cases'],
                      group_names[group]['pop'],
                      group_names[group]['pop_percent'],
                      group_names[group]['cases_per_100'],
                      group_names[group]['cases_percent'],
                      group_names[group]['cases_percent_with_unknown'],
                      group_names[group]['cases_to_pop'],
                      group_names[group]['cases_to_pop_with_unknown'],
                      ])

  tooltips = [alt.Tooltip('state_county:N', title='County'),
              alt.Tooltip('percent_known_cases:Q', format='.0%', title=metric + ' with race/ethnicity')
  ]
  if chart_col in ('percent_known_cases', 'percent_known_or_na_cases'):
    tooltips.extend([
               alt.Tooltip('total_cases:Q', format=',.0f', title=metric)
   ])
  if chart_col == 'percent_known_or_na_cases':
    tooltips.extend([
               alt.Tooltip('percent_known_or_na_cases:Q', format='.0%',
                           title=metric + ' with known or suppressed race/ethnicity')
   ])
  if group:
    tooltips.extend([
                alt.Tooltip(group_names[group]['cases'] + ':Q', format=',',
                            title=group_short_name + ' ' + metric.lower()),
    ])
    if chart_col == 'cases_per_100':
      tooltips.extend([
                  alt.Tooltip(group_names[group]['pop'] + ':Q', format=',',
                            title=group_short_name + ' population'),
                  alt.Tooltip(group_names[group]['cases_per_100'] + ':Q', format='.2%',
                              title='Percent ' + prevalence_text)
      ])
    elif chart_col in ('cases_to_pop', 'cases_to_pop_with_unknown'):
      tooltips.extend([
                  alt.Tooltip(group_names[group]['cases_percent_with_unknown'] + ':Q', format='.1%',
                              title='Percent of total ' + metric.lower()),
                  alt.Tooltip(group_names[group]['cases_percent'] + ':Q', format='.1%',
                              title='Percent of known race/ethnicity ' + metric.lower()),
                  alt.Tooltip(group_names[group]['pop_percent'] + ':Q', format='.1%',
                              title=group_short_name + ' percent of population'),
                  alt.Tooltip(group_names[group]['cases_to_pop'] + ':Q', format='.2f',
                              title='Ratio of percent of known race/ethnicity ' + metric.lower() + ' to percent of population'),
                  alt.Tooltip(group_names[group]['cases_to_pop_with_unknown'] + ':Q', format='.2f',
                              title='Ratio of percent of total ' + metric.lower() + ' to percent of population'),
      ])
  reverse_scale = False
  if chart_col == 'cases_to_pop':
    reverse_scale = False

  plot = alt.Chart(us_counties).mark_geoshape(
      stroke='white',
      strokeOpacity=.2,
      strokeWidth=1
  ).project(
    type='albersUsa'
  ).transform_lookup(
      lookup='id',
      from_=alt.LookupData(filtered_chart_df, 'county_fips', data_cols)
  ).encode(
      alt.Color(group_chart_col,  
                type='quantitative', 
                legend=alt.Legend(format=chart_col_to_legend_format[chart_col]),
                scale=alt.Scale(scheme=chart_col_to_color_scheme[chart_col],
                                reverse=reverse_scale,
                                domain=chart_col_to_range[chart_col],
                                clamp=True,
                                ),
                title=''),
       tooltip=tooltips
  ).add_selection(
      highlight,
  )

  states_outline = alt.Chart(us_states).mark_geoshape(stroke='white', strokeWidth=1.5, fillOpacity=0, fill='white').project(
      type='albersUsa'
  )

  states_fill = alt.Chart(us_states).mark_geoshape(
      fill='silver',
      stroke='white'
  ).project('albersUsa')

  layered_map = alt.layer(states_fill, plot, states_outline).properties(
      title=col_to_title[chart_col],
  )
  return layered_map

#@title
group_charts = {'cases_per_100': {}, 'cases_to_pop': {}, 'cases_to_pop_with_unknown': {}}

for group in race_ethnicity_groups:
  for value in ('cases_per_100', 'cases_to_pop', 'cases_to_pop_with_unknown'):
    group_charts[value][group] = GenerateCountyMap(
        chart_df, value, group, group_names, metric, date_display_name)

#@title
overall_df = pd.io.gbq.read_gbq(cdc_overall_query, project_id=project_id)
overall_df['race_ethnicity_combined'] = overall_df.race_ethnicity_combined.astype('string').str.strip()
overall_df = overall_df.replace(to_replace={'race_ethnicity_combined': race_ethnicity_combined_map})
overall_df = overall_df.set_index('race_ethnicity_combined')

chart_denominator = 1000000
cases_list = [overall_df.cases['hispanic_cases'] / chart_denominator,
         overall_df.cases['black_cases'] / chart_denominator,
         overall_df.cases['white_cases'] / chart_denominator,
         overall_df.cases['asian_cases'] / chart_denominator,
         overall_df.cases['nhpi_cases'] / chart_denominator,
         overall_df.cases['aian_cases'] / chart_denominator,
         overall_df.cases.sum() / chart_denominator,
]

# Population data from https://api.census.gov/data/2019/acs/acs1/profile?get=NAME,DP05_0071E,DP05_0078E,DP05_0077E,DP05_0080E,DP05_0081E,DP05_0079E,DP05_0070E&for=us:1
pop_list = [
    60481746 / chart_denominator,
    40596040  / chart_denominator,
    196789401 / chart_denominator,
    18427914  / chart_denominator,
    565473 / chart_denominator,
    2236348 / chart_denominator,
    328239523 / chart_denominator,
]
percent_list = []
for i in range(len(cases_list)):
  percent_list.append(cases_list[i] / pop_list[i])
prevalence = pd.DataFrame.from_dict({'group': [
    'Hispanic/Latino',
    'Black',
    'White',
    'Asian',
    'Native Hawaiian/Pacific Islander',
    'American Indian/Alaska Native',
    '*Total Including Unknowns*',
], 'percent': percent_list,
   'cases': cases_list,
   'population': pop_list,
})
bars = alt.Chart(prevalence).mark_bar().encode(
      x=alt.X('percent', axis=alt.Axis(format='.1%'), title=''),
      y=alt.Y('group', sort='-x', title=''),
      color=alt.Color('group', 
                      scale=alt.Scale(scheme='tableau20'),
                      title='',
                      legend=None),
      tooltip=[
                  alt.Tooltip('group:N', title='Race/Ethnicity Group'),
                  alt.Tooltip('percent:Q', format='.2%', title='Prevalence in group'),
                  alt.Tooltip('cases:Q', format=',.2f', title='Cases in group (millions)'),
                  alt.Tooltip('population:Q', format=',.2f', title='Population of group (millions)'),
      ]
).properties(
   title='Percent of Race/Ethnicity Group who had COVID-19 based on Incomplete CDC Data up to %s' % date_display_name
)

bars.display()
#alt.concat(bars).properties(
#    title=alt.TitleParams(
#        ['Source: U.S. Census Bureau\'s American Community Survey 2019 5-year estimates for population data.'],
#        baseline='bottom',
#        dy=20,
#        orient='bottom',
#        fontWeight='normal',
#        fontSize=11
#    )
#).display()

#@title
# The age population numbers come from the ACS 2019 1-year via IPUMS microdata
# which allows us to calculate age buckets not available in the ACS API.
# We had to combine Asian and NHPI due to IPUMS reporting categories.
# https://usa.ipums.org/usa-action/variables/RACE#codes_section
# The age_dict was pre-calculated in another colab because it take a while to run.
# https://colab.research.google.com/drive/1b2U0SvZq4oxRXDujc8oWg5LbQDJKRawe#scrollTo=GC1CVtok_GwL

age_dict = {'aian': {'0-9': 288641.0,
  '10-19': 341153.0,
  '20-29': 319143.0,
  '30-39': 302075.0,
  '40-49': 270994.0,
  '50-59': 284191.0,
  '60-69': 236906.0,
  '70-79': 122475.0,
  '80+': 46877.0,
  'total': 2212455.0},
 'api': {'0-9': 1857491.0,
  '10-19': 2136643.0,
  '20-29': 2788000.0,
  '30-39': 3235052.0,
  '40-49': 2909062.0,
  '50-59': 2361661.0,
  '60-69': 1893205.0,
  '70-79': 1084520.0,
  '80+': 560923.0,
  'total': 18826557.0},
 'black': {'0-9': 5144709.0,
  '10-19': 5869466.0,
  '20-29': 6302817.0,
  '30-39': 5703178.0,
  '40-49': 5152576.0,
  '50-59': 5060647.0,
  '60-69': 4240796.0,
  '70-79': 2154455.0,
  '80+': 1022325.0,
  'total': 40650969.0},
 'hispanic': {'0-9': 10021829.0,
  '10-19': 10673378.0,
  '20-29': 9714731.0,
  '30-39': 9112371.0,
  '40-49': 7989372.0,
  '50-59': 6076318.0,
  '60-69': 3913694.0,
  '70-79': 1991765.0,
  '80+': 991319.0,
  'total': 60484777.0},
 'other': {'0-9': 2339910.0,
  '10-19': 2027458.0,
  '20-29': 1513225.0,
  '30-39': 1169024.0,
  '40-49': 822915.0,
  '50-59': 626001.0,
  '60-69': 453421.0,
  '70-79': 222395.0,
  '80+': 95165.0,
  'total': 9269514.0},
 'total': {'0-9': 39028311.0,
  '10-19': 42736680.0,
  '20-29': 44726365.0,
  '30-39': 44148037.0,
  '40-49': 40644111.0,
  '50-59': 41899718.0,
  '60-69': 38395555.0,
  '70-79': 23971046.0,
  '80+': 12689700.0,
  'total': 328239523.0},
 'white': {'0-9': 19375731.0,
  '10-19': 21688582.0,
  '20-29': 24088449.0,
  '30-39': 24626337.0,
  '40-49': 23499192.0,
  '50-59': 27490900.0,
  '60-69': 27657533.0,
  '70-79': 18395436.0,
  '80+': 9973091.0,
  'total': 196795251.0}
}

age_pop_df = pd.DataFrame.from_dict(age_dict)
age_pop_df = age_pop_df.drop('total')

#@title
age_df = pd.io.gbq.read_gbq(cdc_age_query, project_id=project_id)
age_df['race_ethnicity_combined'] = age_df.race_ethnicity_combined.astype('string').str.strip()
age_df = age_df.replace(to_replace={'race_ethnicity_combined': race_ethnicity_combined_map})
age_crosstab_df = pd.crosstab(age_df['age_group'], age_df.race_ethnicity_combined, values=age_df.cases, aggfunc=sum,
                          margins=True,
                          margins_name='total_cases'
)
age_crosstab_df = age_crosstab_df.drop('total_cases')
#age_crosstab_df = age_crosstab_df.drop('Unknown')
age_crosstab_df = age_crosstab_df.drop('Missing')
age_crosstab_df = age_crosstab_df.drop('NA')

The CDC data allows us to see the percentage of people within each age and race/ethnicity group who had COVID-19.

#@title
race_list = ['Hispanic/Latino'] * 9
race_list.extend(['Black'] * 9)
race_list.extend(['White'] * 9)
race_list.extend(['Asian/NHPI'] * 9)
race_list.extend(['AIAN'] * 9)
race_list.extend(['*Total Including Unknowns*'] * 9)

chart_denominator = 1000
cases_list = list(age_crosstab_df.hispanic_cases.fillna(0).values / chart_denominator)
cases_list.extend(list(age_crosstab_df.black_cases.fillna(0).values / chart_denominator))
cases_list.extend(list(age_crosstab_df.white_cases.fillna(0).values / chart_denominator))
cases_list.extend(list((age_crosstab_df.asian_cases.fillna(0).values + age_crosstab_df.fillna(0).nhpi_cases.values) / chart_denominator))
cases_list.extend(list(age_crosstab_df.aian_cases.fillna(0).values / chart_denominator))
cases_list.extend(list(age_crosstab_df.total_cases.fillna(0).values / chart_denominator))

pop_list = list(age_pop_df.hispanic.values / chart_denominator)
pop_list.extend(list(age_pop_df.black.values / chart_denominator))
pop_list.extend(list(age_pop_df.white.values / chart_denominator))
pop_list.extend(list(age_pop_df.api.values / chart_denominator))
pop_list.extend(list(age_pop_df.aian.values / chart_denominator))
pop_list.extend(list(age_pop_df.total.values / chart_denominator))

percent_list = list(age_crosstab_df.hispanic_cases.fillna(0).values / age_pop_df.hispanic.values)
percent_list.extend(list(age_crosstab_df.black_cases.fillna(0).values / age_pop_df.black.values))
percent_list.extend(list(age_crosstab_df.white_cases.fillna(0).values / age_pop_df.white.values))
percent_list.extend(list((age_crosstab_df.asian_cases.fillna(0).values + list(age_crosstab_df.nhpi_cases.values)) / age_pop_df.api.values))
percent_list.extend(list(age_crosstab_df.aian_cases.fillna(0).values / age_pop_df.aian.values))
percent_list.extend(list(age_crosstab_df.total_cases.fillna(0).values / age_pop_df.total.values))

age_chart_df = pd.DataFrame.from_dict({'group': race_list,
                               'age': ['0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+'] * 6,
                               'percent': percent_list,
                               'cases': cases_list,
                               'population': pop_list
                               })
alt.Chart(age_chart_df).mark_bar().encode(
      x=alt.X('percent', sort='y', axis=alt.Axis(format='.0%'), title=''),
      y=alt.Y('age', title='Age'),
      column=alt.Column('group',
                        title='Percent of Age and Race/Ethnicity Group who had COVID-19 based on Incomplete CDC Data up to %s' % date_display_name,
                        header=alt.Header(titleFontSize=13)
                        ),
      color=alt.Color('group',
                      # Set the 6 colors manually to match the 7 groups in the chart above.
                      scale=alt.Scale(range=['#4E79A7', '#A0CBE8', '#F28E2B', '#FFBE7D', '#59A14F', '#B6992D']), title='Race/Ethnicity', legend=None),
      tooltip=[
                  alt.Tooltip('group:N', title='Race/Ethnicity group'),
                  alt.Tooltip('age:N', title='Age'),
                  alt.Tooltip('percent:Q', format='.2%', title='Prevalence in group'),
                  alt.Tooltip('cases:Q', format=',.2f', title='Cases in group (thousands)'),
                  alt.Tooltip('population:Q', format=',.2f', title='Population of group (thousands)'),               
      ]
).properties(
  width=110, 
).display()

Note:

AIAN is American Indian/Alaska Native.
NHPI is Native Hawaiian/Pacific Islander.
We combined Asian and NHPI due to the limited availability of population data for those groups broken down by age (group descriptions).

Percentage of each population group who died from COVID-19 based on incomplete data:

#@title
black = group_charts['cases_per_100']['black'].properties(width=450, height=325)
hispanic = group_charts['cases_per_100']['hispanic'].properties(width=450, height=325)
white = group_charts['cases_per_100']['white'].properties(width=450, height=325)
asian = group_charts['cases_per_100']['asian'].properties(width=450, height=325)
aian = group_charts['cases_per_100']['aian'].properties(width=450, height=325)
nhpi = group_charts['cases_per_100']['nhpi'].properties(width=450, height=325)

((black | hispanic) &
 (white | asian) &
 (aian | nhpi)).configure_legend(
      orient='top',
      gradientLength=400,
      titleLimit=0,
  ).configure_view(
      strokeWidth=0,
  ).display()

We can also view disparities by comparing the percentage of cases with known race/ethnicity that a race/ethnicity group accounts for in a county (the cases share) vs. the percentage of the total population that a race/ethnicity accounts for in a county (the population share). There is no disparity when the cases share is equal to the population share for all race/ethnicity groups in a county (ratio = 1.0). When the ratio of cases share to population share is above 1.0, then a group has a disproportionate number of cases relative to its share of the population.

#@title
black = group_charts['cases_to_pop']['black'].properties(width=450, height=325)
hispanic = group_charts['cases_to_pop']['hispanic'].properties(width=450, height=325)
white = group_charts['cases_to_pop']['white'].properties(width=450, height=325)
asian = group_charts['cases_to_pop']['asian'].properties(width=450, height=325)
aian = group_charts['cases_to_pop']['aian'].properties(width=450, height=325)
nhpi = group_charts['cases_to_pop']['nhpi'].properties(width=450, height=325)

((black | hispanic) &
 (white | asian) &
 (aian | nhpi)).configure_legend(
      orient='top',
      gradientLength=400,
      titleLimit=0,
  ).configure_view(
      strokeWidth=0,
  ).display()

We can also view disparities by comparing the percentage of total cases -- with or without known race/ethnicity -- that a race/ethnicity group accounts for (the cases share) vs. the percentage of the total population that a race/ethnicity accounts for in a county (the population share). Counties with more complete data will change less vs. the charts above, and counties with less complete data will change more.

#@title
black = group_charts['cases_to_pop_with_unknown']['black'].properties(width=450, height=325)
hispanic = group_charts['cases_to_pop_with_unknown']['hispanic'].properties(width=450, height=325)
white = group_charts['cases_to_pop_with_unknown']['white'].properties(width=450, height=325)
asian = group_charts['cases_to_pop_with_unknown']['asian'].properties(width=450, height=325)
aian = group_charts['cases_to_pop_with_unknown']['aian'].properties(width=450, height=325)
nhpi = group_charts['cases_to_pop_with_unknown']['nhpi'].properties(width=450, height=325)

((black | hispanic) &
 (white | asian) &
 (aian | nhpi)).configure_legend(
      orient='top',
      gradientLength=400,
      titleLimit=0,
  ).configure_view(
      strokeWidth=0,
  ).display()

# Large county-level disparity maps
#@title
#for group in ['black', 'hispanic', 'white', 'asian', 'aian', 'nhpi']:
#  (group_charts['cases_per_100'][group]).properties(
#      width=900,
#      height=650,
#  ).configure_legend(
#      orient='top-right',
#      gradientLength=400,
#      titleLimit=0,
#      padding=0
#  ).configure_view(
#      strokeWidth=0,
#  ).display()

#@title
#for group in ['black', 'hispanic', 'white', 'asian', 'aian', 'nhpi']:
#  (group_charts['cases_to_pop'][group]).properties(
#      width=900,
#      height=650,
#  ).configure_legend(
#      orient='top-right',
#      gradientLength=400,
#      titleLimit=0,
#      padding=0
#  ).configure_view(
#      strokeWidth=0,
#  ).display()

Data Citations and Disclaimers¶

CDC data full citation: Centers for Disease Control and Prevention, COVID-19 Response. COVID-19 Case Surveillance Data Access, Summary, and Limitations (version date: April 30, 2021).
Per the CDC data agreement: The CDC does not take responsibility for the scientific validity or accuracy of methodology, results, statistical analyses, or conclusions presented.
Population data: U.S. Census Bureau's American Community Survey 2019 5-year estimates accessed via API; e.g., sample query.
Age population data: U.S. Census Bureau's American Community Survey 2019 1-year estimates accessed via microdata from IPUMS USA, University of Minnesota, www.ipums.org to calculate 10-year age and race/ethnicity groups not available from the Census API.

Contact information¶

Please email us at shli-covid-data-analysis@googlegroups.com with questions or comments.

#%%shell
#jupyter nbconvert --to html 'cdc_case_disparities.ipynb'