May 24, 2021
When looking at disparities for COVID-19 cases between different race/ethnicity groups, please keep in mind that some states and counties have data completeness issues, as discussed in the case data completeness analysis.
#@title
import pandas as pd
import altair as alt
from vega_datasets import data
from google.colab import auth
auth.authenticate_user()
# Turn off the three-dot menu for Altair/Vega charts.
alt.renderers.set_embed_options(actions=False)
#%load_ext google.colab.data_table
#@title
CASES = 'Cases'
DATASET = 'cdc'
metric = CASES
project_id = 'msm-secure-data-1b'
cdc_table = '`%s.ndunlap_secure.cdc_restricted_access_20210430`' % project_id
date = 'DATE(2021, 4, 15)'
date_display_name = 'Apr 15'
# Chart settings.
total_cases_scale_max = 2000000
scatter_height = 350
scatter_width = 350
map_height = 350
map_width = 500
us_states = alt.topo_feature(data.us_10m.url, 'states')
us_counties = alt.topo_feature(data.us_10m.url+"#", 'counties')
territories = ('PR', 'GU', 'VI', 'MP', 'AS')
#@title
cdc_states_query = ('''
SELECT
res_state,
COUNT(*) as cdc_cases
FROM
%s
GROUP BY
res_state
''' % cdc_table)
cdc_counties_query = ('''
SELECT
res_state,
res_county,
race_ethnicity_combined,
COUNT(*) as cases
FROM
%s
GROUP BY
res_county,
res_state,
race_ethnicity_combined
''' % cdc_table)
cdc_overall_query = ('''
SELECT
race_ethnicity_combined,
COUNT(*) as cases
FROM
%s
GROUP BY
1
''' % cdc_table)
cdc_age_query = ('''
SELECT
race_ethnicity_combined,
age_group,
COUNT(*) as cases
FROM
%s
GROUP BY
1, 2
''' % cdc_table)
#@title
df = pd.io.gbq.read_gbq(cdc_counties_query, project_id=project_id)
for territory in territories:
df = df[df.res_state != territory]
project_id = 'msm-secure-data-1b'
df_county_fips_map = pd.io.gbq.read_gbq(f'''
SELECT
*
FROM
`msm-secure-data-1b.ndunlap_secure.county_fips_mapping`
''', project_id=project_id)
df_county_fips_map.cdc_county = df_county_fips_map.cdc_county.str.lower()
df_county_fips_map['state_county'] = df_county_fips_map.state + '-' + df_county_fips_map.cdc_county
df_county_fips_map['state_county'] = df_county_fips_map.state_county.astype('string').str.strip()
df_county_fips_map.set_index('state_county', inplace=True)
#@title
# Concatenate the state and county names because county names are not unique across states.
df.res_county = df.res_county.str.lower()
df['state_county'] = df.res_state + '-' + df.res_county
df['state_county'] = df.state_county.astype('string').str.strip()
df.set_index('state_county', inplace=True)
df['race_ethnicity_combined'] = df.race_ethnicity_combined.astype('string').str.strip()
race_ethnicity_combined_map = {
'Asian, Non-Hispanic': 'asian_cases',
'Black, Non-Hispanic': 'black_cases',
'White, Non-Hispanic': 'white_cases',
'American Indian/Alaska Native, Non-Hispanic': 'aian_cases',
'Hispanic/Latino': 'hispanic_cases',
'Multiple/Other, Non-Hispanic': 'other_cases',
'Native Hawaiian/Other Pacific Islander, Non-Hispanic': 'nhpi_cases',
'Missing': 'unknown_cases',
'Unknown': 'unknown_cases',
'NA': 'na_cases',
}
df = df.replace(to_replace={'race_ethnicity_combined': race_ethnicity_combined_map})
#@title
merged_df = df.join(df_county_fips_map, on="state_county", how='inner', lsuffix='_left', rsuffix='_right')
# Create a crosstab table with rows = counties, columns = race_ethnicity_combined.
crosstab_df = pd.crosstab(merged_df['county_fips'], merged_df.race_ethnicity_combined, values=merged_df.cases, aggfunc=sum,
margins=True,
margins_name='total_cases'
)
# Have to reset_index() to go from pandas multi-index to single index.
crosstab_df = crosstab_df.reset_index()
crosstab_df.drop(axis=0, index=len(crosstab_df) - 1, inplace=True)
crosstab_df['county_fips'] = crosstab_df.county_fips.astype(int)
crosstab_df['total_known_cases'] = crosstab_df['total_cases'] - crosstab_df.na_cases.fillna(0) - crosstab_df.unknown_cases.fillna(0)
#@title
df_acs_name_lookup = pd.io.gbq.read_gbq(f'''
SELECT
*
FROM
`msm-internal-data.ipums_acs.acs_2019_5year_county`
''', project_id=project_id)
df_acs_name_lookup['state_county'] = df_acs_name_lookup.county.astype('string').str.strip() + ', ' + df_acs_name_lookup.state.astype('string').str.strip()
df_acs_name_lookup.drop(columns=['state', 'county'], inplace=True)
df_acs_name_lookup.set_index('county_fips', inplace=True)
county_chart_df = crosstab_df.join(df_acs_name_lookup, on="county_fips", how='inner', lsuffix='_left', rsuffix='_right')
county_chart_df.county_fips = county_chart_df.county_fips.astype(int)
#@title
chart_df = county_chart_df.copy(deep=True)
chart_df.reset_index(inplace=True)
chart_df.county_fips = chart_df.county_fips.astype(int)
chart_df['percent_known_cases'] = round(chart_df.total_known_cases / chart_df.total_cases, 2)
chart_df['total_known_or_na_cases'] = chart_df.total_known_cases + chart_df.na_cases
chart_df['percent_known_or_na_cases'] = round(chart_df.total_known_or_na_cases / chart_df.total_cases, 2)
#@title
race_ethnicity_groups = ['black', 'hispanic', 'aian', 'nhpi', 'asian', 'white', 'other']
def GenerateColNames(group):
cases_col = group + '_cases'
pop_col = group + '_pop'
pop_percent_col = group + '_percent'
cases_percent_col = group + '_cases_percent'
cases_percent_with_unknown_col = group + '_cases_percent_with_unknown'
cases_per_100_col = group + '_cases_per_100'
cases_to_pop_col= group + '_cases_to_pop'
cases_to_pop_with_unknown_col= group + '_cases_to_pop_with_unknown'
return {'cases': cases_col,
'pop': pop_col,
'pop_percent': pop_percent_col,
'cases_per_100': cases_per_100_col,
'cases_percent': cases_percent_col,
'cases_percent_with_unknown': cases_percent_with_unknown_col,
'cases_to_pop': cases_to_pop_col,
'cases_to_pop_with_unknown': cases_to_pop_with_unknown_col,
}
group_names = {}
for group in race_ethnicity_groups:
group_names[group] = GenerateColNames(group)
for group in race_ethnicity_groups:
chart_df[group_names[group]['cases_per_100']] = round(chart_df[group_names[group]['cases']] / chart_df[group_names[group]['pop']], 4)
chart_df[group_names[group]['cases_percent']] = round(chart_df[group_names[group]['cases']] / chart_df.total_known_cases, 2)
chart_df[group_names[group]['cases_percent_with_unknown']] = round(chart_df[group_names[group]['cases']] / chart_df.total_cases, 2)
chart_df[group_names[group]['cases_to_pop']] = round(
chart_df[group_names[group]['cases_percent']] / chart_df[group_names[group]['pop_percent']], 2)
chart_df[group_names[group]['cases_to_pop_with_unknown']] = round(
chart_df[group_names[group]['cases_percent_with_unknown']] / chart_df[group_names[group]['pop_percent']], 2)
chart_df.reset_index(inplace=True)
#@title
filter_data = False
#MIN_POP_PERCENT = 0.001
MIN_POP = 100
MIN_PERCENT_KNOWN = 0.5
MIN_CASES = 5
group_to_display_name = {
'black': 'Black',
'white': 'White',
'hispanic': 'Hispanic/Latino',
'asian': 'Asian',
'nhpi': 'Native Hawaiian/Pacific Islander',
'aian': 'American Indian/Alaska Native',
'other': 'Other or multiple race/ethnicity',
'total': 'Total'
}
group_to_short_name = {
'black': 'Black',
'white': 'White',
'hispanic': 'Hispanic',
'asian': 'Asian',
'nhpi': 'NHPI',
'aian': 'AIAN',
'other': 'Other',
'total': 'Total'
}
chart_col_to_color_scheme = {
'cases_per_100': 'yelloworangebrown',
'cases_to_pop': 'blueorange',
'cases_to_pop_with_unknown': 'blueorange',
'percent_known_cases': 'redyellowblue',
'percent_known_or_na_cases': 'redyellowblue',
}
chart_col_to_legend_format = {
'cases_per_100': '.0%',
'cases_to_pop': '.1f',
'cases_to_pop_with_unknown': '.1f',
'percent_known_cases': '.0%',
'percent_known_or_na_cases': '.0%',
}
def GenerateCountyMap(chart_df, chart_col, group, group_names, metric, date):
group_chart_col = chart_col
if group:
group_chart_col = group_names[group][chart_col]
group_display_name = ''
if group:
group_display_name = group_to_short_name[group]
group_short_name = group_to_short_name[group]
chart_col_to_range = {
'cases_per_100': [0, .2],
'cases_to_pop': [0, 2],
'cases_to_pop_with_unknown': [0, 2],
'percent_known_cases': [0, 1],
'percent_known_or_na_cases': [0, 1],
}
prevalence_text = 'that had COVID-19'
col_to_title = {
'total_cases': group_display_name + ' ' + metric + ' up to ' + date,
'cases_per_100': 'Percent of ' + group_display_name + ' Population ' + prevalence_text + ' up to ' + date,
'cases_to_pop': 'Ratio of ' + group_display_name + ' ' + metric + ' Share to Population Share' + ' up to ' + date,
'cases_to_pop_with_unknown': 'Ratio of ' + group_display_name + ' ' + metric + ' Share to Population Share' + ' up to ' + date,
'percent_known_cases': 'Percent of CDC ' + metric + ' with Known Race/Ethnicity' + ' up to ' + date,
'percent_known_or_na_cases': 'Percent of CDC ' + metric + ' with Known or Suppressed Race/Ethnicity' + ' up to ' + date,
}
filtered_chart_df = chart_df
if group and filter_data:
#filtered_chart_df = filtered_chart_df[filtered_chart_df[group_names[group]['pop_percent']] > MIN_POP_PERCENT]
filtered_chart_df = filtered_chart_df[filtered_chart_df[group_names[group]['pop']] > MIN_POP]
filtered_chart_df = filtered_chart_df[filtered_chart_df['percent_known_cases'] > MIN_PERCENT_KNOWN]
filtered_chart_df = filtered_chart_df[filtered_chart_df[group_names[group]['cases']] > MIN_CASES]
highlight = alt.selection_single(on='mouseover', fields=['id', 'county_fips'], empty='none')
data_cols = ['state_county',
'percent_known_cases',
'percent_known_or_na_cases',
'total_cases']
if group:
data_cols.extend([
group_names[group]['cases'],
group_names[group]['pop'],
group_names[group]['pop_percent'],
group_names[group]['cases_per_100'],
group_names[group]['cases_percent'],
group_names[group]['cases_percent_with_unknown'],
group_names[group]['cases_to_pop'],
group_names[group]['cases_to_pop_with_unknown'],
])
tooltips = [alt.Tooltip('state_county:N', title='County'),
alt.Tooltip('percent_known_cases:Q', format='.0%', title=metric + ' with race/ethnicity')
]
if chart_col in ('percent_known_cases', 'percent_known_or_na_cases'):
tooltips.extend([
alt.Tooltip('total_cases:Q', format=',.0f', title=metric)
])
if chart_col == 'percent_known_or_na_cases':
tooltips.extend([
alt.Tooltip('percent_known_or_na_cases:Q', format='.0%',
title=metric + ' with known or suppressed race/ethnicity')
])
if group:
tooltips.extend([
alt.Tooltip(group_names[group]['cases'] + ':Q', format=',',
title=group_short_name + ' ' + metric.lower()),
])
if chart_col == 'cases_per_100':
tooltips.extend([
alt.Tooltip(group_names[group]['pop'] + ':Q', format=',',
title=group_short_name + ' population'),
alt.Tooltip(group_names[group]['cases_per_100'] + ':Q', format='.2%',
title='Percent ' + prevalence_text)
])
elif chart_col in ('cases_to_pop', 'cases_to_pop_with_unknown'):
tooltips.extend([
alt.Tooltip(group_names[group]['cases_percent_with_unknown'] + ':Q', format='.1%',
title='Percent of total ' + metric.lower()),
alt.Tooltip(group_names[group]['cases_percent'] + ':Q', format='.1%',
title='Percent of known race/ethnicity ' + metric.lower()),
alt.Tooltip(group_names[group]['pop_percent'] + ':Q', format='.1%',
title=group_short_name + ' percent of population'),
alt.Tooltip(group_names[group]['cases_to_pop'] + ':Q', format='.2f',
title='Ratio of percent of known race/ethnicity ' + metric.lower() + ' to percent of population'),
alt.Tooltip(group_names[group]['cases_to_pop_with_unknown'] + ':Q', format='.2f',
title='Ratio of percent of total ' + metric.lower() + ' to percent of population'),
])
reverse_scale = False
if chart_col == 'cases_to_pop':
reverse_scale = False
plot = alt.Chart(us_counties).mark_geoshape(
stroke='white',
strokeOpacity=.2,
strokeWidth=1
).project(
type='albersUsa'
).transform_lookup(
lookup='id',
from_=alt.LookupData(filtered_chart_df, 'county_fips', data_cols)
).encode(
alt.Color(group_chart_col,
type='quantitative',
legend=alt.Legend(format=chart_col_to_legend_format[chart_col]),
scale=alt.Scale(scheme=chart_col_to_color_scheme[chart_col],
reverse=reverse_scale,
domain=chart_col_to_range[chart_col],
clamp=True,
),
title=''),
tooltip=tooltips
).add_selection(
highlight,
)
states_outline = alt.Chart(us_states).mark_geoshape(stroke='white', strokeWidth=1.5, fillOpacity=0, fill='white').project(
type='albersUsa'
)
states_fill = alt.Chart(us_states).mark_geoshape(
fill='silver',
stroke='white'
).project('albersUsa')
layered_map = alt.layer(states_fill, plot, states_outline).properties(
title=col_to_title[chart_col],
)
return layered_map
#@title
group_charts = {'cases_per_100': {}, 'cases_to_pop': {}, 'cases_to_pop_with_unknown': {}}
for group in race_ethnicity_groups:
for value in ('cases_per_100', 'cases_to_pop', 'cases_to_pop_with_unknown'):
group_charts[value][group] = GenerateCountyMap(
chart_df, value, group, group_names, metric, date_display_name)
#@title
overall_df = pd.io.gbq.read_gbq(cdc_overall_query, project_id=project_id)
overall_df['race_ethnicity_combined'] = overall_df.race_ethnicity_combined.astype('string').str.strip()
overall_df = overall_df.replace(to_replace={'race_ethnicity_combined': race_ethnicity_combined_map})
overall_df = overall_df.set_index('race_ethnicity_combined')
chart_denominator = 1000000
cases_list = [overall_df.cases['hispanic_cases'] / chart_denominator,
overall_df.cases['black_cases'] / chart_denominator,
overall_df.cases['white_cases'] / chart_denominator,
overall_df.cases['asian_cases'] / chart_denominator,
overall_df.cases['nhpi_cases'] / chart_denominator,
overall_df.cases['aian_cases'] / chart_denominator,
overall_df.cases.sum() / chart_denominator,
]
# Population data from https://api.census.gov/data/2019/acs/acs1/profile?get=NAME,DP05_0071E,DP05_0078E,DP05_0077E,DP05_0080E,DP05_0081E,DP05_0079E,DP05_0070E&for=us:1
pop_list = [
60481746 / chart_denominator,
40596040 / chart_denominator,
196789401 / chart_denominator,
18427914 / chart_denominator,
565473 / chart_denominator,
2236348 / chart_denominator,
328239523 / chart_denominator,
]
percent_list = []
for i in range(len(cases_list)):
percent_list.append(cases_list[i] / pop_list[i])
prevalence = pd.DataFrame.from_dict({'group': [
'Hispanic/Latino',
'Black',
'White',
'Asian',
'Native Hawaiian/Pacific Islander',
'American Indian/Alaska Native',
'*Total Including Unknowns*',
], 'percent': percent_list,
'cases': cases_list,
'population': pop_list,
})
bars = alt.Chart(prevalence).mark_bar().encode(
x=alt.X('percent', axis=alt.Axis(format='.1%'), title=''),
y=alt.Y('group', sort='-x', title=''),
color=alt.Color('group',
scale=alt.Scale(scheme='tableau20'),
title='',
legend=None),
tooltip=[
alt.Tooltip('group:N', title='Race/Ethnicity Group'),
alt.Tooltip('percent:Q', format='.2%', title='Prevalence in group'),
alt.Tooltip('cases:Q', format=',.2f', title='Cases in group (millions)'),
alt.Tooltip('population:Q', format=',.2f', title='Population of group (millions)'),
]
).properties(
title='Percent of Race/Ethnicity Group who had COVID-19 based on Incomplete CDC Data up to %s' % date_display_name
)
bars.display()
#alt.concat(bars).properties(
# title=alt.TitleParams(
# ['Source: U.S. Census Bureau\'s American Community Survey 2019 5-year estimates for population data.'],
# baseline='bottom',
# dy=20,
# orient='bottom',
# fontWeight='normal',
# fontSize=11
# )
#).display()
#@title
# The age population numbers come from the ACS 2019 1-year via IPUMS microdata
# which allows us to calculate age buckets not available in the ACS API.
# We had to combine Asian and NHPI due to IPUMS reporting categories.
# https://usa.ipums.org/usa-action/variables/RACE#codes_section
# The age_dict was pre-calculated in another colab because it take a while to run.
# https://colab.research.google.com/drive/1b2U0SvZq4oxRXDujc8oWg5LbQDJKRawe#scrollTo=GC1CVtok_GwL
age_dict = {'aian': {'0-9': 288641.0,
'10-19': 341153.0,
'20-29': 319143.0,
'30-39': 302075.0,
'40-49': 270994.0,
'50-59': 284191.0,
'60-69': 236906.0,
'70-79': 122475.0,
'80+': 46877.0,
'total': 2212455.0},
'api': {'0-9': 1857491.0,
'10-19': 2136643.0,
'20-29': 2788000.0,
'30-39': 3235052.0,
'40-49': 2909062.0,
'50-59': 2361661.0,
'60-69': 1893205.0,
'70-79': 1084520.0,
'80+': 560923.0,
'total': 18826557.0},
'black': {'0-9': 5144709.0,
'10-19': 5869466.0,
'20-29': 6302817.0,
'30-39': 5703178.0,
'40-49': 5152576.0,
'50-59': 5060647.0,
'60-69': 4240796.0,
'70-79': 2154455.0,
'80+': 1022325.0,
'total': 40650969.0},
'hispanic': {'0-9': 10021829.0,
'10-19': 10673378.0,
'20-29': 9714731.0,
'30-39': 9112371.0,
'40-49': 7989372.0,
'50-59': 6076318.0,
'60-69': 3913694.0,
'70-79': 1991765.0,
'80+': 991319.0,
'total': 60484777.0},
'other': {'0-9': 2339910.0,
'10-19': 2027458.0,
'20-29': 1513225.0,
'30-39': 1169024.0,
'40-49': 822915.0,
'50-59': 626001.0,
'60-69': 453421.0,
'70-79': 222395.0,
'80+': 95165.0,
'total': 9269514.0},
'total': {'0-9': 39028311.0,
'10-19': 42736680.0,
'20-29': 44726365.0,
'30-39': 44148037.0,
'40-49': 40644111.0,
'50-59': 41899718.0,
'60-69': 38395555.0,
'70-79': 23971046.0,
'80+': 12689700.0,
'total': 328239523.0},
'white': {'0-9': 19375731.0,
'10-19': 21688582.0,
'20-29': 24088449.0,
'30-39': 24626337.0,
'40-49': 23499192.0,
'50-59': 27490900.0,
'60-69': 27657533.0,
'70-79': 18395436.0,
'80+': 9973091.0,
'total': 196795251.0}
}
age_pop_df = pd.DataFrame.from_dict(age_dict)
age_pop_df = age_pop_df.drop('total')
#@title
age_df = pd.io.gbq.read_gbq(cdc_age_query, project_id=project_id)
age_df['race_ethnicity_combined'] = age_df.race_ethnicity_combined.astype('string').str.strip()
age_df = age_df.replace(to_replace={'race_ethnicity_combined': race_ethnicity_combined_map})
age_crosstab_df = pd.crosstab(age_df['age_group'], age_df.race_ethnicity_combined, values=age_df.cases, aggfunc=sum,
margins=True,
margins_name='total_cases'
)
age_crosstab_df = age_crosstab_df.drop('total_cases')
#age_crosstab_df = age_crosstab_df.drop('Unknown')
age_crosstab_df = age_crosstab_df.drop('Missing')
age_crosstab_df = age_crosstab_df.drop('NA')
The CDC data allows us to see the percentage of people within each age and race/ethnicity group who had COVID-19.
#@title
race_list = ['Hispanic/Latino'] * 9
race_list.extend(['Black'] * 9)
race_list.extend(['White'] * 9)
race_list.extend(['Asian/NHPI'] * 9)
race_list.extend(['AIAN'] * 9)
race_list.extend(['*Total Including Unknowns*'] * 9)
chart_denominator = 1000
cases_list = list(age_crosstab_df.hispanic_cases.fillna(0).values / chart_denominator)
cases_list.extend(list(age_crosstab_df.black_cases.fillna(0).values / chart_denominator))
cases_list.extend(list(age_crosstab_df.white_cases.fillna(0).values / chart_denominator))
cases_list.extend(list((age_crosstab_df.asian_cases.fillna(0).values + age_crosstab_df.fillna(0).nhpi_cases.values) / chart_denominator))
cases_list.extend(list(age_crosstab_df.aian_cases.fillna(0).values / chart_denominator))
cases_list.extend(list(age_crosstab_df.total_cases.fillna(0).values / chart_denominator))
pop_list = list(age_pop_df.hispanic.values / chart_denominator)
pop_list.extend(list(age_pop_df.black.values / chart_denominator))
pop_list.extend(list(age_pop_df.white.values / chart_denominator))
pop_list.extend(list(age_pop_df.api.values / chart_denominator))
pop_list.extend(list(age_pop_df.aian.values / chart_denominator))
pop_list.extend(list(age_pop_df.total.values / chart_denominator))
percent_list = list(age_crosstab_df.hispanic_cases.fillna(0).values / age_pop_df.hispanic.values)
percent_list.extend(list(age_crosstab_df.black_cases.fillna(0).values / age_pop_df.black.values))
percent_list.extend(list(age_crosstab_df.white_cases.fillna(0).values / age_pop_df.white.values))
percent_list.extend(list((age_crosstab_df.asian_cases.fillna(0).values + list(age_crosstab_df.nhpi_cases.values)) / age_pop_df.api.values))
percent_list.extend(list(age_crosstab_df.aian_cases.fillna(0).values / age_pop_df.aian.values))
percent_list.extend(list(age_crosstab_df.total_cases.fillna(0).values / age_pop_df.total.values))
age_chart_df = pd.DataFrame.from_dict({'group': race_list,
'age': ['0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+'] * 6,
'percent': percent_list,
'cases': cases_list,
'population': pop_list
})
alt.Chart(age_chart_df).mark_bar().encode(
x=alt.X('percent', sort='y', axis=alt.Axis(format='.0%'), title=''),
y=alt.Y('age', title='Age'),
column=alt.Column('group',
title='Percent of Age and Race/Ethnicity Group who had COVID-19 based on Incomplete CDC Data up to %s' % date_display_name,
header=alt.Header(titleFontSize=13)
),
color=alt.Color('group',
# Set the 6 colors manually to match the 7 groups in the chart above.
scale=alt.Scale(range=['#4E79A7', '#A0CBE8', '#F28E2B', '#FFBE7D', '#59A14F', '#B6992D']), title='Race/Ethnicity', legend=None),
tooltip=[
alt.Tooltip('group:N', title='Race/Ethnicity group'),
alt.Tooltip('age:N', title='Age'),
alt.Tooltip('percent:Q', format='.2%', title='Prevalence in group'),
alt.Tooltip('cases:Q', format=',.2f', title='Cases in group (thousands)'),
alt.Tooltip('population:Q', format=',.2f', title='Population of group (thousands)'),
]
).properties(
width=110,
).display()
Note:
Percentage of each population group who died from COVID-19 based on incomplete data:
#@title
black = group_charts['cases_per_100']['black'].properties(width=450, height=325)
hispanic = group_charts['cases_per_100']['hispanic'].properties(width=450, height=325)
white = group_charts['cases_per_100']['white'].properties(width=450, height=325)
asian = group_charts['cases_per_100']['asian'].properties(width=450, height=325)
aian = group_charts['cases_per_100']['aian'].properties(width=450, height=325)
nhpi = group_charts['cases_per_100']['nhpi'].properties(width=450, height=325)
((black | hispanic) &
(white | asian) &
(aian | nhpi)).configure_legend(
orient='top',
gradientLength=400,
titleLimit=0,
).configure_view(
strokeWidth=0,
).display()
We can also view disparities by comparing the percentage of cases with known race/ethnicity that a race/ethnicity group accounts for in a county (the cases share) vs. the percentage of the total population that a race/ethnicity accounts for in a county (the population share). There is no disparity when the cases share is equal to the population share for all race/ethnicity groups in a county (ratio = 1.0). When the ratio of cases share to population share is above 1.0, then a group has a disproportionate number of cases relative to its share of the population.
#@title
black = group_charts['cases_to_pop']['black'].properties(width=450, height=325)
hispanic = group_charts['cases_to_pop']['hispanic'].properties(width=450, height=325)
white = group_charts['cases_to_pop']['white'].properties(width=450, height=325)
asian = group_charts['cases_to_pop']['asian'].properties(width=450, height=325)
aian = group_charts['cases_to_pop']['aian'].properties(width=450, height=325)
nhpi = group_charts['cases_to_pop']['nhpi'].properties(width=450, height=325)
((black | hispanic) &
(white | asian) &
(aian | nhpi)).configure_legend(
orient='top',
gradientLength=400,
titleLimit=0,
).configure_view(
strokeWidth=0,
).display()
We can also view disparities by comparing the percentage of total cases -- with or without known race/ethnicity -- that a race/ethnicity group accounts for (the cases share) vs. the percentage of the total population that a race/ethnicity accounts for in a county (the population share). Counties with more complete data will change less vs. the charts above, and counties with less complete data will change more.
#@title
black = group_charts['cases_to_pop_with_unknown']['black'].properties(width=450, height=325)
hispanic = group_charts['cases_to_pop_with_unknown']['hispanic'].properties(width=450, height=325)
white = group_charts['cases_to_pop_with_unknown']['white'].properties(width=450, height=325)
asian = group_charts['cases_to_pop_with_unknown']['asian'].properties(width=450, height=325)
aian = group_charts['cases_to_pop_with_unknown']['aian'].properties(width=450, height=325)
nhpi = group_charts['cases_to_pop_with_unknown']['nhpi'].properties(width=450, height=325)
((black | hispanic) &
(white | asian) &
(aian | nhpi)).configure_legend(
orient='top',
gradientLength=400,
titleLimit=0,
).configure_view(
strokeWidth=0,
).display()
# Large county-level disparity maps
#@title
#for group in ['black', 'hispanic', 'white', 'asian', 'aian', 'nhpi']:
# (group_charts['cases_per_100'][group]).properties(
# width=900,
# height=650,
# ).configure_legend(
# orient='top-right',
# gradientLength=400,
# titleLimit=0,
# padding=0
# ).configure_view(
# strokeWidth=0,
# ).display()
#@title
#for group in ['black', 'hispanic', 'white', 'asian', 'aian', 'nhpi']:
# (group_charts['cases_to_pop'][group]).properties(
# width=900,
# height=650,
# ).configure_legend(
# orient='top-right',
# gradientLength=400,
# titleLimit=0,
# padding=0
# ).configure_view(
# strokeWidth=0,
# ).display()
Please email us at shli-covid-data-analysis@googlegroups.com with questions or comments.
#%%shell
#jupyter nbconvert --to html 'cdc_case_disparities.ipynb'