Computational Exercises: Solutions > Analyzing the Contributions Data (g20)

by_place_cand.py

"""
by_place_cand.py
Spring 2022 PJW

Aggregate contributions by location (state and zip) and candidate.
"""

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#
#  Set up file names
#

pkl_file = 'contrib_clean.pkl'
com_cand_file = 'com_cand_info.csv'
sum_file = 'by_place_cand.csv'

#
#  Read the contributions pickle file and the file of information about
#  the candidates
#

contrib = pd.read_pickle(pkl_file)
com_cand = pd.read_csv(com_cand_file)

#%%
#
#  Join the candidates onto the contributions
#

merged = contrib.merge(
    com_cand,on='CMTE_ID',validate="m:1",indicator=True)

print( merged['_merge'].value_counts() )
merged = merged.drop(columns='_merge')

#%%
#
#  Aggregate the contributions by candidate and place (state and zip) and save the results.
#

group_by_place_cand = merged.groupby(['STATE','zip','CAND_NAME'])
by_place_cand = group_by_place_cand['amt'].sum()

by_place_cand.to_csv(sum_file)

#%%
#
#  Now total things up by candidate and print the top few
#

mil = by_place_cand.groupby(['STATE','CAND_NAME']).sum()/1e6

by_cand = mil.groupby('CAND_NAME').sum()
top_cand = by_cand.sort_values()[-10:]

print( '\nCandidates receiving the most funding:\n' )
print( top_cand )

by_state = mil.groupby('STATE').sum()
top_state = by_state.sort_values()[-10:]

print( '\nStates providing the most funding:\n' )
print( top_state )

#%%
#
#  Build a figure showing the amounts raised by the top candidates
#  and the amounts raised in top states
#

fig, (ax1,ax2) = plt.subplots(1,2,dpi=300)
fig.suptitle("Top Candidates and States, Millions of Dollars")
top_cand.plot.barh(ax=ax1,fontsize=7)
ax1.set_ylabel('')
top_state.plot.bar(ax=ax2,fontsize=7)
ax2.set_xlabel('State')
fig.tight_layout()
fig.savefig('top.png')

#%%
#
#  Build a heatmap showing the amounts of money raised by
#  each top candidate in each of the top states.
#
#  Start by filtering down the data to just those candidates
#  and states.
#

reset = mil.reset_index()

keep_cand = reset['CAND_NAME'].isin(top_cand.index)
keep_state = reset['STATE'].isin(top_state.index)

keep = keep_cand & keep_state

sub = reset[ keep ]

#
#  Now sum the contributions by state and candidate
#  and then unstack the state level to make a dataframe
#  with one row per candidate and one column per state.
#

grouped = sub.groupby(['STATE','CAND_NAME'])
summed = grouped['amt'].sum()

grid = summed.unstack('STATE')

#
#  Draw the heatmap
#

fig, ax1 = plt.subplots(dpi=300)
fig.suptitle('Contributions in Millions')
sns.heatmap(grid,annot=True,fmt=".0f",ax=ax1)
ax1.set_xlabel('State')
ax1.set_ylabel('Candidate')
fig.tight_layout()
fig.savefig('heatmap.png')
Site Index | Zoom | Admin
URL: https://wilcoxen.maxwell.insightworks.com/pages/5284.html
Peter J Wilcoxen, The Maxwell School, Syracuse University
Revised 04/26/2022