The Maxwell School
Syracuse University
Syracuse University
""" by_place_cand.py Spring 2022 PJW Aggregate contributions by location (state and zip) and candidate. """ import pandas as pd import matplotlib.pyplot as plt import seaborn as sns # # Set up file names # pkl_file = 'contrib_clean.pkl' com_cand_file = 'com_cand_info.csv' sum_file = 'by_place_cand.csv' # # Read the contributions pickle file and the file of information about # the candidates # contrib = pd.read_pickle(pkl_file) com_cand = pd.read_csv(com_cand_file) #%% # # Join the candidates onto the contributions # merged = contrib.merge( com_cand,on='CMTE_ID',validate="m:1",indicator=True) print( merged['_merge'].value_counts() ) merged = merged.drop(columns='_merge') #%% # # Aggregate the contributions by candidate and place (state and zip) and save the results. # group_by_place_cand = merged.groupby(['STATE','zip','CAND_NAME']) by_place_cand = group_by_place_cand['amt'].sum() by_place_cand.to_csv(sum_file) #%% # # Now total things up by candidate and print the top few # mil = by_place_cand.groupby(['STATE','CAND_NAME']).sum()/1e6 by_cand = mil.groupby('CAND_NAME').sum() top_cand = by_cand.sort_values()[-10:] print( '\nCandidates receiving the most funding:\n' ) print( top_cand ) by_state = mil.groupby('STATE').sum() top_state = by_state.sort_values()[-10:] print( '\nStates providing the most funding:\n' ) print( top_state ) #%% # # Build a figure showing the amounts raised by the top candidates # and the amounts raised in top states # fig, (ax1,ax2) = plt.subplots(1,2,dpi=300) fig.suptitle("Top Candidates and States, Millions of Dollars") top_cand.plot.barh(ax=ax1,fontsize=7) ax1.set_ylabel('') top_state.plot.bar(ax=ax2,fontsize=7) ax2.set_xlabel('State') fig.tight_layout() fig.savefig('top.png') #%% # # Build a heatmap showing the amounts of money raised by # each top candidate in each of the top states. # # Start by filtering down the data to just those candidates # and states. # reset = mil.reset_index() keep_cand = reset['CAND_NAME'].isin(top_cand.index) keep_state = reset['STATE'].isin(top_state.index) keep = keep_cand & keep_state sub = reset[ keep ] # # Now sum the contributions by state and candidate # and then unstack the state level to make a dataframe # with one row per candidate and one column per state. # grouped = sub.groupby(['STATE','CAND_NAME']) summed = grouped['amt'].sum() grid = summed.unstack('STATE') # # Draw the heatmap # fig, ax1 = plt.subplots(dpi=300) fig.suptitle('Contributions in Millions') sns.heatmap(grid,annot=True,fmt=".0f",ax=ax1) ax1.set_xlabel('State') ax1.set_ylabel('Candidate') fig.tight_layout() fig.savefig('heatmap.png')