Aggregating Political Contributions (g17)

contrib_by_zip.py

"""
contrib_by_zip.py
Spring 2022 PJW

Aggregate political contributions by committee, state and zip code.
"""

import pandas as pd

#
#  Set up file names. Input is a pickle file produced by contrib_all.py.
#  Output will be a CSV file.
#

pklfile = 'contrib_all_pkl.zip'
outfile = 'contrib_by_zip.csv'

#
#  Read the contributions from the pickle
#

contrib = pd.read_pickle(pklfile)

#
#  Standardize zip codes to 5 digits. Keep the 5-digit codes
#  and the left 5 digits of 9-digit codes. Make everything
#  else missing.
#

zip_all = contrib['ZIP_CODE']

ziplen = zip_all.str.len()

print( '\nCounts of records by zip code length:\n' )
print( ziplen.value_counts(dropna=False) )

zip_9 = ziplen == 9
zip_5 = ziplen == 5

zip_ok = zip_5 | zip_9
zip_bad = ~zip_ok

zip5 = zip_all.copy()

zip5[ zip_9 ] = zip5[ zip_9 ].str[:5]
zip5[ zip_bad ] = None

zip5len = zip5.str.len()

print( zip5len.value_counts(dropna=False) )

contrib['zip'] = zip5

#
#  Aggregate amounts by committee, state and zip
#

grouped = contrib.groupby(['CMTE_ID','STATE','zip'])

contrib_by_zip = grouped['amt'].sum()

#
#  Save the results
#

contrib_by_zip.to_csv(outfile)

Site Index | Zoom | Admin
URL: https://wilcoxen.maxwell.insightworks.com/pages/5278.html
Peter J Wilcoxen, The Maxwell School, Syracuse University
Revised 04/26/2022