The Maxwell School
Syracuse University
Syracuse University
""" contrib_all.py Spring 2022 PJW Select contribution data from the FEC file and save it in pickled form for analysis by a subsequent script. """ import pandas as pd import matplotlib.pyplot as plt # # Read the CSV file inside the zip archive # zipname = 'contributions.zip' pklname = 'contrib_all_pkl.zip' raw = pd.read_csv(zipname,dtype=str) n_now = len(raw) print( 'raw records:', n_now ) raw = raw.rename( columns={'TRANSACTION_PGI':'PGI'} ) # # Convert the transactions into floats # raw['amt'] = raw['TRANSACTION_AMT'].astype(float) # # Build a date variable and trim off early records # ymd = pd.to_datetime(raw['TRANSACTION_DT'],format="%m%d%Y") raw['date'] = ymd.dt.to_period('M') n_last = n_now year_ok = ymd.dt.year >= 2019 contrib = raw[ year_ok ] n_now = len(contrib) print( 'dropped due to year:', n_last-n_now ) # # Pick out the records for the primary and general elections # in 2020 # n_last = n_now contrib = contrib.query( 'PGI=="P2020" or PGI=="G2020"') n_now = len(contrib) print( 'dropped due to PGI', n_last-n_now ) print( 'final records:', n_now ) # # Trim down the varibles to those of interest # keepvars = ['CMTE_ID','STATE','ZIP_CODE','PGI','date','amt'] trimmed = contrib[keepvars] # # Write it out in pickled form # trimmed.to_pickle(pklname) # # Print some information about the dataframe # print('records:', len(trimmed) ) print('variables:', list(trimmed.columns) ) # # Plot primary and general election contributions by month # grouped = trimmed.groupby(['date','PGI']) by_date_pgi = grouped['amt'].sum()/1e6 by_date_wide = by_date_pgi.unstack('PGI') fig1, ax1 = plt.subplots(dpi=300) fig1.suptitle('Individual Contributions') by_date_wide.plot(ax=ax1) ax1.set_xlabel('Date') ax1.set_ylabel('Million Dollars') fig1.tight_layout() fig1.savefig('by_month.png')