Computational Exercises: Solutions > Analyzing the Contributions Data (g20)

com_cand_info.py

"""
com_cand_info.py
Spring 2022 PJW

Build a file of information about candidates and committees.
"""

import pandas as pd

#
#  Set up file names
#

pkl_file = 'contrib_clean.pkl'
com_file = 'fec_committees.csv'
cand_file = 'fec_candidates.csv'
com_total_file = 'com_total.csv'
out_file = 'com_cand_info.csv'

#
#  Read the cleaned contributions pickle file
#

contrib = pd.read_pickle(pkl_file)

#
#  Read the committee total file
#

com_total = pd.read_csv(com_total_file)

#
#  Read the FEC file of information about committees and select a
#  subset of the columns
#

com_info = pd.read_csv(com_file,dtype=str)
com_info = com_info[['CMTE_ID','CMTE_NM','CMTE_PTY_AFFILIATION','CAND_ID']]

#
#  Join the committee contribution totals onto the other information
#  about the committees. Use a right join to drop all the committees that
#  are not in the totals file because they had no presidential contributions.
#

com_merged = com_info.merge(
    com_total,how='right',validate="m:1",indicator=True)

print( '\nMerge com_total:', com_merged['_merge'].value_counts() , sep='\n' )
com_merged = com_merged.drop(columns='_merge')

#
#  Make sure each committee supported only a single candidate
#

numcan = com_info.groupby('CMTE_ID').size()
print( '\nNumcam', numcan[ numcan>1 ] , sep='\n' )

#
#  Read the file of information about candidates
#

pres = pd.read_csv(cand_file,dtype=str)
is_pres = pres['CAND_OFFICE'] == 'P'
is_2020 = pres['CAND_ELECTION_YR'] == '2020'

#
#  Keep the people running for president in 2020 and trim down the columns
#

keep = is_pres & is_2020

pres = pres[keep]
pres.drop(columns=['CAND_OFFICE','CAND_ELECTION_YR'],inplace=True)

#
#  Merge the candidates onto the committees. Use a left join to keep all
#  the committees but drop any candidates that didn't have a committee
#

com_cand = com_merged.merge(
    pres,how='left',validate='m:1',indicator=True)

print( '\nMerge pres:', com_cand['_merge'].value_counts() , sep='\n' )

com_cand = com_cand[ com_cand['_merge']=='both' ]
com_cand = com_cand.drop(columns='_merge')

#
#  Write out the candidate file
#

com_cand.to_csv(out_file,index=False)

Site Index | Zoom | Admin
URL: https://wilcoxen.maxwell.insightworks.com/pages/5283.html
Peter J Wilcoxen, The Maxwell School, Syracuse University
Revised 04/26/2022