The Maxwell School
Syracuse University
Syracuse University
""" com_cand_info.py Spring 2022 PJW Build a file of information about candidates and committees. """ import pandas as pd # # Set up file names # pkl_file = 'contrib_clean.pkl' com_file = 'fec_committees.csv' cand_file = 'fec_candidates.csv' com_total_file = 'com_total.csv' out_file = 'com_cand_info.csv' # # Read the cleaned contributions pickle file # contrib = pd.read_pickle(pkl_file) # # Read the committee total file # com_total = pd.read_csv(com_total_file) # # Read the FEC file of information about committees and select a # subset of the columns # com_info = pd.read_csv(com_file,dtype=str) com_info = com_info[['CMTE_ID','CMTE_NM','CMTE_PTY_AFFILIATION','CAND_ID']] # # Join the committee contribution totals onto the other information # about the committees. Use a right join to drop all the committees that # are not in the totals file because they had no presidential contributions. # com_merged = com_info.merge( com_total,how='right',validate="m:1",indicator=True) print( '\nMerge com_total:', com_merged['_merge'].value_counts() , sep='\n' ) com_merged = com_merged.drop(columns='_merge') # # Make sure each committee supported only a single candidate # numcan = com_info.groupby('CMTE_ID').size() print( '\nNumcam', numcan[ numcan>1 ] , sep='\n' ) # # Read the file of information about candidates # pres = pd.read_csv(cand_file,dtype=str) is_pres = pres['CAND_OFFICE'] == 'P' is_2020 = pres['CAND_ELECTION_YR'] == '2020' # # Keep the people running for president in 2020 and trim down the columns # keep = is_pres & is_2020 pres = pres[keep] pres.drop(columns=['CAND_OFFICE','CAND_ELECTION_YR'],inplace=True) # # Merge the candidates onto the committees. Use a left join to keep all # the committees but drop any candidates that didn't have a committee # com_cand = com_merged.merge( pres,how='left',validate='m:1',indicator=True) print( '\nMerge pres:', com_cand['_merge'].value_counts() , sep='\n' ) com_cand = com_cand[ com_cand['_merge']=='both' ] com_cand = com_cand.drop(columns='_merge') # # Write out the candidate file # com_cand.to_csv(out_file,index=False)