Joining Datasets (g09)

county_merge.py

"""
county_merge.py
Spring 2022 PJW

Merge state and county data and then calculate each county's share of 
its state's population.
"""

import csv

#
#  Set up a function for reading in a list of records
#

def read_file(filename):
    records = []
    fh = open(filename)
    reader = csv.DictReader(fh)
    for rec in reader:
        records.append(rec)
    fh.close()
    return records

#%%
#
#  Read the geo file and make the geo dictionary
#

geo_list = read_file('county_geo.csv')

geo_data = {}
for geo_rec in geo_list:
    this_geoid = geo_rec['GEOID']
    geo_data[ this_geoid ] = geo_rec


#%%
# 
#  Read the pop file and make the pop dictionary. The key step is creating the 
#  county geoid by concatenating the state and county FIPS codes.
#

pop_list = read_file('county_pop.csv')

pop_data = {}
for pop_rec in pop_list:
    fips_state = pop_rec['state']
    fips_county = pop_rec['county']
    this_geoid = fips_state+fips_county
    pop_data[ this_geoid ] = pop_rec

#%%
#
#  Now join the pop data onto the geo database. Along the way, convert the 
#  population value to a float and save it under a friendlier name.
#

for geoid in geo_data.keys():
    
    geo_rec = geo_data[geoid]
    pop_rec = pop_data[geoid]    

    for k in pop_rec.keys():
        geo_rec[k] = pop_rec[k]
        
    geo_rec['pop'] = float( geo_rec['B01001_001E'] )

#%%
#
#  Next, aggregate the population data by state
#

state_total = {}

for rec in geo_data.values():
    state = rec['STATEFP']
    pop = rec['pop']
    if state in state_total:
        state_total[state] = state_total[state] + pop
    else:
        state_total[state] = pop
        
#%%
#
#  Finally, compute each county's share of the state's population.
#

for rec in geo_data.values():
    state = rec['STATEFP']
    state_pop = state_total[state]
    pct = 100*rec['pop']/state_pop
    rec['percent'] = pct
    
#%%
#
#  Write it out
#

geoids = sorted( geo_data.keys() )

onondaga = geo_data["36067"]
fields = onondaga.keys()

fh = open('county_merge.csv','w',newline='')
writer = csv.DictWriter(fh,fields)

writer.writeheader()
for geoid in geoids:
    writer.writerow( geo_data[geoid] )

fh.close()

Site Index | Zoom | Admin
URL: https://wilcoxen.maxwell.insightworks.com/pages/6178.html
Peter J Wilcoxen, The Maxwell School, Syracuse University
Revised 02/16/2022