The Maxwell School
Syracuse University
Syracuse University
""" earnings.py Spring 2022 PJW Collect median earnings by county for NYS and create a geopackage file for use with QGIS. """ import requests import pandas as pd import geopandas import seaborn as sns import matplotlib.pyplot as plt # # Name of the county shapefile # county_file = "cb_2019_us_county_500k_36.zip" # # Set up the call to the Census API server. This version omits # the key to avoid publishing it on the web. # api = "https://api.census.gov/data/2018/acs/acs5" var_string = 'NAME,B20002_001E' for_clause = 'county:*' in_clause = 'state:36' payload = { 'get':var_string, 'for':for_clause, 'in' :in_clause } # # Get the data and build a dataframe from it # response = requests.get(api,payload) assert response.status_code == 200 row_list = response.json() colnames = row_list[0] datarows = row_list[1:] earnings = pd.DataFrame(columns=colnames,data=datarows) # # Add the GEOID for joining to the county file. Also create a # column of median earnings in thousands of dollars. # earnings['GEOID'] = earnings['state']+earnings['county'] earnings['median'] = earnings['B20002_001E'].astype(float)/1000 earnings.to_csv('earnings.csv',index=False) #%% # # Plot the data # fig, ax1 = plt.subplots(dpi=300) sns.histplot(data=earnings,x='median',stat='density',ax=ax1) sns.kdeplot(data=earnings,x='median',ax=ax1,shade=True) ax1.set_xlabel("Median Income in Thousands") fig.tight_layout() fig.savefig('earnings_hist.png') #%% # # Now trim down the dataframe to the key columns and merge it # onto the county polygons. # trim = earnings[['GEOID','median']] geodata = geopandas.read_file(county_file) geodata = geodata.merge(trim, on='GEOID', how='left', validate='1:1', indicator=True) # # Check that the merge worked correctly # print( geodata['_merge'].value_counts() ) geodata.drop(columns='_merge',inplace=True) # # Save the results as a layer in a geopackage # geodata.to_file("counties.gpkg",layer="earnings",index=False)