The Maxwell School
Syracuse University
Syracuse University
""" solar.py Spring 2022 PJW Analyze residential solar data for NYS. """ import pandas as pd pd.set_option('display.max_rows',None) # # Read the source data into a Pandas dataframe and # set the dataframe's index to the county name. # infile = 'res_solar_by_county.csv' solar = pd.read_csv(infile) solar = solar.set_index('County') # # Write out information about the columns and index of the # dataframe # print( '\nColumns:' ) print( list(solar.columns) ) print( '\nIndex:' ) print( list(solar.index) ) # # Look up the count of projects by county and say a little about it # count = solar['projects'] print( '\nProjects:') print( count ) # # Look up project counts for selected counties. # some_cny_counties = ['Onondaga','Oswego','Wayne'] print( '\nProjects in selected counties:' ) print( count[some_cny_counties] ) # # Look up project counts for a specific county # print( '\nProjects in Albany county:', count['Albany'] ) # # Now sort from high to low counts and print the top 5 # high_to_low = count.sort_values(ascending=False) top_five = high_to_low.iloc[ 0:5 ] print( '\nCounties with the most projects:' ) print( top_five ) # # Now compute means of all variables in the dataframe # by dividing each column by the column of project # counts. # means = solar.div(count,axis='index') print( '\nMean project by county:' ) print(means) # # Do the same calculation with the denominator sorted differently # in order to demonstrate that the indexes will be aligned # automatically. # print( '\nMeans via high_to_low' ) print( solar.div(high_to_low,axis='index') ) # # Pick out the row for Onondaga and divide all the other rows # by it to produce a new databank that has data relative to # Onondaga. Round the ratio to 2 decimal places. # onondaga_row = means.loc['Onondaga'] relative = means/onondaga_row relative = relative.round(2) # # Not required but print some additional information for clarity # print( '\nMean project relative to Onondaga:') print(relative) # # Select the total_incentive column and print it in ascending # order # rel_incent = relative['total_incentive'] print( '\nMean incentive relative to Onondaga:') print(rel_incent.sort_values())