Residential Solar in NY (g12)

solar.py

"""
solar.py
Spring 2022 PJW

Analyze residential solar data for NYS.
"""

import pandas as pd

pd.set_option('display.max_rows',None)

#
#  Read the source data into a Pandas dataframe and 
#  set the dataframe's index to the county name.
#

infile = 'res_solar_by_county.csv'

solar = pd.read_csv(infile)
solar = solar.set_index('County')

#
#  Write out information about the columns and index of the 
#  dataframe
#

print( '\nColumns:' )
print( list(solar.columns) )

print( '\nIndex:' )
print( list(solar.index) )

#
#  Look up the count of projects by county and say a little about it
#

count = solar['projects']

print( '\nProjects:')
print( count )

#
#  Look up project counts for selected counties.
#

some_cny_counties = ['Onondaga','Oswego','Wayne']

print( '\nProjects in selected counties:' )
print( count[some_cny_counties] )

#
#  Look up project counts for a specific county
#

print( '\nProjects in Albany county:', count['Albany'] )

# 
#  Now sort from high to low counts and print the top 5
#

high_to_low = count.sort_values(ascending=False)
top_five = high_to_low.iloc[ 0:5 ]

print( '\nCounties with the most projects:' )
print( top_five )

#
#  Now compute means of all variables in the dataframe 
#  by dividing each column by the column of project
#  counts.
#

means = solar.div(count,axis='index')

print( '\nMean project by county:' )
print(means)

#
#  Do the same calculation with the denominator sorted differently
#  in order to demonstrate that the indexes will be aligned 
#  automatically.
#

print( '\nMeans via high_to_low' )
print( solar.div(high_to_low,axis='index') )

#
#  Pick out the row for Onondaga and divide all the other rows 
#  by it to produce a new databank that has data relative to 
#  Onondaga. Round the ratio to 2 decimal places.
#

onondaga_row = means.loc['Onondaga']

relative = means/onondaga_row

relative = relative.round(2)

#
#  Not required but print some additional information for clarity
#

print( '\nMean project relative to Onondaga:')
print(relative)

#
#  Select the total_incentive column and print it in ascending 
#  order
#

rel_incent = relative['total_incentive']

print( '\nMean incentive relative to Onondaga:')
print(rel_incent.sort_values())
Site Index | Zoom | Admin
URL: https://wilcoxen.maxwell.insightworks.com/pages/5267.html
Peter J Wilcoxen, The Maxwell School, Syracuse University
Revised 02/26/2022