Joining Weather and Electricity Data (g15)

join.py

"""
join.py
Spring 2022 PJW

Join weather and electricity use data and plot some figures.
"""

import pandas as pd
import matplotlib.pyplot as plt

#
#  Read the weather data and improve the name of the key variable
#

weather = pd.read_csv('weather.csv')

fix_name = {'Temperature (F)':'degrees'}
weather = weather.rename( columns=fix_name)

#
#  Look for duplicate hours
#

is_dup = weather.duplicated( subset='Local Hour', keep=False )
dups = weather[is_dup] 

print( '\nRecords with duplicate timestamps:\n')
print( dups )

#
#  Drop duplicates but keep the first record of each set
#

weather = weather.drop_duplicates(subset='Local Hour')

#
#  Verify that we have one of each left
#

fixed = weather['Local Hour'].isin(dups['Local Hour'])

print( '\nRecords after removing duplicates:\n')
print( weather[fixed] )

#%%
#
#  Build a Pandas datetime variable and pull out key components
#

date = pd.to_datetime( weather['Local Hour'] )

print( '\nOriginal "Local Hour" column:\n')
print( weather['Local Hour'] )

print( '\nEquivalent Pandas timestamp in "date":\n')
print( date )

weather['month'] = date.dt.month
weather['day'] = date.dt.day
weather['hour'] = date.dt.hour
weather['dow'] = date.dt.dayofweek

#%%
#
#  Read the electricity data
#

use = pd.read_csv('use.csv')

# 
#  Join the weather data onto it
#

join_keys = ['month','day','hour']

merged = use.merge( weather, on=join_keys,
                    how='outer',validate='1:1',indicator='_merge')

print( '\nResults of merging on weather data:\n')
print( merged['_merge'].value_counts() )

print( '\nRecords not merged:\n')
print( merged[ merged['_merge'] != 'both'] )

#%%
#
#  Build 10-degree temperature bins
#

merged['tbin'] = merged['degrees'].round(-1)

print( '\nCount of records by rounded temperature bin:\n')
print( merged['tbin'].value_counts() )

#
#  Write out the result
#

merged.to_csv('join.csv',index=False)

#%%
#
#  Draw boxplots of electricity usage by temperature bin
#

fig1, ax1 = plt.subplots(dpi=300)
merged.boxplot('usage',by='tbin',ax=ax1,grid=False,showfliers=False)
fig1.suptitle("Usage by Temperature")
ax1.set_title(None)
ax1.set_ylabel("kW")
ax1.set_xlabel("Temperature Bin")
fig1.savefig('by_temp.png')

#%%
#
#  Draw boxplots of electricity usage by month
#

fig2, ax2 = plt.subplots(dpi=300)
merged.boxplot('usage',by='month',ax=ax2,grid=False,showfliers=False)
fig2.suptitle("Usage by Month")
ax2.set_title(None)
ax2.set_ylabel("kW")
ax2.set_xlabel("Month")
fig2.savefig('by_month.png')
Site Index | Zoom | Admin
URL: https://wilcoxen.maxwell.insightworks.com/pages/7437.html
Peter J Wilcoxen, The Maxwell School, Syracuse University
Revised 03/27/2022