The Maxwell School
Syracuse University
Syracuse University
#! /bin/python # Mar 21 (PJW) # # Demonstration file introducing Pandas import pandas as pd import json # # Initial data: a dictionary of days in each month # d = { 'January':31, 'February':28, 'March':31, 'April':30, 'May':31, 'June':30, 'July':31, 'August':31, 'September':30, 'October':31, 'November':30, 'December':31 } print( '\nDictionary:\n' ) print( json.dumps(d,indent=4) ) #%% # # Build a Series object from it # s = pd.Series(d) print( '\nSeries:\n') print( s ) print( s.index ) print( list(s.index) ) #%% # # Print one or more elements # print( '\nMarch:', s['March'] ) this_quarter = ['March','April','May'] print( s[this_quarter] ) #%% # # Use Series methods to do some quick calculations # print( '\nTotal days:', s.sum() ) print( '\nMean days per month:', s.mean() ) #%% # # Ask for a range of summary statistics # stats = s.describe() print( stats ) # Result is a Series: can extract pieces p25 = stats['25%'] p75 = stats['75%'] print( '\nIQR:', p75 - p25 ) #%% # # Calculate a new series from an existing one # pct = 100*s/s.sum() print( "\nEach month as a percent of the year:\n" ) print( pct.round(2) ) #%% by_name = pct.sort_index() print( "\nSorted by name:\n" ) print( by_name ) by_days = pct.sort_values() print( "\nSorted by days:\n" ) print( by_days ) #%% # # Do some quick plots. We'll talk more about matplotlib later but for # now the main thing is that we need plt.figure() to start a new figure # for each plot. Otherwise, they will be drawn on top of one another. # import matplotlib.pyplot as plt plt.figure() ax = s.plot.barh() plt.figure() ax = s.plot.hist() fig = plt.figure() ax = s.plot.pie() #%% # # Now combine some series to make a DataFrame. The series are automatically # joined by aligning their indexes # print( s ) print( by_days ) f = pd.DataFrame() f['days'] = s f['percent'] = by_days print( f ) # # Index and columns of the result # print( list(f.index) ) print( list(f.columns) ) # # Adding lists (not series) is done purely by element order # month_numbers = list( range(1,13) ) print( month_numbers ) f['number'] = month_numbers print( f ) #%% # # Now read in some CSV data. This is monthly average electricity # use for the household in the previous assignment. # kw = pd.read_csv('use_kw.csv') print( '\nkW data:\n' ) print( kw ) # # Set the index to the month # kw = kw.set_index('mo') print( kw ) # # Sort by the index and plot lines # kw = kw.sort_index() print( kw ) lines_to_plot = ['p95','mean','median','p05'] plt.figure() ax = kw[lines_to_plot].plot.line() #%% kw['m+2s'] = kw['mean'] + 2*kw['std'] kw['m-2s'] = kw['mean'] - 2*kw['std'] lines_to_plot = ['m+2s','p95','mean','p05','m-2s'] plt.figure() ax = kw[lines_to_plot].plot.line()