The Maxwell School
Syracuse University
Syracuse University
""" dist.py Spring 2022 PJW Compute the detailed distributional impacts of a sales tax. """ import csv import numpy as np from collections import defaultdict # # Define a function for reading in data from CSV files. # Will convert selected fields to floats along the way. # Returns a dictionary that uses household ID numbers # as keys and has information about each household stored # as the value at that key. # def read_data(filename,floatlist): fh = open(filename) reader = csv.DictReader(fh) file_data = {} for hh in reader: for field in floatlist: hh[field] = float(hh[field]) this_key = hh['id'] file_data[this_key] = hh fh.close() return file_data # # Set up input file names # households = 'households.csv' quantities = 'quantities.csv' # # Read the file of household attributes and the file of # individual quantities with and without the tax. # hh_data = read_data(households,['a','b','inc']) qty_data = read_data(quantities,['qd1','qd2']) # # Now merge the quantity information into the list of # household attributes. # for hh_rec in hh_data.values(): this_id = hh_rec['id'] qty_rec = qty_data[this_id] hh_rec['qd1'] = qty_rec['qd1'] hh_rec['qd2'] = qty_rec['qd2'] # # Make a list of incomes and then compute the quintile # breaks. # incomes = [hh_rec['inc'] for hh_rec in hh_data.values()] inc_cuts = np.percentile(incomes,[0,20,40,60,80]) # # Compute revenue, ETR, and quintile for each HH # pd1 = 53.35 pd2 = 55.27 dp = pd2-pd1 for hh_rec in hh_data.values(): # Compute the revenue and ETR hh_rec['rev'] = dp*hh_rec['qd2'] hh_rec['etr'] = 100*hh_rec['rev']/hh_rec['inc'] # Determine where the household's income falls # in the list of quintiles. quint = 0 for min_inc in inc_cuts: if hh_rec['inc'] >= min_inc: quint += 1 hh_rec['quint'] = str(quint) # # Set up a defaultdict that will be used to hold lists # of ETRs by group. # grouped = defaultdict(list) # # Walk through the list of households and add each ETR # to several lists: one for the household's type and quintile, # one for all households in the household's quintile, and # one for all households of the household's type. # for hh_rec in hh_data.values(): key_by_type_quint = ( hh_rec['type'], hh_rec['quint'] ) key_by_quint = ( 'all' , hh_rec['quint'] ) key_by_type = ( hh_rec['type'], 'all' ) grouped[key_by_type_quint].append( hh_rec['etr'] ) grouped[key_by_quint].append( hh_rec['etr'] ) grouped[key_by_type ].append( hh_rec['etr'] ) # # Calculate medians for every subgroup. # medians = {} for this_key in grouped.keys(): this_median = np.median(grouped[this_key]) medians[this_key] = round(this_median,2) # # Print the results # print("type quint etr") for group_key in sorted(medians.keys()): print(group_key[0],group_key[1],medians[group_key])