dist.py

"""
dist.py
Spring 2022 PJW

Compute the detailed distributional impacts of a sales tax.
"""

import csv
import numpy as np 
from collections import defaultdict 

#
#  Define a function for reading in data from CSV files. 
#  Will convert selected fields to floats along the way.
#  Returns a dictionary that uses household ID numbers 
#  as keys and has information about each household stored
#  as the value at that key.
#

def read_data(filename,floatlist):
    fh = open(filename)
    reader = csv.DictReader(fh)
    file_data = {}
    for hh in reader:
        for field in floatlist:
            hh[field] = float(hh[field])
        this_key = hh['id']
        file_data[this_key] = hh
    fh.close()
    return file_data

#
#  Set up input file names
#

households = 'households.csv'
quantities = 'quantities.csv'

#
#  Read the file of household attributes and the file of 
#  individual quantities with and without the tax.
#

hh_data = read_data(households,['a','b','inc'])
qty_data = read_data(quantities,['qd1','qd2'])

#
#  Now merge the quantity information into the list of 
#  household attributes.
#

for hh_rec in hh_data.values():
    this_id = hh_rec['id']
    qty_rec = qty_data[this_id]
    hh_rec['qd1'] = qty_rec['qd1']
    hh_rec['qd2'] = qty_rec['qd2']

#
#  Make a list of incomes and then compute the quintile 
#  breaks.
#

incomes = [hh_rec['inc'] for hh_rec in hh_data.values()]
inc_cuts = np.percentile(incomes,[0,20,40,60,80])

#
#  Compute revenue, ETR, and quintile for each HH
#

pd1 = 53.35
pd2 = 55.27
dp  = pd2-pd1

for hh_rec in hh_data.values():

    #  Compute the revenue and ETR

    hh_rec['rev'] = dp*hh_rec['qd2']
    hh_rec['etr'] = 100*hh_rec['rev']/hh_rec['inc']
    
    #  Determine where the household's income falls
    #  in the list of quintiles.

    quint = 0
    for min_inc in inc_cuts:
        if hh_rec['inc'] >= min_inc:
            quint += 1
    hh_rec['quint'] = str(quint)
       
#
#  Set up a defaultdict that will be used to hold lists 
#  of ETRs by group.
#

grouped = defaultdict(list)

#
#  Walk through the list of households and add each ETR
#  to several lists: one for the household's type and quintile,
#  one for all households in the household's quintile, and 
#  one for all households of the household's type.
#
for hh_rec in hh_data.values():
    
    key_by_type_quint = ( hh_rec['type'], hh_rec['quint'] )
    key_by_quint      = ( 'all' , hh_rec['quint'] )
    key_by_type       = ( hh_rec['type'], 'all' )

    grouped[key_by_type_quint].append( hh_rec['etr'] )
    grouped[key_by_quint].append( hh_rec['etr'] )
    grouped[key_by_type ].append( hh_rec['etr'] )

#
#  Calculate medians for every subgroup.
#

medians = {}
for this_key in grouped.keys(): 
    this_median = np.median(grouped[this_key])
    medians[this_key] = round(this_median,2)

#
#  Print the results
#

print("type quint etr")
for group_key in sorted(medians.keys()):
    print(group_key[0],group_key[1],medians[group_key])
Site Index | Zoom | Admin
URL: https://wilcoxen.maxwell.insightworks.com/pages/5269.html
Peter J Wilcoxen, The Maxwell School, Syracuse University
Revised 02/20/2022