#! /usr/bin/env python
import os
import pdb
import pandas as pd
import numpy as np
import shutil
from policy_modules.fpl import Fpl as Fpl
import subprocess
import sys
import utility_modules.import_zip
import zipfile
from utility_modules.calibrate_exante_latents import calibrate_exante_latents as calibrate_exante_latents
[docs]class Output():
def __init__(self, out_path, git_info, general_dict, params_dict, cal_levers_dict, policy_levers_dict, system_dict, is_regional):
"""
IN:
out_path - output path (with suffixes)
is_* - flags for inclusion of certain output tables
NOTE:
output structure is as follows:
output_root/
suffix/
year1/
table_type1/
table_name1
table_name2
...
table_type2/
...
year2/
...
"""
self.file_path = out_path
self.out_tables = {}
self.git_info = git_info
self.out_path = out_path
self.is_regional = is_regional
self.general_dict = general_dict
self.config_path = self.general_dict.config_path
self.params_dict = params_dict
self.cal_levers_dict = cal_levers_dict
self.policy_levers_dict = policy_levers_dict
self.system_dict = system_dict
zipf = zipfile.ZipFile(self.out_path+'/Policy_Input.zip','w',zipfile.ZIP_DEFLATED)
import_zip.zipdir(general_dict['pol_input_path'], zipf)
self.table_types = [
'other', # 0
'individual_data', # 1
'tables', # 2
'aggregate_results' # 3
]
# disable certain output depending on configuration file settings
self.tables_flag = params_dict.tables
if not params_dict.tables:
self.table_types[2] = None
# output tables constants
self.industry_type ={0: "unemployed",
1: "natural resource/mining",
2: "construction",
3: "manufacturing",
4: "trans/util/information",
5: "wholesales/retail",
6: "financial",
7: "services/govt",
8: "healthcare",
9: "federal",
10: "state/local"}
self.firm_size_bins = [3, 10, 50, 200, 1000, 5000000] # right=False in the cut function
self.firm_size_lab = ['3-9', '10-49', '50-199', '200-999', '1000+']
self.age_bins = [0, 19, 30, 45, 65, 200]
self.age_lab = ['0-18', '19-29', '30-44', '45-64', '65+']
self.race_cat = {1: "White",
2: "Latino",
3: "African American",
4: "Asian",
5: "Other"}
self.gender_cat = {1: "Male", \
0: "Female"}
self.fpl_bins = [0, 100, 138, 400, 2000]
self.fpl_lab = ['<=100 FPL', '101-138 FPL', '139-400 FPL', '400+ FPL']
self.coverage = {1: 'esi',
2: 'individual market',
3: 'medicare',
4: 'medicaid',
5: 'other public',
6: 'uninsured'}
self.choice = {1: 'uninsured',
2: 'self_esi',
3: 'spouse_esi',
4: 'parent_1_esi',
5: 'parent_2_esi',
6: 'silver',
7: 'bronze',
8: 'catastrophic',
9: 'medicaid',
10: 'medicare'}
# mapping {1:1, [2,3,4,5]:2, [6,7,8]:3, 9:4, 10:5}
self.choice5_map = {1: 1, 2: 2, 3: 2, 4: 2, 5: 2, 6: 3, 7: 3, 8: 3, 9: 4, 10: 5}
self.choice5 = {1: 'uninsured',
2: 'esi',
3: 'individual market',
4: 'medicaid',
5: 'medicare'}
self.choice_lab = ['uninsured', 'esi', 'im', 'medicaid', 'medicare']
self.lep = {0: 'Speaks English very well',
1: 'Limited English Proficiency'}
self.esi_cov = {0: 'ESI without ACA and employer dropped coverage under ACA',
1: 'ESI without ACA and employer maintained coverage under ACA'}
self.firm_type = {1: 'Typical coverage, typical contribution',
2: 'Typical coverage, large employee contribution',
3: 'Bronze'}
self.xc_sub = {0: 'Number individuals enrolled without subsidies',
1: 'Number individuals enrolled with subsidies'}
#pd.options.display.float_format = '{:,.0f}'.format
[docs] def set_year(self, year):
self.year = year
[docs] def write_config(self):
# creating a config file that contains updated information
config_out = os.path.join(*[self.file_path, str(self.year), "reproduction_config.ini"])
# DO NOT CHANGE FORMATTING BELOW
header = """\
################################################################################
# CONFIGURATION FILE FOR CALSIM
#
# Notes: \"#\" are comments and do not affect this file's function. The summary
# sections at the beginning provide a brief explanation for each variable
#
# This file has been updated with any and all parameters passed in through
# the command line and should be used for result reproduction.
################################################################################"""
general_desc = """\
################################################################################
[General]
# 1. outpath = where to put the output files and tables
# 2. datapath = path to input data
# 3. username = username for output folder naming
# 4. suffix = suffix for output folder naming
# 5. test = flag for running as test (on a subset of firms, specified by
# percentage\n"""
output_desc = """\
################################################################################
[Output]
# 1. debug = Flag for outputting debugging information to console
# 2. tables = Flag for creating folder for storing tables for checking\n"""
calibration_desc = """\
################################################################################
[Calibration]
# 1. calibration = Flag for calibration to generate person specific data
# 2. calibration_file = file selected to replace new_cal columns in choice file
# 3. utility_params = utility esi, xc (unsub and sub), mc, cal, penalty\n"""
policy_desc = """\
################################################################################
[Policy]
# 1. firm_threshold = Tendency to offer, tendency to drop
# 2. regional_specificity = 0 is statewide, 1 is regional\n"""
system_sheet_desc = """\
################################################################################
[System_Sheet]\n"""
# write header
# with open(config_out, 'w') as f:
# f.write(header)
# output configuation file
shutil.copy2(self.config_path, config_out)
# write current branch and commmit hash
with open(config_out, 'a') as f:
f.write('\n# Current Branch: %s# Latest Commit: %s' % (self.git_info['calsim'][0], self.git_info['calsim'][1]))
with open(config_out, 'a') as f:
f.write('\n# Current Policy Input Commit: %s# Policy Input Commit Message: %s' % (self.git_info['policy-input'][0], self.git_info['policy-input'][1]))
with open(config_out, 'a') as f:
f.write('\n# Command line args: %s\n' % ' '.join(sys.argv))
[docs] def initialize_raw_data(self, raw_data):
"""
Available Data from CalSim.py (self refers to CalSim):
'perm_ind_choices' : self.perm_ind_choices,
'long_output' : long_output,
'long_calib_table' : output_calib,
'firm_choices_all' : self.firm_choices_all,
'Hieu' : self.Hieu.get_hieu_table(),
'firm_table' : self.Firm,
'fam_table' : self.Family,
'worker_table' : self.Worker,
'esi_table' : self.Esi,
'exp_table' : self.Expenditures,
'weights' : (self.person_count, self.weight_total),
'dynamic_version' : self.dynamic_version,
'dynamic_exchange_statewide_premiums' : self.statewide_premiums,
'dynamic_exchange_regional_premiums' : self.regional_premiums,
'hieu_table_paired_down' : self.hieu_table_paired_down,
'dropped_firms' : self.dropped_firms,
'dropped_PT_firms' : self.dropped_PT_firms,
'calibrateRun' : self.calibrateRun,
'calibrate_ex_ante_latents' : self.calibrate_ex_ante_latents,
'Latent_vars' : self.Latent_vars
"""
self.raw_data = raw_data
[docs] def build_out_tables(self, firm_threshold):
self.firm_threshold = firm_threshold
def example():
"""
* write functions that builds the necessary df as in this example
* if there are raw_data that you need, make sure you pass it in
on line ~403 in CalSim.py. It will be in the dictionary self.raw_data.
* self.year -> useful if you want to output only if simulating a
certain year. self.year is set after every full iteration
* to add a dataframe to the output df list, you must call "initiate_out_table"
* make sure you call your function at the end of "build_out_tables"
"""
# do something like this if you want only certain year (i.e. 2015 and 2019)
if self.year != 2015 and self.year != 2019:
return
# build dataframe as desired. Make sure you pass in any "raw data" that you need
# from CalSim.py. Instructions for that are in the docstring above
example = DataFrame(np.random.randn(10, 5))
# make sure you call "self.initiate_out_table", check docstring for more
self.initiate_out_table(3, "example.csv", example)
# last but not least, call your function at the very end (this is example, so I have not called this function)
def grouped_weighted_avg(values, weights, by):
return (values * weights).groupby(by).sum() / weights.groupby(by).sum()
def try_percent(num, den):
try:
pcent = (num / den) * 100
except ZeroDivisionError:
pcent = None
return pcent
def build_expenditure_tables():
print('building expenditure tables')
exp_data = self.raw_data['exp_table']
choices = self.raw_data['long_output'][['p_id', 'choice']]
df = pd.merge(exp_data, choices, how='left', left_on='person_id', right_on='p_id')
# agument columns
for choice in range(11):
df.loc[df.choice == choice, 'exp_esi_average'] = df.loc[df.choice == choice, 'exp_average_%i' % choice] if 2 <= choice <= 5 else \
df[df.choice == choice][['exp_average_%i' % i for i in range(2, 6)]].max(axis=1)
df.loc[df.choice == choice, 'exp_esi_actual'] = df.loc[df.choice == choice, 'exp_actual_%i' % choice] if 2 <= choice <= 5 else \
df[df.choice == choice][['exp_actual_%i' % i for i in range(2, 6)]].max(axis=1)
df.loc[df.choice == choice, 'oop_esi_average'] = df.loc[df.choice == choice, 'oop_average_%i' % choice] if 2 <= choice <= 5 else \
df[df.choice == choice][['oop_average_%i' % i for i in range(2, 6)]].max(axis=1)
df.loc[df.choice == choice, 'oop_esi_actual'] = df.loc[df.choice == choice, 'oop_actual_%i' % choice] if 2 <= choice <= 5 else \
df[df.choice == choice][['oop_actual_%i' % i for i in range(2, 6)]].max(axis=1)
# rename columns from number to category
# rename example: 'exp_average_7' -> 'exp_private_average'
columns = ['exp_average', 'exp_actual', 'oop_average', 'oop_actual']
old_names = ['_1', '_7', '_9', '_10']
new_names = ['unins', 'private', 'mcaid', 'mcare']
new_columns = {}
for index, old_name in enumerate(old_names):
for column in columns:
new_columns[column + old_name] = column[:4] + new_names[index] + column[3:]
df.rename(columns=new_columns, inplace=True)
# get columns we want
keep_columns = ['person_id', 'choice', 'esi', 'private', 'mcare', 'mcaid', 'unins']
exp_data = df[[column for keep_column in keep_columns for column in df.columns.values if keep_column in column]]
self.initiate_out_table(1, 'step0_exp.csv', exp_data)
def build_calib_tables():
# initiate person specfic table
# merge more variables from hieu_table for creating databook table
thieu = self.raw_data['pop_hieu'][['family_id', 'person_id', 'male', 'race_ethnicity', 'age', \
'lep', 'doc_status', 'coverage', 'mcaid_elig_exante', \
'predicted_exante_elig', 'esi_cov', \
'u_esi', 'u_priv', 'u_unins', 'u_mcaid', 'u_mcare', \
'c_esi', 'c_priv', 'c_mcaid', 'c_mcare', 'c_unins','threshold', 'full_time', \
'num_chronic', 'health_status', 'sector', 'self_emp', 'cobra_flag', 'marit', 'firm_size','mcaid_elig_flipped','pholder_id_esi' \
,'age_rated_slv_prem','age_rated_bronze_prem','affordability','family_type_for_afford_esi','self_affordability','tax_dependent','prem_type','ind_income','ind_fpl','esi_threshold','ind_threshold','povlev','family_cap','subsidy_amount_total']]
thieu = thieu.dropna(subset=['person_id'])
temp_choice = self.raw_data['long_output']
hieu_choice = pd.merge(thieu, temp_choice, how = "inner", on ='person_id')
#adding fpl information
tfam = self.raw_data['fam_table'][['family_id','fpl','orig_fam_income','fam_income','family_size']]
hieu_choice = pd.merge(hieu_choice, tfam, how = "inner", on ='family_id')
# adding firm coverage information
# subsetting to chosen_choice
tfirm = self.raw_data['firm_choices_all']
tfirm1 = tfirm.loc[tfirm['chosen_choice'] == 1]
tfirm1 = tfirm1[['firm_id', 'offer', 'pt','esi_self_cost','esi_fam_cost','esi_sp_cost','esi_ch_cost']]
### previous choice is choice = 0
tfirm2 = tfirm.loc[tfirm['choice'] == 0]
tfirm2 = tfirm2[['firm_id', 'offer', 'pt']]
tfirm2.rename(columns={'offer': 'prev_offer', 'pt': 'previous_offer_pt'}, inplace = True)
tfirm = pd.merge(tfirm1, tfirm2, how="inner", on = "firm_id")
hieu_choice = pd.merge(hieu_choice, tfirm, how = "left", on ='firm_id')
tworker = self.raw_data['worker_table'][['person_id', 'esi_not_elig']]
hieu_choice = pd.merge(hieu_choice, tworker, how='left', on='person_id')
# add two more columns: prev_having_offer curr_having_offer
# if parent 1 or parent 2 has an offer, then the entire hieu has an offer, otherwise, adult children might still have an offer
# this is the case both for prev_having_offer and curr_having_offer
hieu_choice['prev_offer_p0'] = (hieu_choice['person_type'] == 0) & (hieu_choice['prev_offer'] == 1) & \
((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['previous_offer_pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))
hieu_choice['prev_offer_p1'] = (hieu_choice['person_type'] == 1) & (hieu_choice['prev_offer'] == 1) & \
((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['previous_offer_pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))
hieu_choice['prev_offer_parent'] = hieu_choice['prev_offer_p0'] | hieu_choice['prev_offer_p1']
hieu_choice['prev_offer_parent'] = hieu_choice.groupby('hieu_id')['prev_offer_parent'].transform('max')
hieu_choice['prev_having_offer'] = (hieu_choice['prev_offer_parent'] == 1) | ((hieu_choice['prev_offer'] == 1) & \
((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0))))
hieu_choice['curr_offer_p0'] = (hieu_choice['person_type'] == 0) & (hieu_choice['offer'] == 1) & \
((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))
hieu_choice['curr_offer_p1'] = (hieu_choice['person_type'] == 1) & (hieu_choice['offer'] == 1) & \
((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))
hieu_choice['curr_offer_parent'] = hieu_choice['curr_offer_p0'] | hieu_choice['curr_offer_p1']
hieu_choice['curr_offer_parent'] = hieu_choice.groupby('hieu_id')['curr_offer_parent'].transform('max')
hieu_choice['curr_having_offer'] = (hieu_choice['curr_offer_parent'] == 1) | ((hieu_choice['offer'] == 1) & \
((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0))))
hieu_choice['indiv_having_offer']=(hieu_choice['offer'] == 1) & ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))
#if don't have esi_offer, then esi_cost should be nan
loc_self = np.where((hieu_choice['family_type_for_afford_esi'] == 1))[0]
loc_sp = np.where((hieu_choice['family_type_for_afford_esi'] == 2))[0]
loc_ch = np.where((hieu_choice['family_type_for_afford_esi'] == 3))[0]
loc_fam = np.where((hieu_choice['family_type_for_afford_esi'] == 4))[0]
loc_ind_adult_child = np.where((hieu_choice['person_type'] > 3) & (hieu_choice['tax_dependent'] == 0))[0]
loc_real_family = hieu_choice.index.difference(loc_ind_adult_child)
loc_not_elig=np.where((hieu_choice['family_type_for_afford_esi'] == 5))[0]
hieu_choice.loc[loc_self,'real_fam_cost']=hieu_choice.loc[loc_self].esi_self_cost
hieu_choice.loc[loc_sp, 'real_fam_cost'] = hieu_choice.loc[loc_sp].esi_sp_cost
hieu_choice.loc[loc_ch, 'real_fam_cost'] = hieu_choice.loc[loc_ch].esi_ch_cost
hieu_choice.loc[loc_fam, 'real_fam_cost'] = hieu_choice.loc[loc_fam].esi_fam_cost
hieu_choice.loc[loc_not_elig, 'real_fam_cost'] =np.nan
hieu_choice['real_self_cost']=hieu_choice.esi_self_cost
hieu_choice.loc[hieu_choice['indiv_having_offer']==0,'real_self_cost']= np.nan
hieu_choice.loc[hieu_choice['indiv_having_offer'] == 0, 'real_fam_cost'] = np.nan
hieu_choice['aff_self'] = (hieu_choice.esi_threshold * hieu_choice.fam_income > hieu_choice.real_self_cost)
hieu_choice.loc[loc_ind_adult_child,'aff_self']=(hieu_choice.loc[loc_ind_adult_child].esi_threshold*hieu_choice.loc[loc_ind_adult_child].ind_income >hieu_choice.loc[loc_ind_adult_child].real_self_cost)
hieu_choice['aff_fam'] = (hieu_choice.esi_threshold * hieu_choice.fam_income > hieu_choice.real_fam_cost)
hieu_choice.loc[loc_ind_adult_child, 'aff_fam']=hieu_choice.loc[loc_ind_adult_child].aff_self
hieu_choice.loc[hieu_choice['indiv_having_offer'] == 0, 'aff_self'] = False
hieu_choice.loc[hieu_choice['indiv_having_offer'] == 0, 'aff_fam'] = False
hieu_choice.loc[loc_real_family, 'fam_aff_self'] = hieu_choice.loc[loc_real_family].groupby('hieu_id')['aff_self'].transform('max')
hieu_choice.loc[loc_real_family, 'fam_aff_fam'] = hieu_choice.loc[loc_real_family].groupby('hieu_id')['aff_fam'].transform('max')
hieu_choice.loc[loc_ind_adult_child, 'fam_aff_self']=hieu_choice.loc[loc_ind_adult_child,'aff_self']
hieu_choice.loc[loc_ind_adult_child, 'fam_aff_fam'] = hieu_choice.loc[loc_ind_adult_child, 'aff_fam']
#if not person type 0 or 1 they can not have esi_cost for whole family
loc_notparents=np.where((hieu_choice['person_type']!=0) &(hieu_choice['person_type']!=1))[0]
hieu_choice.loc[loc_notparents,'esi_fam_cost']=np.nan
#take the minimum_cost
hieu_choice['min_esi_self_cost'] = hieu_choice.real_self_cost
#set min_esi_self_cost of non-offer people to be the lowest self-only cost in their HIU
hieu_choice.loc[loc_notparents, 'esi_self_cost'] = np.nan
hieu_choice['hieu_min_esi_self_cost'] = hieu_choice.groupby('hieu_id')['real_self_cost'].transform('min')
loc_withoffer= np.where(hieu_choice['indiv_having_offer']==1)[0]
hieu_choice.loc[loc_withoffer, 'hieu_min_esi_self_cost']=hieu_choice.loc[loc_withoffer, 'min_esi_self_cost']
loc_0 = np.where(hieu_choice['person_type']<6)[0]
hieu_choice.loc[loc_0, 'min_esi_fam_cost'] =hieu_choice.loc[loc_0].groupby('hieu_id')['real_fam_cost'].transform('min')
#hieu_choice['lostesi_firmnooffer']=False
#hieu_choice['lostesi_firmno_ptoffer']=False
hieu_choice['offer_disappear']=(hieu_choice.offer==0)&(hieu_choice.prev_offer==1)
hieu_choice['pt_offer_disappear']=(hieu_choice.pt==0)&(hieu_choice.previous_offer_pt==1)
#loc_0 is the position where people previously have coverage but lost esi coverage now
loc_0 =np.where((hieu_choice['coverage'] == 1) & ((hieu_choice['choice'] < 2) | (hieu_choice['choice'] > 5)))[0]
#loc_1 is the position where people previously have coverage but lost esi coverage now and working part time
loc_1=np.where((hieu_choice['coverage']==1) & ((hieu_choice['choice']<2) | (hieu_choice['choice']>5))& (hieu_choice['full_time']==0))[0]
#get p_holder of full time worker who lost their esi coverage
pholder_offerstatus=hieu_choice[hieu_choice['person_id'].isin(hieu_choice.loc[loc_0].pholder_id_esi.tolist())]
pholder_offerstatus=pholder_offerstatus.rename(columns={'offer_disappear': 'lost_esi_firmnooffer','person_id':'p_holder'})
hieu_choice=pd.merge(hieu_choice,pholder_offerstatus[['p_holder','lost_esi_firmnooffer']],left_on='pholder_id_esi',right_on='p_holder',how='left')
#set everything not in the target location to nan
hieu_choice[~hieu_choice.index.isin(loc_0)].lost_esi_firmnooffer=np.nan
#get the p_holder of part time worker who lost their esi coverage
pt_pholder_offerstatus=hieu_choice[hieu_choice['person_id'].isin(hieu_choice.loc[loc_1].pholder_id_esi.tolist())]
pt_pholder_offerstatus = pt_pholder_offerstatus.rename(columns={'offer_disappear': 'lost_esi_firmno_ptoffer','person_id':'p_holder_pt'})
hieu_choice = pd.merge(hieu_choice, pt_pholder_offerstatus[['p_holder_pt', 'lost_esi_firmno_ptoffer']],left_on='pholder_id_esi', right_on='p_holder_pt', how='left')
#set everything not in target row to nan
hieu_choice[~hieu_choice.index.isin(loc_1)].lost_esi_firmno_ptoffer = np.nan
hieu_choice = hieu_choice.drop(['prev_offer_p0', 'prev_offer_p1', 'prev_offer_parent', 'curr_offer_p0', 'curr_offer_p1',\
'curr_offer_parent','esi_threshold','offer_disappear','pt_offer_disappear','pholder_id_esi','p_holder','p_holder_pt'], axis=1)
#print 'people count'
#print hieu_choice.loc[hieu_choice['choice'].isin([6,7])]['person_id'].count()
#print hieu_choice.loc[hieu_choice['choice'].isin([6,7])].p_weight.sum()
#print 'xc_prem sum'
#print hieu_choice.loc[hieu_choice['choice'].isin([6,7])].xc_prem.sum()
# merging with firm information
fchoices = self.raw_data['firm_choices_all']
fchoices = fchoices.loc[fchoices.chosen_choice == 1]
prev_offering = fchoices.loc[fchoices.prev_offer == True]
prev_non_offering = fchoices.loc[fchoices.prev_offer == False]
prev2curr_1 = prev_offering.loc[prev_offering.offer == True].shape[0]
prev2curr_2 = prev_offering.loc[prev_offering.offer == False].shape[0]
prev2curr_3 = prev_non_offering.loc[prev_non_offering.offer == True].shape[0]
prev2curr_4 = prev_non_offering.loc[prev_non_offering.offer == False].shape[0]
results = pd.DataFrame({'offer':[prev2curr_3, prev2curr_1], 'nonoffer': [prev2curr_4, prev2curr_2]})
results.index.name = 'offering'
#print results
# add two final columns: policy holder, and policy group
# if adult_1 is person holder, people who can be under policy include adult_2 == 3, children == 4, adult_children == 4
policy_id = 1
hieu_choice['policy_id'] = 0
hieu_choice['policy_holder'] = 0
for hieu in hieu_choice.loc[(hieu_choice['person_type'] == 0) & (hieu_choice['choice'] == 2)]['hieu_id']:
hieu_choice['policy_id'] += ((hieu_choice['hieu_id'] == hieu) & ((hieu_choice['person_type'] == 0) | \
(hieu_choice['choice'].isin([3,4])))) * policy_id
hieu_choice['policy_holder'] += ((hieu_choice['hieu_id'] == hieu) & (hieu_choice['person_type'] == 0)) * 1
policy_id += 1
# if adult_2 is person holder, people who can be under policy include adult_1 == 3, children == 5, adult_children == 5
for hieu in hieu_choice.loc[(hieu_choice['person_type'] == 1) & (hieu_choice['choice'] == 2)]['hieu_id']:
hieu_choice['policy_id'] += ((hieu_choice['hieu_id'] == hieu) & ((hieu_choice['person_type'] == 1) | \
(hieu_choice['choice'].isin([3,5])))) * policy_id
hieu_choice['policy_holder'] += ((hieu_choice['hieu_id'] == hieu) & (hieu_choice['person_type'] == 1)) * 1
policy_id += 1
# for adult_child 1 to 4, if any is a policy holder, place them in a new group
hieu_choice['policy_id'] += ((hieu_choice['person_type'].isin([4,5,6,7])) & (hieu_choice['choice'] == 2)) * policy_id
hieu_choice['policy_holder'] += (hieu_choice['person_type'].isin([4,5,6,7])) & (hieu_choice['choice'] == 2) * 1
ac = hieu_choice.loc[hieu_choice['policy_id'] == policy_id]
seq = np.arange(policy_id, hieu_choice.loc[hieu_choice['policy_id'] == policy_id].shape[0] + policy_id) # increment for every person
ac['policy_id'] = seq
cols = list(hieu_choice.columns)
hieu_choice.loc[hieu_choice['policy_id'] == policy_id, cols] = ac[cols]
if self.raw_data['long_calib_table'] is not None:
# manipulating wide table for calculations
table = pd.pivot_table(self.raw_data['long_calib_table'], values=['utility','calib'], index=['hieu_id', 'person_id'], columns='choice')
indices = np.unique(self.raw_data['long_calib_table'].choice.values).tolist()
table.columns = ['utility_' + str(i) for i in indices]+['calib_' + str(i) for i in indices]
table.reset_index(inplace=True)
temp = table[['utility_' + str(i) for i in indices]]
table['sum_utility'] = temp.max(axis=1)
# finding hieu total utilities based on persons making optimal individual choices
optimal_ind_choices = table[['hieu_id','sum_utility']].groupby(['hieu_id'], as_index=False).sum()
hieu_df = hieu_choice['hieu_id'].value_counts().reset_index()
hieu_df.columns = ['hieu_id','hieu_size']
hieu_df = pd.merge(hieu_df, hieu_choice[['hieu_id','hieu_utility']], on='hieu_id', how='left')
optimal_ind_choices = pd.merge(optimal_ind_choices, hieu_df, on='hieu_id', how='inner')
optimal_ind_choices['match'] = optimal_ind_choices['sum_utility'] == optimal_ind_choices['hieu_utility']
optimal_ind_choices2 = optimal_ind_choices.loc[optimal_ind_choices['hieu_size'] != 1]
### pcent_dispcy legend - 0: total, 1: non-single size families, 2: non-single size, non esi offering
pcent = float(optimal_ind_choices.loc[optimal_ind_choices.match == False].shape[0]) / float(optimal_ind_choices.shape[0])
pcent = round(pcent, 4)
optimal_ind_choices['pcent_dispcy_0'] = np.nan
optimal_ind_choices['pcent_dispcy_0'][0] = pcent
optimal_ind_choices['pcent_dispcy_0'][1] = str(optimal_ind_choices.loc[optimal_ind_choices.match == False].shape[0]) + ' / ' \
+ str(optimal_ind_choices.shape[0])
if optimal_ind_choices2.empty:
pcent = 0
else:
pcent = float(optimal_ind_choices2.loc[optimal_ind_choices2.match == False].shape[0]) / float(optimal_ind_choices2.shape[0])
pcent = round(pcent, 4)
optimal_ind_choices['pcent_dispcy_1'] = np.nan
optimal_ind_choices['pcent_dispcy_1'][0] = pcent
optimal_ind_choices['pcent_dispcy_1'][1] = str(optimal_ind_choices2.loc[optimal_ind_choices2.match == False].shape[0]) + ' / ' \
+ str(optimal_ind_choices2.shape[0])
person_df = table[['utility_' + str(i) for i in indices]]
person_df['opt_choice'] = (person_df.idxmax(axis=1))
person_df['opt_choice'] = (person_df['opt_choice'].apply(lambda x: x[8:])).astype(int)
table['p_opt_choice'] = person_df['opt_choice']
table = pd.merge(table, hieu_choice[['person_id','choice']], on='person_id', how='left')
table = table.rename(columns={'choice': 'hieu_choice'})
table['match'] = table['p_opt_choice'] == table['hieu_choice']
pcent = float(table.loc[table.match == False].shape[0]) / float(table.shape[0])
pcent = round(pcent, 4)
table['pcent_dispcy_0'] = np.nan
table['pcent_dispcy_0'][0] = pcent
table['pcent_dispcy_0'][1] = str(table.loc[table.match == False].shape[0]) + ' / ' + str(table.shape[0])
# searching for hieus that were offered esi
offering_esi = pd.DataFrame()
utility_esi_cols = ['utility_2','utility_3','utility_4','utility_5']
for hieu in optimal_ind_choices.hieu_id.unique():
no_esi = True
for col in utility_esi_cols:
try:
no_esi &= table.loc[table.hieu_id == hieu][col].isnull().values.all()
except:
pass
temp = pd.DataFrame({'hieu_id': [hieu], 'offers_esi': [~no_esi]})
offering_esi = offering_esi.append(temp)
offering_esi.reset_index(drop=True, inplace=True)
optimal_ind_choices = pd.merge(optimal_ind_choices, offering_esi, on='hieu_id', how='left')
temp = optimal_ind_choices.loc[optimal_ind_choices['offers_esi'] == False]
pcent = float(temp.loc[temp.match == False].shape[0]) / float(temp.shape[0])
pcent = round(pcent, 4)
optimal_ind_choices['pcent_dispcy_2'] = np.nan
optimal_ind_choices['pcent_dispcy_2'][0] = pcent
optimal_ind_choices['pcent_dispcy_2'][1] = str(temp.loc[temp.match == False].shape[0]) + ' / ' + str(temp.shape[0])
self.initiate_out_table(1, "person_behavior_discrepancy.csv", table)
self.initiate_out_table(1, "hieu_behavior_discrepancy.csv", optimal_ind_choices)
# create text file that checks for all
directory = os.path.join(self.out_path, str(self.year), self.table_types[1])
if not os.path.exists(directory):
os.makedirs(directory)
sanity_check_output = open(os.path.join(directory, 'sanity_check.txt'), 'w+')
paragraph = ["checking aggregate p_weights of specific conditions: \n"]
categories = ['xc_sub', 'mcaid', 'new_mcaid', 'penalty']
for category in categories:
paragraph.append(category + ": " + str(hieu_choice.loc[hieu_choice[category] == 1].p_weight.sum()) + "\n")
for line in paragraph:
sanity_check_output.write(line)
sanity_check_output.close()
hieu_choice.rename(columns={'xc_sub': 'subsidy_elig','adultkid_xc_sub': 'adultkid_subsidy_elig', 'hieu_subsidy_amount_total':'subsidy_amount_total_long', 'hieu_subsidy_bronze_total': 'subsidy_bronze_total','hieu_subsidy_amount_total_default': 'subsidy_amount_total_default_long'}, inplace=True)
hieu_choice['xc_sub_final']=0
xc_sub_loc = np.where(((hieu_choice['subsidy_elig']==1)&(hieu_choice['subsidy_amount_total']>0)& \
(hieu_choice['choice'].isin([1,2,3,4,5,9,10])))|((hieu_choice['subsidy_elig']==1)&\
(hieu_choice['sub_amount']>0)&(hieu_choice['choice'].isin([6,7,8]))))[0]
hieu_choice.loc[xc_sub_loc,'xc_sub_final']=1
if self.raw_data['dual_parameter']== 0:
hieu_choice['dual']=0
# ken's method
if self.raw_data['use_ken_flag']==True:
self.initiate_out_table(1, "hieu_choice_orig.csv", hieu_choice)
ken_adjuster=self.raw_data['ken_adjuster']
hieu_choice=pd.merge(hieu_choice,ken_adjuster,how='left',on='person_id')
hieu_choice['p_weight_orig']=hieu_choice.p_weight
hieu_choice.p_weight=hieu_choice.p_weight*hieu_choice.weight_adjuster
hieu_choice = hieu_choice.applymap(lambda x: 1 if x == True else x)
hieu_choice = hieu_choice.applymap(lambda x: 0 if x == False else x)
############################### Dave request variables #########################################
'''
The following two variables are used to count the number of people who lose ESI because
the firms of their *policy holders* of the ESI either drop the entire coverage or drop ESI
coverage for PT:
firm_dropped_my_cov
firm_dropped_my_PTcov
'''
#add firm_id_of_exante_pholder, full_time_of_exante_pholder, firm_dropped_my_cov,
#and firm_of_pholder_dropped_PTcov into hieu_choice
#firm_id_of_exante_pholder, full_time_of_exante_pholder are NA for people
#previously not on ESI
#firm_dropped_my_cov is NA for people previously not on ESI or the firm keeps FT/PT ESI
#firm_of_pholder_dropped_PTcov is NA for people previously not on ESI or firm currently drops FT ESI
hieu_choice=pd.merge(hieu_choice,self.raw_data["hieu_table_paired_down"].loc[self.raw_data["hieu_table_paired_down"].coverage==1,
["person_id","firm_id_of_exante_pholder","full_time_of_exante_pholder"]],on="person_id", how="left")
hieu_choice=pd.merge(hieu_choice,self.raw_data["dropped_firms"],on="firm_id_of_exante_pholder",how="left")
hieu_choice=pd.merge(hieu_choice,self.raw_data["dropped_PT_firms"],on="firm_id_of_exante_pholder",how="left")
#mask for persons with a PT ESI policy holder
#False for FT or NA exante pholder
pholder_is_PT=hieu_choice.full_time_of_exante_pholder.fillna(False)
#NA for person not on ESI coverage or firm drops FT ESI
#False for person continues on ESI but policy holder was FT
#True for person previously on ESI with PT policy holder and the firm dropped PTcov
hieu_choice["firm_dropped_my_PTcov"]=(hieu_choice.firm_of_pholder_dropped_PTcov) & pholder_is_PT
# CLEAN UP FOR N/A, USE SAME FORMAT AS ORIGINAL FOR TRUE/FALSE
#False if the person is not previously on ESI or the firm keeps FT/PT ESI cov for that person
#True if the person is previously on ESI but the firm drops FT cov
hieu_choice.firm_dropped_my_cov.fillna(False,inplace=True)
#False if the person is not previously on ESI or the firm keeps FT ESI cov for that person
#True if the person is previously on PT ESI but the firm drops PT cov
hieu_choice.firm_dropped_my_PTcov.fillna(False,inplace=True)
#ex_ante latent variables creation
if self.raw_data["calibrateRun"]==1 and self.raw_data["calibrate_ex_ante_latents"]==True:
self.calibrate_exante_latents=calibrate_exante_latents(self.raw_data["calibrate_ex_ante_latents"],self.raw_data["Latent_vars"],\
self.raw_data["long_calib_table"],self.raw_data["pop_hieu"].person_id)
exante_latents=self.calibrate_exante_latents.create_ex_ante_latent()
self.initiate_out_table(1, "hieu_choice.csv", hieu_choice)
self.initiate_out_table(1, 'firm_choice.csv', self.raw_data['firm_choices_all'])
self.initiate_out_table(1, 'choice_count.csv', results, True)
def build_xc_wgted_unwgted_tables():
# aggregated results by year and by region
dftmp = pd.merge(self.raw_data['long_output'], self.raw_data['hieu_table'][['hieu_id', 'person_type', 'coverage', 'age']],
how='left', on=['hieu_id', 'person_type'])
# subsetting to age <=64
dftmp = dftmp.loc[dftmp['age']<=64]
# unweighted, statewide
dftmp['choice5'] = dftmp['choice'].map(self.choice5_map)
unwgted = pd.crosstab(dftmp['coverage'], dftmp['choice'], margins=True)
unwgted = unwgted.fillna(0)
unwgted['exp'] = np.nan
unwgted['exp'][0] = self.raw_data['weights'][0]
unwgted['actual'] = np.nan
unwgted['actual'][0] = dftmp['p_id'].count()
self.initiate_out_table(3, "unweighted_6_by_10.csv", unwgted, True)
print("Unweighted 6x10")
print(unwgted)
unwgted = pd.crosstab(dftmp['coverage'], dftmp['choice5'], margins=True)
unwgted = unwgted.fillna(0)
self.initiate_out_table(3, "unweighted_6_by_5.csv", unwgted, True)
print("Unweighted 6x5")
print(unwgted)
if self.is_regional == 1:
# regional table, unweighted
table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice)
table['exp'] = np.nan
table['exp'][0] = self.raw_data['weights'][0]
table['actual'] = np.nan
table['actual'][0] = dftmp['p_id'].count()
self.initiate_out_table(3, "unweighted_6_by_10_regional.csv", table, True)
table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice5)
self.initiate_out_table(3, "unweighted_6_by_5_regional.csv", table, True)
# regional table, weighted
table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice, dftmp.p_weight, aggfunc=sum, margins=True)
table = table.fillna(0)
table['exp'] = np.nan
table['exp'][0] = self.raw_data['weights'][1]
table['actual'] = np.nan
table['actual'][0] = dftmp['p_weight'].sum()
self.initiate_out_table(3, "weighted_6_by_10_regional.csv", table, True)
table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice5, dftmp.p_weight, aggfunc=sum, margins=True)
self.initiate_out_table(3, "weighted_6_by_5_regional.csv", table, True)
# weighted, statewide
wgted = pd.crosstab(dftmp['coverage'], dftmp['choice'], dftmp['p_weight'], aggfunc=sum, margins=True)
wgted = wgted.fillna(0)
wgted['exp'] = np.nan
wgted['exp'][0] = self.raw_data['weights'][1]
wgted['actual'] = np.nan
wgted['actual'][0] = dftmp['p_weight'].sum()
self.initiate_out_table(3, "weighted_6_by_10.csv", wgted, True)
wgted = pd.crosstab(dftmp['coverage'], dftmp['choice5'], dftmp['p_weight'], aggfunc=sum, margins=True)
wgted = wgted.fillna(0)
self.initiate_out_table(3, "weighted_6_by_5.csv", wgted, True)
print("Weighted")
print(wgted)
dftmp_xc_sub_0 = dftmp.loc[(dftmp.xc_sub==0) | (dftmp.xc_sub==False)]
dftmp_xc_sub_1 = dftmp.loc[(dftmp.xc_sub==1) & (dftmp.xc_sub==True)]
outtable_xc_sub_0 = pd.crosstab(dftmp_xc_sub_0['coverage'], dftmp_xc_sub_0['choice'], dftmp_xc_sub_0['p_weight'], aggfunc=sum, margins=True)
outtable_xc_sub_1 = pd.crosstab(dftmp_xc_sub_1['coverage'], dftmp_xc_sub_1['choice'], dftmp_xc_sub_1['p_weight'], aggfunc=sum, margins=True)
outtable_xc_sub_0 = outtable_xc_sub_0.fillna(0)
outtable_xc_sub_1 = outtable_xc_sub_1.fillna(0)
self.initiate_out_table(3, "crosstab_tables_xc_sub_0.csv", outtable_xc_sub_0, True)
self.initiate_out_table(3, "crosstab_tables_xc_sub_1.csv", outtable_xc_sub_1, True)
# regional tables
if self.is_regional == 1:
outtable_xc_sub_0 = pd.crosstab([dftmp_xc_sub_0.region, dftmp_xc_sub_0.coverage], dftmp_xc_sub_0.choice,
dftmp_xc_sub_0.p_weight, aggfunc=sum, margins=True)
outtable_xc_sub_1 = pd.crosstab([dftmp_xc_sub_1.region, dftmp_xc_sub_1.coverage], dftmp_xc_sub_1.choice,
dftmp_xc_sub_1.p_weight, aggfunc=sum, margins=True)
outtable_xc_sub_0 = outtable_xc_sub_0.fillna(0)
outtable_xc_sub_1 = outtable_xc_sub_1.fillna(0)
self.initiate_out_table(3, "regional_xc_sub_0.csv", outtable_xc_sub_0, True)
self.initiate_out_table(3, "regional_xc_sub_1.csv", outtable_xc_sub_1, True)
self.initiate_out_table(3, "dynamic_xc_statewide_premiums_version_"+\
str(self.raw_data["dynamic_version"])+".csv", self.raw_data["dynamic_exchange_statewide_premiums"], False)
self.initiate_out_table(3, "dynamic_xc_regional_premiums_version_"+\
str(self.raw_data["dynamic_version"])+".csv", self.raw_data["dynamic_exchange_regional_premiums"], False)
# build all tables
build_calib_tables()
build_xc_wgted_unwgted_tables()
if self.tables_flag == True:
build_expenditure_tables()
[docs] def initiate_out_table(self, table_type, table_name, table, index=False):
"""
effect:
adds table to the output list
arguments:
table_type - an integer as follows:
0 option is 'other',
1 option is 'individual_data',
2 option is 'tables',
3 option is 'aggregate_results'
table_name - desired name of the table
table - the dataframe itself
"""
if not (0 <= table_type <= len(self.table_types)):
raise ValueError("Invalid table type")
elif self.table_types[table_type] is None:
return
# add table to output list
self.out_tables[os.path.join(*[self.out_path, \
str(self.year), self.table_types[table_type], table_name])] = (table, index)
[docs] def output_tables(self):
"""
effect: output all tables in list of output tables to the correct path
"""
# make out directory if it does not exist
for table_type in self.table_types:
if table_type is None:
continue
full_path = os.path.join(*[self.out_path, str(self.year), table_type])
if not os.path.exists(full_path):
os.makedirs(full_path)
# output each csv
for path, table in self.out_tables.items():
table[0].to_csv(path, float_format='%.4f', index=table[1])
# output configuation file
#shutil.copy(self.config_path, self.out_path)
[docs] def force_output(self, table_type, table_name, table):
# make sure table_type is valid
if not (0 <= table_type <= len(self.table_types)):
raise ValueError("Invalid table type")
elif self.table_types[table_type] is None:
return
# generate path
output_path = os.path.join(*[self.out_path, str(self.year), \
self.table_types[table_type]])
if not os.path.exists(output_path):
os.makedirs(output_path)
table.to_csv(os.path.join(output_path, table_name), index=False)