Source code for utility_modules.output

#! /usr/bin/env python

import os
import pdb
import pandas as pd
import numpy as np
import shutil
from policy_modules.fpl import Fpl as Fpl
import subprocess
import sys
import utility_modules.import_zip
import zipfile
from utility_modules.calibrate_exante_latents import calibrate_exante_latents as calibrate_exante_latents

[docs]class Output(): def __init__(self, out_path, git_info, general_dict, params_dict, cal_levers_dict, policy_levers_dict, system_dict, is_regional): """ IN: out_path - output path (with suffixes) is_* - flags for inclusion of certain output tables NOTE: output structure is as follows: output_root/ suffix/ year1/ table_type1/ table_name1 table_name2 ... table_type2/ ... year2/ ... """ self.file_path = out_path self.out_tables = {} self.git_info = git_info self.out_path = out_path self.is_regional = is_regional self.general_dict = general_dict self.config_path = self.general_dict.config_path self.params_dict = params_dict self.cal_levers_dict = cal_levers_dict self.policy_levers_dict = policy_levers_dict self.system_dict = system_dict zipf = zipfile.ZipFile(self.out_path+'/Policy_Input.zip','w',zipfile.ZIP_DEFLATED) import_zip.zipdir(general_dict['pol_input_path'], zipf) self.table_types = [ 'other', # 0 'individual_data', # 1 'tables', # 2 'aggregate_results' # 3 ] # disable certain output depending on configuration file settings self.tables_flag = params_dict.tables if not params_dict.tables: self.table_types[2] = None # output tables constants self.industry_type ={0: "unemployed", 1: "natural resource/mining", 2: "construction", 3: "manufacturing", 4: "trans/util/information", 5: "wholesales/retail", 6: "financial", 7: "services/govt", 8: "healthcare", 9: "federal", 10: "state/local"} self.firm_size_bins = [3, 10, 50, 200, 1000, 5000000] # right=False in the cut function self.firm_size_lab = ['3-9', '10-49', '50-199', '200-999', '1000+'] self.age_bins = [0, 19, 30, 45, 65, 200] self.age_lab = ['0-18', '19-29', '30-44', '45-64', '65+'] self.race_cat = {1: "White", 2: "Latino", 3: "African American", 4: "Asian", 5: "Other"} self.gender_cat = {1: "Male", \ 0: "Female"} self.fpl_bins = [0, 100, 138, 400, 2000] self.fpl_lab = ['<=100 FPL', '101-138 FPL', '139-400 FPL', '400+ FPL'] self.coverage = {1: 'esi', 2: 'individual market', 3: 'medicare', 4: 'medicaid', 5: 'other public', 6: 'uninsured'} self.choice = {1: 'uninsured', 2: 'self_esi', 3: 'spouse_esi', 4: 'parent_1_esi', 5: 'parent_2_esi', 6: 'silver', 7: 'bronze', 8: 'catastrophic', 9: 'medicaid', 10: 'medicare'} # mapping {1:1, [2,3,4,5]:2, [6,7,8]:3, 9:4, 10:5} self.choice5_map = {1: 1, 2: 2, 3: 2, 4: 2, 5: 2, 6: 3, 7: 3, 8: 3, 9: 4, 10: 5} self.choice5 = {1: 'uninsured', 2: 'esi', 3: 'individual market', 4: 'medicaid', 5: 'medicare'} self.choice_lab = ['uninsured', 'esi', 'im', 'medicaid', 'medicare'] self.lep = {0: 'Speaks English very well', 1: 'Limited English Proficiency'} self.esi_cov = {0: 'ESI without ACA and employer dropped coverage under ACA', 1: 'ESI without ACA and employer maintained coverage under ACA'} self.firm_type = {1: 'Typical coverage, typical contribution', 2: 'Typical coverage, large employee contribution', 3: 'Bronze'} self.xc_sub = {0: 'Number individuals enrolled without subsidies', 1: 'Number individuals enrolled with subsidies'} #pd.options.display.float_format = '{:,.0f}'.format
[docs] def set_year(self, year): self.year = year
[docs] def write_config(self): # creating a config file that contains updated information config_out = os.path.join(*[self.file_path, str(self.year), "reproduction_config.ini"]) # DO NOT CHANGE FORMATTING BELOW header = """\ ################################################################################ # CONFIGURATION FILE FOR CALSIM # # Notes: \"#\" are comments and do not affect this file's function. The summary # sections at the beginning provide a brief explanation for each variable # # This file has been updated with any and all parameters passed in through # the command line and should be used for result reproduction. ################################################################################""" general_desc = """\ ################################################################################ [General] # 1. outpath = where to put the output files and tables # 2. datapath = path to input data # 3. username = username for output folder naming # 4. suffix = suffix for output folder naming # 5. test = flag for running as test (on a subset of firms, specified by # percentage\n""" output_desc = """\ ################################################################################ [Output] # 1. debug = Flag for outputting debugging information to console # 2. tables = Flag for creating folder for storing tables for checking\n""" calibration_desc = """\ ################################################################################ [Calibration] # 1. calibration = Flag for calibration to generate person specific data # 2. calibration_file = file selected to replace new_cal columns in choice file # 3. utility_params = utility esi, xc (unsub and sub), mc, cal, penalty\n""" policy_desc = """\ ################################################################################ [Policy] # 1. firm_threshold = Tendency to offer, tendency to drop # 2. regional_specificity = 0 is statewide, 1 is regional\n""" system_sheet_desc = """\ ################################################################################ [System_Sheet]\n""" # write header # with open(config_out, 'w') as f: # f.write(header) # output configuation file shutil.copy2(self.config_path, config_out) # write current branch and commmit hash with open(config_out, 'a') as f: f.write('\n# Current Branch: %s# Latest Commit: %s' % (self.git_info['calsim'][0], self.git_info['calsim'][1])) with open(config_out, 'a') as f: f.write('\n# Current Policy Input Commit: %s# Policy Input Commit Message: %s' % (self.git_info['policy-input'][0], self.git_info['policy-input'][1])) with open(config_out, 'a') as f: f.write('\n# Command line args: %s\n' % ' '.join(sys.argv))
[docs] def initialize_raw_data(self, raw_data): """ Available Data from CalSim.py (self refers to CalSim): 'perm_ind_choices' : self.perm_ind_choices, 'long_output' : long_output, 'long_calib_table' : output_calib, 'firm_choices_all' : self.firm_choices_all, 'Hieu' : self.Hieu.get_hieu_table(), 'firm_table' : self.Firm, 'fam_table' : self.Family, 'worker_table' : self.Worker, 'esi_table' : self.Esi, 'exp_table' : self.Expenditures, 'weights' : (self.person_count, self.weight_total), 'dynamic_version' : self.dynamic_version, 'dynamic_exchange_statewide_premiums' : self.statewide_premiums, 'dynamic_exchange_regional_premiums' : self.regional_premiums, 'hieu_table_paired_down' : self.hieu_table_paired_down, 'dropped_firms' : self.dropped_firms, 'dropped_PT_firms' : self.dropped_PT_firms, 'calibrateRun' : self.calibrateRun, 'calibrate_ex_ante_latents' : self.calibrate_ex_ante_latents, 'Latent_vars' : self.Latent_vars """ self.raw_data = raw_data
[docs] def build_out_tables(self, firm_threshold): self.firm_threshold = firm_threshold def example(): """ * write functions that builds the necessary df as in this example * if there are raw_data that you need, make sure you pass it in on line ~403 in CalSim.py. It will be in the dictionary self.raw_data. * self.year -> useful if you want to output only if simulating a certain year. self.year is set after every full iteration * to add a dataframe to the output df list, you must call "initiate_out_table" * make sure you call your function at the end of "build_out_tables" """ # do something like this if you want only certain year (i.e. 2015 and 2019) if self.year != 2015 and self.year != 2019: return # build dataframe as desired. Make sure you pass in any "raw data" that you need # from CalSim.py. Instructions for that are in the docstring above example = DataFrame(np.random.randn(10, 5)) # make sure you call "self.initiate_out_table", check docstring for more self.initiate_out_table(3, "example.csv", example) # last but not least, call your function at the very end (this is example, so I have not called this function) def grouped_weighted_avg(values, weights, by): return (values * weights).groupby(by).sum() / weights.groupby(by).sum() def try_percent(num, den): try: pcent = (num / den) * 100 except ZeroDivisionError: pcent = None return pcent def build_expenditure_tables(): print('building expenditure tables') exp_data = self.raw_data['exp_table'] choices = self.raw_data['long_output'][['p_id', 'choice']] df = pd.merge(exp_data, choices, how='left', left_on='person_id', right_on='p_id') # agument columns for choice in range(11): df.loc[df.choice == choice, 'exp_esi_average'] = df.loc[df.choice == choice, 'exp_average_%i' % choice] if 2 <= choice <= 5 else \ df[df.choice == choice][['exp_average_%i' % i for i in range(2, 6)]].max(axis=1) df.loc[df.choice == choice, 'exp_esi_actual'] = df.loc[df.choice == choice, 'exp_actual_%i' % choice] if 2 <= choice <= 5 else \ df[df.choice == choice][['exp_actual_%i' % i for i in range(2, 6)]].max(axis=1) df.loc[df.choice == choice, 'oop_esi_average'] = df.loc[df.choice == choice, 'oop_average_%i' % choice] if 2 <= choice <= 5 else \ df[df.choice == choice][['oop_average_%i' % i for i in range(2, 6)]].max(axis=1) df.loc[df.choice == choice, 'oop_esi_actual'] = df.loc[df.choice == choice, 'oop_actual_%i' % choice] if 2 <= choice <= 5 else \ df[df.choice == choice][['oop_actual_%i' % i for i in range(2, 6)]].max(axis=1) # rename columns from number to category # rename example: 'exp_average_7' -> 'exp_private_average' columns = ['exp_average', 'exp_actual', 'oop_average', 'oop_actual'] old_names = ['_1', '_7', '_9', '_10'] new_names = ['unins', 'private', 'mcaid', 'mcare'] new_columns = {} for index, old_name in enumerate(old_names): for column in columns: new_columns[column + old_name] = column[:4] + new_names[index] + column[3:] df.rename(columns=new_columns, inplace=True) # get columns we want keep_columns = ['person_id', 'choice', 'esi', 'private', 'mcare', 'mcaid', 'unins'] exp_data = df[[column for keep_column in keep_columns for column in df.columns.values if keep_column in column]] self.initiate_out_table(1, 'step0_exp.csv', exp_data) def build_calib_tables(): # initiate person specfic table # merge more variables from hieu_table for creating databook table thieu = self.raw_data['pop_hieu'][['family_id', 'person_id', 'male', 'race_ethnicity', 'age', \ 'lep', 'doc_status', 'coverage', 'mcaid_elig_exante', \ 'predicted_exante_elig', 'esi_cov', \ 'u_esi', 'u_priv', 'u_unins', 'u_mcaid', 'u_mcare', \ 'c_esi', 'c_priv', 'c_mcaid', 'c_mcare', 'c_unins','threshold', 'full_time', \ 'num_chronic', 'health_status', 'sector', 'self_emp', 'cobra_flag', 'marit', 'firm_size','mcaid_elig_flipped','pholder_id_esi' \ ,'age_rated_slv_prem','age_rated_bronze_prem','affordability','family_type_for_afford_esi','self_affordability','tax_dependent','prem_type','ind_income','ind_fpl','esi_threshold','ind_threshold','povlev','family_cap','subsidy_amount_total']] thieu = thieu.dropna(subset=['person_id']) temp_choice = self.raw_data['long_output'] hieu_choice = pd.merge(thieu, temp_choice, how = "inner", on ='person_id') #adding fpl information tfam = self.raw_data['fam_table'][['family_id','fpl','orig_fam_income','fam_income','family_size']] hieu_choice = pd.merge(hieu_choice, tfam, how = "inner", on ='family_id') # adding firm coverage information # subsetting to chosen_choice tfirm = self.raw_data['firm_choices_all'] tfirm1 = tfirm.loc[tfirm['chosen_choice'] == 1] tfirm1 = tfirm1[['firm_id', 'offer', 'pt','esi_self_cost','esi_fam_cost','esi_sp_cost','esi_ch_cost']] ### previous choice is choice = 0 tfirm2 = tfirm.loc[tfirm['choice'] == 0] tfirm2 = tfirm2[['firm_id', 'offer', 'pt']] tfirm2.rename(columns={'offer': 'prev_offer', 'pt': 'previous_offer_pt'}, inplace = True) tfirm = pd.merge(tfirm1, tfirm2, how="inner", on = "firm_id") hieu_choice = pd.merge(hieu_choice, tfirm, how = "left", on ='firm_id') tworker = self.raw_data['worker_table'][['person_id', 'esi_not_elig']] hieu_choice = pd.merge(hieu_choice, tworker, how='left', on='person_id') # add two more columns: prev_having_offer curr_having_offer # if parent 1 or parent 2 has an offer, then the entire hieu has an offer, otherwise, adult children might still have an offer # this is the case both for prev_having_offer and curr_having_offer hieu_choice['prev_offer_p0'] = (hieu_choice['person_type'] == 0) & (hieu_choice['prev_offer'] == 1) & \ ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['previous_offer_pt'] == 1)&(hieu_choice['esi_not_elig'] == 0))) hieu_choice['prev_offer_p1'] = (hieu_choice['person_type'] == 1) & (hieu_choice['prev_offer'] == 1) & \ ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['previous_offer_pt'] == 1)&(hieu_choice['esi_not_elig'] == 0))) hieu_choice['prev_offer_parent'] = hieu_choice['prev_offer_p0'] | hieu_choice['prev_offer_p1'] hieu_choice['prev_offer_parent'] = hieu_choice.groupby('hieu_id')['prev_offer_parent'].transform('max') hieu_choice['prev_having_offer'] = (hieu_choice['prev_offer_parent'] == 1) | ((hieu_choice['prev_offer'] == 1) & \ ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))) hieu_choice['curr_offer_p0'] = (hieu_choice['person_type'] == 0) & (hieu_choice['offer'] == 1) & \ ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0))) hieu_choice['curr_offer_p1'] = (hieu_choice['person_type'] == 1) & (hieu_choice['offer'] == 1) & \ ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0))) hieu_choice['curr_offer_parent'] = hieu_choice['curr_offer_p0'] | hieu_choice['curr_offer_p1'] hieu_choice['curr_offer_parent'] = hieu_choice.groupby('hieu_id')['curr_offer_parent'].transform('max') hieu_choice['curr_having_offer'] = (hieu_choice['curr_offer_parent'] == 1) | ((hieu_choice['offer'] == 1) & \ ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))) hieu_choice['indiv_having_offer']=(hieu_choice['offer'] == 1) & ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0))) #if don't have esi_offer, then esi_cost should be nan loc_self = np.where((hieu_choice['family_type_for_afford_esi'] == 1))[0] loc_sp = np.where((hieu_choice['family_type_for_afford_esi'] == 2))[0] loc_ch = np.where((hieu_choice['family_type_for_afford_esi'] == 3))[0] loc_fam = np.where((hieu_choice['family_type_for_afford_esi'] == 4))[0] loc_ind_adult_child = np.where((hieu_choice['person_type'] > 3) & (hieu_choice['tax_dependent'] == 0))[0] loc_real_family = hieu_choice.index.difference(loc_ind_adult_child) loc_not_elig=np.where((hieu_choice['family_type_for_afford_esi'] == 5))[0] hieu_choice.loc[loc_self,'real_fam_cost']=hieu_choice.loc[loc_self].esi_self_cost hieu_choice.loc[loc_sp, 'real_fam_cost'] = hieu_choice.loc[loc_sp].esi_sp_cost hieu_choice.loc[loc_ch, 'real_fam_cost'] = hieu_choice.loc[loc_ch].esi_ch_cost hieu_choice.loc[loc_fam, 'real_fam_cost'] = hieu_choice.loc[loc_fam].esi_fam_cost hieu_choice.loc[loc_not_elig, 'real_fam_cost'] =np.nan hieu_choice['real_self_cost']=hieu_choice.esi_self_cost hieu_choice.loc[hieu_choice['indiv_having_offer']==0,'real_self_cost']= np.nan hieu_choice.loc[hieu_choice['indiv_having_offer'] == 0, 'real_fam_cost'] = np.nan hieu_choice['aff_self'] = (hieu_choice.esi_threshold * hieu_choice.fam_income > hieu_choice.real_self_cost) hieu_choice.loc[loc_ind_adult_child,'aff_self']=(hieu_choice.loc[loc_ind_adult_child].esi_threshold*hieu_choice.loc[loc_ind_adult_child].ind_income >hieu_choice.loc[loc_ind_adult_child].real_self_cost) hieu_choice['aff_fam'] = (hieu_choice.esi_threshold * hieu_choice.fam_income > hieu_choice.real_fam_cost) hieu_choice.loc[loc_ind_adult_child, 'aff_fam']=hieu_choice.loc[loc_ind_adult_child].aff_self hieu_choice.loc[hieu_choice['indiv_having_offer'] == 0, 'aff_self'] = False hieu_choice.loc[hieu_choice['indiv_having_offer'] == 0, 'aff_fam'] = False hieu_choice.loc[loc_real_family, 'fam_aff_self'] = hieu_choice.loc[loc_real_family].groupby('hieu_id')['aff_self'].transform('max') hieu_choice.loc[loc_real_family, 'fam_aff_fam'] = hieu_choice.loc[loc_real_family].groupby('hieu_id')['aff_fam'].transform('max') hieu_choice.loc[loc_ind_adult_child, 'fam_aff_self']=hieu_choice.loc[loc_ind_adult_child,'aff_self'] hieu_choice.loc[loc_ind_adult_child, 'fam_aff_fam'] = hieu_choice.loc[loc_ind_adult_child, 'aff_fam'] #if not person type 0 or 1 they can not have esi_cost for whole family loc_notparents=np.where((hieu_choice['person_type']!=0) &(hieu_choice['person_type']!=1))[0] hieu_choice.loc[loc_notparents,'esi_fam_cost']=np.nan #take the minimum_cost hieu_choice['min_esi_self_cost'] = hieu_choice.real_self_cost #set min_esi_self_cost of non-offer people to be the lowest self-only cost in their HIU hieu_choice.loc[loc_notparents, 'esi_self_cost'] = np.nan hieu_choice['hieu_min_esi_self_cost'] = hieu_choice.groupby('hieu_id')['real_self_cost'].transform('min') loc_withoffer= np.where(hieu_choice['indiv_having_offer']==1)[0] hieu_choice.loc[loc_withoffer, 'hieu_min_esi_self_cost']=hieu_choice.loc[loc_withoffer, 'min_esi_self_cost'] loc_0 = np.where(hieu_choice['person_type']<6)[0] hieu_choice.loc[loc_0, 'min_esi_fam_cost'] =hieu_choice.loc[loc_0].groupby('hieu_id')['real_fam_cost'].transform('min') #hieu_choice['lostesi_firmnooffer']=False #hieu_choice['lostesi_firmno_ptoffer']=False hieu_choice['offer_disappear']=(hieu_choice.offer==0)&(hieu_choice.prev_offer==1) hieu_choice['pt_offer_disappear']=(hieu_choice.pt==0)&(hieu_choice.previous_offer_pt==1) #loc_0 is the position where people previously have coverage but lost esi coverage now loc_0 =np.where((hieu_choice['coverage'] == 1) & ((hieu_choice['choice'] < 2) | (hieu_choice['choice'] > 5)))[0] #loc_1 is the position where people previously have coverage but lost esi coverage now and working part time loc_1=np.where((hieu_choice['coverage']==1) & ((hieu_choice['choice']<2) | (hieu_choice['choice']>5))& (hieu_choice['full_time']==0))[0] #get p_holder of full time worker who lost their esi coverage pholder_offerstatus=hieu_choice[hieu_choice['person_id'].isin(hieu_choice.loc[loc_0].pholder_id_esi.tolist())] pholder_offerstatus=pholder_offerstatus.rename(columns={'offer_disappear': 'lost_esi_firmnooffer','person_id':'p_holder'}) hieu_choice=pd.merge(hieu_choice,pholder_offerstatus[['p_holder','lost_esi_firmnooffer']],left_on='pholder_id_esi',right_on='p_holder',how='left') #set everything not in the target location to nan hieu_choice[~hieu_choice.index.isin(loc_0)].lost_esi_firmnooffer=np.nan #get the p_holder of part time worker who lost their esi coverage pt_pholder_offerstatus=hieu_choice[hieu_choice['person_id'].isin(hieu_choice.loc[loc_1].pholder_id_esi.tolist())] pt_pholder_offerstatus = pt_pholder_offerstatus.rename(columns={'offer_disappear': 'lost_esi_firmno_ptoffer','person_id':'p_holder_pt'}) hieu_choice = pd.merge(hieu_choice, pt_pholder_offerstatus[['p_holder_pt', 'lost_esi_firmno_ptoffer']],left_on='pholder_id_esi', right_on='p_holder_pt', how='left') #set everything not in target row to nan hieu_choice[~hieu_choice.index.isin(loc_1)].lost_esi_firmno_ptoffer = np.nan hieu_choice = hieu_choice.drop(['prev_offer_p0', 'prev_offer_p1', 'prev_offer_parent', 'curr_offer_p0', 'curr_offer_p1',\ 'curr_offer_parent','esi_threshold','offer_disappear','pt_offer_disappear','pholder_id_esi','p_holder','p_holder_pt'], axis=1) #print 'people count' #print hieu_choice.loc[hieu_choice['choice'].isin([6,7])]['person_id'].count() #print hieu_choice.loc[hieu_choice['choice'].isin([6,7])].p_weight.sum() #print 'xc_prem sum' #print hieu_choice.loc[hieu_choice['choice'].isin([6,7])].xc_prem.sum() # merging with firm information fchoices = self.raw_data['firm_choices_all'] fchoices = fchoices.loc[fchoices.chosen_choice == 1] prev_offering = fchoices.loc[fchoices.prev_offer == True] prev_non_offering = fchoices.loc[fchoices.prev_offer == False] prev2curr_1 = prev_offering.loc[prev_offering.offer == True].shape[0] prev2curr_2 = prev_offering.loc[prev_offering.offer == False].shape[0] prev2curr_3 = prev_non_offering.loc[prev_non_offering.offer == True].shape[0] prev2curr_4 = prev_non_offering.loc[prev_non_offering.offer == False].shape[0] results = pd.DataFrame({'offer':[prev2curr_3, prev2curr_1], 'nonoffer': [prev2curr_4, prev2curr_2]}) results.index.name = 'offering' #print results # add two final columns: policy holder, and policy group # if adult_1 is person holder, people who can be under policy include adult_2 == 3, children == 4, adult_children == 4 policy_id = 1 hieu_choice['policy_id'] = 0 hieu_choice['policy_holder'] = 0 for hieu in hieu_choice.loc[(hieu_choice['person_type'] == 0) & (hieu_choice['choice'] == 2)]['hieu_id']: hieu_choice['policy_id'] += ((hieu_choice['hieu_id'] == hieu) & ((hieu_choice['person_type'] == 0) | \ (hieu_choice['choice'].isin([3,4])))) * policy_id hieu_choice['policy_holder'] += ((hieu_choice['hieu_id'] == hieu) & (hieu_choice['person_type'] == 0)) * 1 policy_id += 1 # if adult_2 is person holder, people who can be under policy include adult_1 == 3, children == 5, adult_children == 5 for hieu in hieu_choice.loc[(hieu_choice['person_type'] == 1) & (hieu_choice['choice'] == 2)]['hieu_id']: hieu_choice['policy_id'] += ((hieu_choice['hieu_id'] == hieu) & ((hieu_choice['person_type'] == 1) | \ (hieu_choice['choice'].isin([3,5])))) * policy_id hieu_choice['policy_holder'] += ((hieu_choice['hieu_id'] == hieu) & (hieu_choice['person_type'] == 1)) * 1 policy_id += 1 # for adult_child 1 to 4, if any is a policy holder, place them in a new group hieu_choice['policy_id'] += ((hieu_choice['person_type'].isin([4,5,6,7])) & (hieu_choice['choice'] == 2)) * policy_id hieu_choice['policy_holder'] += (hieu_choice['person_type'].isin([4,5,6,7])) & (hieu_choice['choice'] == 2) * 1 ac = hieu_choice.loc[hieu_choice['policy_id'] == policy_id] seq = np.arange(policy_id, hieu_choice.loc[hieu_choice['policy_id'] == policy_id].shape[0] + policy_id) # increment for every person ac['policy_id'] = seq cols = list(hieu_choice.columns) hieu_choice.loc[hieu_choice['policy_id'] == policy_id, cols] = ac[cols] if self.raw_data['long_calib_table'] is not None: # manipulating wide table for calculations table = pd.pivot_table(self.raw_data['long_calib_table'], values=['utility','calib'], index=['hieu_id', 'person_id'], columns='choice') indices = np.unique(self.raw_data['long_calib_table'].choice.values).tolist() table.columns = ['utility_' + str(i) for i in indices]+['calib_' + str(i) for i in indices] table.reset_index(inplace=True) temp = table[['utility_' + str(i) for i in indices]] table['sum_utility'] = temp.max(axis=1) # finding hieu total utilities based on persons making optimal individual choices optimal_ind_choices = table[['hieu_id','sum_utility']].groupby(['hieu_id'], as_index=False).sum() hieu_df = hieu_choice['hieu_id'].value_counts().reset_index() hieu_df.columns = ['hieu_id','hieu_size'] hieu_df = pd.merge(hieu_df, hieu_choice[['hieu_id','hieu_utility']], on='hieu_id', how='left') optimal_ind_choices = pd.merge(optimal_ind_choices, hieu_df, on='hieu_id', how='inner') optimal_ind_choices['match'] = optimal_ind_choices['sum_utility'] == optimal_ind_choices['hieu_utility'] optimal_ind_choices2 = optimal_ind_choices.loc[optimal_ind_choices['hieu_size'] != 1] ### pcent_dispcy legend - 0: total, 1: non-single size families, 2: non-single size, non esi offering pcent = float(optimal_ind_choices.loc[optimal_ind_choices.match == False].shape[0]) / float(optimal_ind_choices.shape[0]) pcent = round(pcent, 4) optimal_ind_choices['pcent_dispcy_0'] = np.nan optimal_ind_choices['pcent_dispcy_0'][0] = pcent optimal_ind_choices['pcent_dispcy_0'][1] = str(optimal_ind_choices.loc[optimal_ind_choices.match == False].shape[0]) + ' / ' \ + str(optimal_ind_choices.shape[0]) if optimal_ind_choices2.empty: pcent = 0 else: pcent = float(optimal_ind_choices2.loc[optimal_ind_choices2.match == False].shape[0]) / float(optimal_ind_choices2.shape[0]) pcent = round(pcent, 4) optimal_ind_choices['pcent_dispcy_1'] = np.nan optimal_ind_choices['pcent_dispcy_1'][0] = pcent optimal_ind_choices['pcent_dispcy_1'][1] = str(optimal_ind_choices2.loc[optimal_ind_choices2.match == False].shape[0]) + ' / ' \ + str(optimal_ind_choices2.shape[0]) person_df = table[['utility_' + str(i) for i in indices]] person_df['opt_choice'] = (person_df.idxmax(axis=1)) person_df['opt_choice'] = (person_df['opt_choice'].apply(lambda x: x[8:])).astype(int) table['p_opt_choice'] = person_df['opt_choice'] table = pd.merge(table, hieu_choice[['person_id','choice']], on='person_id', how='left') table = table.rename(columns={'choice': 'hieu_choice'}) table['match'] = table['p_opt_choice'] == table['hieu_choice'] pcent = float(table.loc[table.match == False].shape[0]) / float(table.shape[0]) pcent = round(pcent, 4) table['pcent_dispcy_0'] = np.nan table['pcent_dispcy_0'][0] = pcent table['pcent_dispcy_0'][1] = str(table.loc[table.match == False].shape[0]) + ' / ' + str(table.shape[0]) # searching for hieus that were offered esi offering_esi = pd.DataFrame() utility_esi_cols = ['utility_2','utility_3','utility_4','utility_5'] for hieu in optimal_ind_choices.hieu_id.unique(): no_esi = True for col in utility_esi_cols: try: no_esi &= table.loc[table.hieu_id == hieu][col].isnull().values.all() except: pass temp = pd.DataFrame({'hieu_id': [hieu], 'offers_esi': [~no_esi]}) offering_esi = offering_esi.append(temp) offering_esi.reset_index(drop=True, inplace=True) optimal_ind_choices = pd.merge(optimal_ind_choices, offering_esi, on='hieu_id', how='left') temp = optimal_ind_choices.loc[optimal_ind_choices['offers_esi'] == False] pcent = float(temp.loc[temp.match == False].shape[0]) / float(temp.shape[0]) pcent = round(pcent, 4) optimal_ind_choices['pcent_dispcy_2'] = np.nan optimal_ind_choices['pcent_dispcy_2'][0] = pcent optimal_ind_choices['pcent_dispcy_2'][1] = str(temp.loc[temp.match == False].shape[0]) + ' / ' + str(temp.shape[0]) self.initiate_out_table(1, "person_behavior_discrepancy.csv", table) self.initiate_out_table(1, "hieu_behavior_discrepancy.csv", optimal_ind_choices) # create text file that checks for all directory = os.path.join(self.out_path, str(self.year), self.table_types[1]) if not os.path.exists(directory): os.makedirs(directory) sanity_check_output = open(os.path.join(directory, 'sanity_check.txt'), 'w+') paragraph = ["checking aggregate p_weights of specific conditions: \n"] categories = ['xc_sub', 'mcaid', 'new_mcaid', 'penalty'] for category in categories: paragraph.append(category + ": " + str(hieu_choice.loc[hieu_choice[category] == 1].p_weight.sum()) + "\n") for line in paragraph: sanity_check_output.write(line) sanity_check_output.close() hieu_choice.rename(columns={'xc_sub': 'subsidy_elig','adultkid_xc_sub': 'adultkid_subsidy_elig', 'hieu_subsidy_amount_total':'subsidy_amount_total_long', 'hieu_subsidy_bronze_total': 'subsidy_bronze_total','hieu_subsidy_amount_total_default': 'subsidy_amount_total_default_long'}, inplace=True) hieu_choice['xc_sub_final']=0 xc_sub_loc = np.where(((hieu_choice['subsidy_elig']==1)&(hieu_choice['subsidy_amount_total']>0)& \ (hieu_choice['choice'].isin([1,2,3,4,5,9,10])))|((hieu_choice['subsidy_elig']==1)&\ (hieu_choice['sub_amount']>0)&(hieu_choice['choice'].isin([6,7,8]))))[0] hieu_choice.loc[xc_sub_loc,'xc_sub_final']=1 if self.raw_data['dual_parameter']== 0: hieu_choice['dual']=0 # ken's method if self.raw_data['use_ken_flag']==True: self.initiate_out_table(1, "hieu_choice_orig.csv", hieu_choice) ken_adjuster=self.raw_data['ken_adjuster'] hieu_choice=pd.merge(hieu_choice,ken_adjuster,how='left',on='person_id') hieu_choice['p_weight_orig']=hieu_choice.p_weight hieu_choice.p_weight=hieu_choice.p_weight*hieu_choice.weight_adjuster hieu_choice = hieu_choice.applymap(lambda x: 1 if x == True else x) hieu_choice = hieu_choice.applymap(lambda x: 0 if x == False else x) ############################### Dave request variables ######################################### ''' The following two variables are used to count the number of people who lose ESI because the firms of their *policy holders* of the ESI either drop the entire coverage or drop ESI coverage for PT: firm_dropped_my_cov firm_dropped_my_PTcov ''' #add firm_id_of_exante_pholder, full_time_of_exante_pholder, firm_dropped_my_cov, #and firm_of_pholder_dropped_PTcov into hieu_choice #firm_id_of_exante_pholder, full_time_of_exante_pholder are NA for people #previously not on ESI #firm_dropped_my_cov is NA for people previously not on ESI or the firm keeps FT/PT ESI #firm_of_pholder_dropped_PTcov is NA for people previously not on ESI or firm currently drops FT ESI hieu_choice=pd.merge(hieu_choice,self.raw_data["hieu_table_paired_down"].loc[self.raw_data["hieu_table_paired_down"].coverage==1, ["person_id","firm_id_of_exante_pholder","full_time_of_exante_pholder"]],on="person_id", how="left") hieu_choice=pd.merge(hieu_choice,self.raw_data["dropped_firms"],on="firm_id_of_exante_pholder",how="left") hieu_choice=pd.merge(hieu_choice,self.raw_data["dropped_PT_firms"],on="firm_id_of_exante_pholder",how="left") #mask for persons with a PT ESI policy holder #False for FT or NA exante pholder pholder_is_PT=hieu_choice.full_time_of_exante_pholder.fillna(False) #NA for person not on ESI coverage or firm drops FT ESI #False for person continues on ESI but policy holder was FT #True for person previously on ESI with PT policy holder and the firm dropped PTcov hieu_choice["firm_dropped_my_PTcov"]=(hieu_choice.firm_of_pholder_dropped_PTcov) & pholder_is_PT # CLEAN UP FOR N/A, USE SAME FORMAT AS ORIGINAL FOR TRUE/FALSE #False if the person is not previously on ESI or the firm keeps FT/PT ESI cov for that person #True if the person is previously on ESI but the firm drops FT cov hieu_choice.firm_dropped_my_cov.fillna(False,inplace=True) #False if the person is not previously on ESI or the firm keeps FT ESI cov for that person #True if the person is previously on PT ESI but the firm drops PT cov hieu_choice.firm_dropped_my_PTcov.fillna(False,inplace=True) #ex_ante latent variables creation if self.raw_data["calibrateRun"]==1 and self.raw_data["calibrate_ex_ante_latents"]==True: self.calibrate_exante_latents=calibrate_exante_latents(self.raw_data["calibrate_ex_ante_latents"],self.raw_data["Latent_vars"],\ self.raw_data["long_calib_table"],self.raw_data["pop_hieu"].person_id) exante_latents=self.calibrate_exante_latents.create_ex_ante_latent() self.initiate_out_table(1, "hieu_choice.csv", hieu_choice) self.initiate_out_table(1, 'firm_choice.csv', self.raw_data['firm_choices_all']) self.initiate_out_table(1, 'choice_count.csv', results, True) def build_xc_wgted_unwgted_tables(): # aggregated results by year and by region dftmp = pd.merge(self.raw_data['long_output'], self.raw_data['hieu_table'][['hieu_id', 'person_type', 'coverage', 'age']], how='left', on=['hieu_id', 'person_type']) # subsetting to age <=64 dftmp = dftmp.loc[dftmp['age']<=64] # unweighted, statewide dftmp['choice5'] = dftmp['choice'].map(self.choice5_map) unwgted = pd.crosstab(dftmp['coverage'], dftmp['choice'], margins=True) unwgted = unwgted.fillna(0) unwgted['exp'] = np.nan unwgted['exp'][0] = self.raw_data['weights'][0] unwgted['actual'] = np.nan unwgted['actual'][0] = dftmp['p_id'].count() self.initiate_out_table(3, "unweighted_6_by_10.csv", unwgted, True) print("Unweighted 6x10") print(unwgted) unwgted = pd.crosstab(dftmp['coverage'], dftmp['choice5'], margins=True) unwgted = unwgted.fillna(0) self.initiate_out_table(3, "unweighted_6_by_5.csv", unwgted, True) print("Unweighted 6x5") print(unwgted) if self.is_regional == 1: # regional table, unweighted table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice) table['exp'] = np.nan table['exp'][0] = self.raw_data['weights'][0] table['actual'] = np.nan table['actual'][0] = dftmp['p_id'].count() self.initiate_out_table(3, "unweighted_6_by_10_regional.csv", table, True) table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice5) self.initiate_out_table(3, "unweighted_6_by_5_regional.csv", table, True) # regional table, weighted table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice, dftmp.p_weight, aggfunc=sum, margins=True) table = table.fillna(0) table['exp'] = np.nan table['exp'][0] = self.raw_data['weights'][1] table['actual'] = np.nan table['actual'][0] = dftmp['p_weight'].sum() self.initiate_out_table(3, "weighted_6_by_10_regional.csv", table, True) table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice5, dftmp.p_weight, aggfunc=sum, margins=True) self.initiate_out_table(3, "weighted_6_by_5_regional.csv", table, True) # weighted, statewide wgted = pd.crosstab(dftmp['coverage'], dftmp['choice'], dftmp['p_weight'], aggfunc=sum, margins=True) wgted = wgted.fillna(0) wgted['exp'] = np.nan wgted['exp'][0] = self.raw_data['weights'][1] wgted['actual'] = np.nan wgted['actual'][0] = dftmp['p_weight'].sum() self.initiate_out_table(3, "weighted_6_by_10.csv", wgted, True) wgted = pd.crosstab(dftmp['coverage'], dftmp['choice5'], dftmp['p_weight'], aggfunc=sum, margins=True) wgted = wgted.fillna(0) self.initiate_out_table(3, "weighted_6_by_5.csv", wgted, True) print("Weighted") print(wgted) dftmp_xc_sub_0 = dftmp.loc[(dftmp.xc_sub==0) | (dftmp.xc_sub==False)] dftmp_xc_sub_1 = dftmp.loc[(dftmp.xc_sub==1) & (dftmp.xc_sub==True)] outtable_xc_sub_0 = pd.crosstab(dftmp_xc_sub_0['coverage'], dftmp_xc_sub_0['choice'], dftmp_xc_sub_0['p_weight'], aggfunc=sum, margins=True) outtable_xc_sub_1 = pd.crosstab(dftmp_xc_sub_1['coverage'], dftmp_xc_sub_1['choice'], dftmp_xc_sub_1['p_weight'], aggfunc=sum, margins=True) outtable_xc_sub_0 = outtable_xc_sub_0.fillna(0) outtable_xc_sub_1 = outtable_xc_sub_1.fillna(0) self.initiate_out_table(3, "crosstab_tables_xc_sub_0.csv", outtable_xc_sub_0, True) self.initiate_out_table(3, "crosstab_tables_xc_sub_1.csv", outtable_xc_sub_1, True) # regional tables if self.is_regional == 1: outtable_xc_sub_0 = pd.crosstab([dftmp_xc_sub_0.region, dftmp_xc_sub_0.coverage], dftmp_xc_sub_0.choice, dftmp_xc_sub_0.p_weight, aggfunc=sum, margins=True) outtable_xc_sub_1 = pd.crosstab([dftmp_xc_sub_1.region, dftmp_xc_sub_1.coverage], dftmp_xc_sub_1.choice, dftmp_xc_sub_1.p_weight, aggfunc=sum, margins=True) outtable_xc_sub_0 = outtable_xc_sub_0.fillna(0) outtable_xc_sub_1 = outtable_xc_sub_1.fillna(0) self.initiate_out_table(3, "regional_xc_sub_0.csv", outtable_xc_sub_0, True) self.initiate_out_table(3, "regional_xc_sub_1.csv", outtable_xc_sub_1, True) self.initiate_out_table(3, "dynamic_xc_statewide_premiums_version_"+\ str(self.raw_data["dynamic_version"])+".csv", self.raw_data["dynamic_exchange_statewide_premiums"], False) self.initiate_out_table(3, "dynamic_xc_regional_premiums_version_"+\ str(self.raw_data["dynamic_version"])+".csv", self.raw_data["dynamic_exchange_regional_premiums"], False) # build all tables build_calib_tables() build_xc_wgted_unwgted_tables() if self.tables_flag == True: build_expenditure_tables()
[docs] def initiate_out_table(self, table_type, table_name, table, index=False): """ effect: adds table to the output list arguments: table_type - an integer as follows: 0 option is 'other', 1 option is 'individual_data', 2 option is 'tables', 3 option is 'aggregate_results' table_name - desired name of the table table - the dataframe itself """ if not (0 <= table_type <= len(self.table_types)): raise ValueError("Invalid table type") elif self.table_types[table_type] is None: return # add table to output list self.out_tables[os.path.join(*[self.out_path, \ str(self.year), self.table_types[table_type], table_name])] = (table, index)
[docs] def output_tables(self): """ effect: output all tables in list of output tables to the correct path """ # make out directory if it does not exist for table_type in self.table_types: if table_type is None: continue full_path = os.path.join(*[self.out_path, str(self.year), table_type]) if not os.path.exists(full_path): os.makedirs(full_path) # output each csv for path, table in self.out_tables.items(): table[0].to_csv(path, float_format='%.4f', index=table[1])
# output configuation file #shutil.copy(self.config_path, self.out_path)
[docs] def force_output(self, table_type, table_name, table): # make sure table_type is valid if not (0 <= table_type <= len(self.table_types)): raise ValueError("Invalid table type") elif self.table_types[table_type] is None: return # generate path output_path = os.path.join(*[self.out_path, str(self.year), \ self.table_types[table_type]]) if not os.path.exists(output_path): os.makedirs(output_path) table.to_csv(os.path.join(output_path, table_name), index=False)