Source code for utility_modules.output

#! /usr/bin/env python

import os
import pdb
import pandas as pd
import numpy as np
import shutil
from policy_modules.fpl import Fpl as Fpl
import subprocess
import sys
import utility_modules.import_zip
import zipfile
from utility_modules.calibrate_exante_latents import calibrate_exante_latents as calibrate_exante_latents

[docs]class Output():
    def __init__(self, out_path, git_info, general_dict, params_dict, cal_levers_dict, policy_levers_dict, system_dict, is_regional):
        """
        IN:
            out_path - output path (with suffixes)
            is_* - flags for inclusion of certain output tables
        NOTE:
            output structure is as follows:

            output_root/
                suffix/
                    year1/
                        table_type1/
                            table_name1
                            table_name2
                            ...
                        table_type2/
                        ...
                    year2/
                    ...
        """
        self.file_path = out_path
        self.out_tables = {}
        self.git_info = git_info
        self.out_path = out_path
        self.is_regional = is_regional
        self.general_dict = general_dict
        self.config_path = self.general_dict.config_path
        self.params_dict = params_dict
        self.cal_levers_dict = cal_levers_dict
        self.policy_levers_dict = policy_levers_dict

        self.system_dict = system_dict

        zipf = zipfile.ZipFile(self.out_path+'/Policy_Input.zip','w',zipfile.ZIP_DEFLATED)
        import_zip.zipdir(general_dict['pol_input_path'], zipf)

        self.table_types = [
            'other',                # 0
            'individual_data',      # 1
            'tables',               # 2
            'aggregate_results'     # 3
        ]

        # disable certain output depending on configuration file settings
        self.tables_flag = params_dict.tables
        if not params_dict.tables:
            self.table_types[2] = None

        # output tables constants
        self.industry_type ={0: "unemployed",
                             1: "natural resource/mining",
                             2: "construction",
                             3: "manufacturing",
                             4: "trans/util/information",
                             5: "wholesales/retail",
                             6: "financial",
                             7: "services/govt",
                             8: "healthcare",
                             9: "federal",
                             10: "state/local"}

        self.firm_size_bins  = [3, 10, 50, 200, 1000, 5000000] # right=False in the cut function
        self.firm_size_lab =  ['3-9', '10-49', '50-199', '200-999', '1000+']

        self.age_bins = [0, 19, 30, 45, 65, 200]
        self.age_lab = ['0-18', '19-29', '30-44', '45-64', '65+']

        self.race_cat = {1: "White",
                         2: "Latino",
                         3: "African American",
                         4: "Asian",
                         5: "Other"}

        self.gender_cat = {1: "Male", \
                           0: "Female"}

        self.fpl_bins = [0, 100, 138, 400, 2000]
        self.fpl_lab  = ['<=100 FPL', '101-138 FPL', '139-400 FPL', '400+ FPL']

        self.coverage = {1: 'esi',
                         2: 'individual market',
                         3: 'medicare',
                         4: 'medicaid',
                         5: 'other public',
                         6: 'uninsured'}

        self.choice =   {1: 'uninsured',
                         2: 'self_esi',
                         3: 'spouse_esi',
                         4: 'parent_1_esi',
                         5: 'parent_2_esi',
                         6: 'silver',
                         7: 'bronze',
                         8: 'catastrophic',
                         9: 'medicaid',
                        10: 'medicare'}

        # mapping {1:1, [2,3,4,5]:2, [6,7,8]:3, 9:4, 10:5}
        self.choice5_map = {1: 1, 2: 2, 3: 2, 4: 2, 5: 2, 6: 3, 7: 3, 8: 3, 9: 4, 10: 5}

        self.choice5 =   {1: 'uninsured',
                          2: 'esi',
                          3: 'individual market',
                          4: 'medicaid',
                          5: 'medicare'}
        self.choice_lab  = ['uninsured', 'esi', 'im', 'medicaid', 'medicare']

        self.lep  = {0: 'Speaks English very well',
                     1: 'Limited English Proficiency'}

        self.esi_cov = {0: 'ESI without ACA and employer dropped coverage under ACA',
                        1: 'ESI without ACA and employer maintained coverage under ACA'}

        self.firm_type = {1: 'Typical coverage, typical contribution',
                          2: 'Typical coverage, large employee contribution',
                          3: 'Bronze'}

        self.xc_sub  = {0: 'Number individuals enrolled without subsidies',
                        1: 'Number individuals enrolled with subsidies'}

        #pd.options.display.float_format = '{:,.0f}'.format

[docs]    def set_year(self, year):
        self.year = year

[docs]    def write_config(self):

        # creating a config file that contains updated information
        config_out = os.path.join(*[self.file_path, str(self.year), "reproduction_config.ini"])

        # DO NOT CHANGE FORMATTING BELOW
        header = """\
################################################################################
# CONFIGURATION FILE FOR CALSIM
#
# Notes: \"#\" are comments and do not affect this file's function. The summary
#        sections at the beginning provide a brief explanation for each variable
#
#	This file has been updated with any and all parameters passed in through
#	the command line and should be used for result reproduction.
################################################################################"""
        general_desc = """\
################################################################################
[General]
# 1. outpath     = where to put the output files and tables
# 2. datapath    = path to input data
# 3. username    = username for output folder naming
# 4. suffix      = suffix for output folder naming
# 5. test        = flag for running as test (on a subset of firms, specified by
#                  percentage\n"""
        output_desc = """\
################################################################################
[Output]
# 1. debug = Flag for outputting debugging information to console
# 2. tables = Flag for creating folder for storing tables for checking\n"""
        calibration_desc = """\
################################################################################
[Calibration]
# 1. calibration = Flag for calibration to generate person specific data
# 2. calibration_file = file selected to replace new_cal columns in choice file
# 3. utility_params = utility esi, xc (unsub and sub), mc, cal, penalty\n"""
        policy_desc = """\
################################################################################
[Policy]
# 1. firm_threshold = Tendency to offer, tendency to drop
# 2. regional_specificity = 0 is statewide, 1 is regional\n"""
        system_sheet_desc = """\
################################################################################
[System_Sheet]\n"""

        # write header
        # with open(config_out, 'w') as f:
        #    f.write(header)
        # output configuation file
        shutil.copy2(self.config_path, config_out)
        # write current branch and commmit hash

        with open(config_out, 'a') as f:
            f.write('\n# Current Branch: %s# Latest Commit: %s' % (self.git_info['calsim'][0], self.git_info['calsim'][1]))

        with open(config_out, 'a') as f:
            f.write('\n# Current Policy Input Commit: %s# Policy Input Commit Message: %s' % (self.git_info['policy-input'][0], self.git_info['policy-input'][1]))

        with open(config_out, 'a') as f:
            f.write('\n# Command line args: %s\n' % ' '.join(sys.argv))
        
[docs]    def initialize_raw_data(self, raw_data):
        """
        Available Data from CalSim.py (self refers to CalSim):
            'perm_ind_choices'  :   self.perm_ind_choices,
            'long_output'       :   long_output,
            'long_calib_table'  :   output_calib,
            'firm_choices_all'  :   self.firm_choices_all,
            'Hieu'              :   self.Hieu.get_hieu_table(),
            'firm_table'        :   self.Firm,
            'fam_table'         :   self.Family,
            'worker_table'      :   self.Worker,
            'esi_table'         :   self.Esi,
            'exp_table'         :   self.Expenditures,
            'weights'           :   (self.person_count, self.weight_total),
            'dynamic_version'   :   self.dynamic_version,
            'dynamic_exchange_statewide_premiums' : self.statewide_premiums,
            'dynamic_exchange_regional_premiums' : self.regional_premiums,
            'hieu_table_paired_down'                :   self.hieu_table_paired_down,
            'dropped_firms'                         :   self.dropped_firms,
            'dropped_PT_firms'                      :   self.dropped_PT_firms,
            'calibrateRun'                          :   self.calibrateRun,
            'calibrate_ex_ante_latents'             :   self.calibrate_ex_ante_latents,
            'Latent_vars'                           :   self.Latent_vars
        """

        self.raw_data = raw_data

[docs]    def build_out_tables(self, firm_threshold):

        self.firm_threshold = firm_threshold

        def example():
            """
                * write functions that builds the necessary df as in this example
                * if there are raw_data that you need, make sure you pass it in
                on line ~403 in CalSim.py. It will be in the dictionary self.raw_data.
                * self.year -> useful if you want to output only if simulating a
                certain year. self.year is set after every full iteration
                * to add a dataframe to the output df list, you must call "initiate_out_table"
                * make sure you call your function at the end of "build_out_tables"
            """
            # do something like this if you want only certain year (i.e. 2015 and 2019)
            if self.year != 2015 and self.year != 2019:
                return
            # build dataframe as desired. Make sure you pass in any "raw data" that you need
            # from CalSim.py. Instructions for that are in the docstring above
            example = DataFrame(np.random.randn(10, 5))
            # make sure you call "self.initiate_out_table", check docstring for more
            self.initiate_out_table(3, "example.csv", example)
            # last but not least, call your function at the very end (this is example, so I have not called this function)

        def grouped_weighted_avg(values, weights, by):
            return (values * weights).groupby(by).sum() / weights.groupby(by).sum()

        def try_percent(num, den):
            try:
                pcent = (num / den) * 100
            except ZeroDivisionError:
                pcent = None
            return pcent



        def build_expenditure_tables():

            print('building expenditure tables')
            exp_data = self.raw_data['exp_table']
            choices = self.raw_data['long_output'][['p_id', 'choice']]
            df = pd.merge(exp_data, choices, how='left', left_on='person_id', right_on='p_id')

            # agument columns
            for choice in range(11):
                df.loc[df.choice == choice, 'exp_esi_average'] = df.loc[df.choice == choice, 'exp_average_%i' % choice] if 2 <= choice <= 5 else \
                    df[df.choice == choice][['exp_average_%i' % i for i in range(2, 6)]].max(axis=1)
                df.loc[df.choice == choice, 'exp_esi_actual'] = df.loc[df.choice == choice, 'exp_actual_%i' % choice] if 2 <= choice <= 5 else \
                    df[df.choice == choice][['exp_actual_%i' % i for i in range(2, 6)]].max(axis=1)
                df.loc[df.choice == choice, 'oop_esi_average'] = df.loc[df.choice == choice, 'oop_average_%i' % choice] if 2 <= choice <= 5 else \
                    df[df.choice == choice][['oop_average_%i' % i for i in range(2, 6)]].max(axis=1)
                df.loc[df.choice == choice, 'oop_esi_actual'] = df.loc[df.choice == choice, 'oop_actual_%i' % choice] if 2 <= choice <= 5 else \
                    df[df.choice == choice][['oop_actual_%i' % i for i in range(2, 6)]].max(axis=1)

            # rename columns from number to category
            # rename example: 'exp_average_7' -> 'exp_private_average'
            columns = ['exp_average', 'exp_actual', 'oop_average', 'oop_actual']
            old_names = ['_1', '_7', '_9', '_10']
            new_names  = ['unins', 'private', 'mcaid', 'mcare']
            new_columns = {}
            for index, old_name in enumerate(old_names):
                for column in columns:
                    new_columns[column + old_name] = column[:4] + new_names[index] + column[3:]
            df.rename(columns=new_columns, inplace=True)

            # get columns we want
            keep_columns = ['person_id', 'choice', 'esi', 'private', 'mcare', 'mcaid', 'unins']
            exp_data = df[[column for keep_column in keep_columns for column in df.columns.values if keep_column in column]]

            self.initiate_out_table(1, 'step0_exp.csv', exp_data)

        def build_calib_tables():
            # initiate person specfic table
            # merge more variables from hieu_table for creating databook table
            thieu = self.raw_data['pop_hieu'][['family_id', 'person_id', 'male', 'race_ethnicity', 'age', \
                                               'lep', 'doc_status', 'coverage', 'mcaid_elig_exante', \
                                               'predicted_exante_elig', 'esi_cov', \
                                               'u_esi', 'u_priv', 'u_unins', 'u_mcaid', 'u_mcare', \
                                               'c_esi', 'c_priv', 'c_mcaid', 'c_mcare', 'c_unins','threshold', 'full_time', \
                                               'num_chronic',  'health_status',  'sector',  'self_emp', 'cobra_flag',  'marit', 'firm_size','mcaid_elig_flipped','pholder_id_esi' \
                                                ,'age_rated_slv_prem','age_rated_bronze_prem','affordability','family_type_for_afford_esi','self_affordability','tax_dependent','prem_type','ind_income','ind_fpl','esi_threshold','ind_threshold','povlev','family_cap','subsidy_amount_total']]
            thieu = thieu.dropna(subset=['person_id'])
            temp_choice = self.raw_data['long_output']
            hieu_choice = pd.merge(thieu, temp_choice, how = "inner", on ='person_id')
            #adding fpl information
            tfam = self.raw_data['fam_table'][['family_id','fpl','orig_fam_income','fam_income','family_size']]
            hieu_choice = pd.merge(hieu_choice, tfam, how = "inner", on ='family_id')

            # adding firm coverage information
            # subsetting to chosen_choice
            tfirm =  self.raw_data['firm_choices_all']
            tfirm1 = tfirm.loc[tfirm['chosen_choice'] == 1]
            tfirm1 = tfirm1[['firm_id', 'offer', 'pt','esi_self_cost','esi_fam_cost','esi_sp_cost','esi_ch_cost']]

            ### previous choice is choice = 0
            tfirm2 = tfirm.loc[tfirm['choice'] == 0]
            tfirm2 = tfirm2[['firm_id', 'offer', 'pt']]
            tfirm2.rename(columns={'offer': 'prev_offer', 'pt': 'previous_offer_pt'}, inplace = True)

            tfirm = pd.merge(tfirm1, tfirm2, how="inner", on = "firm_id")
            hieu_choice = pd.merge(hieu_choice, tfirm, how = "left", on ='firm_id')
            tworker = self.raw_data['worker_table'][['person_id', 'esi_not_elig']]
            hieu_choice = pd.merge(hieu_choice, tworker, how='left', on='person_id')

            # add two more columns: prev_having_offer curr_having_offer
            # if parent 1 or parent 2 has an offer, then the entire hieu has an offer, otherwise, adult children might still have an offer
            # this is the case both for prev_having_offer and curr_having_offer
            hieu_choice['prev_offer_p0'] = (hieu_choice['person_type'] == 0) & (hieu_choice['prev_offer'] == 1) & \
                            ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['previous_offer_pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))
            hieu_choice['prev_offer_p1'] = (hieu_choice['person_type'] == 1) & (hieu_choice['prev_offer'] == 1) & \
                            ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['previous_offer_pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))

            hieu_choice['prev_offer_parent'] = hieu_choice['prev_offer_p0'] | hieu_choice['prev_offer_p1']
            hieu_choice['prev_offer_parent'] = hieu_choice.groupby('hieu_id')['prev_offer_parent'].transform('max')

            hieu_choice['prev_having_offer'] = (hieu_choice['prev_offer_parent'] == 1) | ((hieu_choice['prev_offer'] == 1) & \
                                               ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0))))


            hieu_choice['curr_offer_p0'] = (hieu_choice['person_type'] == 0) & (hieu_choice['offer'] == 1) & \
                            ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))
            hieu_choice['curr_offer_p1'] = (hieu_choice['person_type'] == 1) & (hieu_choice['offer'] == 1) & \
                            ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))

            hieu_choice['curr_offer_parent'] = hieu_choice['curr_offer_p0'] | hieu_choice['curr_offer_p1']
            hieu_choice['curr_offer_parent'] = hieu_choice.groupby('hieu_id')['curr_offer_parent'].transform('max')

            hieu_choice['curr_having_offer'] = (hieu_choice['curr_offer_parent'] == 1) | ((hieu_choice['offer'] == 1) & \
                                               ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0))))
            hieu_choice['indiv_having_offer']=(hieu_choice['offer'] == 1) & ((hieu_choice['full_time'] == 1) | ((hieu_choice['full_time'] == 0) & (hieu_choice['pt'] == 1)&(hieu_choice['esi_not_elig'] == 0)))
            #if don't have esi_offer, then esi_cost should be nan
            loc_self = np.where((hieu_choice['family_type_for_afford_esi'] == 1))[0]
            loc_sp = np.where((hieu_choice['family_type_for_afford_esi'] == 2))[0]
            loc_ch = np.where((hieu_choice['family_type_for_afford_esi'] == 3))[0]
            loc_fam = np.where((hieu_choice['family_type_for_afford_esi'] == 4))[0]
            loc_ind_adult_child = np.where((hieu_choice['person_type'] > 3) & (hieu_choice['tax_dependent'] == 0))[0]
            loc_real_family = hieu_choice.index.difference(loc_ind_adult_child)
            loc_not_elig=np.where((hieu_choice['family_type_for_afford_esi'] == 5))[0]
            hieu_choice.loc[loc_self,'real_fam_cost']=hieu_choice.loc[loc_self].esi_self_cost
            hieu_choice.loc[loc_sp, 'real_fam_cost'] = hieu_choice.loc[loc_sp].esi_sp_cost
            hieu_choice.loc[loc_ch, 'real_fam_cost'] = hieu_choice.loc[loc_ch].esi_ch_cost
            hieu_choice.loc[loc_fam, 'real_fam_cost'] = hieu_choice.loc[loc_fam].esi_fam_cost
            hieu_choice.loc[loc_not_elig, 'real_fam_cost'] =np.nan
            hieu_choice['real_self_cost']=hieu_choice.esi_self_cost
            hieu_choice.loc[hieu_choice['indiv_having_offer']==0,'real_self_cost']= np.nan
            hieu_choice.loc[hieu_choice['indiv_having_offer'] == 0, 'real_fam_cost'] = np.nan
            hieu_choice['aff_self'] = (hieu_choice.esi_threshold * hieu_choice.fam_income > hieu_choice.real_self_cost)
            hieu_choice.loc[loc_ind_adult_child,'aff_self']=(hieu_choice.loc[loc_ind_adult_child].esi_threshold*hieu_choice.loc[loc_ind_adult_child].ind_income >hieu_choice.loc[loc_ind_adult_child].real_self_cost)
            hieu_choice['aff_fam'] = (hieu_choice.esi_threshold * hieu_choice.fam_income > hieu_choice.real_fam_cost)
            hieu_choice.loc[loc_ind_adult_child, 'aff_fam']=hieu_choice.loc[loc_ind_adult_child].aff_self
            hieu_choice.loc[hieu_choice['indiv_having_offer'] == 0, 'aff_self'] = False
            hieu_choice.loc[hieu_choice['indiv_having_offer'] == 0, 'aff_fam'] = False
            hieu_choice.loc[loc_real_family, 'fam_aff_self'] = hieu_choice.loc[loc_real_family].groupby('hieu_id')['aff_self'].transform('max')
            hieu_choice.loc[loc_real_family, 'fam_aff_fam'] = hieu_choice.loc[loc_real_family].groupby('hieu_id')['aff_fam'].transform('max')
            hieu_choice.loc[loc_ind_adult_child, 'fam_aff_self']=hieu_choice.loc[loc_ind_adult_child,'aff_self']
            hieu_choice.loc[loc_ind_adult_child, 'fam_aff_fam'] = hieu_choice.loc[loc_ind_adult_child, 'aff_fam']


            #if not person type 0 or 1 they can not have esi_cost for whole family
            loc_notparents=np.where((hieu_choice['person_type']!=0) &(hieu_choice['person_type']!=1))[0]
            hieu_choice.loc[loc_notparents,'esi_fam_cost']=np.nan

            #take the minimum_cost
            hieu_choice['min_esi_self_cost'] = hieu_choice.real_self_cost
            #set min_esi_self_cost of non-offer people to be the lowest self-only cost in their HIU
            hieu_choice.loc[loc_notparents, 'esi_self_cost'] = np.nan
            hieu_choice['hieu_min_esi_self_cost'] = hieu_choice.groupby('hieu_id')['real_self_cost'].transform('min')
            loc_withoffer= np.where(hieu_choice['indiv_having_offer']==1)[0]
            hieu_choice.loc[loc_withoffer, 'hieu_min_esi_self_cost']=hieu_choice.loc[loc_withoffer, 'min_esi_self_cost']
            loc_0 = np.where(hieu_choice['person_type']<6)[0]
            hieu_choice.loc[loc_0, 'min_esi_fam_cost'] =hieu_choice.loc[loc_0].groupby('hieu_id')['real_fam_cost'].transform('min')

            #hieu_choice['lostesi_firmnooffer']=False
            #hieu_choice['lostesi_firmno_ptoffer']=False

            hieu_choice['offer_disappear']=(hieu_choice.offer==0)&(hieu_choice.prev_offer==1)
            hieu_choice['pt_offer_disappear']=(hieu_choice.pt==0)&(hieu_choice.previous_offer_pt==1)

            #loc_0 is the position where people previously have coverage but lost esi coverage now
            loc_0 =np.where((hieu_choice['coverage'] == 1) & ((hieu_choice['choice'] < 2) | (hieu_choice['choice'] > 5)))[0]
            #loc_1 is the position where people previously have coverage but lost esi coverage now and working part time
            loc_1=np.where((hieu_choice['coverage']==1) & ((hieu_choice['choice']<2) | (hieu_choice['choice']>5))& (hieu_choice['full_time']==0))[0]
            #get p_holder of full time worker who lost their esi coverage
            pholder_offerstatus=hieu_choice[hieu_choice['person_id'].isin(hieu_choice.loc[loc_0].pholder_id_esi.tolist())]
            pholder_offerstatus=pholder_offerstatus.rename(columns={'offer_disappear': 'lost_esi_firmnooffer','person_id':'p_holder'})
            hieu_choice=pd.merge(hieu_choice,pholder_offerstatus[['p_holder','lost_esi_firmnooffer']],left_on='pholder_id_esi',right_on='p_holder',how='left')
            #set everything not in the target location to nan
            hieu_choice[~hieu_choice.index.isin(loc_0)].lost_esi_firmnooffer=np.nan

            #get the p_holder of part time worker who lost their esi coverage
            pt_pholder_offerstatus=hieu_choice[hieu_choice['person_id'].isin(hieu_choice.loc[loc_1].pholder_id_esi.tolist())]
            pt_pholder_offerstatus = pt_pholder_offerstatus.rename(columns={'offer_disappear': 'lost_esi_firmno_ptoffer','person_id':'p_holder_pt'})
            hieu_choice = pd.merge(hieu_choice, pt_pholder_offerstatus[['p_holder_pt', 'lost_esi_firmno_ptoffer']],left_on='pholder_id_esi', right_on='p_holder_pt', how='left')

            #set everything not in target row to nan
            hieu_choice[~hieu_choice.index.isin(loc_1)].lost_esi_firmno_ptoffer = np.nan
            hieu_choice = hieu_choice.drop(['prev_offer_p0', 'prev_offer_p1', 'prev_offer_parent', 'curr_offer_p0', 'curr_offer_p1',\
                                            'curr_offer_parent','esi_threshold','offer_disappear','pt_offer_disappear','pholder_id_esi','p_holder','p_holder_pt'], axis=1)
            #print 'people count'
            #print hieu_choice.loc[hieu_choice['choice'].isin([6,7])]['person_id'].count()
            #print hieu_choice.loc[hieu_choice['choice'].isin([6,7])].p_weight.sum()
            #print 'xc_prem sum'
            #print hieu_choice.loc[hieu_choice['choice'].isin([6,7])].xc_prem.sum()

            # merging with firm information
            fchoices = self.raw_data['firm_choices_all']
            fchoices = fchoices.loc[fchoices.chosen_choice == 1]

            prev_offering = fchoices.loc[fchoices.prev_offer == True]
            prev_non_offering = fchoices.loc[fchoices.prev_offer == False]

            prev2curr_1 = prev_offering.loc[prev_offering.offer == True].shape[0]
            prev2curr_2 = prev_offering.loc[prev_offering.offer == False].shape[0]
            prev2curr_3 = prev_non_offering.loc[prev_non_offering.offer == True].shape[0]
            prev2curr_4 = prev_non_offering.loc[prev_non_offering.offer == False].shape[0]

            results = pd.DataFrame({'offer':[prev2curr_3, prev2curr_1], 'nonoffer': [prev2curr_4, prev2curr_2]})
            results.index.name = 'offering'
            #print results

            # add two final columns: policy holder, and policy group
            # if adult_1 is person holder, people who can be under policy include adult_2 == 3, children == 4, adult_children == 4
            policy_id = 1
            hieu_choice['policy_id'] = 0
            hieu_choice['policy_holder'] = 0
            for hieu in hieu_choice.loc[(hieu_choice['person_type'] == 0) & (hieu_choice['choice'] == 2)]['hieu_id']:
                hieu_choice['policy_id'] += ((hieu_choice['hieu_id'] == hieu) & ((hieu_choice['person_type'] == 0) | \
                                            (hieu_choice['choice'].isin([3,4])))) * policy_id
                hieu_choice['policy_holder'] += ((hieu_choice['hieu_id'] == hieu) & (hieu_choice['person_type'] == 0)) * 1
                policy_id += 1

            # if adult_2 is person holder, people who can be under policy include adult_1 == 3, children == 5, adult_children == 5
            for hieu in hieu_choice.loc[(hieu_choice['person_type'] == 1) & (hieu_choice['choice'] == 2)]['hieu_id']:
                hieu_choice['policy_id'] += ((hieu_choice['hieu_id'] == hieu) & ((hieu_choice['person_type'] == 1) | \
                                            (hieu_choice['choice'].isin([3,5])))) * policy_id
                hieu_choice['policy_holder'] += ((hieu_choice['hieu_id'] == hieu) & (hieu_choice['person_type'] == 1)) * 1
                policy_id += 1

            # for adult_child 1 to 4, if any is a policy holder, place them in a new group
            hieu_choice['policy_id'] += ((hieu_choice['person_type'].isin([4,5,6,7])) & (hieu_choice['choice'] == 2)) * policy_id
            hieu_choice['policy_holder'] += (hieu_choice['person_type'].isin([4,5,6,7])) & (hieu_choice['choice'] == 2) * 1

            ac = hieu_choice.loc[hieu_choice['policy_id'] == policy_id]
            seq = np.arange(policy_id, hieu_choice.loc[hieu_choice['policy_id'] == policy_id].shape[0] + policy_id) # increment for every person
            ac['policy_id'] = seq
            cols = list(hieu_choice.columns)
            hieu_choice.loc[hieu_choice['policy_id'] == policy_id, cols] = ac[cols]

            if self.raw_data['long_calib_table'] is not None:
                # manipulating wide table for calculations
                table = pd.pivot_table(self.raw_data['long_calib_table'], values=['utility','calib'], index=['hieu_id', 'person_id'], columns='choice')
                indices = np.unique(self.raw_data['long_calib_table'].choice.values).tolist()
                table.columns = ['utility_' + str(i) for i in indices]+['calib_' + str(i) for i in indices]
                table.reset_index(inplace=True)

                temp = table[['utility_' + str(i) for i in indices]]
                table['sum_utility'] = temp.max(axis=1)

                # finding hieu total utilities based on persons making optimal individual choices
                optimal_ind_choices = table[['hieu_id','sum_utility']].groupby(['hieu_id'], as_index=False).sum()
                hieu_df = hieu_choice['hieu_id'].value_counts().reset_index()
                hieu_df.columns = ['hieu_id','hieu_size']
                hieu_df = pd.merge(hieu_df, hieu_choice[['hieu_id','hieu_utility']], on='hieu_id', how='left')

                optimal_ind_choices = pd.merge(optimal_ind_choices, hieu_df, on='hieu_id', how='inner')
                optimal_ind_choices['match'] = optimal_ind_choices['sum_utility'] == optimal_ind_choices['hieu_utility']
                optimal_ind_choices2 = optimal_ind_choices.loc[optimal_ind_choices['hieu_size'] != 1]

                ### pcent_dispcy legend - 0: total, 1: non-single size families, 2: non-single size, non esi offering
                pcent = float(optimal_ind_choices.loc[optimal_ind_choices.match == False].shape[0]) / float(optimal_ind_choices.shape[0])
                pcent = round(pcent, 4)
                optimal_ind_choices['pcent_dispcy_0'] = np.nan
                optimal_ind_choices['pcent_dispcy_0'][0] = pcent
                optimal_ind_choices['pcent_dispcy_0'][1] = str(optimal_ind_choices.loc[optimal_ind_choices.match == False].shape[0]) + ' / ' \
                    + str(optimal_ind_choices.shape[0])

                if optimal_ind_choices2.empty:
                    pcent = 0
                else:
                    pcent = float(optimal_ind_choices2.loc[optimal_ind_choices2.match == False].shape[0]) / float(optimal_ind_choices2.shape[0])
                    pcent = round(pcent, 4)

                optimal_ind_choices['pcent_dispcy_1'] = np.nan
                optimal_ind_choices['pcent_dispcy_1'][0] = pcent
                optimal_ind_choices['pcent_dispcy_1'][1] = str(optimal_ind_choices2.loc[optimal_ind_choices2.match == False].shape[0]) + ' / ' \
                    + str(optimal_ind_choices2.shape[0])

                person_df = table[['utility_' + str(i) for i in indices]]
                person_df['opt_choice'] = (person_df.idxmax(axis=1))
                person_df['opt_choice'] = (person_df['opt_choice'].apply(lambda x: x[8:])).astype(int)

                table['p_opt_choice'] = person_df['opt_choice']

                table = pd.merge(table, hieu_choice[['person_id','choice']], on='person_id', how='left')
                table = table.rename(columns={'choice': 'hieu_choice'})

                table['match'] = table['p_opt_choice'] == table['hieu_choice']
                pcent = float(table.loc[table.match == False].shape[0]) / float(table.shape[0])
                pcent = round(pcent, 4)
                table['pcent_dispcy_0'] = np.nan
                table['pcent_dispcy_0'][0] = pcent
                table['pcent_dispcy_0'][1] = str(table.loc[table.match == False].shape[0]) + ' / ' + str(table.shape[0])

                # searching for hieus that were offered esi
                offering_esi = pd.DataFrame()
                utility_esi_cols = ['utility_2','utility_3','utility_4','utility_5']
                for hieu in optimal_ind_choices.hieu_id.unique():
                    no_esi = True
                    for col in utility_esi_cols:
                        try:
                            no_esi &= table.loc[table.hieu_id == hieu][col].isnull().values.all()
                        except:
                            pass
                    temp = pd.DataFrame({'hieu_id': [hieu], 'offers_esi': [~no_esi]})
                    offering_esi = offering_esi.append(temp)
                offering_esi.reset_index(drop=True, inplace=True)

                optimal_ind_choices = pd.merge(optimal_ind_choices, offering_esi, on='hieu_id', how='left')
                temp = optimal_ind_choices.loc[optimal_ind_choices['offers_esi'] == False]
                pcent = float(temp.loc[temp.match == False].shape[0]) / float(temp.shape[0])
                pcent = round(pcent, 4)
                optimal_ind_choices['pcent_dispcy_2'] = np.nan
                optimal_ind_choices['pcent_dispcy_2'][0] = pcent
                optimal_ind_choices['pcent_dispcy_2'][1] = str(temp.loc[temp.match == False].shape[0]) + ' / ' + str(temp.shape[0])

                self.initiate_out_table(1, "person_behavior_discrepancy.csv", table)
                self.initiate_out_table(1, "hieu_behavior_discrepancy.csv", optimal_ind_choices)

            # create text file that checks for all
            directory = os.path.join(self.out_path, str(self.year), self.table_types[1])
            if not os.path.exists(directory):
                os.makedirs(directory)
            sanity_check_output = open(os.path.join(directory, 'sanity_check.txt'), 'w+')
            paragraph = ["checking aggregate p_weights of specific conditions: \n"]
            categories = ['xc_sub', 'mcaid', 'new_mcaid', 'penalty']

            for category in categories:
                paragraph.append(category + ": " + str(hieu_choice.loc[hieu_choice[category] == 1].p_weight.sum()) + "\n")
            for line in paragraph:
                sanity_check_output.write(line)
            sanity_check_output.close()

            hieu_choice.rename(columns={'xc_sub': 'subsidy_elig','adultkid_xc_sub': 'adultkid_subsidy_elig', 'hieu_subsidy_amount_total':'subsidy_amount_total_long', 'hieu_subsidy_bronze_total': 'subsidy_bronze_total','hieu_subsidy_amount_total_default': 'subsidy_amount_total_default_long'}, inplace=True)
            hieu_choice['xc_sub_final']=0
            xc_sub_loc = np.where(((hieu_choice['subsidy_elig']==1)&(hieu_choice['subsidy_amount_total']>0)& \
                (hieu_choice['choice'].isin([1,2,3,4,5,9,10])))|((hieu_choice['subsidy_elig']==1)&\
                (hieu_choice['sub_amount']>0)&(hieu_choice['choice'].isin([6,7,8]))))[0]
            hieu_choice.loc[xc_sub_loc,'xc_sub_final']=1

            if self.raw_data['dual_parameter']== 0:
                hieu_choice['dual']=0
            # ken's method
            if self.raw_data['use_ken_flag']==True:
                self.initiate_out_table(1, "hieu_choice_orig.csv", hieu_choice)
                ken_adjuster=self.raw_data['ken_adjuster']
                hieu_choice=pd.merge(hieu_choice,ken_adjuster,how='left',on='person_id')
                hieu_choice['p_weight_orig']=hieu_choice.p_weight
                hieu_choice.p_weight=hieu_choice.p_weight*hieu_choice.weight_adjuster

            hieu_choice = hieu_choice.applymap(lambda x: 1 if x == True else x)
            hieu_choice = hieu_choice.applymap(lambda x: 0 if x == False else x)


            ###############################    Dave request variables    #########################################
            '''
                The following two variables are used to count the number of people who lose ESI because
                the firms of their *policy holders* of the ESI either drop the entire coverage or drop ESI
                coverage for PT:
                    firm_dropped_my_cov
                    firm_dropped_my_PTcov
            '''
            #add firm_id_of_exante_pholder, full_time_of_exante_pholder, firm_dropped_my_cov,
            #and firm_of_pholder_dropped_PTcov into hieu_choice
            #firm_id_of_exante_pholder, full_time_of_exante_pholder are NA for people
            #previously not on ESI 
            #firm_dropped_my_cov is NA for people previously not on ESI or the firm keeps FT/PT ESI
            #firm_of_pholder_dropped_PTcov is NA for people previously not on ESI or firm currently drops FT ESI
            hieu_choice=pd.merge(hieu_choice,self.raw_data["hieu_table_paired_down"].loc[self.raw_data["hieu_table_paired_down"].coverage==1,
                              ["person_id","firm_id_of_exante_pholder","full_time_of_exante_pholder"]],on="person_id", how="left")
            hieu_choice=pd.merge(hieu_choice,self.raw_data["dropped_firms"],on="firm_id_of_exante_pholder",how="left")
            hieu_choice=pd.merge(hieu_choice,self.raw_data["dropped_PT_firms"],on="firm_id_of_exante_pholder",how="left")
            
            #mask for persons with a PT ESI policy holder
            #False for FT or NA exante pholder
            pholder_is_PT=hieu_choice.full_time_of_exante_pholder.fillna(False)

            #NA for person not on ESI coverage or firm drops FT ESI
            #False for person continues on ESI but policy holder was FT
            #True for person previously on ESI with PT policy holder and the firm dropped PTcov
            hieu_choice["firm_dropped_my_PTcov"]=(hieu_choice.firm_of_pholder_dropped_PTcov) & pholder_is_PT


            # CLEAN UP FOR N/A, USE SAME FORMAT AS ORIGINAL FOR TRUE/FALSE
            #False if the person is not previously on ESI or the firm keeps FT/PT ESI cov for that person
            #True if the person is previously on ESI but the firm drops FT cov
            hieu_choice.firm_dropped_my_cov.fillna(False,inplace=True)
            #False if the person is not previously on ESI or the firm keeps FT ESI cov for that person
            #True if the person is previously on PT ESI but the firm drops PT cov
            hieu_choice.firm_dropped_my_PTcov.fillna(False,inplace=True)
           

            #ex_ante latent variables creation
            if self.raw_data["calibrateRun"]==1 and self.raw_data["calibrate_ex_ante_latents"]==True:
                self.calibrate_exante_latents=calibrate_exante_latents(self.raw_data["calibrate_ex_ante_latents"],self.raw_data["Latent_vars"],\
                    self.raw_data["long_calib_table"],self.raw_data["pop_hieu"].person_id)
                exante_latents=self.calibrate_exante_latents.create_ex_ante_latent()

            self.initiate_out_table(1, "hieu_choice.csv", hieu_choice)
            self.initiate_out_table(1, 'firm_choice.csv', self.raw_data['firm_choices_all'])
            self.initiate_out_table(1, 'choice_count.csv', results, True)

        def build_xc_wgted_unwgted_tables():
            # aggregated results by year and by region

            dftmp = pd.merge(self.raw_data['long_output'], self.raw_data['hieu_table'][['hieu_id', 'person_type', 'coverage', 'age']],
                                                           how='left', on=['hieu_id', 'person_type'])
            # subsetting to age <=64
            dftmp = dftmp.loc[dftmp['age']<=64]
            # unweighted, statewide
            dftmp['choice5'] = dftmp['choice'].map(self.choice5_map)

            unwgted = pd.crosstab(dftmp['coverage'], dftmp['choice'], margins=True)
            unwgted = unwgted.fillna(0)
            unwgted['exp'] = np.nan
            unwgted['exp'][0] = self.raw_data['weights'][0]
            unwgted['actual'] = np.nan
            unwgted['actual'][0] = dftmp['p_id'].count()

            self.initiate_out_table(3, "unweighted_6_by_10.csv", unwgted, True)
            print("Unweighted 6x10")
            print(unwgted)

            unwgted = pd.crosstab(dftmp['coverage'], dftmp['choice5'], margins=True)
            unwgted = unwgted.fillna(0)
            self.initiate_out_table(3, "unweighted_6_by_5.csv", unwgted, True)
            print("Unweighted 6x5")
            print(unwgted)

            if self.is_regional == 1:
                # regional table, unweighted
                table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice)
                table['exp'] = np.nan
                table['exp'][0] = self.raw_data['weights'][0]
                table['actual'] = np.nan
                table['actual'][0] = dftmp['p_id'].count()
                self.initiate_out_table(3, "unweighted_6_by_10_regional.csv", table, True)

                table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice5)
                self.initiate_out_table(3, "unweighted_6_by_5_regional.csv", table, True)

                # regional table, weighted
                table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice, dftmp.p_weight, aggfunc=sum, margins=True)
                table = table.fillna(0)
                table['exp'] = np.nan
                table['exp'][0] = self.raw_data['weights'][1]
                table['actual'] = np.nan
                table['actual'][0] = dftmp['p_weight'].sum()
                self.initiate_out_table(3, "weighted_6_by_10_regional.csv", table, True)

                table = pd.crosstab([dftmp.region, dftmp.coverage], dftmp.choice5, dftmp.p_weight, aggfunc=sum, margins=True)
                self.initiate_out_table(3, "weighted_6_by_5_regional.csv", table, True)

            # weighted, statewide
            wgted = pd.crosstab(dftmp['coverage'], dftmp['choice'], dftmp['p_weight'], aggfunc=sum, margins=True)
            wgted = wgted.fillna(0)
            wgted['exp'] = np.nan
            wgted['exp'][0] = self.raw_data['weights'][1]
            wgted['actual'] = np.nan
            wgted['actual'][0] = dftmp['p_weight'].sum()
            self.initiate_out_table(3, "weighted_6_by_10.csv", wgted, True)

            wgted = pd.crosstab(dftmp['coverage'], dftmp['choice5'], dftmp['p_weight'], aggfunc=sum, margins=True)
            wgted = wgted.fillna(0)
            self.initiate_out_table(3, "weighted_6_by_5.csv", wgted, True)

            print("Weighted")
            print(wgted)

            dftmp_xc_sub_0 = dftmp.loc[(dftmp.xc_sub==0) | (dftmp.xc_sub==False)]
            dftmp_xc_sub_1 = dftmp.loc[(dftmp.xc_sub==1) & (dftmp.xc_sub==True)]

            outtable_xc_sub_0 = pd.crosstab(dftmp_xc_sub_0['coverage'], dftmp_xc_sub_0['choice'], dftmp_xc_sub_0['p_weight'], aggfunc=sum, margins=True)
            outtable_xc_sub_1 = pd.crosstab(dftmp_xc_sub_1['coverage'], dftmp_xc_sub_1['choice'], dftmp_xc_sub_1['p_weight'], aggfunc=sum, margins=True)
            outtable_xc_sub_0 = outtable_xc_sub_0.fillna(0)
            outtable_xc_sub_1 = outtable_xc_sub_1.fillna(0)
            self.initiate_out_table(3, "crosstab_tables_xc_sub_0.csv", outtable_xc_sub_0, True)
            self.initiate_out_table(3, "crosstab_tables_xc_sub_1.csv", outtable_xc_sub_1, True)

            # regional tables
            if self.is_regional == 1:
                outtable_xc_sub_0 = pd.crosstab([dftmp_xc_sub_0.region, dftmp_xc_sub_0.coverage], dftmp_xc_sub_0.choice,
                                             dftmp_xc_sub_0.p_weight, aggfunc=sum, margins=True)
                outtable_xc_sub_1 = pd.crosstab([dftmp_xc_sub_1.region, dftmp_xc_sub_1.coverage], dftmp_xc_sub_1.choice,
                                             dftmp_xc_sub_1.p_weight, aggfunc=sum, margins=True)
                outtable_xc_sub_0 = outtable_xc_sub_0.fillna(0)
                outtable_xc_sub_1 = outtable_xc_sub_1.fillna(0)
                self.initiate_out_table(3, "regional_xc_sub_0.csv", outtable_xc_sub_0, True)
                self.initiate_out_table(3, "regional_xc_sub_1.csv", outtable_xc_sub_1, True)
                self.initiate_out_table(3, "dynamic_xc_statewide_premiums_version_"+\
                    str(self.raw_data["dynamic_version"])+".csv", self.raw_data["dynamic_exchange_statewide_premiums"], False)
                self.initiate_out_table(3, "dynamic_xc_regional_premiums_version_"+\
                    str(self.raw_data["dynamic_version"])+".csv", self.raw_data["dynamic_exchange_regional_premiums"], False)

        # build all tables
        build_calib_tables()
        build_xc_wgted_unwgted_tables()
        if self.tables_flag == True:
            build_expenditure_tables()

[docs]    def initiate_out_table(self, table_type, table_name, table, index=False):
        """
        effect:
            adds table to the output list
        arguments:
            table_type - an integer as follows:
                0 option is 'other',
                1 option is 'individual_data',
                2 option is 'tables',
                3 option is 'aggregate_results'
            table_name - desired name of the table
            table - the dataframe itself
        """
        if not (0 <= table_type <= len(self.table_types)):
            raise ValueError("Invalid table type")
        elif self.table_types[table_type] is None:
            return
        # add table to output list
        self.out_tables[os.path.join(*[self.out_path, \
        str(self.year), self.table_types[table_type], table_name])] = (table, index)

[docs]    def output_tables(self):
        """
            effect: output all tables in list of output tables to the correct path
        """

        # make out directory if it does not exist
        for table_type in self.table_types:
            if table_type is None:
                continue
            full_path = os.path.join(*[self.out_path, str(self.year), table_type])
            if not os.path.exists(full_path):
                os.makedirs(full_path)

        # output each csv
        for path, table in self.out_tables.items():
            table[0].to_csv(path,  float_format='%.4f', index=table[1])

        # output configuation file
        #shutil.copy(self.config_path, self.out_path)

[docs]    def force_output(self, table_type, table_name, table):
        # make sure table_type is valid
        if not (0 <= table_type <= len(self.table_types)):
            raise ValueError("Invalid table type")
        elif self.table_types[table_type] is None:
            return
        # generate path
        output_path = os.path.join(*[self.out_path, str(self.year), \
        self.table_types[table_type]])
        if not os.path.exists(output_path):
            os.makedirs(output_path)

        table.to_csv(os.path.join(output_path, table_name), index=False)
calsim

Navigation

Related Topics

Source code for utility_modules.output