Source code for utility_modules.multi_merge

#!/usr/bin/env python
import pandas as pd
import re

[docs]def multi_merge(x, y, diff, common, \ keep_cols = False, cols = None, \ ncols = None): """ x and y: two pandas data frames to merge diff: the columns that change common: the column that to be preserved keep_cols: controls if previous cols are dropped (default = False) cols: specific cols to use in x (default uses all cols in x) ncols: base cols to use; must be in same order and length as cols """ temp_merge = x temp_merge.is_copy = False #tests if any additional arguements have been passed if cols is None: cols = x.columns if ncols is not None: if (len(ncols) == len(cols)): global GLOBAL_COL_INC GLOBAL_COL_INC = 0 global GLOBAL_COLS GLOBAL_COLS = ncols col_set = new_base_col else: col_set = new_col else: col_set = new_col curr_y = y[[diff, common]] curr_y.is_copy = False for p_type in cols: curr_y.columns = [p_type, col_set(common, p_type)] temp_merge = pd.merge(temp_merge, curr_y, how = 'left') if keep_cols == False: temp_merge = temp_merge.drop(cols, axis = 1) if 'GLOBAL_COL_INC' in globals() or 'GLOBAL_COLS' in globals(): del GLOBAL_COL_INC del GLOBAL_COLS return(temp_merge)
[docs]def new_col(col1, col2): rtn_col = col1 + "_" + col2 return(rtn_col)
[docs]def new_base_col(col, *args): global GLOBAL_COL_INC rtn_col = col + "_" + GLOBAL_COLS[GLOBAL_COL_INC] GLOBAL_COL_INC = GLOBAL_COL_INC + 1 return(rtn_col)