Source code for utility_modules.multi_merge
#!/usr/bin/env python
import pandas as pd
import re
[docs]def multi_merge(x, y, diff, common, \
keep_cols = False, cols = None, \
ncols = None):
"""
x and y: two pandas data frames to merge
diff: the columns that change
common: the column that to be preserved
keep_cols: controls if previous cols are dropped (default = False)
cols: specific cols to use in x (default uses all cols in x)
ncols: base cols to use; must be in same order and length as cols
"""
temp_merge = x
temp_merge.is_copy = False
#tests if any additional arguements have been passed
if cols is None:
cols = x.columns
if ncols is not None:
if (len(ncols) == len(cols)):
global GLOBAL_COL_INC
GLOBAL_COL_INC = 0
global GLOBAL_COLS
GLOBAL_COLS = ncols
col_set = new_base_col
else:
col_set = new_col
else:
col_set = new_col
curr_y = y[[diff, common]]
curr_y.is_copy = False
for p_type in cols:
curr_y.columns = [p_type, col_set(common, p_type)]
temp_merge = pd.merge(temp_merge, curr_y, how = 'left')
if keep_cols == False:
temp_merge = temp_merge.drop(cols, axis = 1)
if 'GLOBAL_COL_INC' in globals() or 'GLOBAL_COLS' in globals():
del GLOBAL_COL_INC
del GLOBAL_COLS
return(temp_merge)
[docs]def new_col(col1, col2):
rtn_col = col1 + "_" + col2
return(rtn_col)
[docs]def new_base_col(col, *args):
global GLOBAL_COL_INC
rtn_col = col + "_" + GLOBAL_COLS[GLOBAL_COL_INC]
GLOBAL_COL_INC = GLOBAL_COL_INC + 1
return(rtn_col)