Source code for gen_batch
#!/usr/bin/env python
from itertools import product
#from urllib2 import URLError
import pandas as pd
import requests
import os
import pdb
try:
# For Python 3.0 and later
from urllib.request import urlopen
except ImportError:
# Fall back to Python 2's urllib2
from urllib2 import urlopen
[docs]def download_parameters(file_id, gid, output_dir):
"""
parameters:
file_id: google drive file
gid: sheetname
output:
pandas dataframe workbook
"""
# make request
spreadsheet_url = "https://docs.google.com/spreadsheets/d/{0}/export?format=csv&id={0}&gid={1}".format(file_id, gid)
# make call
try:
res = requests.get(spreadsheet_url)
if res.status_code == 200:
f = open(output_dir + '/gen_batch.csv', 'w+')
f.write(res.content)
f.close()
wb = pd.read_csv(f.name, index_col = False)
return wb
else:
raise Exception("Error fetching csv from online")
except URLError:
raise URLError
[docs]def write_lines(f, batch_init):
for line in batch_init:
f.write(line + '\n')
[docs]def parse_row(row):
params = ''
for idx in range(row.size):
if row.index[idx] != 'name':
params += '' if pd.isnull(row[idx]) else (' ' +row.index[idx] + ' ' + str(row[idx]))
params += ' '
return params
[docs]def main():
'''
TODO, command line parameters to modify file_id, gid, output_dir
# you can specify output directory and sheet number as parameters
# test if directory exists
'''
# batch generator parameters
FILE_ID = '1TNBfZxb1egJbdK0zQqU3-1aIG4X8M3Xx2XgJWGrmvVM'
GID = 0
output_dir = './config'
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
# constants
batch_init = ['#!/bin/bash', '#$ -cwd', '#$ -j y', '#$ -o sep27.$JOB_ID', '#$ -N calsim',
'#$ -l arch=intel*', '#$ -l h_rt=24:00:00,h_data=24G,highp', '#$ -m n', '\n']
cmd_prefix = 'python -u main.py'
config_filename = '-config common_config.ini'
runs = download_parameters(FILE_ID, GID, output_dir)
# generate a batch file for every row extracted
for idx, row in enumerate(runs.iterrows()):
batch_filename = output_dir + '/' + str(idx) + '-run.sh' if pd.isnull(row[1]['name']) \
else output_dir + '/' + str(row[1]['name']) + '.sh'
f = open(batch_filename, 'w+')
write_lines(f, batch_init)
params = parse_row(row[1])
f.write(cmd_prefix + params + config_filename)
f.close()
if __name__ == '__main__':
main()