Sleds/PackageNameXlsx/auto-manifest.py

231 lines
6.1 KiB
Python

import os
import sys
import xlrd
import json
import time
import errno
import socket
import argparse
import datetime
import ConsoleUtils
_name = 'AutoManifest'
_descript = 'Automatically generates a manifest file with the given configuration.'
_auth = 'Chris Diesch <cdiesch@sequencelogic.net>'
_version = '0.1.0'
_date = '2017/11/16'
_usage = 'auto-manifest.py -i,--in-file {IN_FILE} -o,--out-file {OUT_FILE} [OPTIONS...]'
_parser = argparse.ArgumentParser(prog=_name, description=_descript, usage=_usage)
args = None
_printer = ConsoleUtils.SLPrinter(_name)
sys.stdout = _printer
_data = {}
_date_created = '%s-7:00' % (datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S'))
_job_type = 'loan'
_type = 'SNT_JOB'
_work_order = 'basic_cns'
_ip_addr = socket.gethostbyname(socket.gethostname())
_job_id = 'test'
_originator = {
'ipAddress': _ip_addr,
'jobId': _job_id
}
_packages = []
_MANIFEST_DATA = []
def _to_manifest(pkg_name, pkg_files):
out_path = os.path.join(root_directory, pkg_name.split('_')[1], 'manifest.json')
result = {
'data': _data,
'dateCreated': _date_created,
'jobType': _job_type,
'type': _type,
'workOrderName': _work_order,
'originator': {
'ipAddress': _ip_addr,
'jobId': '%s-test' % pkg_name
},
'packages': [
{
'data': {
'package_name': pkg_name
},
'files': pkg_files
}
]
}
print('Writing output for package %s to %s' % (pkg_name, out_path))
try:
with open(out_path, 'w+') as writer:
json.dump(result, writer, indent=3)
except Exception as err:
print('Failed to write file:')
print(' %s' % str(err))
def _load_xl_file(file_path):
result = []
wb = xlrd.open_workbook(file_path)
sh = wb.sheet_by_index(0)
keys = [str(c.value) for c in sh.row(0)]
for i in range(1, sh.nrows):
row_dict = {}
for j in range(sh.ncols):
row_dict[keys[j]] = sh.cell(i, j).value
result.append(row_dict)
return result
def _get_package_files(root_dir, xl_data):
result = {}
num_files = 0
start_time = time.time()
for row in xl_data:
package_file_dir = str(row['Servicer Loan #'])
if package_file_dir.endswith('.0'):
package_file_dir = package_file_dir[:-2]
package_name = '%s_%s' % (row['FNMA LOAN NUMBER'], package_file_dir)
sub_folder = os.path.join(root_dir, package_file_dir)
if not os.path.exists(sub_folder):
print('There is no subfolder at %s' % sub_folder)
else:
files = [f for f in os.listdir(sub_folder) if _is_valid_file(f)]
# files = [os.path.relpath(os.path.join(sub_folder, f), root_dir)
# for f in os.listdir(sub_folder) if _is_valid_file(f)]
result[package_name] = files
print('Loaded %d files for package "%s"' % (len(files), package_name))
num_files += len(files)
run_time = time.time() - start_time
rate = num_files / run_time
_printer.write_no_prefix('')
print('Loaded %d files for %d packages in %.4f s (%.0f files/s)' % (num_files, len(result), run_time, rate))
_printer.write_no_prefix('')
return result
def _is_valid_file(file_name):
is_valid = file_name.lower().endswith('.tif') or \
file_name.lower().endswith('.pdf') or \
file_name.lower().endswith('.tiff')
if not is_valid:
print(' Found invalid file (not pdf or tif): %s' % file_name)
return is_valid
def _make_pkg_manifest(xl_data):
for pkg_name, pkg_files in xl_data.items():
_to_manifest(pkg_name, pkg_files)
def get_package_data_from_xl(xl_file):
"""
Loads the package information given an excel file in the parent folder of input packages.
Args:
``xl_file`` -- ``str`` The path to the excel file to load.
Returns:
``dict`` -- A dict with key's equal to the desired package name & values equal to a list of the files which
make up said package.
"""
if not (os.path.exists(xl_file) and os.path.isfile(xl_file)):
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), xl_file)
try:
xl_dict = _load_xl_file(xl_file)
except xlrd.XLRDError as xl_ex:
print('Encountered error reading excel file:')
print(' %s' % str(xl_ex))
return {}
parent_dir = os.path.split(xl_file)[0]
result_dict = _get_package_files(parent_dir, xl_dict)
return result_dict
def main():
data = get_package_data_from_xl(args.in_file)
_make_pkg_manifest(data)
def _show_args():
print('Loading package data from: %s' % args.in_file)
print('Saving manifest result to: %s' % args.out_file)
_printer.write_no_prefix('')
print('Setting work order to: "%s"' % _work_order)
print('Setting manifest type to: "%s"' % _type)
print('Setting job ID to: "%s"' % _job_id)
print('Setting job type to: "%s"' % _job_type)
_printer.write_no_prefix('')
def _make_args():
_parser.add_argument('-i', '--in-file')
_parser.add_argument('-o', '--out-file')
def _check_args():
fatal_error = False
if not os.path.exists(args.in_file):
print('Error: The given input file does not exist %s' % args.in_file)
fatal_error = True
if fatal_error:
print('Encountered fatal error checking arguments.')
print('Exiting....')
exit(-1)
if __name__ == '__main__':
prog_start = time.time()
_printer.write_no_prefix(ConsoleUtils.get_header(_name, _version, _date, _auth))
_make_args()
args = _parser.parse_args()
_check_args()
root_directory = os.path.split(args.in_file)[0]
_show_args()
status = 0
try:
main()
except Exception as prog_err:
print('Encountered fatal error while processing:')
print(' %s' % str(prog_err))
status = -1
finally:
prog_run = time.time() - prog_start
print('Finished processing in %.4fs' % prog_run)
print('Exiting with status %d' % status)
exit(status)