Sleds/scorewalker-utils/LibCreate/SplitOCR.py

118 lines
4.4 KiB
Python

import json
import argparse
import os
program_name = 'SplitOCR'
program_description = 'Splits the given frt file into frt files for it\'s component pages.'
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)
# Constant values for printing.
red_error = '\033[91mError:\033[0m'
yellow_warning = '\033[93mWARNING:\033[0m'
blue_okay = '\033[94mOK\033[0m'
# Constant values for script.
NUM_PAGES_TAG = 'numPages'
ORIG_SRC_TYPE_TAG = 'originalSourceType'
ORIG_SRC_FILE_TAG = 'originalSourceFile'
FIXED_UP_TAG = 'fixedUp'
PAGES_TAG = 'pages'
# Parses the pages into a list of dicts form the given file.
def parse_pages(file_name):
"""
Parses the pages from the given '.frt' file into a list of dictionaries containing the json info for the pages.
:param file_name: The source '.frt' file to parse the pages form.
:return: A list of dictionaries containing the json data for each page.
"""
pages_result = []
with open(file_name) as reader:
frt_json = json.load(reader)
num_pages = frt_json[NUM_PAGES_TAG]
orig_src_type = frt_json[ORIG_SRC_TYPE_TAG]
orig_src_file = frt_json[ORIG_SRC_FILE_TAG]
fixed_up = frt_json[FIXED_UP_TAG]
pages_json = frt_json[PAGES_TAG]
print('Parsing %s pages from %s', num_pages, file_name)
for page in pages_json:
# There is only one page!
pages_result.append({NUM_PAGES_TAG: '1',
ORIG_SRC_TYPE_TAG: orig_src_type,
ORIG_SRC_FILE_TAG: orig_src_file,
FIXED_UP_TAG: fixed_up,
PAGES_TAG: page})
return pages_result
# Writes the data from
def write_pages(pages_array, file_name, overwrite):
"""
Writes the pages in the passed list to a file named [file_name].[page_idx].frt
:param pages_array: The list of page dictionaries parsed from file_name to write.
:param file_name: The source file the dictionaries in pages_array were parsed from.
:param overwrite: Weather or not to overwrite files if they exist.
If set to True, they will be overwritten, if set to false, they will not.
|
:return: nothing.
"""
for i in range(len(pages_array)):
new_file = file_name.replace('.frt', '.{:04d}.frt'.format(i))
# If the file exists and we aren't auto overwriting,
if os.path.exists(new_file) and not overwrite:
# Ask for permission.
yes_or_no('%s File will be overwritten (%s)\n'
' Continue (y/n)? ', yellow_warning, new_file)
page = pages_array[i]
with open(new_file, 'w+') as writer:
json.dump(page, writer, indent=1)
def main(in_file, auto_overwrite):
pages = parse_pages(in_file)
write_pages(pages, in_file, auto_overwrite)
def check_args(in_file):
fatal_error = False
if not in_file.endswith('.frt'):
print('%s File is not the correct type (.frt): %s', red_error, in_file)
fatal_error = True
if not os.path.exists(in_file):
print('%s File does not exist: %s', red_error, in_file)
fatal_error = True
if fatal_error:
parser.print_help()
print('Fatal error occurred, Exiting...')
exit(-1)
# Will ask the user to input yes or no and if they input yes the program will continue to execute. If however, they
# input no, the program will exit with status 0. 0 status is used here because there was no error, the user just chose
# to exit rather than continue executing.
def yes_or_no(message):
decision = input(message)
if decision.lower() == 'y' or decision.lower() == 'yes':
print('%s continuing...', blue_okay)
return
elif decision.lower() == 'n' or decision.lower() == 'no':
exit(0)
else:
yes_or_no(' Invalid input, enter Y(es) or N(o): ')
if __name__ == '__main__':
required_args = parser.add_argument_group('Required')
optional_args = parser.add_argument_group('Optional')
required_args.add_argument('-i', '--input_file', required=True,
help='The input frt file to split the pages out of.')
optional_args.add_argument('-w', '--overwrite', required=False, action='set_true',
help='If used, previous files will be overwritten without asking permission.')
optional_args.add_argument('-h', '--help', action='help', help='Prints the help message.')