118 lines
4.4 KiB
Python
118 lines
4.4 KiB
Python
|
|
import json
|
||
|
|
import argparse
|
||
|
|
import os
|
||
|
|
|
||
|
|
program_name = 'SplitOCR'
|
||
|
|
program_description = 'Splits the given frt file into frt files for it\'s component pages.'
|
||
|
|
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)
|
||
|
|
# Constant values for printing.
|
||
|
|
red_error = '\033[91mError:\033[0m'
|
||
|
|
yellow_warning = '\033[93mWARNING:\033[0m'
|
||
|
|
blue_okay = '\033[94mOK\033[0m'
|
||
|
|
# Constant values for script.
|
||
|
|
NUM_PAGES_TAG = 'numPages'
|
||
|
|
ORIG_SRC_TYPE_TAG = 'originalSourceType'
|
||
|
|
ORIG_SRC_FILE_TAG = 'originalSourceFile'
|
||
|
|
FIXED_UP_TAG = 'fixedUp'
|
||
|
|
PAGES_TAG = 'pages'
|
||
|
|
|
||
|
|
|
||
|
|
# Parses the pages into a list of dicts form the given file.
|
||
|
|
def parse_pages(file_name):
|
||
|
|
"""
|
||
|
|
Parses the pages from the given '.frt' file into a list of dictionaries containing the json info for the pages.
|
||
|
|
|
||
|
|
:param file_name: The source '.frt' file to parse the pages form.
|
||
|
|
:return: A list of dictionaries containing the json data for each page.
|
||
|
|
"""
|
||
|
|
pages_result = []
|
||
|
|
with open(file_name) as reader:
|
||
|
|
frt_json = json.load(reader)
|
||
|
|
num_pages = frt_json[NUM_PAGES_TAG]
|
||
|
|
orig_src_type = frt_json[ORIG_SRC_TYPE_TAG]
|
||
|
|
orig_src_file = frt_json[ORIG_SRC_FILE_TAG]
|
||
|
|
fixed_up = frt_json[FIXED_UP_TAG]
|
||
|
|
pages_json = frt_json[PAGES_TAG]
|
||
|
|
|
||
|
|
print('Parsing %s pages from %s', num_pages, file_name)
|
||
|
|
|
||
|
|
for page in pages_json:
|
||
|
|
# There is only one page!
|
||
|
|
pages_result.append({NUM_PAGES_TAG: '1',
|
||
|
|
ORIG_SRC_TYPE_TAG: orig_src_type,
|
||
|
|
ORIG_SRC_FILE_TAG: orig_src_file,
|
||
|
|
FIXED_UP_TAG: fixed_up,
|
||
|
|
PAGES_TAG: page})
|
||
|
|
return pages_result
|
||
|
|
|
||
|
|
|
||
|
|
# Writes the data from
|
||
|
|
def write_pages(pages_array, file_name, overwrite):
|
||
|
|
"""
|
||
|
|
Writes the pages in the passed list to a file named [file_name].[page_idx].frt
|
||
|
|
|
||
|
|
:param pages_array: The list of page dictionaries parsed from file_name to write.
|
||
|
|
:param file_name: The source file the dictionaries in pages_array were parsed from.
|
||
|
|
:param overwrite: Weather or not to overwrite files if they exist.
|
||
|
|
If set to True, they will be overwritten, if set to false, they will not.
|
||
|
|
|
|
||
|
|
:return: nothing.
|
||
|
|
"""
|
||
|
|
for i in range(len(pages_array)):
|
||
|
|
new_file = file_name.replace('.frt', '.{:04d}.frt'.format(i))
|
||
|
|
# If the file exists and we aren't auto overwriting,
|
||
|
|
if os.path.exists(new_file) and not overwrite:
|
||
|
|
# Ask for permission.
|
||
|
|
yes_or_no('%s File will be overwritten (%s)\n'
|
||
|
|
' Continue (y/n)? ', yellow_warning, new_file)
|
||
|
|
|
||
|
|
page = pages_array[i]
|
||
|
|
with open(new_file, 'w+') as writer:
|
||
|
|
json.dump(page, writer, indent=1)
|
||
|
|
|
||
|
|
|
||
|
|
def main(in_file, auto_overwrite):
|
||
|
|
pages = parse_pages(in_file)
|
||
|
|
write_pages(pages, in_file, auto_overwrite)
|
||
|
|
|
||
|
|
|
||
|
|
def check_args(in_file):
|
||
|
|
fatal_error = False
|
||
|
|
|
||
|
|
if not in_file.endswith('.frt'):
|
||
|
|
print('%s File is not the correct type (.frt): %s', red_error, in_file)
|
||
|
|
fatal_error = True
|
||
|
|
if not os.path.exists(in_file):
|
||
|
|
print('%s File does not exist: %s', red_error, in_file)
|
||
|
|
fatal_error = True
|
||
|
|
|
||
|
|
if fatal_error:
|
||
|
|
parser.print_help()
|
||
|
|
print('Fatal error occurred, Exiting...')
|
||
|
|
exit(-1)
|
||
|
|
|
||
|
|
|
||
|
|
# Will ask the user to input yes or no and if they input yes the program will continue to execute. If however, they
|
||
|
|
# input no, the program will exit with status 0. 0 status is used here because there was no error, the user just chose
|
||
|
|
# to exit rather than continue executing.
|
||
|
|
def yes_or_no(message):
|
||
|
|
decision = input(message)
|
||
|
|
if decision.lower() == 'y' or decision.lower() == 'yes':
|
||
|
|
print('%s continuing...', blue_okay)
|
||
|
|
return
|
||
|
|
elif decision.lower() == 'n' or decision.lower() == 'no':
|
||
|
|
exit(0)
|
||
|
|
else:
|
||
|
|
yes_or_no(' Invalid input, enter Y(es) or N(o): ')
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
required_args = parser.add_argument_group('Required')
|
||
|
|
optional_args = parser.add_argument_group('Optional')
|
||
|
|
|
||
|
|
required_args.add_argument('-i', '--input_file', required=True,
|
||
|
|
help='The input frt file to split the pages out of.')
|
||
|
|
optional_args.add_argument('-w', '--overwrite', required=False, action='set_true',
|
||
|
|
help='If used, previous files will be overwritten without asking permission.')
|
||
|
|
optional_args.add_argument('-h', '--help', action='help', help='Prints the help message.')
|