import json import argparse import os program_name = 'SplitOCR' program_description = 'Splits the given frt file into frt files for it\'s component pages.' parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False) # Constant values for printing. red_error = '\033[91mError:\033[0m' yellow_warning = '\033[93mWARNING:\033[0m' blue_okay = '\033[94mOK\033[0m' # Constant values for script. NUM_PAGES_TAG = 'numPages' ORIG_SRC_TYPE_TAG = 'originalSourceType' ORIG_SRC_FILE_TAG = 'originalSourceFile' FIXED_UP_TAG = 'fixedUp' PAGES_TAG = 'pages' # Parses the pages into a list of dicts form the given file. def parse_pages(file_name): """ Parses the pages from the given '.frt' file into a list of dictionaries containing the json info for the pages. :param file_name: The source '.frt' file to parse the pages form. :return: A list of dictionaries containing the json data for each page. """ pages_result = [] with open(file_name) as reader: frt_json = json.load(reader) num_pages = frt_json[NUM_PAGES_TAG] orig_src_type = frt_json[ORIG_SRC_TYPE_TAG] orig_src_file = frt_json[ORIG_SRC_FILE_TAG] fixed_up = frt_json[FIXED_UP_TAG] pages_json = frt_json[PAGES_TAG] print('Parsing %s pages from %s', num_pages, file_name) for page in pages_json: # There is only one page! pages_result.append({NUM_PAGES_TAG: '1', ORIG_SRC_TYPE_TAG: orig_src_type, ORIG_SRC_FILE_TAG: orig_src_file, FIXED_UP_TAG: fixed_up, PAGES_TAG: page}) return pages_result # Writes the data from def write_pages(pages_array, file_name, overwrite): """ Writes the pages in the passed list to a file named [file_name].[page_idx].frt :param pages_array: The list of page dictionaries parsed from file_name to write. :param file_name: The source file the dictionaries in pages_array were parsed from. :param overwrite: Weather or not to overwrite files if they exist. If set to True, they will be overwritten, if set to false, they will not. | :return: nothing. """ for i in range(len(pages_array)): new_file = file_name.replace('.frt', '.{:04d}.frt'.format(i)) # If the file exists and we aren't auto overwriting, if os.path.exists(new_file) and not overwrite: # Ask for permission. yes_or_no('%s File will be overwritten (%s)\n' ' Continue (y/n)? ', yellow_warning, new_file) page = pages_array[i] with open(new_file, 'w+') as writer: json.dump(page, writer, indent=1) def main(in_file, auto_overwrite): pages = parse_pages(in_file) write_pages(pages, in_file, auto_overwrite) def check_args(in_file): fatal_error = False if not in_file.endswith('.frt'): print('%s File is not the correct type (.frt): %s', red_error, in_file) fatal_error = True if not os.path.exists(in_file): print('%s File does not exist: %s', red_error, in_file) fatal_error = True if fatal_error: parser.print_help() print('Fatal error occurred, Exiting...') exit(-1) # Will ask the user to input yes or no and if they input yes the program will continue to execute. If however, they # input no, the program will exit with status 0. 0 status is used here because there was no error, the user just chose # to exit rather than continue executing. def yes_or_no(message): decision = input(message) if decision.lower() == 'y' or decision.lower() == 'yes': print('%s continuing...', blue_okay) return elif decision.lower() == 'n' or decision.lower() == 'no': exit(0) else: yes_or_no(' Invalid input, enter Y(es) or N(o): ') if __name__ == '__main__': required_args = parser.add_argument_group('Required') optional_args = parser.add_argument_group('Optional') required_args.add_argument('-i', '--input_file', required=True, help='The input frt file to split the pages out of.') optional_args.add_argument('-w', '--overwrite', required=False, action='set_true', help='If used, previous files will be overwritten without asking permission.') optional_args.add_argument('-h', '--help', action='help', help='Prints the help message.')