import json import os import argparse program_name = 'LibraryMissing' program_description = 'Finds all missing doctypes in a library given CLUX input' parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False) red_error = '\033[91mError:\033[0m' yellow_warning = '\033[93mWARNING:\033[0m' blue_okay = '\033[94mOK\033[0m' program_header = format('\033[95m%s\033[0m\n' '-----------------------' % program_name) decision_message = ' Is this okay? (Y/N): ' def load_clux_results(file): doc_count = 0 clux_results = {} clux_file = json.load(open(file)) documents = clux_file["documents"] for document in documents: if not document["doctype"] == 'Unknown Document' and not document["doctype"] == 'Blank Page': clux_results[doc_count] = document["doctype"] doc_count += 1 return clux_results def search_library(library, doctypes): missing_doctypes = {} missing_doctype_count = 0 # Loop through the doctypes... for i in range(len(doctypes)): # If it doesn't have a folder in the library, it is missing. if is_doctype_missing(library, doctypes[i]): missing_doctypes[missing_doctype_count] = doctypes[i] missing_doctype_count += 1 return missing_doctypes def is_doctype_missing(library, doctype): doctype_folder = os.path.join(library, doctype) # If the folder doesn't exist, the doctype is missing. if not os.path.exists(doctype_folder): return True # If the folder is empty, the doctype is missing. if not os.listdir(doctype_folder): return True # Otherwise the doctype is there! :D return False def write_results(library, missing_doctypes, missing_file): with open(missing_file, 'w+') as doctypes_file: doctypes_file.write("There are %d missing doctypes in the library at %s\n" % (len(missing_doctypes), library)) for i in range(len(missing_doctypes)): doctypes_file.write("%d) %s. \n" % (i+1, missing_doctypes[i])) doctypes_file.close() def check_args(input, library, output): fatal_errors = False if os.path.exists(output): print('%s The file: %s already exists, it will be overwritten.' % (yellow_warning, output)) yes_or_no(decision_message) # Check for fatal errors. if not os.path.exists(library): print('%s No library at %s' % (red_error, library)) fatal_errors = True if not os.path.exists(input): print('%s No file at %s' % (red_error, input)) fatal_errors = True if fatal_errors: parser.print_help() print('Exiting...') exit(0) def yes_or_no(message): decision = input(message) if decision.lower() == 'y' or decision.lower() == 'yes': return elif decision.lower() == 'n' or decision.lower() == 'no': exit(0) else: yes_or_no(' Invalid input, enter Y(es) or N(o): ') def main(library_folder, input_file, missing_file): clux_results = load_clux_results(input_file) missing_doctypes = search_library(library_folder, clux_results) write_results(library_folder, missing_doctypes, missing_file) if __name__ == '__main__': required_args = parser.add_argument_group('Required') optional_args = parser.add_argument_group('Optional') required_args.add_argument('-i', '--input', required=True, help='The classification output from CLUX.') required_args.add_argument('-l', '--library', required=True, help='The root folder of the library to check for missing doctypes in.') required_args.add_argument('-o', '--output', required=True, help='The output file for the list of missing doctypes.') optional_args.add_argument('-h', '--help', action="help", help='Prints the help message.') args = parser.parse_args() clux_file = args.input lib_folder = args.library missing_file = args.output check_args(clux_file, lib_folder, missing_file) main(lib_folder, clux_file, missing_file)