Sleds/scorewalker-utils/LibraryMissing/LibraryMissing.py

120 lines
4.0 KiB
Python
Raw Normal View History

2025-03-13 21:28:38 +00:00
import json
import os
import argparse
program_name = 'LibraryMissing'
program_description = 'Finds all missing doctypes in a library given CLUX input'
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)
red_error = '\033[91mError:\033[0m'
yellow_warning = '\033[93mWARNING:\033[0m'
blue_okay = '\033[94mOK\033[0m'
program_header = format('\033[95m%s\033[0m\n'
'-----------------------' % program_name)
decision_message = ' Is this okay? (Y/N): '
def load_clux_results(file):
doc_count = 0
clux_results = {}
clux_file = json.load(open(file))
documents = clux_file["documents"]
for document in documents:
if not document["doctype"] == 'Unknown Document' and not document["doctype"] == 'Blank Page':
clux_results[doc_count] = document["doctype"]
doc_count += 1
return clux_results
def search_library(library, doctypes):
missing_doctypes = {}
missing_doctype_count = 0
# Loop through the doctypes...
for i in range(len(doctypes)):
# If it doesn't have a folder in the library, it is missing.
if is_doctype_missing(library, doctypes[i]):
missing_doctypes[missing_doctype_count] = doctypes[i]
missing_doctype_count += 1
return missing_doctypes
def is_doctype_missing(library, doctype):
doctype_folder = os.path.join(library, doctype)
# If the folder doesn't exist, the doctype is missing.
if not os.path.exists(doctype_folder):
return True
# If the folder is empty, the doctype is missing.
if not os.listdir(doctype_folder):
return True
# Otherwise the doctype is there! :D
return False
def write_results(library, missing_doctypes, missing_file):
with open(missing_file, 'w+') as doctypes_file:
doctypes_file.write("There are %d missing doctypes in the library at %s\n" % (len(missing_doctypes), library))
for i in range(len(missing_doctypes)):
doctypes_file.write("%d) %s. \n" % (i+1, missing_doctypes[i]))
doctypes_file.close()
def check_args(input, library, output):
fatal_errors = False
if os.path.exists(output):
print('%s The file: %s already exists, it will be overwritten.' % (yellow_warning, output))
yes_or_no(decision_message)
# Check for fatal errors.
if not os.path.exists(library):
print('%s No library at %s' % (red_error, library))
fatal_errors = True
if not os.path.exists(input):
print('%s No file at %s' % (red_error, input))
fatal_errors = True
if fatal_errors:
parser.print_help()
print('Exiting...')
exit(0)
def yes_or_no(message):
decision = input(message)
if decision.lower() == 'y' or decision.lower() == 'yes':
return
elif decision.lower() == 'n' or decision.lower() == 'no':
exit(0)
else:
yes_or_no(' Invalid input, enter Y(es) or N(o): ')
def main(library_folder, input_file, missing_file):
clux_results = load_clux_results(input_file)
missing_doctypes = search_library(library_folder, clux_results)
write_results(library_folder, missing_doctypes, missing_file)
if __name__ == '__main__':
required_args = parser.add_argument_group('Required')
optional_args = parser.add_argument_group('Optional')
required_args.add_argument('-i', '--input', required=True, help='The classification output from CLUX.')
required_args.add_argument('-l', '--library', required=True,
help='The root folder of the library to check for missing doctypes in.')
required_args.add_argument('-o', '--output', required=True,
help='The output file for the list of missing doctypes.')
optional_args.add_argument('-h', '--help', action="help", help='Prints the help message.')
args = parser.parse_args()
clux_file = args.input
lib_folder = args.library
missing_file = args.output
check_args(clux_file, lib_folder, missing_file)
main(lib_folder, clux_file, missing_file)