120 lines
4.0 KiB
Python
120 lines
4.0 KiB
Python
import json
|
|
import os
|
|
import argparse
|
|
|
|
program_name = 'LibraryMissing'
|
|
program_description = 'Finds all missing doctypes in a library given CLUX input'
|
|
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)
|
|
|
|
red_error = '\033[91mError:\033[0m'
|
|
yellow_warning = '\033[93mWARNING:\033[0m'
|
|
blue_okay = '\033[94mOK\033[0m'
|
|
program_header = format('\033[95m%s\033[0m\n'
|
|
'-----------------------' % program_name)
|
|
decision_message = ' Is this okay? (Y/N): '
|
|
|
|
|
|
def load_clux_results(file):
|
|
doc_count = 0
|
|
clux_results = {}
|
|
clux_file = json.load(open(file))
|
|
documents = clux_file["documents"]
|
|
|
|
for document in documents:
|
|
if not document["doctype"] == 'Unknown Document' and not document["doctype"] == 'Blank Page':
|
|
clux_results[doc_count] = document["doctype"]
|
|
doc_count += 1
|
|
|
|
return clux_results
|
|
|
|
|
|
def search_library(library, doctypes):
|
|
missing_doctypes = {}
|
|
missing_doctype_count = 0
|
|
# Loop through the doctypes...
|
|
for i in range(len(doctypes)):
|
|
# If it doesn't have a folder in the library, it is missing.
|
|
if is_doctype_missing(library, doctypes[i]):
|
|
missing_doctypes[missing_doctype_count] = doctypes[i]
|
|
missing_doctype_count += 1
|
|
|
|
return missing_doctypes
|
|
|
|
|
|
def is_doctype_missing(library, doctype):
|
|
doctype_folder = os.path.join(library, doctype)
|
|
# If the folder doesn't exist, the doctype is missing.
|
|
if not os.path.exists(doctype_folder):
|
|
return True
|
|
# If the folder is empty, the doctype is missing.
|
|
if not os.listdir(doctype_folder):
|
|
return True
|
|
# Otherwise the doctype is there! :D
|
|
return False
|
|
|
|
|
|
def write_results(library, missing_doctypes, missing_file):
|
|
with open(missing_file, 'w+') as doctypes_file:
|
|
doctypes_file.write("There are %d missing doctypes in the library at %s\n" % (len(missing_doctypes), library))
|
|
for i in range(len(missing_doctypes)):
|
|
doctypes_file.write("%d) %s. \n" % (i+1, missing_doctypes[i]))
|
|
|
|
doctypes_file.close()
|
|
|
|
|
|
def check_args(input, library, output):
|
|
fatal_errors = False
|
|
if os.path.exists(output):
|
|
print('%s The file: %s already exists, it will be overwritten.' % (yellow_warning, output))
|
|
yes_or_no(decision_message)
|
|
|
|
# Check for fatal errors.
|
|
if not os.path.exists(library):
|
|
print('%s No library at %s' % (red_error, library))
|
|
fatal_errors = True
|
|
if not os.path.exists(input):
|
|
print('%s No file at %s' % (red_error, input))
|
|
fatal_errors = True
|
|
|
|
if fatal_errors:
|
|
parser.print_help()
|
|
print('Exiting...')
|
|
exit(0)
|
|
|
|
|
|
def yes_or_no(message):
|
|
decision = input(message)
|
|
if decision.lower() == 'y' or decision.lower() == 'yes':
|
|
return
|
|
elif decision.lower() == 'n' or decision.lower() == 'no':
|
|
exit(0)
|
|
else:
|
|
yes_or_no(' Invalid input, enter Y(es) or N(o): ')
|
|
|
|
|
|
def main(library_folder, input_file, missing_file):
|
|
clux_results = load_clux_results(input_file)
|
|
missing_doctypes = search_library(library_folder, clux_results)
|
|
write_results(library_folder, missing_doctypes, missing_file)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
required_args = parser.add_argument_group('Required')
|
|
optional_args = parser.add_argument_group('Optional')
|
|
required_args.add_argument('-i', '--input', required=True, help='The classification output from CLUX.')
|
|
required_args.add_argument('-l', '--library', required=True,
|
|
help='The root folder of the library to check for missing doctypes in.')
|
|
required_args.add_argument('-o', '--output', required=True,
|
|
help='The output file for the list of missing doctypes.')
|
|
optional_args.add_argument('-h', '--help', action="help", help='Prints the help message.')
|
|
|
|
args = parser.parse_args()
|
|
clux_file = args.input
|
|
lib_folder = args.library
|
|
missing_file = args.output
|
|
|
|
check_args(clux_file, lib_folder, missing_file)
|
|
|
|
main(lib_folder, clux_file, missing_file)
|
|
|