import shutil import os import argparse import pandas program_name = 'LibraryWalker' program_description = 'Creates a subset of the main library with only false positives.' parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False) red_error = '\033[91mError:\033[0m' yellow_warning = '\033[93mWARNING:\033[0m' blue_okay = '\033[94mOK\033[0m' program_header = format('\033[95m%s\033[0m\n' '-----------------------' % program_name) decision_message = ' Is this okay? (Y/N): ' def copy_frts(src, dst): if src.endswith('.frt'): shutil.copy2(src, dst) def load_false_positives(file_name): col_names = ['Doc Index', 'Page Index', 'CluxDocType', 'WalkerType', 'Status', 'Scored', 'WalkerPageType', 'Num Char', 'Raw Score', 'Self Score', 'Sum Match Scores', 'Top Doc Agree'] false_pos_fields = {} # If there is no file... if not os.path.exists(file_name): return false_pos_fields data = pandas.read_csv(file_name, names=col_names) print('Opened file %s\n' % file_name) false_pos_count = 0 status = data.Status.tolist() doc_names = data.WalkerType.tolist() clux_names = data.CluxDocType.tolist() for index in range(len(status)): doc_name = doc_names[index] clux_name = clux_names[index] if status[index] == 'FalsePos' and doc_name not in false_pos_fields.values() \ and not doc_name == 'no results returned': false_pos_fields[false_pos_count] = str(doc_names[index]) if clux_name not in false_pos_fields.values(): # Need to increment before adding the CLUX name. false_pos_count += 1 false_pos_fields[false_pos_count] = str(clux_name) false_pos_count += 1 print('Found %d false positives.' % false_pos_count) return false_pos_fields def create_library(fields: set, src_library, new_library): # We need somewhere to put stuff... missing_doctypes = {} missing_doctype_count = 0 print('Attempting to move false positives to the new library...') if not os.path.exists(new_library): print('Error directory for new library does not exist:\n ' 'Creating directory %s' % new_library) os.makedirs(new_library) for row in fields: old_lib_folder = os.path.join(src_library, fields[row]) new_lib_folder = os.path.join(new_library, fields[row]) # Does the folder exist? print('Attempting to move %s to the new library (%s)' % (fields[row], new_lib_folder)) if os.path.exists(old_lib_folder): # We can get away with using copytree here because we guarantee uniqueness of each folder already if not os.path.exists(new_lib_folder): shutil.copytree(old_lib_folder, new_lib_folder, copy_function=copy_frts) print(' Success!') else: print('Error, could not find %s in directory:\n %s' % (fields[row], src_library)) missing_doctypes[missing_doctype_count] = fields[row] missing_doctype_count += 1 if missing_doctype_count > 0: missing_doctypes_file = os.path.join(new_library, "missing_doctypes.txt") missing_log = open(missing_doctypes_file, 'w') for i in range(missing_doctype_count): missing_log.write(missing_doctypes[i]) missing_log.write('\n') missing_log.close() def yes_or_no(message): decision = input(message) if decision.lower() == 'y' or decision.lower() == 'yes': return elif decision.lower() == 'n' or decision.lower() == 'no': exit(0) else: yes_or_no(' Invalid input, enter Y(es) or N(o): ') def main(analysis_file, old_library, new_library): fields = load_false_positives(analysis_file) create_library(fields, old_library, new_library) def check_args(source_lib, new_lib, false_pos): fatal_errors = False if not os.path.exists(new_lib): print('%s No folder at %s' % (yellow_warning, new_lib)) print('One will be created.') yes_or_no(decision_message) # Check for fatal errors. if not os.path.exists(source_lib): print('%s No library at %s' % (red_error, library)) fatal_errors = True if not os.path.exists(false_pos): print('%s File does not exist: %s' % (red_error, false_pos)) fatal_errors = True if fatal_errors: parser.print_help() print('Exiting...') exit(0) if __name__ == '__main__': required_args = parser.add_argument_group('Required') optional_args = parser.add_argument_group('Optional') required_args.add_argument('-s', '--source_lib', required=True, help='The original library to search through.') required_args.add_argument('-n', '--new_lib', required=True, help='The new library to create.') required_args.add_argument('-f', '--false_pos', required=True, help='The csv file of false positives.') optional_args.add_argument('-h', '--help', action="help", help='Prints the help message.') args = parser.parse_args() false_pos_file = args.false_pos src_lib = args.source_lib new_lib = args.new_lib check_args(src_lib, new_lib, false_pos_file) # Now we can make the call to main main(false_pos_file, src_lib, new_lib)