import json import datetime import os import argparse import sys sys.path.insert(0, '../KMeans') import ConsoleUtils """ generate a configuration JSON file for WalkerClassifier that contains the list of centers for all the doc-types relies on a program that calculates the centers inputs are hard coded in the main() method output is hard coded in the main() method """ program_name = 'ConvertCenters' program_description = 'Converts the centers computed by L3Centers to a config file for WalkerClassifier.' parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False) red_error = '\033[91mError:\033[0m' yellow_warning = '\033[93mWARNING:\033[0m' blue_okay = '\033[94mOK\033[0m' doc_id_dict = {} id_doc_dict = {} config = {} centers = {} center_docs = [] doc_centers = {} # load dictionaries for doc id to filename and filename to doc id def load_id_map(filename): with open(filename) as infile: for line in infile: delim_loc = line.rfind(':') doc_path = line[0:delim_loc:] doc_id = line[delim_loc+1:] doc_id.replace('\n', '') doc_id_dict[doc_path] = doc_id.strip() id_doc_dict[doc_id.strip()] = doc_path # load the doc-type centers; translation is made to match id to filename # format of the dictionaries created matches the format of the JSON file to be created def load_dist_map(filename): bad_file_cnt = 0 with open(filename) as infile: for line in infile: delim_loc = line.rfind(':') doc_type = line[0:delim_loc:] doc_id = line[delim_loc+1:].strip().replace(',', '') doc_center = {"doc-type": doc_type, "center-doc": id_doc_dict[doc_id]} center_docs.append(doc_center) doc_centers["center-docs"] = center_docs # write the JSON file that will be passed as config to WalkerClassifier def write_doc_centers(filename, library_version): config["layout"] = library_version config["centers"] = doc_centers with open(filename, 'w+') as outfile: text = json.dumps(config, indent=1) print(text) outfile.write(text) # hard coded input and output files def main(doc_id_file, doc_dist_file, config_file, lib_ver): load_id_map(doc_id_file) load_dist_map(doc_dist_file) print(config_file, lib_ver) write_doc_centers(config_file, lib_ver) def check_args(id_path, dist_path, out_path, lib_ver, overwrite): fatal_errors = False if not (lib_ver == 'V1' or lib_ver == 'V2'): print('%s library version "%s" is invalid, using default (V1).' % (red_error, lib_ver)) if os.path.exists(out_path) and not overwrite: print('%s Out file already exists and will be overwritten' % yellow_warning) ConsoleUtils.yes_or_no('Overwrite %s? ' % out_path) if not os.path.exists(id_path): print('%s ID file does not exist: %s' % (red_error, id_path)) fatal_errors = True if not os.path.exists(dist_path): print('%s Distance file does not exist: %s' % (red_error, dist_path)) fatal_errors = True if fatal_errors: parser.print_help() print('Fatal Error encountered, exiting...') exit(-1) # if run interactively, call the main() method if __name__ == '__main__': required_args = parser.add_argument_group('Required') optional_args = parser.add_argument_group('Optional') required_args.add_argument('-i', '--id_file', required=True, help='The path to a docIDMap file.') required_args.add_argument('-d', '--dist_file', required=True, help='The path to a docDist file.') required_args.add_argument('-o', '--output', required=True, help='The path to save the config file to.') optional_args.add_argument('-v', '--lib_version', required=False, help='Sets the library version (V1 or V2).', default='V1') optional_args.add_argument('-h', '--help', action='help', help='Prints the help message.') optional_args.add_argument('-w', '--overwrite', required=False, action='store_true', help='If this is used, the program will overwrite a file at output if it exists.') args = parser.parse_args() id_map_file = args.id_file dist_map_file = args.dist_file out_file = args.output lib_version = args.lib_version auto_overwrite = args.overwrite check_args(id_map_file, dist_map_file, out_file, lib_version, auto_overwrite) main(id_map_file, dist_map_file, out_file, lib_version)