# We do all our imports at the top of our program. import argparse import json import operator # Give the program a name. program_name = 'Classification Output Reader' # Describe what the program does beiefly. program_description = 'Loads The classification console output to make the data easier to view when testing.' # The argument parser for the program. parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False) # Error and Warning console values: red_error = '\033[91mError:\033[0m' yellow_warning = '\033[93mWARNING:\033[0m' blue_okay = '\033[94mOK\033[0m' header = format('\033[95m%s\033[0m\n' '-----------------------' % program_name) phrase_count_tag = 'Phrase Count' analyzer_tag = 'Analyzer' paginator_tag = 'Paginator' confidence_tag = 'Confidence' ignored_chars_tag = 'Ignored characters' found_phrase_tag = 'Found important phrase' def map_walker_classifier_data(file_path, out_file): with open(file_path) as reader: lines = reader.readlines() num_phrases = 0 paginator_class = '' conf_class = '' analyzer_class = '' for line in lines: line = line.replace('\n', '') if found_phrase_tag in line: num_phrases += 1 if paginator_tag in line: paginator_class = parse_class_line(line) if analyzer_tag in line: analyzer_class = parse_class_line(line) if confidence_tag in line: analyzer_class = parse_class_line(line) if ignored_chars_tag in line: chars = parse_class_line(line).split(' ') ignored_chars = ('[%s]' % ', '.join('"%s"' % c for c in chars)) cls_result = {analyzer_tag: analyzer_class, paginator_tag: paginator_class, confidence_tag: conf_class, phrase_count_tag: num_phrases, ignored_chars_tag: ignored_chars} save_file(cls_result, out_file) def map_walker_indexer_data(file_path, out_file): with open(file_path) as reader: lines = reader.readlines() num_phrases = 0 analyzer_class = '' ignored_chars = '' for line in lines: if found_phrase_tag in line: num_phrases += 1 if analyzer_tag in line: analyzer_class = parse_class_line(line) if ignored_chars_tag in line: chars = parse_class_line().split(' ') ignored_chars = ('%s%s%s' % ('[', (', '.join('\'%s\'' % c for c in chars), ']'))) idx_result = {analyzer_tag: analyzer_class, phrase_count_tag: num_phrases, ignored_chars: ignored_chars} save_file(idx_result, out_file) def save_file(data, out_path): with open(out_path, 'w+', newline='') as writer: for key, value in data.items(): writer.write('%s = %s\n' % (key, value)) def parse_class_line(line): split_idx = line.find(':') result = line[split_idx + 2:] return result def load_phrase_line(line): line = line[33:] split_idx = line.find('.') line = line[:split_idx] return line