105 lines
3.0 KiB
Python
105 lines
3.0 KiB
Python
# We do all our imports at the top of our program.
|
|
import argparse
|
|
import json
|
|
import operator
|
|
|
|
# Give the program a name.
|
|
program_name = 'Classification Output Reader'
|
|
# Describe what the program does beiefly.
|
|
program_description = 'Loads The classification console output to make the data easier to view when testing.'
|
|
# The argument parser for the program.
|
|
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)
|
|
|
|
# Error and Warning console values:
|
|
red_error = '\033[91mError:\033[0m'
|
|
yellow_warning = '\033[93mWARNING:\033[0m'
|
|
blue_okay = '\033[94mOK\033[0m'
|
|
header = format('\033[95m%s\033[0m\n'
|
|
'-----------------------' % program_name)
|
|
|
|
phrase_count_tag = 'Phrase Count'
|
|
analyzer_tag = 'Analyzer'
|
|
paginator_tag = 'Paginator'
|
|
confidence_tag = 'Confidence'
|
|
ignored_chars_tag = 'Ignored characters'
|
|
|
|
found_phrase_tag = 'Found important phrase'
|
|
|
|
|
|
def map_walker_classifier_data(file_path, out_file):
|
|
with open(file_path) as reader:
|
|
lines = reader.readlines()
|
|
|
|
num_phrases = 0
|
|
paginator_class = ''
|
|
conf_class = ''
|
|
analyzer_class = ''
|
|
|
|
for line in lines:
|
|
line = line.replace('\n', '')
|
|
|
|
if found_phrase_tag in line:
|
|
num_phrases += 1
|
|
|
|
if paginator_tag in line:
|
|
paginator_class = parse_class_line(line)
|
|
|
|
if analyzer_tag in line:
|
|
analyzer_class = parse_class_line(line)
|
|
|
|
if confidence_tag in line:
|
|
analyzer_class = parse_class_line(line)
|
|
|
|
if ignored_chars_tag in line:
|
|
chars = parse_class_line(line).split(' ')
|
|
ignored_chars = ('[%s]' % ', '.join('"%s"' % c for c in chars))
|
|
|
|
cls_result = {analyzer_tag: analyzer_class, paginator_tag: paginator_class, confidence_tag: conf_class,
|
|
phrase_count_tag: num_phrases, ignored_chars_tag: ignored_chars}
|
|
|
|
save_file(cls_result, out_file)
|
|
|
|
|
|
def map_walker_indexer_data(file_path, out_file):
|
|
with open(file_path) as reader:
|
|
lines = reader.readlines()
|
|
|
|
num_phrases = 0
|
|
analyzer_class = ''
|
|
ignored_chars = ''
|
|
|
|
for line in lines:
|
|
if found_phrase_tag in line:
|
|
num_phrases += 1
|
|
|
|
if analyzer_tag in line:
|
|
analyzer_class = parse_class_line(line)
|
|
|
|
if ignored_chars_tag in line:
|
|
chars = parse_class_line().split(' ')
|
|
ignored_chars = ('%s%s%s' % ('[', (', '.join('\'%s\'' % c for c in chars), ']')))
|
|
|
|
idx_result = {analyzer_tag: analyzer_class, phrase_count_tag: num_phrases, ignored_chars: ignored_chars}
|
|
|
|
save_file(idx_result, out_file)
|
|
|
|
|
|
def save_file(data, out_path):
|
|
with open(out_path, 'w+', newline='') as writer:
|
|
for key, value in data.items():
|
|
writer.write('%s = %s\n' % (key, value))
|
|
|
|
|
|
def parse_class_line(line):
|
|
split_idx = line.find(':')
|
|
result = line[split_idx + 2:]
|
|
return result
|
|
|
|
|
|
def load_phrase_line(line):
|
|
line = line[33:]
|
|
split_idx = line.find('.')
|
|
line = line[:split_idx]
|
|
return line
|
|
|