Sleds/scorewalker-utils/RunTest/ClassificaionOutputReader.py

# We do all our imports at the top of our program.
import argparse
import json
import operator

# Give the program a name.
program_name = 'Classification Output Reader'
# Describe what the program does beiefly.
program_description = 'Loads The classification console output to make the data easier to view when testing.'
# The argument parser for the program.
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)

# Error and Warning console values:
red_error = '\033[91mError:\033[0m'
yellow_warning = '\033[93mWARNING:\033[0m'
blue_okay = '\033[94mOK\033[0m'
header = format('\033[95m%s\033[0m\n'
                '-----------------------' % program_name)

phrase_count_tag = 'Phrase Count'
analyzer_tag = 'Analyzer'
paginator_tag = 'Paginator'
confidence_tag = 'Confidence'
ignored_chars_tag = 'Ignored characters'

found_phrase_tag = 'Found important phrase'


def map_walker_classifier_data(file_path, out_file):
    with open(file_path) as reader:
        lines = reader.readlines()

    num_phrases = 0
    paginator_class = ''
    conf_class = ''
    analyzer_class = ''

    for line in lines:
        line = line.replace('\n', '')

        if found_phrase_tag in line:
            num_phrases += 1

        if paginator_tag in line:
            paginator_class = parse_class_line(line)

        if analyzer_tag in line:
            analyzer_class = parse_class_line(line)

        if confidence_tag in line:
            analyzer_class = parse_class_line(line)

        if ignored_chars_tag in line:
            chars = parse_class_line(line).split(' ')
            ignored_chars = ('[%s]' % ', '.join('"%s"' % c for c in chars))

    cls_result = {analyzer_tag: analyzer_class, paginator_tag: paginator_class, confidence_tag: conf_class,
                  phrase_count_tag: num_phrases, ignored_chars_tag: ignored_chars}

    save_file(cls_result, out_file)


def map_walker_indexer_data(file_path, out_file):
    with open(file_path) as reader:
        lines = reader.readlines()

    num_phrases = 0
    analyzer_class = ''
    ignored_chars = ''

    for line in lines:
        if found_phrase_tag in line:
            num_phrases += 1

        if analyzer_tag in line:
            analyzer_class = parse_class_line(line)

        if ignored_chars_tag in line:
            chars = parse_class_line().split(' ')
            ignored_chars = ('%s%s%s' % ('[', (', '.join('\'%s\'' % c for c in chars), ']')))

    idx_result = {analyzer_tag: analyzer_class, phrase_count_tag: num_phrases, ignored_chars: ignored_chars}

    save_file(idx_result, out_file)


def save_file(data, out_path):
    with open(out_path, 'w+', newline='') as writer:
        for key, value in data.items():
            writer.write('%s = %s\n' % (key, value))


def parse_class_line(line):
    split_idx = line.find(':')
    result = line[split_idx + 2:]
    return result


def load_phrase_line(line):
    line = line[33:]
    split_idx = line.find('.')
    line = line[:split_idx]
    return line