ScoreWalker/scorewalker-utils/AccuracyCounter/Accuracy.py

"""
Accuracy.py
=============

This is a tool for producing the number of false positives, false negatives, correct, and incorrect classification
results from a TreeWalker output file. It is created to allow AccuracyImprovement to be able to produce a measure of a

 .. moduleauthor:: Chris Diesch <cdiesch@sequencelogic.net>
"""
import argparse
import csv
import os
import sys

import AccuracyGraph
import ConsoleUtils

# Give the program a name.
program_name = 'Accuracy'
# Describe what the program does beiefly.
program_description = 'Gets the counts of False Positive, False Negative, Correct, and Incorrect classification ' \
                      'results from data from TreeWalker.'
author = 'Chris Diesch'
version = '1.0.0'
build_date = '2017.08.14'
# The argument parser for the program.
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)
# Error and Warning console values:

printer = ConsoleUtils.SLPrinter(program_name)
sys.stdout = printer


def load_csv(file_path):
    """
    Computes the false positive, false negative, correct, and incorrect classification results from the given TreeWalker
    file.

    :param file_path: The path to a TreeWalker file to load.
    :type file_path: str

    .. raw:: html <br>

    :return: Correct count, Incorrect count, False Positive count, False Negative count
    :rtype: tuple(int, int, int, int)
    """
    fp_count = 0
    fn_count = 0
    correct_count = 0
    incorrect_count = 0
    paginated_good = 0
    total = 0
    with open(file_path) as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            total += 1
            # Get the status.
            status = row['Status']
            if status == 'False Positive':
                fp_count += 1
            elif status == 'False Negative':
                fn_count += 1
            elif status == 'Correct':
                correct_count += 1
            elif status == 'Incorrect':
                incorrect_count += 1
            # How'd pagination go?
            if pagination_was_correct(row):
                paginated_good += 1

    classify_score = (paginated_good + correct_count + (incorrect_count/3)) - (fp_count + fn_count)
    classify_score = classify_score/2

    print('Found %d pages' % total)
    return {'Incorrect & High Confidence': fp_count, 'Correct & Low Confidence': fn_count,
            'Correct & High Confidence': correct_count, 'Incorrect & Low Confidence': incorrect_count,
            'Correct Pagination': paginated_good, 'total': total, 'Classification Score': classify_score}


def pagination_was_correct(row):
    if row['Status'] == 'Correct' or row['Status'] == 'False Negative':
        return row['Walker Break Point'] == row['CLUX Break Point']
    return True


def process_data(data_to_process):
    total = data_to_process['total']
    del data_to_process['total']

    for key, value in data_to_process.items():
        data_to_process[key] = (float(value) * 100)/float(total)

    return data_to_process


def write_file(accuracy_percents, file_path):
    """
    Writes a file with the necessary metadata to the given file path.

    :param file_path: The path to the output file.
    :type file_path: str
    :param accuracy_percents: The percentages to write out to the file, keyed by their tags for the graph.
    :return: none
    """
    with open(file_path, 'w+', newline='') as writer:
        for key, value in accuracy_percents.items():
            writer.write('%s=%s:%s\n' % (key, value, AccuracyGraph.NUM_VAL))


# This is the main function of the program.
def main(in_file, out_file):
    accuracy_counts = load_csv(in_file)
    accuracy_counts = process_data(accuracy_counts)
    write_file(accuracy_counts, out_file)


def check_args(in_file, out_file):
    fatal_error = False

    if os.path.exists(out_file):
        print('Warning file will be overwritten %s' % out_file)

    if not os.path.exists(in_file):
        print('Error: Input file does not exist: %s' % in_file)
        fatal_error = True

    if fatal_error:
        parser.print_help()
        print('Encountered fatal error, exiting...')
        exit(-1)


# This is where we call the main method from.
if __name__ == '__main__':
    printer.write_no_prefix(ConsoleUtils.get_header(program_name, version, build_date, author))
    # Set up arguments here.
    required_args = parser.add_argument_group('Required')
    optional_args = parser.add_argument_group('Optional')
    required_args.add_argument('-i', '--in_file', required=True, help='The file from TreeWalker to load.')
    required_args.add_argument('-o', '--out_file', required=True, help='The path to the output file.')
    optional_args.add_argument('-h', '--help', action="help", help='Prints the help message.')

    # Get the arguments
    args = parser.parse_args()
    input_file = args.in_file
    output_file = args.out_file
    # Check the args
    check_args(input_file, output_file)
    # Run main
    main(input_file, output_file)