151 lines
4.9 KiB
Python
151 lines
4.9 KiB
Python
"""
|
|
Accuracy.py
|
|
=============
|
|
|
|
This is a tool for producing the number of false positives, false negatives, correct, and incorrect classification
|
|
results from a TreeWalker output file. It is created to allow AccuracyImprovement to be able to produce a measure of a
|
|
|
|
.. moduleauthor:: Chris Diesch <cdiesch@sequencelogic.net>
|
|
"""
|
|
import argparse
|
|
import csv
|
|
import os
|
|
import sys
|
|
|
|
import AccuracyGraph
|
|
import ConsoleUtils
|
|
|
|
# Give the program a name.
|
|
program_name = 'Accuracy'
|
|
# Describe what the program does beiefly.
|
|
program_description = 'Gets the counts of False Positive, False Negative, Correct, and Incorrect classification ' \
|
|
'results from data from TreeWalker.'
|
|
author = 'Chris Diesch'
|
|
version = '1.0.0'
|
|
build_date = '2017.08.14'
|
|
# The argument parser for the program.
|
|
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)
|
|
# Error and Warning console values:
|
|
|
|
printer = ConsoleUtils.SLPrinter(program_name)
|
|
sys.stdout = printer
|
|
|
|
|
|
def load_csv(file_path):
|
|
"""
|
|
Computes the false positive, false negative, correct, and incorrect classification results from the given TreeWalker
|
|
file.
|
|
|
|
:param file_path: The path to a TreeWalker file to load.
|
|
:type file_path: str
|
|
|
|
.. raw:: html <br>
|
|
|
|
:return: Correct count, Incorrect count, False Positive count, False Negative count
|
|
:rtype: tuple(int, int, int, int)
|
|
"""
|
|
fp_count = 0
|
|
fn_count = 0
|
|
correct_count = 0
|
|
incorrect_count = 0
|
|
paginated_good = 0
|
|
total = 0
|
|
with open(file_path) as csv_file:
|
|
reader = csv.DictReader(csv_file)
|
|
for row in reader:
|
|
total += 1
|
|
# Get the status.
|
|
status = row['Status']
|
|
if status == 'False Positive':
|
|
fp_count += 1
|
|
elif status == 'False Negative':
|
|
fn_count += 1
|
|
elif status == 'Correct':
|
|
correct_count += 1
|
|
elif status == 'Incorrect':
|
|
incorrect_count += 1
|
|
# How'd pagination go?
|
|
if pagination_was_correct(row):
|
|
paginated_good += 1
|
|
|
|
classify_score = (paginated_good + correct_count + (incorrect_count/3)) - (fp_count + fn_count)
|
|
classify_score = classify_score/2
|
|
|
|
print('Found %d pages' % total)
|
|
return {'Incorrect & High Confidence': fp_count, 'Correct & Low Confidence': fn_count,
|
|
'Correct & High Confidence': correct_count, 'Incorrect & Low Confidence': incorrect_count,
|
|
'Correct Pagination': paginated_good, 'total': total, 'Classification Score': classify_score}
|
|
|
|
|
|
def pagination_was_correct(row):
|
|
if row['Status'] == 'Correct' or row['Status'] == 'False Negative':
|
|
return row['Walker Break Point'] == row['CLUX Break Point']
|
|
return True
|
|
|
|
|
|
def process_data(data_to_process):
|
|
total = data_to_process['total']
|
|
del data_to_process['total']
|
|
|
|
for key, value in data_to_process.items():
|
|
data_to_process[key] = (float(value) * 100)/float(total)
|
|
|
|
return data_to_process
|
|
|
|
|
|
def write_file(accuracy_percents, file_path):
|
|
"""
|
|
Writes a file with the necessary metadata to the given file path.
|
|
|
|
:param file_path: The path to the output file.
|
|
:type file_path: str
|
|
:param accuracy_percents: The percentages to write out to the file, keyed by their tags for the graph.
|
|
:return: none
|
|
"""
|
|
with open(file_path, 'w+', newline='') as writer:
|
|
for key, value in accuracy_percents.items():
|
|
writer.write('%s=%s:%s\n' % (key, value, AccuracyGraph.NUM_VAL))
|
|
|
|
|
|
# This is the main function of the program.
|
|
def main(in_file, out_file):
|
|
accuracy_counts = load_csv(in_file)
|
|
accuracy_counts = process_data(accuracy_counts)
|
|
write_file(accuracy_counts, out_file)
|
|
|
|
|
|
def check_args(in_file, out_file):
|
|
fatal_error = False
|
|
|
|
if os.path.exists(out_file):
|
|
print('Warning file will be overwritten %s' % out_file)
|
|
|
|
if not os.path.exists(in_file):
|
|
print('Error: Input file does not exist: %s' % in_file)
|
|
fatal_error = True
|
|
|
|
if fatal_error:
|
|
parser.print_help()
|
|
print('Encountered fatal error, exiting...')
|
|
exit(-1)
|
|
|
|
|
|
# This is where we call the main method from.
|
|
if __name__ == '__main__':
|
|
printer.write_no_prefix(ConsoleUtils.get_header(program_name, version, build_date, author))
|
|
# Set up arguments here.
|
|
required_args = parser.add_argument_group('Required')
|
|
optional_args = parser.add_argument_group('Optional')
|
|
required_args.add_argument('-i', '--in_file', required=True, help='The file from TreeWalker to load.')
|
|
required_args.add_argument('-o', '--out_file', required=True, help='The path to the output file.')
|
|
optional_args.add_argument('-h', '--help', action="help", help='Prints the help message.')
|
|
|
|
# Get the arguments
|
|
args = parser.parse_args()
|
|
input_file = args.in_file
|
|
output_file = args.out_file
|
|
# Check the args
|
|
check_args(input_file, output_file)
|
|
# Run main
|
|
main(input_file, output_file)
|