ScoreWalker/scorewalker-utils/AccuracyCounter/Accuracy.py
2025-03-13 00:13:53 -06:00

151 lines
4.9 KiB
Python

"""
Accuracy.py
=============
This is a tool for producing the number of false positives, false negatives, correct, and incorrect classification
results from a TreeWalker output file. It is created to allow AccuracyImprovement to be able to produce a measure of a
.. moduleauthor:: Chris Diesch <cdiesch@sequencelogic.net>
"""
import argparse
import csv
import os
import sys
import AccuracyGraph
import ConsoleUtils
# Give the program a name.
program_name = 'Accuracy'
# Describe what the program does beiefly.
program_description = 'Gets the counts of False Positive, False Negative, Correct, and Incorrect classification ' \
'results from data from TreeWalker.'
author = 'Chris Diesch'
version = '1.0.0'
build_date = '2017.08.14'
# The argument parser for the program.
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)
# Error and Warning console values:
printer = ConsoleUtils.SLPrinter(program_name)
sys.stdout = printer
def load_csv(file_path):
"""
Computes the false positive, false negative, correct, and incorrect classification results from the given TreeWalker
file.
:param file_path: The path to a TreeWalker file to load.
:type file_path: str
.. raw:: html <br>
:return: Correct count, Incorrect count, False Positive count, False Negative count
:rtype: tuple(int, int, int, int)
"""
fp_count = 0
fn_count = 0
correct_count = 0
incorrect_count = 0
paginated_good = 0
total = 0
with open(file_path) as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
total += 1
# Get the status.
status = row['Status']
if status == 'False Positive':
fp_count += 1
elif status == 'False Negative':
fn_count += 1
elif status == 'Correct':
correct_count += 1
elif status == 'Incorrect':
incorrect_count += 1
# How'd pagination go?
if pagination_was_correct(row):
paginated_good += 1
classify_score = (paginated_good + correct_count + (incorrect_count/3)) - (fp_count + fn_count)
classify_score = classify_score/2
print('Found %d pages' % total)
return {'Incorrect & High Confidence': fp_count, 'Correct & Low Confidence': fn_count,
'Correct & High Confidence': correct_count, 'Incorrect & Low Confidence': incorrect_count,
'Correct Pagination': paginated_good, 'total': total, 'Classification Score': classify_score}
def pagination_was_correct(row):
if row['Status'] == 'Correct' or row['Status'] == 'False Negative':
return row['Walker Break Point'] == row['CLUX Break Point']
return True
def process_data(data_to_process):
total = data_to_process['total']
del data_to_process['total']
for key, value in data_to_process.items():
data_to_process[key] = (float(value) * 100)/float(total)
return data_to_process
def write_file(accuracy_percents, file_path):
"""
Writes a file with the necessary metadata to the given file path.
:param file_path: The path to the output file.
:type file_path: str
:param accuracy_percents: The percentages to write out to the file, keyed by their tags for the graph.
:return: none
"""
with open(file_path, 'w+', newline='') as writer:
for key, value in accuracy_percents.items():
writer.write('%s=%s:%s\n' % (key, value, AccuracyGraph.NUM_VAL))
# This is the main function of the program.
def main(in_file, out_file):
accuracy_counts = load_csv(in_file)
accuracy_counts = process_data(accuracy_counts)
write_file(accuracy_counts, out_file)
def check_args(in_file, out_file):
fatal_error = False
if os.path.exists(out_file):
print('Warning file will be overwritten %s' % out_file)
if not os.path.exists(in_file):
print('Error: Input file does not exist: %s' % in_file)
fatal_error = True
if fatal_error:
parser.print_help()
print('Encountered fatal error, exiting...')
exit(-1)
# This is where we call the main method from.
if __name__ == '__main__':
printer.write_no_prefix(ConsoleUtils.get_header(program_name, version, build_date, author))
# Set up arguments here.
required_args = parser.add_argument_group('Required')
optional_args = parser.add_argument_group('Optional')
required_args.add_argument('-i', '--in_file', required=True, help='The file from TreeWalker to load.')
required_args.add_argument('-o', '--out_file', required=True, help='The path to the output file.')
optional_args.add_argument('-h', '--help', action="help", help='Prints the help message.')
# Get the arguments
args = parser.parse_args()
input_file = args.in_file
output_file = args.out_file
# Check the args
check_args(input_file, output_file)
# Run main
main(input_file, output_file)