""" Accuracy.py ============= This is a tool for producing the number of false positives, false negatives, correct, and incorrect classification results from a TreeWalker output file. It is created to allow AccuracyImprovement to be able to produce a measure of a .. moduleauthor:: Chris Diesch """ import argparse import csv import os import sys import AccuracyGraph import ConsoleUtils # Give the program a name. program_name = 'Accuracy' # Describe what the program does beiefly. program_description = 'Gets the counts of False Positive, False Negative, Correct, and Incorrect classification ' \ 'results from data from TreeWalker.' author = 'Chris Diesch' version = '1.0.0' build_date = '2017.08.14' # The argument parser for the program. parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False) # Error and Warning console values: printer = ConsoleUtils.SLPrinter(program_name) sys.stdout = printer def load_csv(file_path): """ Computes the false positive, false negative, correct, and incorrect classification results from the given TreeWalker file. :param file_path: The path to a TreeWalker file to load. :type file_path: str .. raw:: html
:return: Correct count, Incorrect count, False Positive count, False Negative count :rtype: tuple(int, int, int, int) """ fp_count = 0 fn_count = 0 correct_count = 0 incorrect_count = 0 paginated_good = 0 total = 0 with open(file_path) as csv_file: reader = csv.DictReader(csv_file) for row in reader: total += 1 # Get the status. status = row['Status'] if status == 'False Positive': fp_count += 1 elif status == 'False Negative': fn_count += 1 elif status == 'Correct': correct_count += 1 elif status == 'Incorrect': incorrect_count += 1 # How'd pagination go? if pagination_was_correct(row): paginated_good += 1 classify_score = (paginated_good + correct_count + (incorrect_count/3)) - (fp_count + fn_count) classify_score = classify_score/2 print('Found %d pages' % total) return {'Incorrect & High Confidence': fp_count, 'Correct & Low Confidence': fn_count, 'Correct & High Confidence': correct_count, 'Incorrect & Low Confidence': incorrect_count, 'Correct Pagination': paginated_good, 'total': total, 'Classification Score': classify_score} def pagination_was_correct(row): if row['Status'] == 'Correct' or row['Status'] == 'False Negative': return row['Walker Break Point'] == row['CLUX Break Point'] return True def process_data(data_to_process): total = data_to_process['total'] del data_to_process['total'] for key, value in data_to_process.items(): data_to_process[key] = (float(value) * 100)/float(total) return data_to_process def write_file(accuracy_percents, file_path): """ Writes a file with the necessary metadata to the given file path. :param file_path: The path to the output file. :type file_path: str :param accuracy_percents: The percentages to write out to the file, keyed by their tags for the graph. :return: none """ with open(file_path, 'w+', newline='') as writer: for key, value in accuracy_percents.items(): writer.write('%s=%s:%s\n' % (key, value, AccuracyGraph.NUM_VAL)) # This is the main function of the program. def main(in_file, out_file): accuracy_counts = load_csv(in_file) accuracy_counts = process_data(accuracy_counts) write_file(accuracy_counts, out_file) def check_args(in_file, out_file): fatal_error = False if os.path.exists(out_file): print('Warning file will be overwritten %s' % out_file) if not os.path.exists(in_file): print('Error: Input file does not exist: %s' % in_file) fatal_error = True if fatal_error: parser.print_help() print('Encountered fatal error, exiting...') exit(-1) # This is where we call the main method from. if __name__ == '__main__': printer.write_no_prefix(ConsoleUtils.get_header(program_name, version, build_date, author)) # Set up arguments here. required_args = parser.add_argument_group('Required') optional_args = parser.add_argument_group('Optional') required_args.add_argument('-i', '--in_file', required=True, help='The file from TreeWalker to load.') required_args.add_argument('-o', '--out_file', required=True, help='The path to the output file.') optional_args.add_argument('-h', '--help', action="help", help='Prints the help message.') # Get the arguments args = parser.parse_args() input_file = args.in_file output_file = args.out_file # Check the args check_args(input_file, output_file) # Run main main(input_file, output_file)