ScoreWalker/scorewalker-utils/WalkerComparer/WalkerComparer.py

import csv
import argparse
import os

program_name = 'Walker Compare'
program_description = 'Compares the false positives between runs.'
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)

red_error = '\033[91mError:\033[0m'
yellow_warning = '\033[93mWARNING:\033[0m'
blue_okay = '\033[94mOK\033[0m'
program_header = format('\033[95m%s\033[0m\n'
                        '-----------------------' % program_name)
decision_message = '   Is this okay? (Y/N): '

false_pos_tag = 'FalsePos'
status_tag = 'Status'
walker_field_name_tag = 'WalkerType'
clux_field_name_tag = 'Clux DocType'
field_difference_tag = 'Corrected False Positives'
field_count_tag = 'Number of fields corrected'


def read_false_positives(in_file):
    # The fields we want to keep track of
    false_pos_count = 0
    false_pos_list = {}
    # Now we read the file
    false_pos_file = csv.DictReader(open(in_file))

    # Read the file row by row
    for row in false_pos_file:
        # We only care about false positives...
        status = row[status_tag]
        if status == false_pos_tag:
            # Increment the cound and save the important field data...
            false_pos_list[false_pos_count] = row[walker_field_name_tag]
            false_pos_count += 1

    return false_pos_list


def compare_false_positives(original_false_pos, new_false_pos):
    # List through the original false positives and see if we still have them
    difference_count = 0
    difference_fields = {}

    for i in range(len(original_false_pos)):
        false_val = original_false_pos[i]
        # If the field is not in the new list....
        if not false_val in new_false_pos.values():
            difference_fields[difference_count] = false_val
            difference_count += 1

    return difference_count, difference_fields


def write_output(difference_fields, file_name):
    writer = csv.writer(open(file_name, 'w+', newline=''))

    # Write the first row with the difference count...
    writer.writerow({field_difference_tag})
    # Then write the rest of the rows.
    for i in range(len(difference_fields)):
        writer.writerow({difference_fields[i]})


def check_args(old_csv, new_csv, output_file):
    fatal_errors = False

    if os.path.exists(output_file):
        print('%s File exists: %s' % (yellow_warning, output_file))
        print('It will be overwritten.')
        yes_or_no(decision_message)

    if not os.path.exists(old_csv):
        print('%s No file at %s' % (red_error, old_csv))
        fatal_errors = True
    if not os.path.exists(new_csv):
        print('%s No file at %s' % (red_error, new_csv))
        fatal_errors = True

    if fatal_errors:
        parser.print_help()
        print('Exiting...')
        exit(0)


def yes_or_no(message):
    decision = input(message)
    if decision.lower() == 'y' or decision.lower() == 'yes':
        return
    elif decision.lower() == 'n' or decision.lower() == 'no':
        exit(0)
    else:
        yes_or_no('   Invalid input, enter Y(es) or N(o): ')


def main(original_file, new_file, output):
    # Now do some stuff...
    original_false_pos_list = read_false_positives(original_file)
    new_false_pos_list = read_false_positives(new_file)
    difference_count, difference_list = compare_false_positives(original_false_pos_list, new_false_pos_list)

    write_output(difference_list, output)


if __name__ == '__main__':
    # This is the main call.
    required_args = parser.add_argument_group('Required')
    optional_args = parser.add_argument_group('Optional')
    required_args.add_argument('-s', '--source', required=True, help='The original false positive CSV file.')
    required_args.add_argument('-n', '--new', required=True, help='The new false positive CSV file.')
    required_args.add_argument('-o', '--output', required=True, help='The output CSV file.')
    optional_args.add_argument('-h', '--help', action="help", help='Prints the help message.')

    # Get the args.
    args = parser.parse_args()
    # Save the args.
    original_csv = args.source
    new_csv = args.new
    output_csv = args.output
    check_args(original_csv, new_csv, output_csv)
    # Now we can run...
    main(original_csv, new_csv, output_csv)