ScoreWalker/scorewalker-utils/WalkerComparer/WalkerComparer.py

126 lines
4.2 KiB
Python
Raw Normal View History

2025-03-13 06:13:53 +00:00
import csv
import argparse
import os
program_name = 'Walker Compare'
program_description = 'Compares the false positives between runs.'
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)
red_error = '\033[91mError:\033[0m'
yellow_warning = '\033[93mWARNING:\033[0m'
blue_okay = '\033[94mOK\033[0m'
program_header = format('\033[95m%s\033[0m\n'
'-----------------------' % program_name)
decision_message = ' Is this okay? (Y/N): '
false_pos_tag = 'FalsePos'
status_tag = 'Status'
walker_field_name_tag = 'WalkerType'
clux_field_name_tag = 'Clux DocType'
field_difference_tag = 'Corrected False Positives'
field_count_tag = 'Number of fields corrected'
def read_false_positives(in_file):
# The fields we want to keep track of
false_pos_count = 0
false_pos_list = {}
# Now we read the file
false_pos_file = csv.DictReader(open(in_file))
# Read the file row by row
for row in false_pos_file:
# We only care about false positives...
status = row[status_tag]
if status == false_pos_tag:
# Increment the cound and save the important field data...
false_pos_list[false_pos_count] = row[walker_field_name_tag]
false_pos_count += 1
return false_pos_list
def compare_false_positives(original_false_pos, new_false_pos):
# List through the original false positives and see if we still have them
difference_count = 0
difference_fields = {}
for i in range(len(original_false_pos)):
false_val = original_false_pos[i]
# If the field is not in the new list....
if not false_val in new_false_pos.values():
difference_fields[difference_count] = false_val
difference_count += 1
return difference_count, difference_fields
def write_output(difference_fields, file_name):
writer = csv.writer(open(file_name, 'w+', newline=''))
# Write the first row with the difference count...
writer.writerow({field_difference_tag})
# Then write the rest of the rows.
for i in range(len(difference_fields)):
writer.writerow({difference_fields[i]})
def check_args(old_csv, new_csv, output_file):
fatal_errors = False
if os.path.exists(output_file):
print('%s File exists: %s' % (yellow_warning, output_file))
print('It will be overwritten.')
yes_or_no(decision_message)
if not os.path.exists(old_csv):
print('%s No file at %s' % (red_error, old_csv))
fatal_errors = True
if not os.path.exists(new_csv):
print('%s No file at %s' % (red_error, new_csv))
fatal_errors = True
if fatal_errors:
parser.print_help()
print('Exiting...')
exit(0)
def yes_or_no(message):
decision = input(message)
if decision.lower() == 'y' or decision.lower() == 'yes':
return
elif decision.lower() == 'n' or decision.lower() == 'no':
exit(0)
else:
yes_or_no(' Invalid input, enter Y(es) or N(o): ')
def main(original_file, new_file, output):
# Now do some stuff...
original_false_pos_list = read_false_positives(original_file)
new_false_pos_list = read_false_positives(new_file)
difference_count, difference_list = compare_false_positives(original_false_pos_list, new_false_pos_list)
write_output(difference_list, output)
if __name__ == '__main__':
# This is the main call.
required_args = parser.add_argument_group('Required')
optional_args = parser.add_argument_group('Optional')
required_args.add_argument('-s', '--source', required=True, help='The original false positive CSV file.')
required_args.add_argument('-n', '--new', required=True, help='The new false positive CSV file.')
required_args.add_argument('-o', '--output', required=True, help='The output CSV file.')
optional_args.add_argument('-h', '--help', action="help", help='Prints the help message.')
# Get the args.
args = parser.parse_args()
# Save the args.
original_csv = args.source
new_csv = args.new
output_csv = args.output
check_args(original_csv, new_csv, output_csv)
# Now we can run...
main(original_csv, new_csv, output_csv)