142 lines
3.8 KiB
Python
142 lines
3.8 KiB
Python
|
|
import json
|
||
|
|
|
||
|
|
FILES = 'files'
|
||
|
|
COLORS = 'colors'
|
||
|
|
LABELS = 'labels'
|
||
|
|
|
||
|
|
TITLE = 'Title'
|
||
|
|
X_LBL = 'xLabel'
|
||
|
|
Y_LBL = 'yLabel'
|
||
|
|
|
||
|
|
NUM_VAL = 'number'
|
||
|
|
STR_VAL = 'string'
|
||
|
|
|
||
|
|
package_compare_colors = {'Correct & High Confidence': 'g', 'Incorrect & Low Confidence': 'y',
|
||
|
|
'Incorrect & High Confidence': 'r', 'Correct & Low Confidence': 'b',
|
||
|
|
'Correct Pagination': 'm', 'Classification Score': 'teal'}
|
||
|
|
|
||
|
|
doctype_compare_colors = {'Correct & High Confidence': 'g', 'Incorrect & Low Confidence': 'y',
|
||
|
|
'Incorrect & High Confidence': 'r', 'Correct & Low Confidence': 'b'}
|
||
|
|
|
||
|
|
|
||
|
|
def load_files(files):
|
||
|
|
result = {'Incorrect & High Confidence': 0.0,
|
||
|
|
'Correct & Low Confidence': 0.0,
|
||
|
|
'Correct & High Confidence': 0.0,
|
||
|
|
'Incorrect & Low Confidence': 0.0,
|
||
|
|
'Correct Pagination': 0.0}
|
||
|
|
|
||
|
|
for f in files:
|
||
|
|
cfg_data = load_data_from_file(f)
|
||
|
|
for key, _ in result.items():
|
||
|
|
result[key] += cfg_data[key]
|
||
|
|
|
||
|
|
for key, _ in result.items():
|
||
|
|
result[key] /= len(files)
|
||
|
|
|
||
|
|
return result
|
||
|
|
|
||
|
|
|
||
|
|
def load_data_from_file(file_path):
|
||
|
|
result = {}
|
||
|
|
with open(file_path) as reader:
|
||
|
|
lines = reader.readlines()
|
||
|
|
# Get the data from the lines.
|
||
|
|
for line in lines:
|
||
|
|
key, val = parse_line(line)
|
||
|
|
result[key] = val
|
||
|
|
|
||
|
|
return result
|
||
|
|
|
||
|
|
|
||
|
|
def parse_line(line_txt):
|
||
|
|
if line_txt.endswith('\n'):
|
||
|
|
line_txt = line_txt[:-1]
|
||
|
|
|
||
|
|
tag, value = line_txt.split('=', 1)
|
||
|
|
value, val_type = value.split(':', 1)
|
||
|
|
return tag, cast_to_type(val_type, value)
|
||
|
|
|
||
|
|
|
||
|
|
def cast_to_type(t_val, val):
|
||
|
|
if t_val == NUM_VAL:
|
||
|
|
return float(val)
|
||
|
|
|
||
|
|
elif t_val == STR_VAL:
|
||
|
|
return str(val)
|
||
|
|
|
||
|
|
else:
|
||
|
|
return val
|
||
|
|
|
||
|
|
|
||
|
|
def make_avg_cfg(files, title, config_file_loc):
|
||
|
|
data_file = config_file_loc.replace('graph-config.json', 'graph-data.txt')
|
||
|
|
make_global_graph_data_file(files, data_file)
|
||
|
|
# to_json = {FILES: make_config(files, tags), COLORS: package_compare_colors, LABELS: get_package_labels(title)}
|
||
|
|
to_json = {FILES: {'Averages': data_file}, COLORS: package_compare_colors, LABELS: get_package_labels(title)}
|
||
|
|
|
||
|
|
with open(config_file_loc, 'w+') as writer:
|
||
|
|
json.dump(to_json, writer, indent=3)
|
||
|
|
|
||
|
|
|
||
|
|
def make_package_graph_config(files, tags, title, config_file_loc):
|
||
|
|
|
||
|
|
to_json = {FILES: make_config(files, tags), COLORS: package_compare_colors, LABELS: get_package_labels(title)}
|
||
|
|
|
||
|
|
with open(config_file_loc, 'w+') as writer:
|
||
|
|
json.dump(to_json, writer, indent=3)
|
||
|
|
|
||
|
|
|
||
|
|
def make_global_graph_data_file(files, out_path):
|
||
|
|
data = load_files(files)
|
||
|
|
|
||
|
|
with open(out_path, 'w+', newline='') as writer:
|
||
|
|
for key, value in data.items():
|
||
|
|
writer.write('%s=%s:%s\n' % (key, value, NUM_VAL))
|
||
|
|
|
||
|
|
|
||
|
|
def change_title(old_file, new_file, new_title):
|
||
|
|
# Load and change the title.
|
||
|
|
new_config = load_config(old_file)
|
||
|
|
new_config[LABELS][TITLE] = new_title
|
||
|
|
# Save the file.
|
||
|
|
with open(new_file, 'w+') as writer:
|
||
|
|
json.dump(new_config, writer)
|
||
|
|
|
||
|
|
|
||
|
|
def load_config(config_file):
|
||
|
|
with open(config_file) as reader:
|
||
|
|
result = json.load(reader)
|
||
|
|
return result
|
||
|
|
|
||
|
|
|
||
|
|
def load_files_from_config(config_file):
|
||
|
|
with open(config_file) as cfg_reader:
|
||
|
|
config = json.load(cfg_reader)
|
||
|
|
|
||
|
|
return get_files(config[FILES])
|
||
|
|
|
||
|
|
|
||
|
|
def get_files(config_files):
|
||
|
|
result = []
|
||
|
|
|
||
|
|
for tag, file in config_files.items():
|
||
|
|
result.append(file)
|
||
|
|
|
||
|
|
return result
|
||
|
|
|
||
|
|
|
||
|
|
def make_config(files, tags):
|
||
|
|
result = {}
|
||
|
|
for i in range(len(files)):
|
||
|
|
result[tags[i]] = files[i]
|
||
|
|
return result
|
||
|
|
|
||
|
|
|
||
|
|
def get_package_labels(title):
|
||
|
|
return {X_LBL: 'Document.', Y_LBL: 'Pages Classified (%).', TITLE: title}
|
||
|
|
|
||
|
|
|
||
|
|
def get_doctype_labels(title):
|
||
|
|
return {X_LBL: 'Doctype', Y_LBL: 'Pages Classified (%)', TITLE: title}
|