Sleds/scorewalker-utils/RunTest/Tester.py

"""
Information
-----------

This script is a tool for running a full test over the ScoreWalker Classification engine. The following tools are called
 in order to achieve this goal:

    #) Builds WalkerIndexer and WalkerClassifier.
    #) The necessary steps for indexing.
    #) WalkerClassifier to preform classification on the test document.
    #) Several other Python tools to perform analysis on the results.
    #) ValidationWalker to validate the output from the classification engine against the JSON schemas used in
    production.

.. moduleauthor:: Chris Diesch <cdiesch@sequencelogic.net>


Commandline Usage
------------------
Usage: ``Tester.py [-h, --help] [-b, --build] [-t, --test_dir] {TEST_DIR} [-l, --library_dir] {LIB_DIR}
[-c, --config] {CFG_FILE} [-o, --out_dir] {OUT_DIR}

Required Arguments:

    ``-t TEST_DIR, --test_dir TEST_DIR``
        Where ``TEST_DIR`` is the path to the root folder to load the test data from.

    ``-l LIB_DIR, --library_dir LIB_DIR``
        Where ``LIB_DIR`` is the path to the library to run the test with.

    ``-c CFG_FILE, --config CFG_FILE``
        Where ``CFG_FILE`` is the path to the config file to use for testing.

    ``-o OUT_DIR, --out_dir OUT_DIR``
        Where ``OUT_DIR`` is the path to save the output to.

Optional Arguments:

    ``-h, --help``
        Prints the help message.

    ``-b, --build``
        Runs the maven build for the classification tools.


Python Module Usage
--------------------
"""
import argparse
import os
import subprocess
import datetime
import time
import shutil
import sys
import csv
import json
import operator
import ConsoleUtils

import AccuracyGraphSetup

program_name = 'TestWalker'
program_description = 'This tool handles running the classification engine and several tools to perform analysis on ' \
                      'the results.'
author = 'Chris Diesch'

# The argument parser for the program.
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)

# Error and Warning console values:
red_error = '\033[91mError:\033[0m'
yellow_warning = '\033[93mWARNING:\033[0m'
blue_okay = '\033[94mOK\033[0m'

build_date = '2017.07.24'  # datetime.datetime.now().strftime('%Y.%m.%d-%H.%M.%S')
program_version = '1.7.2'
side_bound_char = '|'
line_break_char = '-'
corner_char = '+'
line_break_size = 150
line_break = line_break_char * line_break_size

console_line_break = '-' * line_break_size


DEF_NUM_TOP_DOCS = 20
DEF_TERM_LENGTH = 1
DEF_CONF_THRESHOLD = 60
DEF_MIN_PHRASE_SIZE = 5
DEF_MAX_PHRASE_SIZE = 30

paginate = True

DEF_GRAPH_TITLE = 'Classification Accuracy'

graph_config_file_name = 'graph-config.json'
test_config_file_name = 'test-config.txt'
graph_name = 'Classification Accuracy.pdf'

classification_time_tag = 'Classification Run Time'

clux_file_tag = 'CLUX File'
package_file_tag = 'Package File'

package_name_tag = 'Package Name'
classification_file_tag = 'Classification'
tree_walker_file_tag = 'Tree Walker'
term_walker_file_tag = 'Term Walker'
fp_counter_file_tag = 'False Positive Counts'
accuracy_file_tag = 'Graph Data'
graph_config_file_tag = 'Graph Config'
graph_file_tag = 'Graph'
doctype_graph_tag = 'Doctype Graph'

idx_root_tag = 'Index Root'
test_root_tag = 'Test Root'
logs_root_tag = 'Logs Root'

tokenizer = 'tokenizer'
phrase_maker = 'phrase maker'
phrase_loader = 'phrase loader'
indexer = 'indexer'
classifier = 'classifier'
diff_utils = 'diff-utils'
walker_validator = 'walker validator'
tree_walker = 'tree walker'
status_counter = 'false positive counter'
doctype_graph = 'doctype graph'
term_walker = 'term walker'
accuracy = 'accuracy'
grapher = 'package graph maker'
term_diff = 'walker term diff'

current_dir = ''
phrase_exe_name = ''
build_roots = {}
executables = {}

printer = None


def write_line_break():
    printer.write_no_prefix(console_line_break)


def load_executables():
    """
    Loads the executables needed to perform a test run and analysis.

    Returns:
        ``None``

    """
    global executables, phrase_exe_name, current_dir, build_roots

    current_dir = os.getcwd()

    phrase_exe_root = r'C:\Users\chris\Documents\Code\Tests\KMeans\L3Input\Programs'

    walker_validate_root = os.path.abspath(os.path.join(current_dir, os.pardir))
    walker_validate_root = os.path.join(walker_validate_root, 'ValidationWalker')

    tree_walker_root = os.path.abspath(os.path.join(current_dir, os.pardir))
    tree_walker_root = os.path.join(tree_walker_root, 'TreeWalker')

    accuracy_root = os.path.abspath(os.path.join(current_dir, os.pardir))
    accuracy_root = os.path.join(accuracy_root, 'AccuracyCounter')

    diff_utils_root = os.path.abspath(os.path.join(current_dir, os.pardir, os.pardir, 'diff-utils'))

    classify_root = os.path.abspath(os.path.join(current_dir, os.pardir))
    classify_root = os.path.abspath(os.path.join(classify_root, os.pardir))
    classify_root = os.path.abspath(os.path.join(classify_root, 'scorewalker'))

    tokenizer_jar_root = os.path.join(classify_root, 'walker-analysis', 'target')
    tokenizer_jar_name = os.path.join(tokenizer_jar_root, 'tokenizer-one-jar.jar')

    phrase_exe_name = os.path.join(phrase_exe_root, 'phrases.exe')

    phrase_loader_name = os.path.abspath(os.path.join(current_dir, os.pardir))
    phrase_loader_name = os.path.join(phrase_loader_name, 'PhraseCountLoader')
    phrase_loader_name = os.path.join(phrase_loader_name, 'PhraseCountLoader.py')

    classify_jar_root = os.path.abspath(os.path.join(classify_root, 'walker-classifier', 'target'))
    classify_jar_name = os.path.join(classify_jar_root, 'walker-classifier-one-jar.jar')

    walker_validator_name = os.path.join(walker_validate_root, 'ValidationWalker.py')

    tree_walker_name = os.path.join(tree_walker_root, 'TreeWalker.py')
    doctype_graph_name = os.path.join(tree_walker_root, 'DoctypeGraph.py')
    fp_count_name = os.path.join(tree_walker_root, 'StatusCounter.py')
    term_walker_name = os.path.join(tree_walker_root, 'TermWalker.py')

    accuracy_name = os.path.join(accuracy_root, 'Accuracy.py')
    graph_maker_name = os.path.join(accuracy_root, 'AccuracyGraph.py')

    index_jar_root = os.path.abspath(os.path.join(classify_root, 'walker-indexer', 'target'))
    index_jar_name = os.path.join(index_jar_root, 'walker-indexer-one-jar.jar')

    walker_term_diff_root = os.path.abspath(os.path.join(classify_root, 'walker-term-diff', 'target'))
    walker_term_diff_name = os.path.join(walker_term_diff_root, 'walker-term-diff-one-jar.jar')

    build_roots = {classifier: classify_root,
                   diff_utils: diff_utils_root}

    executables = {tokenizer: tokenizer_jar_name,
                   term_diff: walker_term_diff_name,
                   phrase_loader: phrase_loader_name,
                   indexer: index_jar_name,
                   classifier: classify_jar_name,
                   walker_validator: walker_validator_name,
                   tree_walker: tree_walker_name,
                   status_counter: fp_count_name,
                   doctype_graph: doctype_graph_name,
                   term_walker: term_walker_name,
                   accuracy: accuracy_name,
                   grapher: graph_maker_name}

    for file in os.listdir(phrase_exe_root):
        executables[file] = os.path.join(phrase_exe_root, file)


def open_sublime(file_path):
    """
    Opens SublimeText on the given file.

    Args:
        ``file_path`` The file to open with SublimeText.

    Returns:
        ``None``

    """
    sublime_cmd = [r'C:\Program Files\Sublime Text 3\sublime_text.exe', file_path]
    subprocess.Popen(sublime_cmd)


def open_log_files(std_out_path, std_err_path):
    """
    Opens the log files and adds a header to them.

    Args:
        ``std_out_path`` -- ``str`` The path to save the standard output to.

        ``std_err_path`` -- ``str`` The path to save the standard error to.

    Returns:
        ``str, str`` -- The writer for the standard out file, The writer for the standard error file.
    """
    log_line_break = '=' * 100
    log_std_err = format('%s\n%s\n%s\n' % (log_line_break, '||{:^96}||'.format('STANDARD ERROR'), log_line_break))
    log_std_out = format('%s\n%s\n%s\n' % (log_line_break, '||{:^96}||'.format('STANDARD OUT'), log_line_break))

    with open(std_out_path, 'a+') as std_out:
        std_out.write(log_std_out)

    with open(std_err_path, 'a+') as std_err:
        std_err.write(log_std_err)

    std_out = open(std_out_path, 'a+')
    std_err = open(std_err_path, 'a+')

    return std_out, std_err


def run_process(proc_name, proc_cmd, log_dir):
    """
    Runs a process, logs the console output, and gets the time to execute.

    Returns:
        ``proc_name`` -- ``str`` The name of the process to run.

        ``proc_cmd`` -- ``list(str)`` The command string for the process.

        ``log_dir`` -- ``str`` The path to save the log files to.

    Returns:
        ``float`` The time (in seconds) to execute the command.

    """

    proc_err_log = os.path.join(log_dir, '%s-std-err.log' % proc_name)
    proc_std_log = os.path.join(log_dir, '%s-std-out.log' % proc_name)

    std_out_writer, std_err_writer = open_log_files(proc_std_log, proc_err_log)

    print('Running %s' % proc_name)

    start_time = time.time()

    process = subprocess.Popen(proc_cmd, shell=True, stdout=std_out_writer, stderr=std_err_writer)
    process.wait()

    run_time = time.time() - start_time

    time.sleep(0.05)
    process.poll()

    return_code = int(process.returncode)
    if return_code != 0:
        print('%s Process completed with return code %d.' % (proc_name, return_code))
        print('   Opening log files...')
        std_out_writer.close()
        std_err_writer.close()

        open_sublime(std_out_writer.name)
        open_sublime(std_err_writer.name)

        exit(return_code)

    print('%s completed (%.4f s)' % (proc_name, run_time))
    return run_time


def run_build(log_dir):
    """
    Runs the maven build for the classificaion and indexing engines.

    Args:
        ``log_dir`` -- ``str`` The path to save log files.

    Returns:
        ``None``

    """
    mvn_home = os.environ.get('MAVEN_HOME')
    mvn_path = os.path.join(mvn_home, os.path.join('bin', 'mvn.cmd'))
    # Change dir and run maven
    # os.chdir(build_roots[diff_utils])
    # mvn_cmd = [mvn_path, 'clean', 'install']
    # run_process('DiffUtils Maven Build', mvn_cmd, log_dir)

    # Change dir and run maven
    os.chdir(build_roots[classifier])
    mvn_cmd = [mvn_path, '-DskipTests', 'install']

    run_process('ScoreWalker Maven Build', mvn_cmd, log_dir)

    os.chdir(current_dir)


def run_tokenizer(lib_dir, index_dir, log_dir):
    """
    Runs the tokenizer using :meth:`run_process`.

    Args:
        ``lib_dir`` -- ``str`` The path to the library to tokenize.

        ``index_dir`` -- ``str`` The path to the index being used to classify.

        ``log_dir`` -- ``str`` The path to the log directory.

    Returns:
        ``None``

    """
    tokenizer_cmd = ['java', '-jar', executables[tokenizer],
                     '-I', '"'+index_dir+'"',
                     '-L', '"'+lib_dir+'"',
                     '-O']

    run_process('Tokenizer', tokenizer_cmd, log_dir)


def run_walker_term_diff(lib_dir, index_dir, log_dir):
    with open(config_file) as cfg_file:
        too_similar = json.load(cfg_file)

    too_similar = too_similar['CLASSIFYWALKER']
    too_similar = too_similar['similarDoctypes']

    for similar_list in too_similar:
        term_diff_cmd = ['java', '-jar', executables[term_diff],
                         '--libRoot', lib_dir,
                         '--outRoot', index_dir,
                         '--doctypes'] + similar_list

        run_process('Walker Term Diff over doctypes %s' % similar_list, term_diff_cmd, log_dir)


def run_phrase_maker(files, min_phrase_len, max_phrase_len, phrase_file, log_file):

    folder, doctype = os.path.split(phrase_file)
    split_idx = doctype.rfind('.')
    doctype = doctype[:split_idx]

    phrases_cmd = [phrase_exe_name,
                   '-c', '80',
                   '-p', str(min_phrase_len),
                   '-P', str(max_phrase_len),
                   '-ol'] + files

    with open(log_file, 'a+') as tmp:
        tmp.write('Running Phrase Maker on %s\n%s\n' % (doctype, console_line_break))

    std_out = open(phrase_file, 'a+')
    std_err = open(log_file, 'a+')
    process = subprocess.Popen(phrases_cmd, stdout=std_out, stderr=std_err)

    with open(log_file, 'a+') as tmp:
        tmp.write('%s\n' % console_line_break)

    process.wait()


def run_phrase_loader(phrases_dir, out_file_name, min_phrase_len, log_dir):
    phrase_loader_cmd = ['python', executables[phrase_loader],
                         '-i', phrases_dir,
                         '-o', out_file_name,
                         '-m', str(min_phrase_len)]

    run_time = run_process('Phrase Loader', phrase_loader_cmd, log_dir)
    return run_time


def run_indexer(idx_dir, lib_dir, log_dir):
    index_cmd = ['java', '-jar', executables[indexer],
                 '-I', '"'+idx_dir+'"',
                 '-D', '"'+lib_dir+'"']

    run_time = run_process('Walker Indexer', index_cmd, log_dir)

    write_line_break()

    return run_time


def new_run_classifier(index_dir, config_file_path, dest_file, package, log_dir,
                       min_memory=128, max_memory=4096, thread_count=8):
    walker_loc = r'C:\Users\chris\Documents\Code\Git\scorewalker-utils\RunScoreWalker' \
                 r'\sequencelogic-run-walker-classifier.py'

    classify_cmd = ['python', walker_loc,
                    '--index-root', index_dir,
                    '--pkg-path', package,
                    '--config', config_file_path,
                    '--out', dest_file,
                    '--min-memory', str(min_memory),
                    '--max-memory', str(max_memory),
                    '--thread-count', str(thread_count)]
    run_time = run_process('sequencelogic-run-walker-classifier', classify_cmd, log_dir)
    return run_time


def run_classifier(index_dir, config_file_path, dest_file, package, n_lucene, conf, word_len, log_dir):
    classify_cmd = ['java', '-jar', executables[classifier],
                    '-D', '"'+dest_file+'"',
                    '-C', '"'+config_file_path+'"',
                    '-I', '"'+index_dir+'"',
                    '-P', '"'+package+'"']

    run_time = run_process('Walker Classifier', classify_cmd, log_dir)

    return run_time


def run_walker_validator(walker_file, log_dir):
    validate_cmd = ['python', executables[walker_validator],
                    '-i', walker_file]

    run_time = run_process('ValidationWalker', validate_cmd, log_dir)

    return run_time


def run_tree_walker(clux_output, engine_output, result_file, log_dir):
    tree_walker_cmd = ['python', executables[tree_walker],
                       '-c', clux_output,
                       '-w', engine_output,
                       '-o', result_file]

    run_time = run_process('Tree Walker', tree_walker_cmd, log_dir)

    return run_time


def run_fp_counter(twk_files, result_file, log_dir):
    fp_count_cmd = ['python', executables[status_counter],
                    '-o', result_file,
                    '--in_files'] + twk_files

    run_time = run_process('Status Counter', fp_count_cmd, log_dir)

    return run_time


def run_doctype_graph(test_dir, data_dir, log_dir):

    doctype_graph_cmd = ['python', executables[doctype_graph],
                         '-i', test_dir,
                         '-o', data_dir]

    run_time = run_process('DoctypeGraph', doctype_graph_cmd, log_dir)
    return run_time


def run_term_walker(tree_walker_output, engine_output, result_file, log_dir):

    term_walker_cmd = ['python', executables[term_walker],
                       '-w', tree_walker_output,
                       '-c', engine_output,
                       '-o', result_file]

    run_time = run_process('Term Walker', term_walker_cmd, log_dir)

    return run_time


def run_accuracy(tree_walker_out, out_file, log_dir):
    # Run it
    accuracy_cmd = ['python', executables[accuracy],
                    '-i', tree_walker_out,
                    '-o', out_file]

    run_time = run_process('Graph Metadata', accuracy_cmd, log_dir)

    return run_time


def run_graph(configuration_file, out_file, log_dir):
    graph_cmd = ['python', executables[grapher],
                 '-o', out_file,
                 '-i', configuration_file]

    run_time = run_process('Graph Maker', graph_cmd, log_dir)

    return run_time


def run_clean_up():
    print('Cleaning up...')

    # for name in executables:
        # file = executables[name]
        # if os.path.exists(file):
            # os.remove(file)

    print('Done cleaning up.')


def make_graph_config(files, tags, dest_file, graph_title=DEF_GRAPH_TITLE):
    # folder, file = os.path.split(dest_file)
    # avg_file = os.path.join(folder, 'global-%s' % file)
    # AccuracyGraphSetup.make_package_graph_config(files, tags, graph_title, dest_file)
    AccuracyGraphSetup.make_avg_cfg(files, graph_title, dest_file)
    # return avg_file


def get_pkg_file_names(parent_dir):
    # Get th appropriate sub folders
    if not os.path.exists(parent_dir):
        os.mkdir(parent_dir)

    run_dir = os.path.abspath(parent_dir)
    log_dir = os.path.join(run_dir, 'logs')
    # Make the directories if they don't exist.
    if not os.path.exists(run_dir):
        os.mkdir(run_dir)
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    # Get the new full file names
    classifier_out = os.path.join(run_dir, 'classification-results.json')
    tree_walker_out = os.path.join(run_dir, 'classification-analysis.csv')
    fp_counter_out = os.path.join(run_dir, 'false-positive-counts.csv')
    term_walker_out = os.path.join(run_dir, 'term-analysis.csv')
    accuracy_out = os.path.join(run_dir, 'graph-data.txt')
    graph_out = os.path.join(run_dir, 'accuracy-graph.pdf')
    doctype_graph = os.path.join(run_dir, 'doctype-counts')

    return {classification_file_tag: classifier_out,
            tree_walker_file_tag: tree_walker_out,
            fp_counter_file_tag: fp_counter_out,
            term_walker_file_tag: term_walker_out,
            accuracy_file_tag: accuracy_out,
            graph_file_tag: graph_out,
            logs_root_tag: log_dir,
            doctype_graph_tag: doctype_graph}


def get_root_folder(test_root_path):
    subfolder = os.path.join(test_root_path, datetime.datetime.now().strftime('%Y.%m.%d'))
    # Files will start with HH.MM
    file_prefix = datetime.datetime.now().strftime('%H.%M')

    test_root = os.path.join(test_root_path, subfolder)
    run_dir = os.path.join(subfolder, file_prefix)

    if not os.path.exists(test_root):
        os.mkdir(test_root)

    if not os.path.exists(run_dir):
        os.mkdir(run_dir)

    return run_dir


def get_idx_names(run_dir):

    idx_dir = os.path.join(run_dir, 'index')
    log_dir = os.path.join(run_dir, 'logs')
    # Make the directories if they don't exist.

    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    if not os.path.exists(idx_dir):
        os.mkdir(idx_dir)

    # Return the folders
    return {idx_root_tag: idx_dir,

            test_root_tag: run_dir,
            logs_root_tag: log_dir}


def get_tree_walker_files(root_folder):
    files = []
    for folder in os.listdir(root_folder):
        folder = os.path.join(root_folder, folder)
        if os.path.isdir(folder):
            for file in os.listdir(folder):
                file = os.path.join(folder, file)
                if os.path.isfile(file):
                    if file.endswith('analysis-no-centers.csv') and 'term' not in file:
                        files.append(file)

    return files


def load_packages(test_data_path):
    test_packages_root = os.path.join(test_data_path, 'Test-Files')

    result = []
    for file in os.listdir(test_packages_root):
        file = os.path.join(test_packages_root, file)
        if os.path.isfile(file) and file.endswith('.frt'):
            clux_file = file.replace('.frt', '_true.json')
            if os.path.exists(clux_file):
                result.append({package_file_tag: file, clux_file_tag: clux_file})

    return result


def write_cfg_file(settings_dict, test_settings_file, start_time, files):

    with open(test_settings_file, 'w+') as writer:
        writer.write('Tester.py Settings/Results\n')
        writer.write('Start: %s\n' % start_time)
        writer.write('Ended: %s\n' % datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'))
        writer.write('%s\n' % console_line_break)
        for key, value in settings_dict.items():
            writer.write('%s = %s\n' % (key, value))
        writer.write('%s\nFiles:\n' % console_line_break)
        for key, value in files.items():
            writer.write('   %s: %s\n' % (key, value))


def make_empty_index_files(new_index_root):
    out_files = ['MaleNames.txt', 'FemaleNames.txt', 'Surnames.txt', 'Phrases.txt']
    for file in out_files:
        file = os.path.join(new_index_root, file)
        with open(file, 'w+') as writer:
            writer.write('')


def copy_index_files(files_to_copy, new_index_root):
    print('Copying index files...')
    for item in files_to_copy:
        folder, name = os.path.split(item)
        new_path = os.path.join(new_index_root, name)
        shutil.copy(item, new_path)
    print('Done copying index files.')


def copy_executables(test_dir):
    global executables

    program_dir = os.environ['SEQUENCELOGICHOME']
    # program_dir = os.path.join(test_dir, 'Programs')
    # os.environ['SEQUENCELOGICHOME'] = program_dir
    program_dir = os.path.join(program_dir, 'bin')
    if not os.path.exists(program_dir):
        os.makedirs(program_dir)

    for name in executables:
        if name != walker_validator:
            original_path = executables[name]
            original_folder, original_name = os.path.split(original_path)
            new_path = os.path.join(program_dir, original_name)
            shutil.copy(original_path, new_path)
            executables[name] = new_path


def new_run_indexer(config_file, lib_path, idx_path, log_dir):
    index_wrapper_path = \
        r'C:\Users\chris\Documents\Code\Git\scorewalker-utils\RunScoreWalker\sequencelogic-run-walker-indexer.py'

    cmd = ['python', index_wrapper_path,
           '--config', config_file,
           '--indexRoot', idx_path,
           '--libRoot', lib_path]

    idx_time = run_process('sequencelogic-run-walker-indexer', cmd, log_dir)

    return idx_time


def run_phrase_maker_on_folder(folder, phrases_root, min_phrase_len, max_phrase_len, log_dir):
    files = []
    lib_dir, doctype = os.path.split(folder)
    dest_file = os.path.join(phrases_root, '%s.phrasecount' % doctype)
    log_file = os.path.join(log_dir, 'phrase-maker-std-err.txt')
    for file in os.listdir(folder):
        file = os.path.join(folder, file)
        if os.path.isfile(file) and file.endswith('.tkn'):
            files.append(file)

    if len(files) > 1:
        run_phrase_maker(files, min_phrase_len, max_phrase_len, dest_file, log_file)

    else:
        print('There are not enough examples of "%s"' % doctype)


def generate_phrases_for_library(library_root, index_root, log_dir, min_phrase_length=DEF_MIN_PHRASE_SIZE,
                                 max_phrase_len=DEF_MAX_PHRASE_SIZE):

    phrase_count_root = os.path.join(index_root, 'Phrase Count Source')
    phrase_file = os.path.join(index_root, 'Phrases.txt')

    if not os.path.exists(phrase_count_root):
        os.mkdir(phrase_count_root)

    run_tokenizer(library_root, index_root, log_dir)

    for folder in os.listdir(library_root):
        folder = folder.replace('\uf028', '')
        folder = os.path.join(library_root, folder)
        if os.path.isdir(folder):
            run_phrase_maker_on_folder(folder, phrase_count_root, min_phrase_length, max_phrase_len, log_dir)

    run_phrase_loader(phrase_count_root, phrase_file, min_phrase_length, log_dir)

    shutil.rmtree(phrase_count_root)


def run_pre_index_tools(test_data_folder, new_index_root, log_file_dir, lib_dir):
    index_data_dir = os.path.join(test_data_folder, 'Index-Data')

    # Make empty files, they will be overwritten!
    make_empty_index_files(new_index_root)
    # Copy the non-empty files to overwrite the empty ones
    copy_index_files(get_move_files(index_data_dir), new_index_root)
    # Now we can finally generate phrases for the library.
    generate_phrases_for_library(lib_dir, new_index_root, log_file_dir)

    # Make the FirstPageIndex
    first_page_index = os.path.join(new_index_root, 'FirstPageIndex')
    tmp_idx = os.path.join(new_index_root, 'temp')
    tmp_first_page_index = os.path.join(tmp_idx, 'FirstPageIndex')
    if not os.path.exists(first_page_index):
        os.mkdir(first_page_index)
    if not os.path.exists(tmp_idx):
        os.mkdir(tmp_idx)
    if not os.path.exists(tmp_first_page_index):
        os.mkdir(tmp_first_page_index)

    # Run the term diff
    run_walker_term_diff(lib_dir, new_index_root, log_file_dir)


def setup_test(test_path, should_build):
    global printer
    load_executables()

    log_dir = os.path.join(test_path, 'logs')

    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    test_log = os.path.join(log_dir, 'TestLog.log')
    printer = ConsoleUtils.SLLogger(program_name, test_log)
    sys.stdout = printer

    printer.write_no_prefix(ConsoleUtils.get_header(program_name, program_version, build_date, author, 150))

    if should_build:
        run_build(log_dir)

    copy_executables(test_path)


def run_package(index_dir, parent_dir, package_path, clux_file, config_file_path, lucene_query_cnt=DEF_NUM_TOP_DOCS,
                conf_to_use=DEF_CONF_THRESHOLD, term_len=DEF_TERM_LENGTH):
    start_time = get_time()
    start_clock = time.time()

    files = get_pkg_file_names(parent_dir)
    package_name = get_package_name(package_path)

    times = []

    print('Testing Package: %s' % package_name)

    log_root = files[logs_root_tag]
    classification_out = files[classification_file_tag]
    tree_walker_out = files[tree_walker_file_tag]
    fp_counter_out = files[fp_counter_file_tag]
    term_walker_out = files[term_walker_file_tag]
    accuracy_out = files[accuracy_file_tag]
    doctype_counts = files[doctype_graph_tag]

    # Get the package settings data to save.
    package_config = os.path.join(parent_dir, test_config_file_name)
    package_data = {'Package File': package_path, 'CLUX File': clux_file, 'Num TopDocs': lucene_query_cnt,
                    'Term Length': term_len, 'Current Minimum Confidence': conf_to_use, 'Log Directory': log_root}

    # Run the classifier
    classification_time = new_run_classifier(index_dir, config_file_path, classification_out, package_path, log_root)
    times.append(classification_time)
    # Validate output
    validate_time = run_walker_validator(classification_out, log_root)
    times.append(validate_time)
    # Run TreeWalker
    tree_walker_time = run_tree_walker(clux_file, classification_out, tree_walker_out, log_root)
    times.append(tree_walker_time)
    # Run FalsePositiveCounter
    fp_counter_time = run_fp_counter([tree_walker_out], fp_counter_out, log_root)
    times.append(fp_counter_time)
    # # Run TermWalker
    # term_walker_time = run_term_walker(tree_walker_out, classification_out, term_walker_out, log_root)
    # times.append(term_walker_time)
    # Run DoctypeGraph
    doctype_graph_time = run_doctype_graph(parent_dir, doctype_counts, log_root)
    times.append(doctype_graph_time)
    # make_graphs_from_folder(doctype_counts, log_root)
    # Run Accuracy.
    accuracy_time = run_accuracy(tree_walker_out, accuracy_out, log_root)
    times.append(accuracy_time)

    # Get the total time, save the config data, and return the classification time and graph metadata file.
    total_time = time.time() - start_clock

    write_cfg_file(package_data, package_config, start_time, files)

    print('Finished testing package (%.4f s)' % total_time)
    write_line_break()

    files[classification_time_tag] = classification_time
    return files


def run_packages(index_dir, parent_dir, package_dicts, config_file_path, n_top_docs=DEF_NUM_TOP_DOCS,
                 t_conf=DEF_CONF_THRESHOLD, term_len=DEF_TERM_LENGTH):
    start_time = get_time()

    graph_data_files = []
    graph_tags = []
    package_files = {}
    package_results = {}

    total_classification_time = 0.0
    num_runs = len(package_dicts)

    graph_config = os.path.join(parent_dir, graph_config_file_name)
    test_config = os.path.join(parent_dir, test_config_file_name)

    print('Testing %d packages...' % num_runs)

    for i in range(num_runs):
        print('Testing Package %d/%d' % (i + 1, num_runs))

        file_dict = package_dicts[i]
        package_file = file_dict[package_file_tag]
        clux_file = file_dict[clux_file_tag]

        package_name = get_package_name(package_file)
        run_dir = os.path.join(parent_dir, package_name)

        run_data = run_package(index_dir, run_dir, package_file, clux_file, config_file_path, n_top_docs, t_conf, term_len)

        # Get the data To save...
        package_results[package_name] = run_data
        total_classification_time += run_data[classification_time_tag]
        graph_tags.append(package_name)
        graph_data_files.append(run_data[accuracy_file_tag])
        package_files[i] = package_file

    # avg_classification_time = total_classification_time / num_runs
    # print('Average Classification time: %.4f seconds' % avg_classification_time)

    make_graph_config(graph_data_files, graph_tags, graph_config)

    total_pages = join_class_error_counts(parent_dir)
    page_classification_rate = total_classification_time / total_pages
    package_classification_rate = total_classification_time / num_runs

    print('Classified %d pages from %d files in %.2fs (%.2f s/page | %.2f s/file)' %
          (total_pages, num_runs, total_classification_time,
           page_classification_rate, package_classification_rate))
    printer.write_line_break()

    test_config_data = {classification_time_tag: total_classification_time,
                        graph_config_file_tag: graph_config,
                        'Total Pages': total_pages,
                        'Average Package Size': round(total_pages/num_runs),
                        'Classification Speed (pages)': '%.2f s/page' % page_classification_rate,
                        'Classification Speed (files)': '%.2f s/file' % package_classification_rate}
    test_files = get_test_files(package_results, 'Package')

    write_cfg_file(test_config_data, test_config, start_time, test_files)

    return test_config_data


def test_changing_confidence(config_file, index_dir, parent_dir, package_dicts, min_conf, max_conf, step_size,
                             lucene_query_cnt=DEF_NUM_TOP_DOCS, term_len=DEF_TERM_LENGTH):
    start_time = get_time()

    current_conf = min_conf
    avg_cls_time = 0.0
    graph_config_files = {}
    result_data = {}

    num_runs = 0
    print('Running Test With Confidence Values [%.2f%% - %.2f%%]' % (min_conf, max_conf))
    test_config_file = os.path.join(parent_dir, test_config_file_name)

    while current_conf >= max_conf:
        print('Current Confidence: %.2f%%' % current_conf)

        run_dir = os.path.join(parent_dir, 'Confidence Threshold = %.2f%%' % current_conf)
        graph_config = os.path.join(run_dir, graph_config_file_name)
        graph_config_files.append(graph_config)

        return_data = run_packages(config_file, index_dir, run_dir, package_dicts, lucene_query_cnt, current_conf,
                                   term_len)

        result_data[current_conf] = return_data

        cls_time = return_data[classification_time_tag]
        packages_config = return_data[graph_config_file_tag]

        new_title = '%s With Confidence Threshold %.2f%%' % (DEF_GRAPH_TITLE, current_conf * 100)
        AccuracyGraphSetup.change_title(packages_config, graph_config, new_title)

        num_runs += 1
        avg_cls_time += cls_time
        current_conf += step_size

    avg_cls_time = avg_cls_time / num_runs
    print('Average Classification time: %.4f seconds' % avg_cls_time)

    config_result = {classification_time_tag: avg_cls_time, graph_config_file_tag: graph_config,
                     'Minimum Confidence': '%.2f%%' % (min_conf * 100),
                     'Maximum Confidence': '%.2f%%' % (max_conf * 100)}

    files = get_test_files(result_data, 'Confidence')
    write_cfg_file(config_result, test_config_file, start_time, files)

    return config_result


def make_graphs_from_folder(folder, log_dir):
    graph_dict = {}
    print('Making graphs from config files in "%s"' % folder)
    for file in os.listdir(folder):
        file = os.path.join(folder, file)
        if file.endswith(graph_config_file_name):
            out_file = file.replace(graph_config_file_name, graph_name)
            graph_dict[out_file] = file
    make_graphs(graph_dict, log_dir)


def get_test_files(test_files, new_prefix):
    result = {}
    for key, value in test_files.items():
        for sub_key, sub_value in value.items():
            new_key = '%s %s - %s' % (new_prefix, key, sub_key)
            result[new_key] = sub_value
    return result


def get_package_name(package_path):
    par_dir, package_name = os.path.split(package_path)
    package_name = package_name[:-4]
    return package_name


def get_lib_name(lib_path):
    folder, name = os.path.split(lib_path)
    return name


def get_move_files(index_data_dir):
    result = []
    for file in os.listdir(index_data_dir):
        result.append(os.path.join(index_data_dir, file))

    if os.environ['SEQUENCELOGICHOME'] is not None:
        dest_folder = os.path.join(os.environ['SEQUENCELOGICHOME'], 'SLSync', 'config', 'data')
        for file in result:
            new_file = os.path.join(dest_folder, os.path.split(file)[1])

            if not os.path.exists(dest_folder):
                os.makedirs(dest_folder)

            shutil.copy(file, new_file)

    return result


def get_graph_config_files(package_data):
    result = {}

    for key, value in package_data.items():
        value = str(value)
        if value.endswith(graph_config_file_name):
            out_file = value.replace(graph_config_file_name, graph_name)
            result[out_file] = value

    return result


def do_make_graphs(configs_by_paths, logs_dir):
    print('Making %d graphs...' % len(configs_by_paths))

    graph_time = 0
    for out_file, config_file in configs_by_paths.items():
        graph_time += run_graph(config_file, out_file, logs_dir)

    print('Done making Graphs (%.4f s)' % graph_time)


def make_graphs(run_data_dict, log_dir, title=None):
    config_data = get_graph_config_files(run_data_dict)
    if title is not None:
        for out, config in config_data.items():
            AccuracyGraphSetup.change_title(config, config, title)
    do_make_graphs(config_data, log_dir)


def join_class_error_counts(test_out_root):

    types = ['false-negative', 'false-positive', 'incorrect', 'correct']
    fields = ['Correct Type', 'Classified Type', 'Number of Occurrences']

    total_pages = 0

    for t in types:
        file_name = 'global-%s-counts.csv' % t
        result_file = os.path.join(test_out_root, file_name)

        result = {}

        for folder in os.listdir(test_out_root):
            folder = os.path.join(test_out_root, folder)
            if os.path.isdir(folder):
                to_read = os.path.join(folder, '%s-counts.csv' % t)
                if os.path.exists(to_read):
                    with open(to_read) as reader:
                        csv_reader = csv.DictReader(reader)
                        for row in csv_reader:
                            key = '%s>>>%s' % (row['Correct Type'], row['Classified Type'])
                            count = int(row['Number of Occurrences'])
                            if key not in result.keys():
                                result[key] = count
                            else:
                                result[key] += count

        result = sorted(result.items(), key=operator.itemgetter(1), reverse=True)
        with open(result_file, 'w+', newline='') as writer:
            csv_file = csv.DictWriter(writer, fieldnames=fields)
            csv_file.writeheader()
            for key, count in result:
                correct_doctype, classified_doctype = key.split('>>>')[:2]
                total_pages += count
                csv_file.writerow({'Correct Type': correct_doctype,
                                   'Classified Type': classified_doctype,
                                   'Number of Occurrences': count})

    return total_pages


def run_test_on_lib(lib_dir, output_paths, test_in_root, config_file_path, min_phrase_len=DEF_MIN_PHRASE_SIZE,
                    max_phrase_len=DEF_MAX_PHRASE_SIZE):
    global phrase_exe_name, paginate

    start_time = get_time()

    idx_root = output_paths[idx_root_tag]
    test_out = output_paths[test_root_tag]
    logs_out = output_paths[logs_root_tag]

    packages = load_packages(test_in_root)

    idx_data_root = os.path.join(test_in_root, 'Index-Data')

    get_move_files(idx_data_root)

    new_run_indexer(config_file_path, lib_dir, idx_root, logs_out)

    config_out = os.path.join(test_out, test_config_file_name)

    config_info = {'Library Path': lib_dir, 'Library Name': get_lib_name(lib_dir), 'Test Data Source': test_in_root,
                   'Test Data Result': test_out, 'Minimum Phrase Length': min_phrase_len,
                   'Maximum Phrase Length': max_phrase_len, 'Minimum Phrase Doctype Coverage': '80%',
                   'Paginate': paginate, 'Minimum OCR Confidence': '60%',
                   'Confidence Threshold': '%.2f' % DEF_CONF_THRESHOLD, 'Score Ratio': 'Yes'}

    write_line_break()
    out_path = test_out

    # Run the packages
    run_data = run_packages(idx_root, out_path, packages, config_file_path)
    config_info['Average Classification Time'] = run_data[classification_time_tag]
    del run_data[classification_time_tag]
    # Make the graphs!
    make_graphs(run_data, logs_out, 'Confidence Threshold: %.2f' % DEF_CONF_THRESHOLD)

    write_cfg_file(config_info, config_out, start_time, run_data)


def get_time():
    return datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')


def main(lib_dir, output_directory, test_data_root, config_file_loc, do_build):
    # Do some set up
    run_dir = get_root_folder(output_directory)
    os.environ['SEQUENCELOGICHOME'] = os.path.join(run_dir, 'Programs')
    setup_test(run_dir, do_build)

    # Get the output information.
    output_info = get_idx_names(run_dir)
    # Perform a test
    run_test_on_lib(lib_dir, output_info, test_data_root, config_file_loc)

    # Do any necessary cleanup
    run_clean_up()
    print('\nTest completed. Exiting...')


# This is where we call the main method from.
if __name__ == '__main__':
    # load_executables()
    # # Set up arguments
    # files = [
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597000945\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597000964\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597000967\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597000990\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001171\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001276\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001454\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001462\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001468\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001474\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001635\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001639\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001648\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001676\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001681\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001685\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001699\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001741\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001785\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001842\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001849\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597001917\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002051\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002121\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002173\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002322\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002344\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002443\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002544\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002550\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002556\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002648\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002662\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002837\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002854\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002924\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002931\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597002954\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003029\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003044\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003068\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003088\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003125\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003128\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003130\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003137\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003186\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003196\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003206\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003210\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003221\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003257\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003261\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003308\\graph-data.txt",
    #     "C:\\Users\\chris\\Documents\\Code\\Tests\\KMeans\\L3Results\\2017.10.12\\17.26\\597003335\\graph-data.txt"]
    #
    # tags = ['Correct & High Confidence',
    #         'Incorrect & Low Confidence',
    #         'Incorrect & High Confidence',
    #         'Correct & Low Confidence',
    #         'Correct Pagination']
    #
    # out_file = r'C:\Users\chris\Documents\Code\Tests\KMeans\L3Results\2017.10.12\17.26\global-graph-config.json'
    # graph_out = \
    #     r'C:\Users\chris\Documents\Code\Tests\KMeans\L3Results\2017.10.12\17.26\global-classification-accuracy.pdf'
    # make_graph_config(files, tags, out_file)
    #
    # do_make_graphs({graph_out: out_file}, r'C:\Users\chris\Documents\Code\Tests\KMeans\L3Results\2017.10.12\17.26\logs')

    required_args = parser.add_argument_group('Required')
    optional_args = parser.add_argument_group('Optional')

    required_args.add_argument('-t', '--test_dir', required=True,
                               help='The path to the directory containing test data.')
    required_args.add_argument('-o', '--out_dir', required=True, help='The location to write the test data to.')
    required_args.add_argument('-l', '--library_dir', required=True, help='The path to the library root folder.')
    required_args.add_argument('-c', '--config', required=True, help='The path to the config file')

    optional_args.add_argument('-b', '--build', required=False, action='store_true',
                               help='Use if you want to run a build before testing.')
    optional_args.add_argument('-h', '--help', action='help', help='prints the help message')

    # Get the arguments
    args = parser.parse_args()
    library_dir = args.library_dir
    output_dir = args.out_dir
    test_data_dir = args.test_dir
    build = args.build
    config_file = args.config
    # os.environ['UseFirstPages'] = 'true'
    # Run the program
    main(library_dir, output_dir, test_data_dir, config_file, build)

    # printer.close()