Sleds/TFFirstPageEngine/Trainer.py

# We do all our imports at the top of our program.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import datetime
import sys
import math
import os
import re
import time

import tensorflow as tf
import tensorflow.contrib.layers as layers
from tensorflow.contrib.slim.python.slim.data.prefetch_queue import prefetch_queue

import ConsoleUtils

# Constants
NUM_GPUS = 2
BATCH_SIZE = 800
NUM_EPOCHS = 1000

RECORD_FILE = '/mnt/00_train_data.tfrecord'
TRAIN_DATA_ROOT = '/home/cdiesch/Documents/TFFirstPageClassifier/GeneratedData/'

FIRST_PAGE_LABEL = 'firstPage'
NON_FIRST_PAGE_LABEL = 'nonFirstPage'
# Give the program a name.
program_name = 'FirstPageTrainer'
# Describe what the program does briefly.
program_description = 'Trains a CNN (Convolutional Neural Network) for the purpose of classifying a pages as first ' \
                      'pages or non-first pages.'
# The argument cmd_line_parser for the program.
parser = argparse.ArgumentParser(prog=program_name, description=program_description, add_help=False)

build_date = datetime.datetime.now().strftime('%Y.%m.%d-%H.%M.%S')
program_version = '0.0.1'
author = 'Chris Diesch'

side_bound_char = '|'
line_break_char = '-'
corner_char = '+'
line_break_size = 40
line_break = line_break_char * line_break_size

printer = ConsoleUtils.SLPrinter(program_name)
sys.stdout = printer

img_w = 201
img_h = 260

IMG_SIZE = 280

lr_decay_rate = int(-1600)
max_lr = 0.02
min_lr = 0.0001

NUM_CLASSES = 2

INIT_LEARNING_RATE = 0.01
LEARNING_RATE_DECAY = 0.99
DECAY_RATE = 500

MAX_STEPS = 1000000

CONV_STD_DEV = 5e-2
FLAT_STD_DEV = 0.04
CONV_WEIGHT_DECAY = 0.0
FLAT_WEIGHT_DECAY = 0.004

BIAS_INIT_VAL = 0.1
CONV_BIAS_INIT_VAL = 0.0

EXP_MOVING_AVG_DECAY = 0.9999

# default local_response_normalization values
LRN_DEPTH_RAD = 4
LRN_BIAS = 1.0
LRN_ALPHA = 0.001/9.0
LRN_BETA = 0.75

TOWER_NAME = 'tower'

BIAS_INIT = tf.constant_initializer(BIAS_INIT_VAL)
CONV_BIAS_INIT = tf.constant_initializer(CONV_BIAS_INIT_VAL)

IMAGE_SET_SIZE = (12466 * 2)


page_types = ['first_page', 'non_first_page']

CLASSES = tf.constant(['first_page', 'non_first_page'])


def _act_summary(x):
    tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
    hist_name = '%s/activations' % tensor_name
    sclr_name = '%s/sparsity' % tensor_name
    tf.summary.histogram(hist_name, x)
    tf.summary.scalar(sclr_name, tf.nn.zero_fraction(x))


def _get_cpu_weights(shape, dtype, std_dev=FLAT_STD_DEV, weight_decay=None, name='weights'):
    result = _get_cpu_var(name=name, shape=shape, dtype=dtype,
                          initializer=tf.truncated_normal_initializer(stddev=std_dev, dtype=dtype))

    if weight_decay is not None:
        weight_loss = tf.multiply(tf.nn.l2_loss(result), weight_decay, name='weight_loss')
        tf.add_to_collection(name='losses', value=weight_loss)

    return result


def _get_cpu_var(name, shape, dtype, initializer):
    with tf.device('/cpu:0'):
        result = tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=initializer)
    return result


def _get_conv2d_on_cpu(scope, inputs, num_outputs, kernel_size, stride, dtype, in_channels):
    # in_shape = tf.shape(inputs)
    #
    # in_channels = in_shape[3]
    filter_shape = [kernel_size, kernel_size, in_channels, num_outputs]
    stride_shape = [1, stride, stride, 1]

    weights = _get_cpu_weights(shape=filter_shape, dtype=dtype, std_dev=CONV_STD_DEV, weight_decay=CONV_WEIGHT_DECAY)

    conv = tf.nn.conv2d(input=inputs, filter=weights, strides=stride_shape, padding='SAME')

    biases = _get_cpu_var(name='biases', shape=[num_outputs], dtype=dtype, initializer=CONV_BIAS_INIT)
    pre_act = tf.nn.bias_add(value=conv, bias=biases)

    result = tf.nn.relu(features=pre_act, name=scope.name)
    _act_summary(result)
    return result


def _get_flattened_on_cpu(scope, inputs, num_outputs, dtype):
    flattened = tf.reshape(inputs, [BATCH_SIZE, -1])
    width = flattened.get_shape()[1].value
    weights_shape = [width, num_outputs]

    weights = _get_cpu_weights(shape=weights_shape, dtype=dtype, std_dev=CONV_STD_DEV,
                               weight_decay=CONV_WEIGHT_DECAY)

    biases = _get_cpu_var(name='baises', shape=[num_outputs], dtype=dtype, initializer=BIAS_INIT)

    result = tf.nn.relu(features=(tf.matmul(flattened, weights) + biases), name=scope.name)
    _act_summary(result)

    return result


def _get_fully_connected_on_cpu(scope, inputs, num_outputs, dtype, in_channels):
    # num_inputs = tf.shape(inputs)[1]
    #
    weights_shape = [in_channels, num_outputs]
    weights = _get_cpu_weights(shape=weights_shape, dtype=dtype, std_dev=FLAT_STD_DEV, weight_decay=FLAT_WEIGHT_DECAY)
    biases = _get_cpu_var(name='biases', shape=[num_outputs], dtype=dtype, initializer=BIAS_INIT)

    result = tf.nn.relu(features=(tf.matmul(inputs, weights) + biases), name=scope.name)
    _act_summary(result)
    return result


def _get_softmax_on_cpu(scope, inputs, dtype, in_channels, num_outputs=NUM_CLASSES):
    # num_inputs = tf.shape(inputs)[1]
    # Set up the weight/bias values...
    weights_std_dev = 1 / num_outputs
    weights_decay = 0.0
    weights_shape = [in_channels, num_outputs]

    weights = _get_cpu_weights(shape=weights_shape, dtype=dtype, std_dev=weights_std_dev, weight_decay=weights_decay)
    biases = _get_cpu_var(name='biases', dtype=dtype, shape=num_outputs, initializer=CONV_BIAS_INIT)
    # Wx + B
    result = tf.add(tf.matmul(inputs, weights), biases, name=scope.name)
    # Get summary & Return
    _act_summary(result)
    return result


def _add_loss_summaries(total_loss):
    loss_avg = tf.train.ExponentialMovingAverage(decay=EXP_MOVING_AVG_DECAY, name='avg')
    # Get the collection of losses
    losses = tf.get_collection('losses')
    full_losses = losses + [total_loss]
    # Get the average op...
    loss_average_op = loss_avg.apply(full_losses)

    # Assign summaries to all the losses
    for loss in full_losses:
        raw_name = '%s (raw)' % loss.op.name
        tf.summary.scalar(raw_name, loss)
        tf.summary.scalar(loss.op.name, loss_avg.average(loss))

    return loss_average_op


def _generate_img_label_batch(image, label, batch_size):
    min_after_dequeue = 2 * batch_size
    num_threads = 12
    images, labels = tf.train.shuffle_batch([image, label], batch_size=batch_size, num_threads=num_threads,
                                            capacity=min_after_dequeue + 2 * batch_size,
                                            min_after_dequeue=min_after_dequeue)
    return images, labels


def _read(file_name_queue):
    reader = tf.TFRecordReader(name='record_reader')

    _, serialized_example = reader.read(file_name_queue, name='example')

    features = tf.parse_single_example(serialized=serialized_example,
                                       features={
                                           # 'height': tf.FixedLenFeature([1], tf.int64),
                                           # 'width': tf.FixedLenFeature([1], tf.int64),
                                           # 'channels': tf.FixedLenFeature([1], tf.int64),
                                           'image_raw': tf.FixedLenFeature([], tf.string),
                                           'label': tf.FixedLenFeature([], tf.int64)
                                       })

    # Get the image and label
    raw_image = tf.decode_raw(features['image_raw'], tf.uint8, name='raw_image')
    label = tf.cast(features['label'], dtype=tf.int32, name='label')
    # Get the metadata
    # height = tf.cast(features['height'], tf.int32, name='height')
    # width = tf.cast(features['width'], tf.int32, name='width')
    # channels = tf.cast(features['channels'], tf.int32, name='channels')

    # reshape the image
    image = tf.reshape(tf.cast(raw_image, tf.float32), [280, 280, 1], name='reshaped_image')

    return image, label


def inference(x):
    # Images are [BATCH_SIZE x 280 x 280 x1]
    with tf.variable_scope('conv_1') as scope:
        conv_1 = _get_conv2d_on_cpu(scope=scope, inputs=x, num_outputs=10, kernel_size=6, stride=10,
                                    dtype=tf.float32, in_channels=1)

    pool_1 = layers.max_pool2d(inputs=conv_1, kernel_size=3, stride=2, padding='SAME')

    norm_1 = tf.nn.local_response_normalization(input=pool_1, depth_radius=LRN_DEPTH_RAD, bias=LRN_BIAS,
                                                alpha=LRN_ALPHA, beta=LRN_BETA, name='norm_1')

    # Images are [BATCH_SIZE x 28 x 28 x 10]
    with tf.variable_scope('conv_2') as scope:
        conv_2 = _get_conv2d_on_cpu(scope=scope, inputs=norm_1, num_outputs=5, kernel_size=5, stride=2,
                                    dtype=tf.float32, in_channels=10)

    norm_2 = tf.nn.local_response_normalization(input=conv_2, depth_radius=LRN_DEPTH_RAD, bias=LRN_BIAS,
                                                alpha=LRN_ALPHA, beta=LRN_BETA, name='norm_2')

    pool_2 = layers.max_pool2d(inputs=norm_2, kernel_size=1, stride=1, padding='SAME')

    with tf.variable_scope('flattened_1') as scope:
        flattened_1 = _get_flattened_on_cpu(scope=scope, inputs=pool_2, num_outputs=78400, dtype=tf.float32)

    # with tf.variable_scope('fully_connected_1') as scope:
    #     fully_connected_1 = _get_fully_connected_on_cpu(scope=scope, in_channels=78400, inputs=flattened_1,
    #                                                     num_outputs=50, dtype=tf.float32)

    with tf.variable_scope('logits') as scope:
        softmax_linear = _get_softmax_on_cpu(scope, in_channels=78400, inputs=flattened_1, dtype=tf.float32)

    return softmax_linear


def get_loss(logits, labels):
    labels = tf.cast(labels, tf.int64)

    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits,
                                                                   name='cross_entropy_per_example')
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    tf.add_to_collection('losses', cross_entropy_mean)

    return tf.add_n(tf.get_collection('losses'), name='total_loss')


def get_accuracy(logits, labels):
    labels = tf.cast(labels, tf.int64)
    logits = tf.cast(logits, tf.int64)

    predicted_types = tf.arg_max(logits, 1, name='predicted_types')

    tf.summary.tensor_summary(name='predicted_types', tensor=predicted_types)
    tf.summary.tensor_summary(name='labels', tensor=labels)

    correct_prediction = tf.equal(labels, predicted_types, name='correct_prediction')
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype=tf.float32), name='accuracy')

    tf.add_to_collection('accuracies', accuracy)

    return tf.add_n(tf.get_collection('accuracies'), name='total_accuracy')


def tower_loss(scope, x_batch, y_batch):
    tf.summary.image(name='image_batch', tensor=x_batch, max_outputs=10)
    # Build inference graph...
    logits = inference(x_batch)
    # Build loss graph.
    _1 = get_loss(logits=logits, labels=y_batch)
    _2 = get_accuracy(logits=logits, labels=y_batch)
    # Group the losses for this "tower" only
    losses = tf.get_collection('losses', scope)
    accuracies = tf.get_collection('accuracies', scope)
    # Get the totals
    tot_accuracy = tf.add_n(accuracies, 'total_accuracy')
    tot_loss = tf.add_n(losses, name='total_loss')

    str = '%s_[0-9]*/' % TOWER_NAME

    for loss in losses + [tot_loss]:
        loss_name = re.sub(str, '', loss.op.name)
        tf.summary.scalar(name=loss_name, tensor=loss)

    for acc in accuracies + [tot_accuracy]:
        acc_name = re.sub(str, '', acc.op.name)
        tf.summary.scalar(name=acc_name, tensor=acc)

    tf.summary.tensor_summary(name='predictions', tensor=logits)
    tf.summary.tensor_summary(name='labels', tensor=y_batch)

    return tot_loss, tot_accuracy


def average_grads(tower_grads):
    avg_grads = []
    # zip the tower gradients together for processing...
    for grads_with_vars in zip(*tower_grads):
        grads = []
        for grad, _ in grads_with_vars:
            # Expand the gradient back out
            expanded_grad = tf.expand_dims(input=grad, axis=0)
            # add it to the list
            grads.append(expanded_grad)

        # compute the average
        grad = tf.concat(axis=0, values=grads)
        grad = tf.reduce_mean(input_tensor=grad, axis=0)

        # Get the variable and append it and it's gradient to the list...
        var = grads_with_vars[0][1]
        grad_with_var = (grad, var)
        avg_grads.append(grad_with_var)
    return avg_grads


def inputs(record_file, num_epochs, batch_size):
    with tf.name_scope('inputs'):
        file_name_queue = tf.train.string_input_producer([record_file])

        image, label = _read(file_name_queue)

        images, labels = _generate_img_label_batch(image, label, batch_size)

        tf.summary.image(name='images', tensor=images, max_outputs=BATCH_SIZE)

    return images, labels


def train():
    with tf.Graph().as_default(), tf.device('/cpu:0'):

        global_step = tf.get_variable(name='global_step', shape=[], initializer=tf.constant_initializer(0),
                                      trainable=False)

        learning_rate = tf.train.exponential_decay(learning_rate=INIT_LEARNING_RATE, global_step=global_step,
                                                   decay_steps=DECAY_RATE, decay_rate=LEARNING_RATE_DECAY,
                                                   staircase=True, name='learning_rate')

        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        tower_grads = []

        batch_x, batch_y = inputs(record_file=RECORD_FILE, num_epochs=NUM_EPOCHS, batch_size=BATCH_SIZE)
        batch_queue = prefetch_queue([batch_x, batch_y], capacity=(2*NUM_GPUS))

        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(0, NUM_GPUS):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('%s_%d' % (TOWER_NAME, (i+1))) as scope:

                        img_batch, lbl_batch = batch_queue.dequeue()

                        loss, accuracy = tower_loss(scope, img_batch, lbl_batch)

                        tf.get_variable_scope().reuse_variables()

                        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

                        grads = optimizer.compute_gradients(loss)
                        tower_grads.append(grads)

        grads = average_grads(tower_grads)
        accuracy = tf.divide(accuracy, tf.cast(NUM_GPUS, dtype=tf.float32), 'accuracy')

        summaries.append(tf.summary.scalar('accuracy', accuracy))
        summaries.append(tf.summary.scalar('learning_rate', learning_rate))

        for grad, var in grads:
            if grad is not None:
                name = '%s/gradients' % var.op.name
                summaries.append(tf.summary.histogram(name, grad))

        apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step)

        for var in tf.trainable_variables():
            summaries.append(tf.summary.histogram(var.op.name, var))

        var_averages = tf.train.ExponentialMovingAverage(EXP_MOVING_AVG_DECAY, global_step)
        var_averages_op = var_averages.apply(tf.trainable_variables())
        train_op = tf.group(apply_gradient_op, var_averages_op)
        saver = tf.train.Saver(tf.global_variables())
        summary_op = tf.summary.merge(summaries)

        init = tf.global_variables_initializer()
        run_metadata = tf.RunMetadata()
        config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)
        sess = tf.Session(config=config)
        sess.run(init)

        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter('/home/cdiesch/Documents/TFFirstPageClassifier/summaries/run4',
                                               graph=sess.graph)

        for step in range(0, MAX_STEPS, 1):
            start_time = time.time()
            _, loss_val, accuracy_val = sess.run([train_op, loss, accuracy])
            duration = time.time() - start_time

            if step % 10 == 0:
                num_examples_per_step = BATCH_SIZE * NUM_GPUS
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = duration / NUM_GPUS

                step_str = '{:15}'.format('Step: %d' % step)
                format_str = '%s Accuracy %.5f Loss = %.2f (%.1f ex/s; %.3f s/batch)'

                print(format_str % (step_str, accuracy_val, loss_val, examples_per_sec, sec_per_batch))

            if step % 100 == 0:
                summary_str = sess.run(summary_op, run_metadata=run_metadata)
                summary_writer.add_run_metadata(run_metadata=run_metadata, global_step=step, tag='Step %d' % step)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            if step % 1000 == 0:
                model_save_loc = '/home/cdiesch/Documents/TFFirstPageClassifier/summaries/run4/model.ckpt'

                saver.save(sess, model_save_loc, step)


# def set_up_model(graph, config):
#     with tf.Session(graph=graph, config=config):
#         with tf.name_scope('global_values'):
#             # Initialize global constants
#             with tf.name_scope('constants'):
#                 max_learning_rate = tf.constant(value=max_lr, dtype=tf.float32, name='max_learning_rate')
#                 min_learning_rate = tf.constant(value=max_lr, dtype=tf.float32, name='min_learning_rate')
#                 learning_rate_decay_speed = tf.constant(value=lr_decay_rate, dtype=tf.int32,
#                                                         name='learning_rate_decay_speed')
#             # Initialize global variables
#             with tf.name_scope('variables'):
#                 global_step = tf.get_variable(name='global_step', shape=[], dtype=tf.int32,
#                                               initializer=tf.constant_initializer(0), trainable=False)
#
#                 exp_in = tf.divide(tf.cast(global_step, tf.float32), tf.cast(learning_rate_decay_speed, tf.float32),
#                                    name='learning_rate_input')
#
#                 exp = tf.cast(tf.exp(exp_in), tf.float64, name='exp')
#
#                 min_max_factor = tf.cast(tf.subtract(max_learning_rate, min_learning_rate), tf.float64,
#                                          name='min_max_factor')
#
#                 scaled_exp = tf.cast(tf.multiply(min_max_factor, exp), tf.float32, name='scaled_exp')
#                 learning_rate = tf.add(min_learning_rate, scaled_exp, name='learning_rate')
#         # Place holders. These are what need to be passed in to sess.run().
#         with tf.variable_scope('input_arguments'):
#             # Are we testing?
#             train = tf.placeholder(tf.bool, name='train')
#             # Input (batch_size x 280 x 280 x 1)
#             X = tf.placeholder(tf.float32, shape=[None, 280, 280, 1], name='X')
#             # Output (batch_size x 2)
#             Y_ = tf.placeholder(tf.float32, shape=[None, 2], name='Y_')
#
#         with tf.variable_scope('neural_net_structure'):
#             with tf.name_scope('layer_1'):
#                 # size: 28 x 18
#                 conv_1 = tf.identity(layers.conv2d(X, num_outputs=6, kernel_size=6, stride=10), name='conv_2d')
#                 bn_1 = tf.identity(layers.batch_norm(conv_1), name='batch_norm')
#                 y1 = tf.identity(layers.dropout(bn_1, keep_prob=p_keep_conv, is_training=train), name='dropout')
#
#             with tf.variable_scope('layer_2'):
#                 # size: 14 x 14
#                 conv_2 = tf.identity(layers.conv2d(y1, num_outputs=6, kernel_size=6, stride=2), name='conv_2d')
#                 bn_2 = tf.identity(layers.batch_norm(conv_2), name='batch_norm')
#                 y2 = tf.identity(layers.dropout(bn_2, keep_prob=p_keep_conv, is_training=train), name='dropout')
#
#             # with tf.name_scope('layer_3'):
#             #     # size: 7 x 7
#             #     conv_3 = tf.identity(layers.conv2d(y2, num_outputs=6, kernel_size=6, stride=2), name='conv_2d')
#             #     bn_3 = tf.identity(layers.batch_norm(conv_3), name='batch_norm')
#             #     y3 = tf.identity(layers.dropout(bn_3, keep_prob=p_keep_conv, is_training=train), name='dropout')
#
#             with tf.variable_scope('layer_3'):
#                 # Pooling layer
#                 pool = tf.identity(layers.max_pool2d(y2, kernel_size=4), name='pooling')
#                 bn_4 = tf.identity(layers.batch_norm(pool), name='batch_norm')
#                 y4 = tf.identity(layers.dropout(bn_4), name='dropout')
#
#             with tf.variable_scope('layer_5'):
#                 # flatten
#                 flatten = tf.identity(layers.flatten(y4), name='flatten')
#                 bn_5 = tf.identity(layers.batch_norm(flatten), name='batch_norm')
#                 y5 = tf.identity(layers.dropout(bn_5, keep_prob=p_keep, is_training=train), name='batch_norm')
#
#             with tf.variable_scope('layer_6'):
#                 # fully connected
#                 fully_connected = tf.identity(layers.relu(y5, 200), name='fully_connected')
#                 bn_6 = tf.identity(layers.batch_norm(fully_connected), name='batch_norm')
#                 y6 = tf.identity(layers.dropout(bn_6, keep_prob=p_keep, is_training=train), name='batch_norm')
#
#             with tf.variable_scope('logits'):
#                 # Logits
#                 y_logits = tf.identity(layers.linear(y6, 2), name='linear')
#
#         with tf.name_scope('prediction'):
#             # Prediction
#             y = tf.identity(tf.nn.softmax(y_logits), name='y')
#
#         with tf.name_scope('cross_entropy'):
#             loss = tf.identity(tf.nn.softmax_cross_entropy_with_logits(logits=y_logits, labels=Y_), name='loss')
#
#             loss = tf.identity(tf.reduce_mean(loss) * batch_size, name='reduced_mean')
#
#         with tf.name_scope('optimization'):
#             optimizer = tf.train.AdamOptimizer(learning_rate)
#             train_step = layers.optimize_loss(loss, global_step, learning_rate, optimizer=optimizer)
#
#         with tf.name_scope('accuracy_data'):
#             correct = tf.equal(tf.argmax(y, 1), tf.argmax(Y_, 1), name='correct')
#             accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
#
#     return global_step, learning_rate, train, X, Y_, loss, accuracy, train_step
#
#
# def pretty_dict(value_dict):
#     fixed_len_str = '{:^%d}' % 22
#     to_console = '%s: %d  |' % ('Step', value_dict['Step'])
#
#     for key, value in value_dict.items():
#         if not key == 'Step':
#             to_console += fixed_len_str.format('   %s: %s' % (key, str(value)))
#
#     print(to_console)
#
#
# def run_sess(session, fetches_dict, feed_dict, log=True):
#     run_metadata = tf.RunMetadata()
#     options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE, output_partition_graphs=True)
#
#     output_dict = session.run(fetches=fetches_dict, feed_dict=feed_dict, options=options, run_metadata=run_metadata)
#
#     with open("/home/cdiesch/Documents/TensorFlowLogs/FirstPageEngine/%s.txt"
#               % datetime.datetime.now().strftime('%Y.%m.%d-%H:%M:%S'), "w+") as out:
#         out.write(str(run_metadata))
#
#     if log:
#         pretty_dict(output_dict)
#
#
# def training_step(global_step, learning_rate, train, X, Y_, loss, accuracy, train_step, update_test_data=False):
#     # Get the session
#     session = tf.get_default_session()
#
#     batch_x, batch_y = DataHelper.next_train_batch(batch_size)
#
#     # Decide what data we want to get...
#     fetches_dict = {'Accuracy': accuracy, 'Loss': loss, 'Learning Rate': learning_rate, 'Step': global_step}
#     # Get the batch of training data
#     if update_test_data:
#         feed_x, feed_y = DataHelper.get_test_data()
#         train_update = False
#     else:
#         feed_x = batch_x
#         feed_y = batch_y
#         train_update = True
#
#     feed_dict = {X: feed_x, Y_: feed_y, train: train_update}
#
#     # If we are updating the training data...
#     run_sess(session=session, fetches_dict=fetches_dict, feed_dict=feed_dict)
#
#     # Back propagate
#     if not train_update:
#         del feed_dict
#         feed_dict = {X: batch_x, Y_: batch_y, train: True}
#
#     fetches_dict = {'Train Step': train_step}
#     # Run the training step...
#     run_sess(session=session, fetches_dict=fetches_dict, feed_dict=feed_dict, log=False)


# This is the main function of the program.
def main(arv_v=None):
    # import MakeRecords
    # MakeRecords.make_record(TRAIN_DATA_ROOT, RECORD_FILE)
    train()


def check_args(arg1, arg2):
    global global_opt

    fatal_errors = False
    # Do whatever checks are needed on the optional arguments here.
    if arg2 is not None:
        print('"%s" is a valid argument!' % arg2)
        global_opt = arg2
    # If they weren't valid, let the user know you're going with the default value. We print an error for an optional
    # argument being bad.
    else:
        print('Warning: "%s" is not valid\n'
              '   OK: Using the default of "%s"%s' % (arg2, global_opt))

    # Do the checks for the required arguments second, this makes more sense reading the output.
    if not arg1 == 'bad value':
        print('"%s" is a valid argument!' % arg1)
    # We print an error for a required argument being bad.
    else:
        print('Error: "%s" is not a valid argument!' % arg1)
        fatal_errors = True

    # We only exit if a required argument was bad, we can handle optional arguments.
    if fatal_errors:
        parser.print_help()
        print('Exiting...')
        exit(0)


def show_args(train_data_root):
    # print the arguments with a brief summation of what they mean.
    print('Training Data Directory: %s' % train_data_root)


def make_args():
    required_args = parser.add_argument_group('Required')
    optional_args = parser.add_argument_group('Optional')

    # required_args.add_argument('-l', '--argument1', required=True, help='A required argument.')
    #
    # optional_args.add_argument('-o', '--argument2', required=False, help='An optional argument.')
    # optional_args.add_argument('-h', '--help', action="help", help='Prints the help message.')


# This is where we call the main method from.
if __name__ == '__main__':
    printer.write_no_prefix(ConsoleUtils.get_header(program_name, program_version, build_date, author))
    make_args()
    args = parser.parse_args()
    tf.app.run()

    # training_data_dir = r'/home/cdiesch/Documents/TFFirstPageClassifier/GeneratedData/'
    # trained_model_out_dir = '/home/cdiesch/Documents/TFFirstPageClassifier/TrainedModels/'
    # # Get the argument.
    # # some_arg_string = args.argument1
    # # some_opt_string = args.argument2
    # # Do an argument check.
    # # check_args(some_arg_string, some_opt_string)
    # # Now we can run...
    # # main(r'/home/cdiesch/Documents/TFFirstPageClassifier/RESTRICTED-TrainTestData')
    # main(training_data_dir, trained_model_out_dir)