Sleds/slocr/sequencelogic-run-ocr.sh

166 lines
4.0 KiB
Bash

#!/bin/bash
# Usage: sequencelogic-run-ocr.sh [--timeout seconds] [--frtname frtfilename] {tiff-or-pdf-file-path}.pdf/tif[f]
# Where:
# {tiff-or-pdf-file-path} is an absolute path to a multi-page file
# This will create in the same directory as the input file a "fart" file of the form:
# {tiff-or-pdf-file-path}.frt
function usage(){
echo "Usage: sequencelogic-run-ocr.sh [--errpages pageCount] [--timeout seconds] [--tmpdir temp directory] [--frtname frtfilename] {tiff-or-pdf-file-path}.pdf/tif[f]"
echo "Requires that ${SEQUENCELOGICHOME}/bin/sequencelogic-ocr exists and is executable"
echo "The optional --errpages arg is the number of pages allowed to be in error, before OCR gives up."
echo "The --frtname file arg is only used for Darwin and test"
echo "the --timeout arg option is used only for Linux"
exit 1
}
#set -x
# setup
REDIR=/dev/null # use /dev/tty for debug
SCRIPT=$0
echo "[OK] Script: ${SCRIPT}" > ${REDIR}
# Absolute path this script is in, thus /home/user/bin
SCRIPTPATH=`dirname ${SCRIPT}`
echo "[OK] This script is located at: $SCRIPTPATH" > ${REDIR}
if [ -f "${SCRIPTPATH}/sequencelogic-helpers.sh" ]; then
source ${SCRIPTPATH}/sequencelogic-helpers.sh > ${REDIR}
else
source ${SEQUENCELOGICHOME}/bin/sequencelogic-helpers.sh > ${REDIR}
fi
# get running bot count
BOTCNT=`countRunningBots`
CORECNT=`countCores`
MAXTHREADS=1
if [ "${BOTCNT}" -le "2" ]; then
MAXTHREADS=$(expr $CORECNT - 1)
else
MAXTHREADS=$(expr $(expr ${CORECNT} - ${BOTCNT}) / 2)
fi
if [ "${MAXTHREADS}" -le 3 ]; then
MAXTHREADS=4
fi
echo "There are ${BOTCNT} running robots and ${CORECNT} CPU cores on this machine; using ${MAXTHREADS} max threads"
IPADDR=`hostname -i`
echo "The address of this host is: $IPADDR"
echo " --------- Memory Info ----------- "
free -m
uptime
echo ""
NUMERRPAGES="0"
if [ "$1" = "--errpages" ]; then
shift
NUMERRPAGES="$1"
shift
fi
TIMEOUTPROG=""
if [ "$1" = "--timeout" ]; then
shift
TIMEOUTPROG="timeout $1"
shift
fi
TEMPDIR="/tmp/"
if [ "$1" = "--tmpdir" ]; then
shift
TEMPDIR="$1"
shift
fi
# below only used for Darwin/mock
BNAME=""
if [ "$1" = "--frtname" ]; then
shift
BNAME="$1"
shift
fi
IS_MOCK=0
if [ -x ${SEQUENCELOGICHOME}/bin/sequencelogic-ocr ]; then
echo "Using ${SEQUENCELOGICHOME}/bin/sequencelogic-ocr..."
else
if [ "$PLATFORM" = "Darwin" ]; then
IS_MOCK=1
fi
fi
if [ ${IS_MOCK} = 0 ]; then
BNAME=""
fi
FILE="$1"
STAT=0
if [ ! -f "$FILE" ]; then
usage;
exit 1;
fi
# There is no OCR for Darwin :(
if [ ${IS_MOCK} = 1 ]; then
TDROOT="${SEQUENCELOGICHOME}/SLSync/testdata"
if [ ! -d "$TDROOT" ]; then
TDROOT="/Users/tim/Development/sl/testdata"
fi
# mocking for linux
if [ "$BNAME" = "" ]; then
BNAME=`basename "$FILE"`
BNAME="${BNAME%.*}.frt"
fi
TESTFRT="${TDROOT}/package/loan/frt/${BNAME}"
if [ ! -f "$TESTFRT" ]; then
TESTFRT="${TDROOT}/NO_PI/frt/${BNAME}"
fi
echo "Looking for testdata .frt in: $TESTFRT"
if [ -f "$TESTFRT" ]; then
DF=`basename "$FILE"`
DF="${DF%.*}.frt"
DEST=`dirname "${FILE}"`/"${DF}"
echo "Copy $TESTFRT to $DEST"
cp "$TESTFRT" "$DEST"
else
STAT=-1
echo "Did not find testdata .frt file"
fi
FIXNAME="${BNAME}"
FIXEXTN="${FILE##*.}"
FIXNAME=$(basename "${FIXNAME}" .frt)
FIXNAME=${FIXNAME}_fixed.${FIXEXTN}
TESTFIX="${TDROOT}/package/loan/fix/${FIXNAME}"
if [ ! -f "$TESTFIX" ]; then
TESTFIX="${TDROOT}/NO_PI/fix/${FIXNAME}"
fi
echo "Looking for testdata _fixed in: $TESTFIX"
if [ -f "$TESTFIX" ]; then
DF=`basename "$FILE"`
DF="${DF%.*}_fixed.${FIXEXTN}"
DEST=`dirname "${FILE}"`/"${DF}"
echo "Copy $TESTFIX to $DEST"
cp "$TESTFIX" "$DEST"
else
# allow this to succeed ... STAT=-1
echo "Did not find testdata _fixed file"
fi
sleep 3
else
# The actual OCR program is named: "sequencelogic-ocr" and must be in in ${SEQUENCELOGICHOME}/bin
OCRPROG="${SEQUENCELOGICHOME}/bin/sequencelogic-ocr"
if [ ! -x "$OCRPROG" ]; then
echo "$OCRPROG does not exist or is not executable"
exit 2;
fi
$TIMEOUTPROG "$OCRPROG" -e "$NUMERRPAGES" -i "$FILE" -t "$TEMPDIR" >&1
STAT=$?
fi
echo "Exit with status: $STAT"
exit $STAT