166 lines
4.0 KiB
Bash
166 lines
4.0 KiB
Bash
#!/bin/bash
|
|
# Usage: sequencelogic-run-ocr.sh [--timeout seconds] [--frtname frtfilename] {tiff-or-pdf-file-path}.pdf/tif[f]
|
|
# Where:
|
|
# {tiff-or-pdf-file-path} is an absolute path to a multi-page file
|
|
# This will create in the same directory as the input file a "fart" file of the form:
|
|
# {tiff-or-pdf-file-path}.frt
|
|
|
|
function usage(){
|
|
echo "Usage: sequencelogic-run-ocr.sh [--errpages pageCount] [--timeout seconds] [--tmpdir temp directory] [--frtname frtfilename] {tiff-or-pdf-file-path}.pdf/tif[f]"
|
|
echo "Requires that ${SEQUENCELOGICHOME}/bin/sequencelogic-ocr exists and is executable"
|
|
echo "The optional --errpages arg is the number of pages allowed to be in error, before OCR gives up."
|
|
echo "The --frtname file arg is only used for Darwin and test"
|
|
echo "the --timeout arg option is used only for Linux"
|
|
exit 1
|
|
}
|
|
|
|
#set -x
|
|
|
|
# setup
|
|
REDIR=/dev/null # use /dev/tty for debug
|
|
SCRIPT=$0
|
|
echo "[OK] Script: ${SCRIPT}" > ${REDIR}
|
|
# Absolute path this script is in, thus /home/user/bin
|
|
SCRIPTPATH=`dirname ${SCRIPT}`
|
|
echo "[OK] This script is located at: $SCRIPTPATH" > ${REDIR}
|
|
|
|
if [ -f "${SCRIPTPATH}/sequencelogic-helpers.sh" ]; then
|
|
source ${SCRIPTPATH}/sequencelogic-helpers.sh > ${REDIR}
|
|
else
|
|
source ${SEQUENCELOGICHOME}/bin/sequencelogic-helpers.sh > ${REDIR}
|
|
fi
|
|
|
|
# get running bot count
|
|
BOTCNT=`countRunningBots`
|
|
CORECNT=`countCores`
|
|
MAXTHREADS=1
|
|
if [ "${BOTCNT}" -le "2" ]; then
|
|
MAXTHREADS=$(expr $CORECNT - 1)
|
|
else
|
|
MAXTHREADS=$(expr $(expr ${CORECNT} - ${BOTCNT}) / 2)
|
|
fi
|
|
if [ "${MAXTHREADS}" -le 3 ]; then
|
|
MAXTHREADS=4
|
|
fi
|
|
echo "There are ${BOTCNT} running robots and ${CORECNT} CPU cores on this machine; using ${MAXTHREADS} max threads"
|
|
|
|
IPADDR=`hostname -i`
|
|
echo "The address of this host is: $IPADDR"
|
|
echo " --------- Memory Info ----------- "
|
|
free -m
|
|
uptime
|
|
echo ""
|
|
|
|
|
|
NUMERRPAGES="0"
|
|
if [ "$1" = "--errpages" ]; then
|
|
shift
|
|
NUMERRPAGES="$1"
|
|
shift
|
|
fi
|
|
|
|
TIMEOUTPROG=""
|
|
if [ "$1" = "--timeout" ]; then
|
|
shift
|
|
TIMEOUTPROG="timeout $1"
|
|
shift
|
|
fi
|
|
|
|
TEMPDIR="/tmp/"
|
|
if [ "$1" = "--tmpdir" ]; then
|
|
shift
|
|
TEMPDIR="$1"
|
|
shift
|
|
fi
|
|
|
|
# below only used for Darwin/mock
|
|
BNAME=""
|
|
if [ "$1" = "--frtname" ]; then
|
|
shift
|
|
BNAME="$1"
|
|
shift
|
|
fi
|
|
IS_MOCK=0
|
|
if [ -x ${SEQUENCELOGICHOME}/bin/sequencelogic-ocr ]; then
|
|
echo "Using ${SEQUENCELOGICHOME}/bin/sequencelogic-ocr..."
|
|
else
|
|
if [ "$PLATFORM" = "Darwin" ]; then
|
|
IS_MOCK=1
|
|
fi
|
|
fi
|
|
|
|
if [ ${IS_MOCK} = 0 ]; then
|
|
BNAME=""
|
|
fi
|
|
|
|
FILE="$1"
|
|
STAT=0
|
|
|
|
if [ ! -f "$FILE" ]; then
|
|
usage;
|
|
exit 1;
|
|
fi
|
|
|
|
# There is no OCR for Darwin :(
|
|
if [ ${IS_MOCK} = 1 ]; then
|
|
TDROOT="${SEQUENCELOGICHOME}/SLSync/testdata"
|
|
if [ ! -d "$TDROOT" ]; then
|
|
TDROOT="/Users/tim/Development/sl/testdata"
|
|
fi
|
|
|
|
# mocking for linux
|
|
if [ "$BNAME" = "" ]; then
|
|
BNAME=`basename "$FILE"`
|
|
BNAME="${BNAME%.*}.frt"
|
|
fi
|
|
|
|
TESTFRT="${TDROOT}/package/loan/frt/${BNAME}"
|
|
if [ ! -f "$TESTFRT" ]; then
|
|
TESTFRT="${TDROOT}/NO_PI/frt/${BNAME}"
|
|
fi
|
|
echo "Looking for testdata .frt in: $TESTFRT"
|
|
if [ -f "$TESTFRT" ]; then
|
|
DF=`basename "$FILE"`
|
|
DF="${DF%.*}.frt"
|
|
DEST=`dirname "${FILE}"`/"${DF}"
|
|
echo "Copy $TESTFRT to $DEST"
|
|
cp "$TESTFRT" "$DEST"
|
|
else
|
|
STAT=-1
|
|
echo "Did not find testdata .frt file"
|
|
fi
|
|
|
|
FIXNAME="${BNAME}"
|
|
FIXEXTN="${FILE##*.}"
|
|
FIXNAME=$(basename "${FIXNAME}" .frt)
|
|
FIXNAME=${FIXNAME}_fixed.${FIXEXTN}
|
|
TESTFIX="${TDROOT}/package/loan/fix/${FIXNAME}"
|
|
if [ ! -f "$TESTFIX" ]; then
|
|
TESTFIX="${TDROOT}/NO_PI/fix/${FIXNAME}"
|
|
fi
|
|
echo "Looking for testdata _fixed in: $TESTFIX"
|
|
if [ -f "$TESTFIX" ]; then
|
|
DF=`basename "$FILE"`
|
|
DF="${DF%.*}_fixed.${FIXEXTN}"
|
|
DEST=`dirname "${FILE}"`/"${DF}"
|
|
echo "Copy $TESTFIX to $DEST"
|
|
cp "$TESTFIX" "$DEST"
|
|
else
|
|
# allow this to succeed ... STAT=-1
|
|
echo "Did not find testdata _fixed file"
|
|
fi
|
|
sleep 3
|
|
else
|
|
# The actual OCR program is named: "sequencelogic-ocr" and must be in in ${SEQUENCELOGICHOME}/bin
|
|
OCRPROG="${SEQUENCELOGICHOME}/bin/sequencelogic-ocr"
|
|
if [ ! -x "$OCRPROG" ]; then
|
|
echo "$OCRPROG does not exist or is not executable"
|
|
exit 2;
|
|
fi
|
|
|
|
$TIMEOUTPROG "$OCRPROG" -e "$NUMERRPAGES" -i "$FILE" -t "$TEMPDIR" >&1
|
|
STAT=$?
|
|
fi
|
|
echo "Exit with status: $STAT"
|
|
exit $STAT
|