#!/bin/bash # Usage: sequencelogic-run-ocr.sh [--timeout seconds] [--frtname frtfilename] {tiff-or-pdf-file-path}.pdf/tif[f] # Where: # {tiff-or-pdf-file-path} is an absolute path to a multi-page file # This will create in the same directory as the input file a "fart" file of the form: # {tiff-or-pdf-file-path}.frt function usage(){ echo "Usage: sequencelogic-run-ocr.sh [--errpages pageCount] [--timeout seconds] [--tmpdir temp directory] [--frtname frtfilename] {tiff-or-pdf-file-path}.pdf/tif[f]" echo "Requires that ${SEQUENCELOGICHOME}/bin/sequencelogic-ocr exists and is executable" echo "The optional --errpages arg is the number of pages allowed to be in error, before OCR gives up." echo "The --frtname file arg is only used for Darwin and test" echo "the --timeout arg option is used only for Linux" exit 1 } #set -x # setup REDIR=/dev/null # use /dev/tty for debug SCRIPT=$0 echo "[OK] Script: ${SCRIPT}" > ${REDIR} # Absolute path this script is in, thus /home/user/bin SCRIPTPATH=`dirname ${SCRIPT}` echo "[OK] This script is located at: $SCRIPTPATH" > ${REDIR} if [ -f "${SCRIPTPATH}/sequencelogic-helpers.sh" ]; then source ${SCRIPTPATH}/sequencelogic-helpers.sh > ${REDIR} else source ${SEQUENCELOGICHOME}/bin/sequencelogic-helpers.sh > ${REDIR} fi # get running bot count BOTCNT=`countRunningBots` CORECNT=`countCores` MAXTHREADS=1 if [ "${BOTCNT}" -le "2" ]; then MAXTHREADS=$(expr $CORECNT - 1) else MAXTHREADS=$(expr $(expr ${CORECNT} - ${BOTCNT}) / 2) fi if [ "${MAXTHREADS}" -le 3 ]; then MAXTHREADS=4 fi echo "There are ${BOTCNT} running robots and ${CORECNT} CPU cores on this machine; using ${MAXTHREADS} max threads" IPADDR=`hostname -i` echo "The address of this host is: $IPADDR" echo " --------- Memory Info ----------- " free -m uptime echo "" NUMERRPAGES="0" if [ "$1" = "--errpages" ]; then shift NUMERRPAGES="$1" shift fi TIMEOUTPROG="" if [ "$1" = "--timeout" ]; then shift TIMEOUTPROG="timeout $1" shift fi TEMPDIR="/tmp/" if [ "$1" = "--tmpdir" ]; then shift TEMPDIR="$1" shift fi # below only used for Darwin/mock BNAME="" if [ "$1" = "--frtname" ]; then shift BNAME="$1" shift fi IS_MOCK=0 if [ -x ${SEQUENCELOGICHOME}/bin/sequencelogic-ocr ]; then echo "Using ${SEQUENCELOGICHOME}/bin/sequencelogic-ocr..." else if [ "$PLATFORM" = "Darwin" ]; then IS_MOCK=1 fi fi if [ ${IS_MOCK} = 0 ]; then BNAME="" fi FILE="$1" STAT=0 if [ ! -f "$FILE" ]; then usage; exit 1; fi # There is no OCR for Darwin :( if [ ${IS_MOCK} = 1 ]; then TDROOT="${SEQUENCELOGICHOME}/SLSync/testdata" if [ ! -d "$TDROOT" ]; then TDROOT="/Users/tim/Development/sl/testdata" fi # mocking for linux if [ "$BNAME" = "" ]; then BNAME=`basename "$FILE"` BNAME="${BNAME%.*}.frt" fi TESTFRT="${TDROOT}/package/loan/frt/${BNAME}" if [ ! -f "$TESTFRT" ]; then TESTFRT="${TDROOT}/NO_PI/frt/${BNAME}" fi echo "Looking for testdata .frt in: $TESTFRT" if [ -f "$TESTFRT" ]; then DF=`basename "$FILE"` DF="${DF%.*}.frt" DEST=`dirname "${FILE}"`/"${DF}" echo "Copy $TESTFRT to $DEST" cp "$TESTFRT" "$DEST" else STAT=-1 echo "Did not find testdata .frt file" fi FIXNAME="${BNAME}" FIXEXTN="${FILE##*.}" FIXNAME=$(basename "${FIXNAME}" .frt) FIXNAME=${FIXNAME}_fixed.${FIXEXTN} TESTFIX="${TDROOT}/package/loan/fix/${FIXNAME}" if [ ! -f "$TESTFIX" ]; then TESTFIX="${TDROOT}/NO_PI/fix/${FIXNAME}" fi echo "Looking for testdata _fixed in: $TESTFIX" if [ -f "$TESTFIX" ]; then DF=`basename "$FILE"` DF="${DF%.*}_fixed.${FIXEXTN}" DEST=`dirname "${FILE}"`/"${DF}" echo "Copy $TESTFIX to $DEST" cp "$TESTFIX" "$DEST" else # allow this to succeed ... STAT=-1 echo "Did not find testdata _fixed file" fi sleep 3 else # The actual OCR program is named: "sequencelogic-ocr" and must be in in ${SEQUENCELOGICHOME}/bin OCRPROG="${SEQUENCELOGICHOME}/bin/sequencelogic-ocr" if [ ! -x "$OCRPROG" ]; then echo "$OCRPROG does not exist or is not executable" exit 2; fi $TIMEOUTPROG "$OCRPROG" -e "$NUMERRPAGES" -i "$FILE" -t "$TEMPDIR" >&1 STAT=$? fi echo "Exit with status: $STAT" exit $STAT