#!/bin/sh # # Run frpost convertImages-no-full-page on a hierarchy # # Usage: image-all.sh [-no-overwrite] OVER=1 if [ "$1" = "-no-overwrite" ]; then # n.b. this checks for existence of log file and if present will skip directory OVER=0 echo "Not overwriting files" shift fi SRCDIR=$1 OCRDIR=$1 DSTDIR=$2 CAPDIR=$2 TGZ=0 if [ ! -d "$SRCDIR" -o ! -d "$OCRDIR" -o "$DSTDIR" = "" -o "$CAPDIR" = "" ]; then echo "Usage: image-all.sh imgdir destdir" exit 1 fi mkdir -p "${DSTDIR}" mkdir -p "${CAPDIR}" if [ ! -d "${DSTDIR}" ]; then echo "Unable to create directory: ${DSTDIR}" exit 2 fi if [ ! -d "${CAPDIR}" ]; then echo "Unable to create directory: ${CAPDIR}" exit 2 fi echo "Finding directories to process..." find "$SRCDIR" -mindepth 0 -maxdepth 1 -type d | while read dir; do dst=${DSTDIR}`echo $dir | sed "s#$SRCDIR##"` cap=${CAPDIR}`echo $dir | sed "s#$SRCDIR##"` ocr=${OCRDIR}`echo $dir | sed "s#$SRCDIR##"` echo "*** Checking dir: ${dir} for isDataDirectory" if [ -f "${dir}/isDataDirectory" ]; then dir=${dir}/output/FRCapture/images dst=${dst}/images ocr=${ocr}/output/OCR cap=${cap}/output/FRPost echo "*** ${dir}/isDataDirectory ***" else echo "!!! Skipping non isDataDirectory" continue; fi mkdir -p "${dst}" mkdir -p "${cap}" if [ "$OVER" = 0 -a -f "${dst}/../_imageprocess_.log" ]; then echo "!!! Not overwriting data; _imageprocess_.log exists" continue; fi #continue echo "*** Processing directory: $dir to $dst ***" ( echo "*** Processing directory: $dir to $dst ***" # untar/zip if [ $TGZ = 1 -a -f "$ocr/../OCR.tar.gz" ]; then echo "*** Untar/gzip .ptx contents" gnutar -C "$ocr/.." -xzvf OCR.tar.gz fi echo "*** frpost.sh --image-source-directory=$dir --image-source-pattern=\\d{7}.pdf --image-output-directory=$dst --ocr-source-directory=$ocr --caption-output-directory=$cap --action=convertImages-no-full-page --threads=12" frpost.sh --image-source-directory=$dir --image-source-pattern=\\d{7}.pdf --image-output-directory=$dst --ocr-source-directory=$ocr --caption-output-directory=$cap --action=convertImages-no-full-page --threads=12 # tgz the OCR .ptx files, we're done with 'em if [ $TGZ = 1 ]; then echo "*** Archiving OCR files" cd "$ocr/.." gnutar -czvf OCR.tar.gz ./OCR --remove-files fi ) > "${dst}/../_imageprocess_.log" done exit 0