Sleds/posting/image-all.sh

87 lines
2.3 KiB
Bash

#!/bin/sh
#
# Run frpost convertImages-no-full-page on a hierarchy
#
# Usage: image-all.sh [-no-overwrite] <img-src-root> <img-dest-root>
OVER=1
if [ "$1" = "-no-overwrite" ]; then
# n.b. this checks for existence of log file and if present will skip directory
OVER=0
echo "Not overwriting files"
shift
fi
SRCDIR=$1
OCRDIR=$1
DSTDIR=$2
CAPDIR=$2
TGZ=0
if [ ! -d "$SRCDIR" -o ! -d "$OCRDIR" -o "$DSTDIR" = "" -o "$CAPDIR" = "" ]; then
echo "Usage: image-all.sh imgdir destdir"
exit 1
fi
mkdir -p "${DSTDIR}"
mkdir -p "${CAPDIR}"
if [ ! -d "${DSTDIR}" ]; then
echo "Unable to create directory: ${DSTDIR}"
exit 2
fi
if [ ! -d "${CAPDIR}" ]; then
echo "Unable to create directory: ${CAPDIR}"
exit 2
fi
echo "Finding directories to process..."
find "$SRCDIR" -mindepth 0 -maxdepth 1 -type d | while read dir; do
dst=${DSTDIR}`echo $dir | sed "s#$SRCDIR##"`
cap=${CAPDIR}`echo $dir | sed "s#$SRCDIR##"`
ocr=${OCRDIR}`echo $dir | sed "s#$SRCDIR##"`
echo "*** Checking dir: ${dir} for isDataDirectory"
if [ -f "${dir}/isDataDirectory" ]; then
dir=${dir}/output/FRCapture/images
dst=${dst}/images
ocr=${ocr}/output/OCR
cap=${cap}/output/FRPost
echo "*** ${dir}/isDataDirectory ***"
else
echo "!!! Skipping non isDataDirectory"
continue;
fi
mkdir -p "${dst}"
mkdir -p "${cap}"
if [ "$OVER" = 0 -a -f "${dst}/../_imageprocess_.log" ]; then
echo "!!! Not overwriting data; _imageprocess_.log exists"
continue;
fi
#continue
echo "*** Processing directory: $dir to $dst ***"
(
echo "*** Processing directory: $dir to $dst ***"
# untar/zip
if [ $TGZ = 1 -a -f "$ocr/../OCR.tar.gz" ]; then
echo "*** Untar/gzip .ptx contents"
gnutar -C "$ocr/.." -xzvf OCR.tar.gz
fi
echo "*** frpost.sh --image-source-directory=$dir --image-source-pattern=\\d{7}.pdf --image-output-directory=$dst --ocr-source-directory=$ocr --caption-output-directory=$cap --action=convertImages-no-full-page --threads=12"
frpost.sh --image-source-directory=$dir --image-source-pattern=\\d{7}.pdf --image-output-directory=$dst --ocr-source-directory=$ocr --caption-output-directory=$cap --action=convertImages-no-full-page --threads=12
# tgz the OCR .ptx files, we're done with 'em
if [ $TGZ = 1 ]; then
echo "*** Archiving OCR files"
cd "$ocr/.."
gnutar -czvf OCR.tar.gz ./OCR --remove-files
fi
) > "${dst}/../_imageprocess_.log"
done
exit 0