87 lines
2.3 KiB
Bash
87 lines
2.3 KiB
Bash
#!/bin/sh
|
|
#
|
|
# Run frpost convertImages-no-full-page on a hierarchy
|
|
#
|
|
# Usage: image-all.sh [-no-overwrite] <img-src-root> <img-dest-root>
|
|
|
|
OVER=1
|
|
if [ "$1" = "-no-overwrite" ]; then
|
|
# n.b. this checks for existence of log file and if present will skip directory
|
|
OVER=0
|
|
echo "Not overwriting files"
|
|
shift
|
|
fi
|
|
|
|
SRCDIR=$1
|
|
OCRDIR=$1
|
|
DSTDIR=$2
|
|
CAPDIR=$2
|
|
|
|
TGZ=0
|
|
|
|
if [ ! -d "$SRCDIR" -o ! -d "$OCRDIR" -o "$DSTDIR" = "" -o "$CAPDIR" = "" ]; then
|
|
echo "Usage: image-all.sh imgdir destdir"
|
|
exit 1
|
|
fi
|
|
|
|
mkdir -p "${DSTDIR}"
|
|
mkdir -p "${CAPDIR}"
|
|
if [ ! -d "${DSTDIR}" ]; then
|
|
echo "Unable to create directory: ${DSTDIR}"
|
|
exit 2
|
|
fi
|
|
if [ ! -d "${CAPDIR}" ]; then
|
|
echo "Unable to create directory: ${CAPDIR}"
|
|
exit 2
|
|
fi
|
|
|
|
echo "Finding directories to process..."
|
|
find "$SRCDIR" -mindepth 0 -maxdepth 1 -type d | while read dir; do
|
|
dst=${DSTDIR}`echo $dir | sed "s#$SRCDIR##"`
|
|
cap=${CAPDIR}`echo $dir | sed "s#$SRCDIR##"`
|
|
ocr=${OCRDIR}`echo $dir | sed "s#$SRCDIR##"`
|
|
|
|
echo "*** Checking dir: ${dir} for isDataDirectory"
|
|
if [ -f "${dir}/isDataDirectory" ]; then
|
|
dir=${dir}/output/FRCapture/images
|
|
dst=${dst}/images
|
|
ocr=${ocr}/output/OCR
|
|
cap=${cap}/output/FRPost
|
|
echo "*** ${dir}/isDataDirectory ***"
|
|
else
|
|
echo "!!! Skipping non isDataDirectory"
|
|
continue;
|
|
fi
|
|
|
|
mkdir -p "${dst}"
|
|
mkdir -p "${cap}"
|
|
if [ "$OVER" = 0 -a -f "${dst}/../_imageprocess_.log" ]; then
|
|
echo "!!! Not overwriting data; _imageprocess_.log exists"
|
|
continue;
|
|
fi
|
|
#continue
|
|
|
|
echo "*** Processing directory: $dir to $dst ***"
|
|
(
|
|
echo "*** Processing directory: $dir to $dst ***"
|
|
# untar/zip
|
|
if [ $TGZ = 1 -a -f "$ocr/../OCR.tar.gz" ]; then
|
|
echo "*** Untar/gzip .ptx contents"
|
|
gnutar -C "$ocr/.." -xzvf OCR.tar.gz
|
|
fi
|
|
|
|
echo "*** frpost.sh --image-source-directory=$dir --image-source-pattern=\\d{7}.pdf --image-output-directory=$dst --ocr-source-directory=$ocr --caption-output-directory=$cap --action=convertImages-no-full-page --threads=12"
|
|
frpost.sh --image-source-directory=$dir --image-source-pattern=\\d{7}.pdf --image-output-directory=$dst --ocr-source-directory=$ocr --caption-output-directory=$cap --action=convertImages-no-full-page --threads=12
|
|
|
|
# tgz the OCR .ptx files, we're done with 'em
|
|
if [ $TGZ = 1 ]; then
|
|
echo "*** Archiving OCR files"
|
|
cd "$ocr/.."
|
|
gnutar -czvf OCR.tar.gz ./OCR --remove-files
|
|
fi
|
|
) > "${dst}/../_imageprocess_.log"
|
|
|
|
done
|
|
|
|
exit 0
|