Sleds/slocr/nuanceocr.cpp

1426 lines
42 KiB
C++

//
// Copyright (c) 2016, Sequence Logic
//
#include "nuanceocr.h"
#include "ocrbase.h"
#include "KernelApi.h"
#include "RecApiPlus.h"
#include "jsonobject.h"
#include "sledsconstants.h"
#include "Task.h"
#include "ThreadPool.h"
#include "Global.h"
using namespace sequencelogic;
namespace
{
struct word
{
int MaxY;
int MaxX;
int MinY;
int MinX;
int Page;
int Length;
int Tops[];
int Bottoms[];
int Lefts[];
int Rights[];
int Confidences[];
int Zone;
std::string value;
};
const double SCALE_FACTOR = 1200.0;
}
CNuanceOCR::CNuanceOCR(std::string &inFile) :
COCRBase(inFile), _pages(NULL), _imgPages(NULL)
{
// Set up the engine.
// This is where the page count gets set
initializeNuance();
loadImgFile(inFile);
//_pages = new JSONObject[pageCnt];
//_imgPages = new HPAGE[pageCnt];
}
CNuanceOCR::~CNuanceOCR()
{
delete [] _pages;
delete [] _imgPages;
}
void CNuanceOCR::addTime(double runTime)
{
std::lock_guard<std::mutex> timeLocker(timeLock);
totOCRTime += runTime;
}
//have debug code here================================================================================================================================================================
RECERR CNuanceOCR::OCRPage(int pageNum, int &adjustedPage, bool &rotated, bool singlePageSave, const std::string &singlePageFolder)
{
RECERR nRetVal = REC_OK;
HPAGE hPage = NULL;
std::stringstream messageStream;
RECERR rc;
double duration;
clock_t start;
JSONObject pageObj;
bool cantRotate = false;
//load the image into nuance
rc = kRecLoadImg(0, hIFile, &hPage, pageNum - 1);
if (rc != REC_OK)
{
if (hPage == NULL)
messageStream << "ERROR: Cannot load page " << pageNum << "/" << pageCnt << ":" <<
getError(rc) << " (" << rc << ")";
else
{
messageStream << "WARNING: Page " << pageNum << "/" << pageCnt << ":" <<
getError(rc) << " (" << rc << ")";
rc = REC_OK;
}
printMessage(messageStream.str());
messageStream.str("");
}
else
{
rc = kRecPreprocessImg(0, hPage);
if (rc != REC_OK)
{
if (hPage == NULL)
messageStream << "ERROR: Cannot pre-process page " << pageNum << "/" << pageCnt << ":" <<
getError(rc) << " (" << rc << ")";
else
{
messageStream << "WARNING: Pre-processing page " << pageNum << "/" << pageCnt << ":" <<
getError(rc) << " (" << rc << ")";
rc = REC_OK;
}
printMessage(messageStream.str());
messageStream.str("");
}
else
{
// See if Nuance did anything to the image...
PREPROC_INFO preprocInfo;
if (kRecGetPreprocessInfo(hPage, &preprocInfo) == REC_OK)
{
if ( ((preprocInfo.Flags & PREPROC_INFO_3DDESKEW) == PREPROC_INFO_3DDESKEW) ||
((preprocInfo.Flags & PREPROC_INFO_STRAIGHTENED) == PREPROC_INFO_STRAIGHTENED) )
{
printMessage("Page was straightened.");
rotated = true;
}
else if (preprocInfo.Rotation != ROT_NO)
{
rotated = true;
switch (preprocInfo.Rotation)
{
case ROT_RIGHT:
printMessage("Page was rotated image to the right (clockwise).");
break;
case ROT_DOWN:
printMessage("Page was rotated image down (rotate twice).");
break;
case ROT_LEFT:
printMessage("Page was rotated image to the left (anti-clockwise).");
break;
case ROT_FLIPPED:
printMessage("Page was mirrored without rotation (mirrored around the Y-axis).");
break;
case ROT_RIGHT_FLIPPED:
printMessage("Page was mirrored, then rotate to the right.");
break;
case ROT_DOWN_FLIPPED:
printMessage("Page was mirrored image, then rotate twice.");
break;
case ROT_LEFT_FLIPPED:
printMessage("Page was mirrored, then rotate to the left.");
break;
}
}
}
}
}
// If we have a NULL image handle, bail!
if (hPage == NULL)
{
rotated = true;
// Write into the bad page object.
pageObj.setJSONValue("badPage", pageNum);
addToJSONPages(pageNum-1, pageObj);
return rc;
}
messageStream << "Recognizing page " << pageNum << "/" << pageCnt << ".";
printMessage(messageStream.str());
messageStream.str("");
start = clock();
rc = kRecRecognize(SID, hPage, NULL);
if (rc != REC_OK)
{
messageStream << "WARNING: Failed recognition page " << pageNum << "/" << pageCnt << ": " << getError(rc) << " (" << rc << ")";
printMessage(messageStream.str());
messageStream.str("");
// Try rotating the page, through all 270 degrees.
int nRotation = 0;
for (int i = 0; (rc != REC_OK) && (i < 3); ++i)
{
nRotation += 90;
messageStream << "Trying to rotate page to " << nRotation << " deg.";
printMessage(messageStream.str());
messageStream.str("");
rc = kRecRotateImg(0, hPage, ROT_RIGHT);
if (rc == REC_OK)
{
rc = kRecRecognize(SID, hPage, NULL);
if (rc != REC_OK)
{
messageStream << "WARNING: Recognizing rotated page " << pageNum <<
" to " << nRotation << " deg: " << getError(rc) << " (" << rc << ")" << std::endl;
printMessage(messageStream.str());
messageStream.str("");
}
else
rotated = true;
}
else
{
messageStream << "Error rotating page " << pageNum <<
" to " << nRotation << " deg, bailing out: " << getError(rc) << " (" << rc << ")" << std::endl;
printMessage(messageStream.str());
messageStream.str("");
}
}
}
if (rc != REC_OK)
{
// There was an error rotating the page, or we didn't find anything. Re-load the original
// page to get the correct orientation for writing, then bail.
kRecFreeImg(hPage);
kRecLoadImg(0, hIFile, &hPage, pageNum - 1);
nRetVal = rc;
messageStream << "Nothing on page " << pageNum << "." << std::endl;
printMessage(messageStream.str());
messageStream.str("");
pageObj.setJSONValue("page", adjustedPage++);
pageObj.setJSONValue("numLines", 0);
pageObj.setJSONArray("lines");
addToJSONPages(pageNum - 1, pageObj);
}
else
{
duration = (clock() - start) / ((double)CLOCKS_PER_SEC);
totOCRTime += duration;
messageStream << "Done in " << duration << "s.";
printMessage(messageStream.str());
messageStream.str("");
}
if (hPage != NULL)
{
IMF_FORMAT nImgFormat = FF_TIFG4;
if (isPDF)
nImgFormat = FF_PDF_GOOD;
//save the image
if (singlePageSave)
{
printMessage("Saving page to PNG file.");
std::stringstream saveLoc;
saveLoc << std::setw(5) << std::setfill('0') << pageNum;
std::string saveFile = singlePageFolder + "/" + "page" + saveLoc.str() + ".png";
rc = kRecSaveImgF(SID, saveFile.c_str(), FF_PNG, hPage, II_CURRENT, false); //save a png Image and don't overwrite
}
//always need this file or gs is unreliable.
messageStream << "Saving page to " << ((isPDF) ? "PDF" : "TIFF") << " file.\n";
printMessage(messageStream.str());
messageStream.str("");
//rc = kRecSaveImgF(SID, fixedImgFile.c_str(), nImgFormat, hPage, II_CURRENT, true);
rc = kRecSaveImgF(SID, tmpImgFile.c_str(), nImgFormat, hPage, II_CURRENT, true);
//did it work?
if (rc != REC_OK)
{
messageStream << "Error saving page: " << getError(rc) << " (" << rc << ")" << "\n";
printMessage(messageStream.str());
messageStream.str("");
}
}
if (nRetVal != REC_OK)
{
kRecFreeImg(hPage);
pageObj.setJSONValue("badPage", pageNum);
addToJSONPages(pageNum-1, pageObj);
return nRetVal;
}
// else
// {
// // Write into the bad page object.
// pageObj.setJSONValue("badPage", pageNum);
// addToJSONPages(pageNum-1, pageObj);
// // kRecFreeImg(hPage);
// return rc;
// }
//-----------------------------------------------------------------------------------------------------------
// Character extraction logic
//-----------------------------------------------------------------------------------------------------------
LONG numLetters = 0;
LETTER *pletters;
rc = kRecGetLetters(hPage, II_CURRENT, &pletters, &numLetters);
//set up the arrays for the variables
char chars[numLetters];
int lefts[numLetters];
int rights[numLetters];
int bottoms[numLetters];
int tops[numLetters];
int zones[numLetters];
unsigned char confs[numLetters];
IMG_INFO pageInfo = {0};
if (kRecGetImgInfo(SID, hPage, II_CURRENT, &pageInfo) != REC_OK)
// Hmmm, can't get image info? Punt, and default to 300x300dpi.
pageInfo.DPI.cx = pageInfo.DPI.cy = 300;
// Calculate the mult factor, using the page's resolution.
double multX = SCALE_FACTOR / static_cast<double>(pageInfo.DPI.cx);
double multY = SCALE_FACTOR / static_cast<double>(pageInfo.DPI.cy);
//pull the letter info out
LETTER *pTmpLetter = pletters;
for (int i = 0; i < numLetters; i++)
{
chars[i] = pTmpLetter->code;
lefts[i] = static_cast<int>(static_cast<double>(pTmpLetter->left)*multX);
tops[i] = static_cast<int>(static_cast<double>(pTmpLetter->top)*multY);
confs[i] = pTmpLetter->err;
//have to do some quick math to get the bottoms and rights
bottoms[i] = tops[i] + (static_cast<int>(static_cast<double>(pTmpLetter->height)*multY)); // add the height to get the bottoms
rights[i] = lefts[i] + (static_cast<int>(static_cast<double>(pTmpLetter->width)*multX)); // add the width to get the rights
zones[i] = pTmpLetter->zone;
pTmpLetter++;
}
kRecFree(pletters);
//start the JSON stuff
int wordsTot = getNumWords(chars, numLetters);
pageObj.setJSONValue("page", adjustedPage);
pageObj.setJSONValue("numLines", wordsTot);
pageObj.setJSONArray("lines");
JSONArray &linesArray = pageObj.getJSONArray("lines");
int numWords = 0;
int wLen = 0;
for (int i = 0; i < numLetters; i += wLen)
{
JSONObject lineObj;
wLen = 0;
//get the length of the word
while (chars[i + wLen] != ' ' && i + wLen < numLetters)
{
wLen++;
chars[i + wLen];
}
wLen++;
lineObj.setJSONValue("line", numWords);
lineObj.setJSONValue("zone", zones[i]);
lineObj.setJSONValue("page", adjustedPage);
lineObj.setJSONValue("xmin", getMin(i, i + wLen, lefts));
lineObj.setJSONValue("ymin", getMin(i, i + wLen, tops));
lineObj.setJSONValue("xmax", getMax(i, i + wLen, rights));
lineObj.setJSONValue("ymax", getMax(i, i + wLen, bottoms));
lineObj.setJSONValue("numChars", wLen);
std::stringstream tmpStr;
//write the chars
for (int j = 0; j < wLen; j++)
{
if (static_cast<int>(chars[i + j]) < 128 && static_cast<int>(chars[i + j]) >= 32 && static_cast<int>(chars[i + j]) != 34)
tmpStr << chars[i + j];
else
tmpStr << " ";
}
lineObj.setJSONValue("chars", tmpStr.str().c_str());
tmpStr.str("");
for (int j = 0; j < wLen; j++)
{
long confB = ((getBit(confs[i + j], 6) * 1000000) + (getBit(confs[i + j], 5) * 100000) + (getBit(confs[i + j], 4) * 10000) + (getBit(confs[i + j], 3) * 1000) +
(getBit(confs[i + j], 2) * 100) + (getBit(confs[i + j], 1) * 10) + getBit(confs[i + j], 0));
long confI = fromBin(confB);
tmpStr << convertConfidence(confI);
}
lineObj.setJSONValue("confs", tmpStr.str().c_str());
lineObj.setJSONArray("xmins");
{
JSONArray &xminsArray = lineObj.getJSONArray("xmins");
for (int j = 0; j < wLen; j++)
xminsArray.addElement(lefts[i + j]);
}
lineObj.setJSONArray("ymins");
{
JSONArray &yminsArray = lineObj.getJSONArray("ymins");
for (int j = 0; j < wLen; j++)
yminsArray.addElement(tops[i + j]);
}
lineObj.setJSONArray("xmaxs");
{
JSONArray &xmaxsArray = lineObj.getJSONArray("xmaxs");
//Add bottom right
for (int j = 0; j < wLen; j++)
xmaxsArray.addElement(rights[i + j]);
}
lineObj.setJSONArray("ymaxs");
{
JSONArray &ymaxsArray = lineObj.getJSONArray("ymaxs");
for (int j = 0; j < wLen; j++)
ymaxsArray.addElement(bottoms[i + j]);
}
numWords++;
linesArray.addElement(lineObj);
}
++adjustedPage;
addToJSONPages(pageNum-1, pageObj);
kRecFreeImg(hPage);
return nRetVal;
}
bool CNuanceOCR::loadImgFile(std::string &inFile)
{
docReturnVals = kRecOpenImgFile(inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0);
if (docReturnVals != REC_OK)
{
printMessage(std::string("Couldn't load file " + inFile));
kRecQuit();
return false;
}
printMessage(std::string("File loaded successfuly!"));
docReturnVals = kRecGetImgFilePageCount(hIFile, &pageCnt);
if (docReturnVals != REC_OK)
{
printMessage(std::string("Error counting pages."));
kRecQuit();
return false;
}
return true;
}
/**
* Get the text of a Nuance error code.
*/
std::string CNuanceOCR::getError (RECERR nErr)
{
int nErrLen = 0;
int nExtErr = 0;
kRecGetLastError(&nExtErr, NULL, 0);
kRecGetErrorUIText(nErr, nExtErr, "", NULL, &nErrLen);
std::string errStr;
errStr.resize(nErrLen);
kRecGetErrorUIText(nErr, nExtErr, "", &errStr[0], &nErrLen);
return errStr;
}
bool CNuanceOCR::initializeNuance()
{
#if (USE_OEM_LICENSE)
docReturnVals = kRecSetLicense(LICENSE_FILE, OEM_CODE);
if (docReturnVals != REC_OK)
{
std::stringstream msg;
msg << "kRecSetLicense: Error obtaining license: " << getError(docReturnVals) << std::endl;
printMessage(msg.str().c_str());
kRecQuit();
return false;
}
#endif
docReturnVals = RecInitPlus(YOUR_COMPANY, YOUR_PRODUCT); // use your company and product name here
if ((docReturnVals != REC_OK) && (docReturnVals != API_INIT_WARN) && (docReturnVals != API_LICENSEVALIDATION_WARN))
{
std::stringstream msg;
msg << "kRecSetLicense: Error obtaining license: " << getError(docReturnVals) << std::endl;
printMessage(msg.str().c_str());
RecQuitPlus();
return false;
}
return true;
}
bool CNuanceOCR::savePage(int page, HPAGE &hPage)
{
imgPageLock.lock();
_imgPages[page] = hPage;
imgPageLock.unlock();
return true;
}
bool CNuanceOCR::writeFixedImg()
{
std::stringstream messageStream;
RECERR rc;
imgPageLock.lock();
for (int i = 0; i < pageCnt; i++)
{
HPAGE imgPage = _imgPages[i];
if (isTIF)
{
messageStream << "Saving page to TIF file.\n";
printMessage(messageStream.str());
messageStream.str("");
rc = kRecSaveImgF(SID, fixedImgFile.c_str(), FF_TIFNO, imgPage, II_CURRENT, true);
if (rc != REC_OK)
{
messageStream << "Error saving page.\n";
printMessage(messageStream.str());
messageStream.str("");
}
}
else if (isPDF)
{
messageStream << "Saving page to PDF file.\n";
printMessage(messageStream.str());
messageStream.str("");
rc = kRecSaveImgF(SID, fixedImgFile.c_str(), FF_PDF_GOOD, imgPage, II_CURRENT, true);
if (rc != REC_OK)
{
messageStream << "Error saving page.\n";
printMessage(messageStream.str());
messageStream.str("");
}
}
}
imgPageLock.unlock();
return true;
}
int CNuanceOCR::OCR(const OCROpts &opts, const std::string &outFile)
{
//set up nuance
initializeNuance();
int nRetVal = OCR_SUCCESS;
std::stringstream messageStream;
bool pageRotated = false;
RECERR rc;
double duration;
clock_t start;
rc = kRecOpenImgFile(opts._inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0);
if (rc != REC_OK)
{
printMessage(std::string("Couldn't load file " + opts._inFile));
kRecQuit();
}
printMessage(std::string("File loaded successfuly!"));
rc = kRecGetImgFilePageCount(hIFile, &pageCnt);
if (rc != REC_OK)
{
printMessage(std::string("Error counting pages."));
kRecCloseImgFile(hIFile);
kRecQuit();
}
if (_pages != NULL)
delete [] _pages;
_pages = new JSONObject[pageCnt];
//if there isn't a page range.
if (opts._nStartPage == 0 && opts._nEndPage == 0)
fixedImgFile = getFixedImgName(opts._inFile);
else
fixedImgFile = getFixedImgName(opts._inFile, opts._nStartPage, opts._nEndPage);
tmpImgFile = getTmpImgName(opts._tempFolder, opts._inFile);
JSONObject fartDoc;
fartDoc.setJSONValue("numPages", pageCnt);
fartDoc.setJSONValue("numBadPages", 0);
fartDoc.setJSONValue("originalSourceFile", opts._inFile.c_str());
fartDoc.setJSONValue("originalSourceType", getTypeByFileExt(opts._inFile).c_str());
fartDoc.setJSONValue("textSourceType", "NUANCE");
fartDoc.setJSONValue("fixedUp", false);
fartDoc.setJSONArray("pages");
fartDoc.setJSONArray("badPages");
//These are the default values
int page = 1;
int adjustedPage = 1;
int pageEnd = pageCnt;
//-------------------------
if (opts._nStartPage == 0 && opts._nEndPage == 0 || (opts._nEndPage < opts._nStartPage))
printMessage("No/invalid page range passed. Running full doc.");
else if (opts._nStartPage != 0)
page = opts._nStartPage;
if (opts._nEndPage != 0)
pageEnd = opts._nEndPage;
rc = REC_OK;
int nNumPagesErr = 0;
while ((nNumPagesErr <= nPagesinErrToAllow) && (page <= pageEnd))
{
rc = OCRPage(page, adjustedPage, pageRotated, opts._bSinglePageSave, opts._singlePageFolder);
// Blank pages return ZONE_NOTFOUND_WARN. Ignore blank page warnings.
if ((rc != REC_OK) && (rc != ZONE_NOTFOUND_WARN))
{
switch (rc)
{
case IMF_FONT_MISSING_WARN:
printMessage("MISSING FONT!\n");
if (nNumPagesErr <= nPagesinErrToAllow)
nRetVal |= OCR_MISSING_FONT_SKIPPED;
else
nRetVal |= OCR_MISSING_FONT;
++nNumPagesErr;
break;
case IMG_DPI_ERR:
case IMG_DPI_WARN:
printMessage("BAD RESOLUTION!\n");
if (nNumPagesErr <= nPagesinErrToAllow)
nRetVal |= OCR_RES_UNSUPPORTED_SKIPPED;
else
nRetVal |= OCR_RES_UNSUPPORTED;
++nNumPagesErr;
break;
case IMG_SIZE_ERR:
printMessage("IMAGE SIZE ERROR\n");
if (nNumPagesErr <= nPagesinErrToAllow)
nRetVal |= OCR_SIZE_UNSUPPORTED_SKIPPED;
else
nRetVal |= OCR_SIZE_UNSUPPORTED;
++nNumPagesErr;
break;
case NO_TXT_WARN:
// Just a warning that no text was found on the page. Don't need to do anything!
break;
default:
messageStream << "UNKNOWN ERROR ON PAGE " << (page + 1);
printMessage(messageStream.str());
messageStream.str("");
nRetVal = OCR_FAILED;
break;
}
}
page++;
}
kRecCloseImgFile(hIFile);
//-----------------------------------------------------------------------------------------------------------
JSONArray &pageArray = fartDoc.getJSONArray("pages");
JSONArray &badPageArray = fartDoc.getJSONArray("badPages");
for (int i = 0; i < pageCnt; i++)
{
if (_pages[i].get("badPage") == NULL)
pageArray.addElement(_pages[i]);
else
badPageArray.addElement(_pages[i]);
}
fartDoc.setJSONValue("numPages", pageArray.getnumelements());
fartDoc.setJSONValue("numBadPages", badPageArray.getnumelements());
//Close Nuance, save the frt file, and record time.
RecQuitPlus();
//if it was run in full, save without name modification
if (opts._nStartPage == 0 && opts._nEndPage == 0)
fartDoc.SaveToFile(outFile, opts._bPrettyOutput);
//if it was run in segments, save with page range in fileName.
else
fartDoc.SaveToFile(getFRTFilePageRangeName(outFile, opts._nStartPage, opts._nEndPage), opts._bPrettyOutput);
double avgPageTime = (totOCRTime / ((double)pageCnt));
messageStream << "Avg time per page: " << avgPageTime << "s";
printMessage(messageStream.str());
messageStream.str("");;
//If a page was rotated, move the image.
if (pageRotated)
{
messageStream << "Detected page rotation, moving" << tmpImgFile << " to " << fixedImgFile << ".";
printMessage(messageStream.str());
messageStream.str("");
copyFile(tmpImgFile, fixedImgFile);
}
//otherwise
else
std::remove(tmpImgFile.c_str()); //delete the temp file.
//return
return nRetVal;
}
std::string CNuanceOCR::getFRTFilePageRangeName(std::string inFile, int startPage, int endPage)
{
std::stringstream result;
result << inFile.substr(0, inFile.find('.')) << "_" << startPage << "-" << endPage <<".frt";
return result.str();
}
void CNuanceOCR::addToJSONPages(int pageNum, JSONObject page)
{
std::lock_guard<std::mutex> locker(pageLock);
_pages[pageNum] = page;
}
bool CNuanceOCR::verify()
{
return initializeNuance();
}
//--------------------------------------------------------------------------------------------------------------------------------------
//Maybe one day these can be used.
void CNuanceOCR::OCRPageMT(OCRArg* arg)
{
//may need to reinitialize on a new thread.
printMessage(std::string("Initializing Nuance..."));
initializeNuance();
printMessage(std::string("Done."));
std::string inFile = arg->getFileName();
int pageNum = arg->getPageNum();
RECERR nRetVal = REC_OK;
HIMGFILE imgFile;
HPAGE hPage = NULL;
std::stringstream messageStream;
RECERR rc;
double duration;
JSONObject pageObj;
bool cantRotate = false;
bool rotated = false;
std::string tmpFixedImgFile = getFixedImgName(inFile, pageNum);
//load the image into nuance, nuance claims this to be thread-safe
printMessage(std::string("Locking image and loading page " + std::to_string(pageNum+1)));
imgLock.lock();
int ID = SID;
rc = kRecOpenImgFile(inFile.c_str(), &imgFile, IMGF_READ, (IMF_FORMAT)0);
rc = kRecLoadImg(0, imgFile, &hPage, pageNum);
imgLock.unlock();
if (rc != REC_OK)
{
messageStream << "ERROR: Cannot load page " << pageNum + 1 << "/" << pageCnt << ":" <<
getError(rc) << " (" << rc << ")";
printMessage(messageStream.str());
messageStream.str("");
}
else
{
imgLock.lock();
rc = kRecPreprocessImg(0, hPage); //Not thread safe.
imgLock.unlock();
if (rc != REC_OK)
{
messageStream << "ERROR: Cannot pre-process page " << pageNum + 1 << "/" << pageCnt << ":" <<
getError(rc) << " (" << rc << ")";
printMessage(messageStream.str());
messageStream.str("");
}
}
// At this point, if we are in error skip recognition, write the empty page, and bail.
if (rc == REC_OK)
{
messageStream << "Recognizing page " << pageNum + 1 << "/" << pageCnt << ".";
printMessage(messageStream.str());
messageStream.str("");
imgLock.lock();
rc = kRecRecognize(ID, hPage, NULL); //also not thread safe :(
imgLock.unlock();
if (rc != REC_OK)
{
messageStream << "Failed recognition page " << pageNum + 1 << "/" << pageCnt << ": " << getError(rc) << " (" << rc << ")";
printMessage(messageStream.str());
messageStream.str("");
// Try rotating the page, through all 270 degrees.
int nRotation = 0;
for (int i = 0; (rc != REC_OK) && (i < 3); ++i)
{
nRotation += 90;
messageStream << "Trying to rotate page to " << nRotation << " deg.";
printMessage(messageStream.str());
messageStream.str("");
rc = kRecRotateImg(0, hPage, ROT_RIGHT);
if (rc == REC_OK)
{
//try and recognize the image again
imgLock.lock();
rc = kRecRecognize(ID, hPage, NULL);
imgLock.unlock();
//if we still can't
if (rc != REC_OK)
{
//print a message
messageStream << "Error recognizing rotated page " << pageNum + 1 <<
" to " << nRotation << " deg: " << getError(rc) << " (" << rc << ")" << std::endl;
printMessage(messageStream.str());
messageStream.str("");
// Try again...
rc = REC_OK;
}
//save that we rotated the page
rotated = true;
}
//If we couldn't recognize it after 3 rotations....
else
{
messageStream << "Error rotating page " << pageNum + 1 <<
" to " << nRotation << " deg, bailing out: " << getError(rc) << " (" << rc << ")" << std::endl;
printMessage(messageStream.str());
messageStream.str("");
}
}
}
}
// At this point, we should have an OCR'd page. If not, bail!
if (rc != REC_OK)
{
nRetVal = rc;
messageStream << "Nothing on page " << pageNum + 1 << "." << std::endl;
printMessage(messageStream.str());
messageStream.str("");
//release the page
kRecFreeImg(hPage);
hPage = NULL;
pageObj.setJSONValue("page", pageNum + 1);
pageObj.setJSONValue("numLines", 0);
pageObj.setJSONArray("lines");
if (rc == IMF_FONT_MISSING_WARN)
pageObj.setJSONValue("ocrerr", "OCR_MISSING_FONT_SKIPPED");
else if ((rc == IMG_DPI_ERR) || (rc == IMG_DPI_WARN))
pageObj.setJSONValue("ocrerr", "OCR_RES_UNSUPPORTED_SKIPPED");
addToJSONPages(pageNum, pageObj);
}
//if the page was rotated
if (hPage != NULL && rotated)
{
IMF_FORMAT nImgFormat = FF_TIFNO;
if (isPDF)
nImgFormat = FF_PDF_GOOD;
messageStream << "Saving page to " << ((isPDF) ? "PDF" : "TIFF") << " file.\n";
printMessage(messageStream.str());
messageStream.str("");
//We always want to save this as the only page in the document...
rc = kRecSaveImgF(ID, tmpFixedImgFile.c_str(), nImgFormat, hPage, II_CURRENT, true);
if (rc != REC_OK)
{
messageStream << "Error saving page: " << getError(rc) << " (" << rc << ")" << "\n";
printMessage(messageStream.str());
messageStream.str("");
}
}
if (nRetVal != REC_OK)
{
messageStream << "Error on page " << pageNum <<" \n " << getError(nRetVal);
printMessage(messageStream.str());
messageStream.str("");
}
//-----------------------------------------------------------------------------------------------------------
// Character extraction logic
//-----------------------------------------------------------------------------------------------------------
LONG numLetters = 0;
LETTER* letters[1];
rc = kRecGetLetters(hPage, II_CURRENT, letters, &numLetters);
//set up the arrays for the variables
char chars[numLetters];
int lefts[numLetters];
int rights[numLetters];
int bottoms[numLetters];
int tops[numLetters];
int zones[numLetters];
unsigned char confs[numLetters];
IMG_INFO pageInfo = { 0 };
if (kRecGetImgInfo(ID, hPage, II_CURRENT, &pageInfo) != REC_OK)
// Hmmm, can't get image info? Punt, and default to 300x300dpi.
pageInfo.DPI.cx = pageInfo.DPI.cy = 300;
// Calculate the mult factor, using the page's resolution.
double multX = SCALE_FACTOR / static_cast<double>(pageInfo.DPI.cx);
double multY = SCALE_FACTOR / static_cast<double>(pageInfo.DPI.cy);
//pull the letter info out
for (int i = 0; i < numLetters; i++)
{
chars[i] = letters[0]->code;
lefts[i] = static_cast<int>(static_cast<double>(letters[0]->left)*multX);
tops[i] = static_cast<int>(static_cast<double>(letters[0]->top)*multY);
confs[i] = letters[0]->err;
//have to do some quick math to get the bottoms and rights
bottoms[i] = tops[i] + (static_cast<int>(static_cast<double>(letters[0]->height)*multY)); // add the height to get the bottoms
rights[i] = lefts[i] + (static_cast<int>(static_cast<double>(letters[0]->width)*multX)); // add the width to get the rights
zones[i] = letters[0]->zone;
letters[0]++;
}
//start the JSON stuff
int wordsTot = getNumWords(chars, numLetters);
pageObj.setJSONValue("page", pageNum + 1);
pageObj.setJSONValue("numLines", wordsTot);
pageObj.setJSONArray("lines");
JSONArray &linesArray = pageObj.getJSONArray("lines");
int numWords = 0;
int wLen = 0;
//character logic
for (int i = 0; i < numLetters; i += wLen)
{
JSONObject lineObj;
wLen = 0;
//get the length of the word
while (chars[i + wLen] != ' ' && i + wLen < numLetters)
{
wLen++;
chars[i + wLen];
}
wLen++;
lineObj.setJSONValue("line", numWords);
lineObj.setJSONValue("zone", zones[i]);
lineObj.setJSONValue("page", pageNum + 1);
lineObj.setJSONValue("xmin", getMin(i, i + wLen, lefts));
lineObj.setJSONValue("ymin", getMin(i, i + wLen, tops));
lineObj.setJSONValue("xmax", getMax(i, i + wLen, rights));
lineObj.setJSONValue("ymax", getMax(i, i + wLen, bottoms));
lineObj.setJSONValue("numChars", wLen);
std::stringstream tmpStr;
//write the chars
for (int j = 0; j < wLen; j++)
{
if (static_cast<int>(chars[i + j]) < 128 && static_cast<int>(chars[i + j]) >= 32 && static_cast<int>(chars[i + j]) != 34)
tmpStr << chars[i + j];
else
tmpStr << " ";
}
lineObj.setJSONValue("chars", tmpStr.str().c_str());
tmpStr.str("");
for (int j = 0; j < wLen; j++)
{
long confB = ((getBit(confs[i + j], 6) * 1000000) + (getBit(confs[i + j], 5) * 100000) + (getBit(confs[i + j], 4) * 10000) + (getBit(confs[i + j], 3) * 1000) +
(getBit(confs[i + j], 2) * 100) + (getBit(confs[i + j], 1) * 10) + getBit(confs[i + j], 0));
long confI = fromBin(confB);
tmpStr << convertConfidence(confI);
}
lineObj.setJSONValue("confs", tmpStr.str().c_str());
lineObj.setJSONArray("xmins");
{
JSONArray &xminsArray = lineObj.getJSONArray("xmins");
for (int j = 0; j < wLen; j++)
xminsArray.addElement(lefts[i + j]);
}
lineObj.setJSONArray("ymins");
{
JSONArray &yminsArray = lineObj.getJSONArray("ymins");
for (int j = 0; j < wLen; j++)
yminsArray.addElement(tops[i + j]);
}
lineObj.setJSONArray("xmaxs");
{
JSONArray &xmaxsArray = lineObj.getJSONArray("xmaxs");
//Add bottom right
for (int j = 0; j < wLen; j++)
xmaxsArray.addElement(rights[i + j]);
}
lineObj.setJSONArray("ymaxs");
{
JSONArray &ymaxsArray = lineObj.getJSONArray("ymaxs");
for (int j = 0; j < wLen; j++)
ymaxsArray.addElement(bottoms[i + j]);
}
numWords++;
linesArray.addElement(lineObj);
}
//add the page to the JSON pages array, this is thread safe.
addToJSONPages(pageNum, pageObj);
}
int CNuanceOCR::OCRMT(const std::string &inFile, int threadCount)
{
initializeNuance();
HPAGE page;
bool result;
RECERR nRetVal = REC_OK;
std::stringstream msgStr;
imgLock.lock();
nRetVal = kRecOpenImgFile(inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0);
if (nRetVal != REC_OK)
{
printMessage(std::string("Couldn't load file " + inFile));
kRecQuit();
}
imgLock.unlock();
//Set the output to XML
printMessage(std::string("Setting output to XML"));
nRetVal = kRecSetDTXTFormat(SID, DTXT_XMLCOORD);
if (nRetVal != REC_OK)
{
msgStr << getError(nRetVal);
printMessage(msgStr.str());
msgStr.str("");
}
//Create an array of the pages
int pageCnt;
nRetVal = kRecGetImgFilePageCount(hIFile, &pageCnt);
if (nRetVal != REC_OK)
{
msgStr << "Error getting the page count: " << getError(nRetVal);
printMessage(msgStr.str());
return false;
}
msgStr << "page count = " << pageCnt;
printMessage(msgStr.str());
msgStr.str("");
//-------------------------------------------------------------------------------------------------------------------------------
//set the OCR thread count
msgStr << "Setting thread count to " << threadCount << std::endl;
printMessage(msgStr.str());
msgStr.str("");
nRetVal = RecSetOCRThreadCount(SID, threadCount);
if (nRetVal != REC_OK)
{
msgStr << getError(nRetVal);
printMessage(msgStr.str());
msgStr.str("");
}
nRetVal = RecGetOCRThreadCount(SID, &threadCount);
if (nRetVal != REC_OK)
{
msgStr << "Error getting the thread count: " << getError(nRetVal);
printMessage(msgStr.str());
return false;
}
msgStr << "Seccessfully set thread count to " << threadCount;
printMessage(msgStr.str());
msgStr.str("");
//--------------------------------------------------------------------------------------------------------------------------------
//load the page into the page array
HPAGE pages[pageCnt];
for (int i = 0; i < pageCnt; i++)
{
//msgstr << "adding page " << i+1 << "/" << pagecnt << " to the page array.";
//printmessage(msgstr.str());
//msgstr.str("");
HPAGE tmpPage;
nRetVal =kRecLoadImg(SID, hIFile, &tmpPage, i);
if (nRetVal != REC_OK)
{
msgStr << "Error getting page " << i << std::endl << " ";
msgStr << getError(nRetVal);
printMessage(msgStr.str());
msgStr.str("");
}
//save the page info
pages[i] = tmpPage;
msgStr << "page " << i + 1 << " = " << pages[i];
printMessage(msgStr.str());
msgStr.str("");
}
//run Nuance
msgStr << "Writing OCR output to " << getXMLName(inFile);
printMessage(msgStr.str());
msgStr.str("");
for (int i = 0; i < pageCnt; i++)
{
nRetVal = kRecRecognize(SID, pages[i], getXMLName(inFile).c_str());
if (nRetVal != REC_OK)
{
msgStr << "Error recognizing page " << i << std::endl;
msgStr << getError(nRetVal);
printMessage(msgStr.str());
msgStr.str("");
}
}
nRetVal = kRecConvert2DTXTEx(SID, pages, pageCnt, II_CURRENT, getXMLName(inFile).c_str());
if (nRetVal != REC_OK)
{
const char* errorInfo;
kRecGetErrorInfo(nRetVal, &errorInfo);
//msgStr << getError(nRetVal);
std::string strErr(errorInfo);
printMessage(strErr);
msgStr.str("");
}
return nRetVal;
}
//Split to lines prototype (doesn't work so well...)
//---------------------------------------------------------------------------------------------------------------------------------------------
//JSONArray splitToLines(word *words, int wordCnt)
//{
// int lineNum = 1;
// JSONArray lines;
// word previous = words[0];
// //Go through the coordinate bounds for each word
// for (int i = 0; i < wordCnt; i++)
// {
// JSONObject line;
// std::string lineVal;
// std::vector<int> tops;
// std::vector<int> bottoms;
// std::vector<int> lefts;
// std::vector<int> rights;
// std::vector<int> confs;
// int numChars;
//
// word current = words[i];
// //if a word's upper left is larger than the previous one's lower right
// if (current.MinY > previous.MaxY)
// {
// //set the values
// line.setJSONValue("line", lineNum);
// line.setJSONValue("zone", current.Zone);
// line.setJSONValue("page", current.Page);
// /*line.setJSONValue("xmin", getMin(i, i + wLen, lefts));
// line.setJSONValue("ymin", getMin(i, i + wLen, bottoms));
// line.setJSONValue("xmax", getMax(i, i + wLen, tops));
// line.setJSONValue("ymax", getMax(i, i + wLen, bottoms));
// line.setJSONValue("numChars", wLen);*/
// //start a new line
// lines.addElement(line);
// lineNum++;
// }
// else
// {
// //add the values in the line
// for (int k = 0; k < current.Length; k++)
// {
// tops.push_back(current.Tops[k]);
// bottoms.push_back(current.Bottoms[k]);
// rights.push_back(current.Rights[k]);
// lefts.push_back(current.Lefts[k]);
// confs.push_back(current.Confidences[k]);
// }
// lineVal += current.value;
// numChars += current.Length;
// }
// //point previous to current
// *previous = &current;
// }
//
//
//
// return lines;
//}
//---------------------------------------------------------------------------------------------------------------------------------------------
//bool CNuanceOCR::OCR(const std::string &inFile, const std::string &outFile)
//{
//
// initializeNuance();
//
// JSONObject fartDoc;
// fartDoc.setJSONValue("numPages", pageCnt);
// fartDoc.setJSONValue("originalSourceFile", inFile.c_str());
// fartDoc.setJSONValue("originalSourceType", getTypeByFileExt(inFile).c_str());
// fartDoc.setJSONValue("textSourceType", "NUANCE");
// fartDoc.setJSONValue("fixedUp", false);
// fartDoc.setJSONArray("pages");
// JSONArray &pageArray = fartDoc.getJSONArray("pages");
//
// HPAGE hPage;
// RECERR rc;
// double duration;
// std::stringstream messageStream;
// clock_t start;
//
//
// rc = kRecOpenImgFile(inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0);
// if (rc != REC_OK)
// {
// printMessage(std::string("Couldn't load file " + inFile));
// kRecQuit();
// }
//
// printMessage(std::string("File loaded successfuly!"));
//
// rc = kRecGetImgFilePageCount(hIFile, &pageCnt);
// if (rc != REC_OK)
// {
// printMessage(std::string("Error counting pages."));
// kRecQuit();
// }
// for (int page = 0; page < pageCnt; ++page)
// {
// JSONObject pageObj;
//
// rc = kRecLoadImg(0, hIFile, &hPage, page);
// rc = kRecPreprocessImg(0, hPage);
//
// messageStream << "Recognizing page " << page + 1 << "/" << pageCnt << "\n";
// printMessage(messageStream.str());
// messageStream.str("");
//
// start = clock();
// rc = kRecRecognize(SID, hPage, NULL);
// duration = (clock() - start) / ((double)CLOCKS_PER_SEC);
// totOCRTime += duration;
//
// messageStream << "Done in " << duration << "s.";
// printMessage(messageStream.str());
// messageStream.str("");;
//
// if (rc != REC_OK)
// {
// //std::cout<<"Error" <<std::endl;
// messageStream << "Trying to rotate page " << page;
// printMessage(messageStream.str());
// messageStream.str("");;
//
// rc = kRecRotateImg(0, hPage, ROT_AUTO);
// rc = kRecRecognize(SID, hPage, NULL);
// if (rc != REC_OK)
// {
// //std::cout<<"Error" <<std::endl;
// messageStream << "Nothing on page " << page;
// printMessage(messageStream.str());
// messageStream.str("");;
// //release the page
// kRecFreeImg(hPage);
//
// pageObj.setJSONValue("page", page + 1);
// pageObj.setJSONValue("numLines", 0);
// pageObj.setJSONArray("lines");
// pageArray.addElement(pageObj);
// continue;
// }
// }
//
// if (isTIF)
// rc = kRecSaveImgF(SID, fixedImgFile.c_str(), FF_TIFNO, hPage, II_CURRENT, true);
// else if (isPDF)
// rc = kRecSaveImgF(SID, fixedImgFile.c_str(), FF_PDF_GOOD, hPage, II_CURRENT, true);
//
// if (!rc == REC_OK)
// {
// messageStream << "Error writing to the fixed file:\n"
// << " " << fixedImgFile << "\n"
// << " Page " << page;
// printMessage(messageStream.str());
// messageStream.str("");;
// }
//
// //-----------------------------------------------------------------------------------------------------------
// LONG numLetters = 0;
// LETTER* letters[1];
// rc = kRecGetLetters(hPage, II_CURRENT, letters, &numLetters);
//
// char chars[numLetters];
// int lefts[numLetters];
// int rights[numLetters];
// int bottoms[numLetters];
// int tops[numLetters];
// int zones[numLetters];
// unsigned char confs[numLetters];
//
// if (rc != REC_OK)
// {
// //std::cout<<"Error" <<std::endl;
// RecQuitPlus();
// return false;
// }
//
// //-----------------------------------------------------------------------------------------------------------------------
// IMG_INFO imgInfo;
//
// rc = kRecGetImgInfo(SID, hPage, II_CURRENT, &imgInfo);
//
// //TODO figure out how to adjust the multiplier based on the DPI of the image
// //First figure out what the hell the DPI actually is?
// //SIZE dpi = imgInfo.DPI;
// //-----------------------------------------------------------------------------------------------------------------------
// int mult = 6;
//
// for (int i = 0; i < numLetters; i++)
// {
// chars[i] = letters[0]->code;
// lefts[i] = letters[0]->left*mult;
// tops[i] = letters[0]->top*mult;
// confs[i] = letters[0]->err;
// bottoms[i] = tops[i] + (letters[0]->height*mult);
// rights[i] = lefts[i] + (letters[0]->width*mult);
// zones[i] = letters[0]->zone;
// letters[0]++;
// }
//
// int wordsTot = getNumWords(chars, numLetters);
// pageObj.setJSONValue("page", page + 1);
// pageObj.setJSONValue("numLines", wordsTot);
// pageObj.setJSONArray("lines");
// JSONArray &linesArray = pageObj.getJSONArray("lines");
//
// int numWords = 0;
// int wLen = 0;
// for (int i = 0; i < numLetters; i += wLen)
// {
// JSONObject lineObj;
// wLen = 0;
// //get the length of the word
// while (chars[i + wLen] != ' ' && i + wLen < numLetters)
// wLen++;
// wLen++;
// lineObj.setJSONValue("line", numWords);
// lineObj.setJSONValue("zone", zones[i]);
// lineObj.setJSONValue("page", page + 1);
// lineObj.setJSONValue("xmin", getMin(i, i + wLen, lefts));
// lineObj.setJSONValue("ymin", getMin(i, i + wLen, bottoms));
// lineObj.setJSONValue("xmax", getMax(i, i + wLen, tops));
// lineObj.setJSONValue("ymax", getMax(i, i + wLen, bottoms));
// lineObj.setJSONValue("numChars", wLen);
//
// std::stringstream tmpStr;
// //write the chars
// for (int j = 0; j < wLen; j++)
// {
// if (static_cast<int>(chars[i + j]) < 128 && static_cast<int>(chars[i + j]) >= 32 && static_cast<int>(chars[i + j]) != 34)
// tmpStr << chars[i + j];
// else
// tmpStr << " ";
// }
// lineObj.setJSONValue("chars",tmpStr.str().c_str());
//
// tmpStr.str("");
// for (int j = 0; j < wLen; j++)
// {
// long confB = ((getBit(confs[i + j], 6) * 1000000) + (getBit(confs[i + j], 5) * 100000) + (getBit(confs[i + j], 4) * 10000) + (getBit(confs[i + j], 3) * 1000) +
// (getBit(confs[i + j], 2) * 100) + (getBit(confs[i + j], 1) * 10) + getBit(confs[i + j], 0));
//
// long confI = fromBin(confB);
// tmpStr << convertConfidence(confI);
// }
// lineObj.setJSONValue("confs", tmpStr.str().c_str());
//
// lineObj.setJSONArray("xmins");
// {
// JSONArray &xminsArray = lineObj.getJSONArray("xmins");
// for (int j = 0; j < wLen; j++)
// xminsArray.addElement(lefts[i + j]);
// }
//
// lineObj.setJSONArray("ymins");
// {
// JSONArray &yminsArray = lineObj.getJSONArray("ymins");
// for (int j = 0; j < wLen; j++)
// yminsArray.addElement(tops[i + j]);
// }
//
// lineObj.setJSONArray("xmaxs");
// {
// JSONArray &xmaxsArray = lineObj.getJSONArray("xmaxs");
// //Add bottom right
// for (int j = 0; j < wLen; j++)
// xmaxsArray.addElement(rights[i + j]);
// }
//
// lineObj.setJSONArray("ymaxs");
// {
// JSONArray &ymaxsArray = lineObj.getJSONArray("ymaxs");
// for (int j = 0; j < wLen; j++)
// ymaxsArray.addElement(bottoms[i + j]);
// }
//
// numWords++;
// linesArray.addElement(lineObj);
// }
// pageArray.addElement(pageObj);
// }
// //-----------------------------------------------------------------------------------------------------------
//
// RecQuitPlus();
//
// fartDoc.SaveToFile(outFile);
// double avgPageTime = (totOCRTime / ((double)pageCnt));
//
// messageStream << "Avg time per page: \033[1;34m" << avgPageTime << "s\033[0m";
// printMessage(messageStream.str());
// messageStream.str("");;
//
// return true;
//}