1426 lines
42 KiB
C++
1426 lines
42 KiB
C++
//
|
|
// Copyright (c) 2016, Sequence Logic
|
|
//
|
|
#include "nuanceocr.h"
|
|
#include "ocrbase.h"
|
|
#include "KernelApi.h"
|
|
#include "RecApiPlus.h"
|
|
#include "jsonobject.h"
|
|
|
|
#include "sledsconstants.h"
|
|
|
|
#include "Task.h"
|
|
#include "ThreadPool.h"
|
|
#include "Global.h"
|
|
|
|
using namespace sequencelogic;
|
|
|
|
namespace
|
|
{
|
|
struct word
|
|
{
|
|
int MaxY;
|
|
int MaxX;
|
|
int MinY;
|
|
int MinX;
|
|
|
|
int Page;
|
|
int Length;
|
|
int Tops[];
|
|
int Bottoms[];
|
|
int Lefts[];
|
|
int Rights[];
|
|
int Confidences[];
|
|
int Zone;
|
|
std::string value;
|
|
};
|
|
|
|
const double SCALE_FACTOR = 1200.0;
|
|
}
|
|
|
|
CNuanceOCR::CNuanceOCR(std::string &inFile) :
|
|
COCRBase(inFile), _pages(NULL), _imgPages(NULL)
|
|
{
|
|
// Set up the engine.
|
|
// This is where the page count gets set
|
|
initializeNuance();
|
|
loadImgFile(inFile);
|
|
|
|
//_pages = new JSONObject[pageCnt];
|
|
//_imgPages = new HPAGE[pageCnt];
|
|
}
|
|
|
|
CNuanceOCR::~CNuanceOCR()
|
|
{
|
|
delete [] _pages;
|
|
delete [] _imgPages;
|
|
}
|
|
|
|
void CNuanceOCR::addTime(double runTime)
|
|
{
|
|
std::lock_guard<std::mutex> timeLocker(timeLock);
|
|
totOCRTime += runTime;
|
|
}
|
|
|
|
|
|
//have debug code here================================================================================================================================================================
|
|
RECERR CNuanceOCR::OCRPage(int pageNum, int &adjustedPage, bool &rotated, bool singlePageSave, const std::string &singlePageFolder)
|
|
{
|
|
RECERR nRetVal = REC_OK;
|
|
|
|
HPAGE hPage = NULL;
|
|
std::stringstream messageStream;
|
|
RECERR rc;
|
|
double duration;
|
|
clock_t start;
|
|
JSONObject pageObj;
|
|
bool cantRotate = false;
|
|
|
|
|
|
//load the image into nuance
|
|
rc = kRecLoadImg(0, hIFile, &hPage, pageNum - 1);
|
|
if (rc != REC_OK)
|
|
{
|
|
if (hPage == NULL)
|
|
messageStream << "ERROR: Cannot load page " << pageNum << "/" << pageCnt << ":" <<
|
|
getError(rc) << " (" << rc << ")";
|
|
else
|
|
{
|
|
messageStream << "WARNING: Page " << pageNum << "/" << pageCnt << ":" <<
|
|
getError(rc) << " (" << rc << ")";
|
|
rc = REC_OK;
|
|
}
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
else
|
|
{
|
|
rc = kRecPreprocessImg(0, hPage);
|
|
if (rc != REC_OK)
|
|
{
|
|
if (hPage == NULL)
|
|
messageStream << "ERROR: Cannot pre-process page " << pageNum << "/" << pageCnt << ":" <<
|
|
getError(rc) << " (" << rc << ")";
|
|
else
|
|
{
|
|
messageStream << "WARNING: Pre-processing page " << pageNum << "/" << pageCnt << ":" <<
|
|
getError(rc) << " (" << rc << ")";
|
|
rc = REC_OK;
|
|
}
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
else
|
|
{
|
|
// See if Nuance did anything to the image...
|
|
PREPROC_INFO preprocInfo;
|
|
if (kRecGetPreprocessInfo(hPage, &preprocInfo) == REC_OK)
|
|
{
|
|
if ( ((preprocInfo.Flags & PREPROC_INFO_3DDESKEW) == PREPROC_INFO_3DDESKEW) ||
|
|
((preprocInfo.Flags & PREPROC_INFO_STRAIGHTENED) == PREPROC_INFO_STRAIGHTENED) )
|
|
{
|
|
printMessage("Page was straightened.");
|
|
rotated = true;
|
|
}
|
|
else if (preprocInfo.Rotation != ROT_NO)
|
|
{
|
|
rotated = true;
|
|
switch (preprocInfo.Rotation)
|
|
{
|
|
case ROT_RIGHT:
|
|
printMessage("Page was rotated image to the right (clockwise).");
|
|
break;
|
|
case ROT_DOWN:
|
|
printMessage("Page was rotated image down (rotate twice).");
|
|
break;
|
|
case ROT_LEFT:
|
|
printMessage("Page was rotated image to the left (anti-clockwise).");
|
|
break;
|
|
case ROT_FLIPPED:
|
|
printMessage("Page was mirrored without rotation (mirrored around the Y-axis).");
|
|
break;
|
|
case ROT_RIGHT_FLIPPED:
|
|
printMessage("Page was mirrored, then rotate to the right.");
|
|
break;
|
|
case ROT_DOWN_FLIPPED:
|
|
printMessage("Page was mirrored image, then rotate twice.");
|
|
break;
|
|
case ROT_LEFT_FLIPPED:
|
|
printMessage("Page was mirrored, then rotate to the left.");
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we have a NULL image handle, bail!
|
|
if (hPage == NULL)
|
|
{
|
|
rotated = true;
|
|
// Write into the bad page object.
|
|
pageObj.setJSONValue("badPage", pageNum);
|
|
addToJSONPages(pageNum-1, pageObj);
|
|
return rc;
|
|
}
|
|
|
|
messageStream << "Recognizing page " << pageNum << "/" << pageCnt << ".";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
|
|
start = clock();
|
|
rc = kRecRecognize(SID, hPage, NULL);
|
|
|
|
if (rc != REC_OK)
|
|
{
|
|
messageStream << "WARNING: Failed recognition page " << pageNum << "/" << pageCnt << ": " << getError(rc) << " (" << rc << ")";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
|
|
// Try rotating the page, through all 270 degrees.
|
|
int nRotation = 0;
|
|
for (int i = 0; (rc != REC_OK) && (i < 3); ++i)
|
|
{
|
|
nRotation += 90;
|
|
messageStream << "Trying to rotate page to " << nRotation << " deg.";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
|
|
rc = kRecRotateImg(0, hPage, ROT_RIGHT);
|
|
if (rc == REC_OK)
|
|
{
|
|
rc = kRecRecognize(SID, hPage, NULL);
|
|
if (rc != REC_OK)
|
|
{
|
|
messageStream << "WARNING: Recognizing rotated page " << pageNum <<
|
|
" to " << nRotation << " deg: " << getError(rc) << " (" << rc << ")" << std::endl;
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
else
|
|
rotated = true;
|
|
}
|
|
else
|
|
{
|
|
messageStream << "Error rotating page " << pageNum <<
|
|
" to " << nRotation << " deg, bailing out: " << getError(rc) << " (" << rc << ")" << std::endl;
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
}
|
|
}
|
|
|
|
if (rc != REC_OK)
|
|
{
|
|
// There was an error rotating the page, or we didn't find anything. Re-load the original
|
|
// page to get the correct orientation for writing, then bail.
|
|
kRecFreeImg(hPage);
|
|
kRecLoadImg(0, hIFile, &hPage, pageNum - 1);
|
|
|
|
nRetVal = rc;
|
|
messageStream << "Nothing on page " << pageNum << "." << std::endl;
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
|
|
pageObj.setJSONValue("page", adjustedPage++);
|
|
pageObj.setJSONValue("numLines", 0);
|
|
pageObj.setJSONArray("lines");
|
|
addToJSONPages(pageNum - 1, pageObj);
|
|
}
|
|
else
|
|
{
|
|
duration = (clock() - start) / ((double)CLOCKS_PER_SEC);
|
|
totOCRTime += duration;
|
|
messageStream << "Done in " << duration << "s.";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
|
|
if (hPage != NULL)
|
|
{
|
|
IMF_FORMAT nImgFormat = FF_TIFG4;
|
|
if (isPDF)
|
|
nImgFormat = FF_PDF_GOOD;
|
|
//save the image
|
|
if (singlePageSave)
|
|
{
|
|
printMessage("Saving page to PNG file.");
|
|
std::stringstream saveLoc;
|
|
saveLoc << std::setw(5) << std::setfill('0') << pageNum;
|
|
std::string saveFile = singlePageFolder + "/" + "page" + saveLoc.str() + ".png";
|
|
rc = kRecSaveImgF(SID, saveFile.c_str(), FF_PNG, hPage, II_CURRENT, false); //save a png Image and don't overwrite
|
|
}
|
|
//always need this file or gs is unreliable.
|
|
messageStream << "Saving page to " << ((isPDF) ? "PDF" : "TIFF") << " file.\n";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
//rc = kRecSaveImgF(SID, fixedImgFile.c_str(), nImgFormat, hPage, II_CURRENT, true);
|
|
rc = kRecSaveImgF(SID, tmpImgFile.c_str(), nImgFormat, hPage, II_CURRENT, true);
|
|
|
|
//did it work?
|
|
if (rc != REC_OK)
|
|
{
|
|
messageStream << "Error saving page: " << getError(rc) << " (" << rc << ")" << "\n";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
}
|
|
|
|
if (nRetVal != REC_OK)
|
|
{
|
|
kRecFreeImg(hPage);
|
|
pageObj.setJSONValue("badPage", pageNum);
|
|
addToJSONPages(pageNum-1, pageObj);
|
|
return nRetVal;
|
|
}
|
|
|
|
// else
|
|
// {
|
|
// // Write into the bad page object.
|
|
// pageObj.setJSONValue("badPage", pageNum);
|
|
// addToJSONPages(pageNum-1, pageObj);
|
|
// // kRecFreeImg(hPage);
|
|
// return rc;
|
|
// }
|
|
|
|
//-----------------------------------------------------------------------------------------------------------
|
|
// Character extraction logic
|
|
//-----------------------------------------------------------------------------------------------------------
|
|
LONG numLetters = 0;
|
|
LETTER *pletters;
|
|
rc = kRecGetLetters(hPage, II_CURRENT, &pletters, &numLetters);
|
|
|
|
//set up the arrays for the variables
|
|
char chars[numLetters];
|
|
int lefts[numLetters];
|
|
int rights[numLetters];
|
|
int bottoms[numLetters];
|
|
int tops[numLetters];
|
|
int zones[numLetters];
|
|
unsigned char confs[numLetters];
|
|
|
|
IMG_INFO pageInfo = {0};
|
|
if (kRecGetImgInfo(SID, hPage, II_CURRENT, &pageInfo) != REC_OK)
|
|
// Hmmm, can't get image info? Punt, and default to 300x300dpi.
|
|
pageInfo.DPI.cx = pageInfo.DPI.cy = 300;
|
|
|
|
// Calculate the mult factor, using the page's resolution.
|
|
double multX = SCALE_FACTOR / static_cast<double>(pageInfo.DPI.cx);
|
|
double multY = SCALE_FACTOR / static_cast<double>(pageInfo.DPI.cy);
|
|
|
|
//pull the letter info out
|
|
LETTER *pTmpLetter = pletters;
|
|
for (int i = 0; i < numLetters; i++)
|
|
{
|
|
chars[i] = pTmpLetter->code;
|
|
lefts[i] = static_cast<int>(static_cast<double>(pTmpLetter->left)*multX);
|
|
tops[i] = static_cast<int>(static_cast<double>(pTmpLetter->top)*multY);
|
|
confs[i] = pTmpLetter->err;
|
|
//have to do some quick math to get the bottoms and rights
|
|
bottoms[i] = tops[i] + (static_cast<int>(static_cast<double>(pTmpLetter->height)*multY)); // add the height to get the bottoms
|
|
rights[i] = lefts[i] + (static_cast<int>(static_cast<double>(pTmpLetter->width)*multX)); // add the width to get the rights
|
|
zones[i] = pTmpLetter->zone;
|
|
pTmpLetter++;
|
|
}
|
|
kRecFree(pletters);
|
|
|
|
//start the JSON stuff
|
|
int wordsTot = getNumWords(chars, numLetters);
|
|
pageObj.setJSONValue("page", adjustedPage);
|
|
pageObj.setJSONValue("numLines", wordsTot);
|
|
pageObj.setJSONArray("lines");
|
|
JSONArray &linesArray = pageObj.getJSONArray("lines");
|
|
|
|
int numWords = 0;
|
|
int wLen = 0;
|
|
for (int i = 0; i < numLetters; i += wLen)
|
|
{
|
|
JSONObject lineObj;
|
|
wLen = 0;
|
|
//get the length of the word
|
|
while (chars[i + wLen] != ' ' && i + wLen < numLetters)
|
|
{
|
|
wLen++;
|
|
chars[i + wLen];
|
|
}
|
|
wLen++;
|
|
|
|
lineObj.setJSONValue("line", numWords);
|
|
lineObj.setJSONValue("zone", zones[i]);
|
|
lineObj.setJSONValue("page", adjustedPage);
|
|
lineObj.setJSONValue("xmin", getMin(i, i + wLen, lefts));
|
|
lineObj.setJSONValue("ymin", getMin(i, i + wLen, tops));
|
|
lineObj.setJSONValue("xmax", getMax(i, i + wLen, rights));
|
|
lineObj.setJSONValue("ymax", getMax(i, i + wLen, bottoms));
|
|
lineObj.setJSONValue("numChars", wLen);
|
|
|
|
std::stringstream tmpStr;
|
|
//write the chars
|
|
for (int j = 0; j < wLen; j++)
|
|
{
|
|
if (static_cast<int>(chars[i + j]) < 128 && static_cast<int>(chars[i + j]) >= 32 && static_cast<int>(chars[i + j]) != 34)
|
|
tmpStr << chars[i + j];
|
|
else
|
|
tmpStr << " ";
|
|
}
|
|
lineObj.setJSONValue("chars", tmpStr.str().c_str());
|
|
|
|
tmpStr.str("");
|
|
for (int j = 0; j < wLen; j++)
|
|
{
|
|
long confB = ((getBit(confs[i + j], 6) * 1000000) + (getBit(confs[i + j], 5) * 100000) + (getBit(confs[i + j], 4) * 10000) + (getBit(confs[i + j], 3) * 1000) +
|
|
(getBit(confs[i + j], 2) * 100) + (getBit(confs[i + j], 1) * 10) + getBit(confs[i + j], 0));
|
|
|
|
long confI = fromBin(confB);
|
|
tmpStr << convertConfidence(confI);
|
|
}
|
|
lineObj.setJSONValue("confs", tmpStr.str().c_str());
|
|
|
|
lineObj.setJSONArray("xmins");
|
|
{
|
|
JSONArray &xminsArray = lineObj.getJSONArray("xmins");
|
|
for (int j = 0; j < wLen; j++)
|
|
xminsArray.addElement(lefts[i + j]);
|
|
}
|
|
|
|
lineObj.setJSONArray("ymins");
|
|
{
|
|
JSONArray &yminsArray = lineObj.getJSONArray("ymins");
|
|
for (int j = 0; j < wLen; j++)
|
|
yminsArray.addElement(tops[i + j]);
|
|
}
|
|
|
|
lineObj.setJSONArray("xmaxs");
|
|
{
|
|
JSONArray &xmaxsArray = lineObj.getJSONArray("xmaxs");
|
|
//Add bottom right
|
|
for (int j = 0; j < wLen; j++)
|
|
xmaxsArray.addElement(rights[i + j]);
|
|
}
|
|
|
|
lineObj.setJSONArray("ymaxs");
|
|
{
|
|
JSONArray &ymaxsArray = lineObj.getJSONArray("ymaxs");
|
|
for (int j = 0; j < wLen; j++)
|
|
ymaxsArray.addElement(bottoms[i + j]);
|
|
}
|
|
|
|
numWords++;
|
|
linesArray.addElement(lineObj);
|
|
}
|
|
++adjustedPage;
|
|
addToJSONPages(pageNum-1, pageObj);
|
|
kRecFreeImg(hPage);
|
|
|
|
return nRetVal;
|
|
}
|
|
|
|
bool CNuanceOCR::loadImgFile(std::string &inFile)
|
|
{
|
|
docReturnVals = kRecOpenImgFile(inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0);
|
|
if (docReturnVals != REC_OK)
|
|
{
|
|
printMessage(std::string("Couldn't load file " + inFile));
|
|
kRecQuit();
|
|
return false;
|
|
}
|
|
|
|
printMessage(std::string("File loaded successfuly!"));
|
|
|
|
docReturnVals = kRecGetImgFilePageCount(hIFile, &pageCnt);
|
|
if (docReturnVals != REC_OK)
|
|
{
|
|
printMessage(std::string("Error counting pages."));
|
|
kRecQuit();
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Get the text of a Nuance error code.
|
|
*/
|
|
std::string CNuanceOCR::getError (RECERR nErr)
|
|
{
|
|
int nErrLen = 0;
|
|
int nExtErr = 0;
|
|
kRecGetLastError(&nExtErr, NULL, 0);
|
|
kRecGetErrorUIText(nErr, nExtErr, "", NULL, &nErrLen);
|
|
std::string errStr;
|
|
errStr.resize(nErrLen);
|
|
kRecGetErrorUIText(nErr, nExtErr, "", &errStr[0], &nErrLen);
|
|
return errStr;
|
|
}
|
|
|
|
bool CNuanceOCR::initializeNuance()
|
|
{
|
|
#if (USE_OEM_LICENSE)
|
|
docReturnVals = kRecSetLicense(LICENSE_FILE, OEM_CODE);
|
|
if (docReturnVals != REC_OK)
|
|
{
|
|
std::stringstream msg;
|
|
msg << "kRecSetLicense: Error obtaining license: " << getError(docReturnVals) << std::endl;
|
|
printMessage(msg.str().c_str());
|
|
kRecQuit();
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
docReturnVals = RecInitPlus(YOUR_COMPANY, YOUR_PRODUCT); // use your company and product name here
|
|
if ((docReturnVals != REC_OK) && (docReturnVals != API_INIT_WARN) && (docReturnVals != API_LICENSEVALIDATION_WARN))
|
|
{
|
|
std::stringstream msg;
|
|
msg << "kRecSetLicense: Error obtaining license: " << getError(docReturnVals) << std::endl;
|
|
printMessage(msg.str().c_str());
|
|
RecQuitPlus();
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool CNuanceOCR::savePage(int page, HPAGE &hPage)
|
|
{
|
|
imgPageLock.lock();
|
|
_imgPages[page] = hPage;
|
|
imgPageLock.unlock();
|
|
return true;
|
|
}
|
|
|
|
bool CNuanceOCR::writeFixedImg()
|
|
{
|
|
std::stringstream messageStream;
|
|
RECERR rc;
|
|
imgPageLock.lock();
|
|
|
|
for (int i = 0; i < pageCnt; i++)
|
|
{
|
|
HPAGE imgPage = _imgPages[i];
|
|
if (isTIF)
|
|
{
|
|
messageStream << "Saving page to TIF file.\n";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
|
|
|
|
rc = kRecSaveImgF(SID, fixedImgFile.c_str(), FF_TIFNO, imgPage, II_CURRENT, true);
|
|
if (rc != REC_OK)
|
|
{
|
|
messageStream << "Error saving page.\n";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
}
|
|
|
|
else if (isPDF)
|
|
{
|
|
messageStream << "Saving page to PDF file.\n";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
|
|
rc = kRecSaveImgF(SID, fixedImgFile.c_str(), FF_PDF_GOOD, imgPage, II_CURRENT, true);
|
|
if (rc != REC_OK)
|
|
{
|
|
messageStream << "Error saving page.\n";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
}
|
|
}
|
|
|
|
imgPageLock.unlock();
|
|
return true;
|
|
}
|
|
|
|
int CNuanceOCR::OCR(const OCROpts &opts, const std::string &outFile)
|
|
{
|
|
//set up nuance
|
|
initializeNuance();
|
|
|
|
int nRetVal = OCR_SUCCESS;
|
|
std::stringstream messageStream;
|
|
bool pageRotated = false;
|
|
RECERR rc;
|
|
double duration;
|
|
clock_t start;
|
|
|
|
rc = kRecOpenImgFile(opts._inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0);
|
|
if (rc != REC_OK)
|
|
{
|
|
printMessage(std::string("Couldn't load file " + opts._inFile));
|
|
kRecQuit();
|
|
}
|
|
|
|
printMessage(std::string("File loaded successfuly!"));
|
|
|
|
rc = kRecGetImgFilePageCount(hIFile, &pageCnt);
|
|
if (rc != REC_OK)
|
|
{
|
|
printMessage(std::string("Error counting pages."));
|
|
kRecCloseImgFile(hIFile);
|
|
kRecQuit();
|
|
}
|
|
|
|
if (_pages != NULL)
|
|
delete [] _pages;
|
|
|
|
_pages = new JSONObject[pageCnt];
|
|
|
|
//if there isn't a page range.
|
|
if (opts._nStartPage == 0 && opts._nEndPage == 0)
|
|
fixedImgFile = getFixedImgName(opts._inFile);
|
|
else
|
|
fixedImgFile = getFixedImgName(opts._inFile, opts._nStartPage, opts._nEndPage);
|
|
|
|
tmpImgFile = getTmpImgName(opts._tempFolder, opts._inFile);
|
|
|
|
JSONObject fartDoc;
|
|
fartDoc.setJSONValue("numPages", pageCnt);
|
|
fartDoc.setJSONValue("numBadPages", 0);
|
|
fartDoc.setJSONValue("originalSourceFile", opts._inFile.c_str());
|
|
fartDoc.setJSONValue("originalSourceType", getTypeByFileExt(opts._inFile).c_str());
|
|
fartDoc.setJSONValue("textSourceType", "NUANCE");
|
|
fartDoc.setJSONValue("fixedUp", false);
|
|
fartDoc.setJSONArray("pages");
|
|
fartDoc.setJSONArray("badPages");
|
|
|
|
//These are the default values
|
|
int page = 1;
|
|
int adjustedPage = 1;
|
|
int pageEnd = pageCnt;
|
|
//-------------------------
|
|
|
|
if (opts._nStartPage == 0 && opts._nEndPage == 0 || (opts._nEndPage < opts._nStartPage))
|
|
printMessage("No/invalid page range passed. Running full doc.");
|
|
else if (opts._nStartPage != 0)
|
|
page = opts._nStartPage;
|
|
if (opts._nEndPage != 0)
|
|
pageEnd = opts._nEndPage;
|
|
|
|
rc = REC_OK;
|
|
int nNumPagesErr = 0;
|
|
while ((nNumPagesErr <= nPagesinErrToAllow) && (page <= pageEnd))
|
|
{
|
|
rc = OCRPage(page, adjustedPage, pageRotated, opts._bSinglePageSave, opts._singlePageFolder);
|
|
// Blank pages return ZONE_NOTFOUND_WARN. Ignore blank page warnings.
|
|
if ((rc != REC_OK) && (rc != ZONE_NOTFOUND_WARN))
|
|
{
|
|
switch (rc)
|
|
{
|
|
case IMF_FONT_MISSING_WARN:
|
|
printMessage("MISSING FONT!\n");
|
|
if (nNumPagesErr <= nPagesinErrToAllow)
|
|
nRetVal |= OCR_MISSING_FONT_SKIPPED;
|
|
else
|
|
nRetVal |= OCR_MISSING_FONT;
|
|
++nNumPagesErr;
|
|
break;
|
|
case IMG_DPI_ERR:
|
|
case IMG_DPI_WARN:
|
|
printMessage("BAD RESOLUTION!\n");
|
|
if (nNumPagesErr <= nPagesinErrToAllow)
|
|
nRetVal |= OCR_RES_UNSUPPORTED_SKIPPED;
|
|
else
|
|
nRetVal |= OCR_RES_UNSUPPORTED;
|
|
++nNumPagesErr;
|
|
break;
|
|
case IMG_SIZE_ERR:
|
|
printMessage("IMAGE SIZE ERROR\n");
|
|
if (nNumPagesErr <= nPagesinErrToAllow)
|
|
nRetVal |= OCR_SIZE_UNSUPPORTED_SKIPPED;
|
|
else
|
|
nRetVal |= OCR_SIZE_UNSUPPORTED;
|
|
++nNumPagesErr;
|
|
break;
|
|
case NO_TXT_WARN:
|
|
// Just a warning that no text was found on the page. Don't need to do anything!
|
|
break;
|
|
default:
|
|
messageStream << "UNKNOWN ERROR ON PAGE " << (page + 1);
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
nRetVal = OCR_FAILED;
|
|
break;
|
|
}
|
|
}
|
|
page++;
|
|
}
|
|
|
|
kRecCloseImgFile(hIFile);
|
|
|
|
//-----------------------------------------------------------------------------------------------------------
|
|
|
|
JSONArray &pageArray = fartDoc.getJSONArray("pages");
|
|
JSONArray &badPageArray = fartDoc.getJSONArray("badPages");
|
|
for (int i = 0; i < pageCnt; i++)
|
|
{
|
|
if (_pages[i].get("badPage") == NULL)
|
|
pageArray.addElement(_pages[i]);
|
|
else
|
|
badPageArray.addElement(_pages[i]);
|
|
}
|
|
fartDoc.setJSONValue("numPages", pageArray.getnumelements());
|
|
fartDoc.setJSONValue("numBadPages", badPageArray.getnumelements());
|
|
|
|
//Close Nuance, save the frt file, and record time.
|
|
RecQuitPlus();
|
|
|
|
//if it was run in full, save without name modification
|
|
if (opts._nStartPage == 0 && opts._nEndPage == 0)
|
|
fartDoc.SaveToFile(outFile, opts._bPrettyOutput);
|
|
//if it was run in segments, save with page range in fileName.
|
|
else
|
|
fartDoc.SaveToFile(getFRTFilePageRangeName(outFile, opts._nStartPage, opts._nEndPage), opts._bPrettyOutput);
|
|
|
|
double avgPageTime = (totOCRTime / ((double)pageCnt));
|
|
|
|
messageStream << "Avg time per page: " << avgPageTime << "s";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");;
|
|
|
|
//If a page was rotated, move the image.
|
|
if (pageRotated)
|
|
{
|
|
messageStream << "Detected page rotation, moving" << tmpImgFile << " to " << fixedImgFile << ".";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
|
|
copyFile(tmpImgFile, fixedImgFile);
|
|
}
|
|
//otherwise
|
|
else
|
|
std::remove(tmpImgFile.c_str()); //delete the temp file.
|
|
|
|
//return
|
|
return nRetVal;
|
|
}
|
|
|
|
std::string CNuanceOCR::getFRTFilePageRangeName(std::string inFile, int startPage, int endPage)
|
|
{
|
|
std::stringstream result;
|
|
|
|
result << inFile.substr(0, inFile.find('.')) << "_" << startPage << "-" << endPage <<".frt";
|
|
|
|
return result.str();
|
|
}
|
|
|
|
void CNuanceOCR::addToJSONPages(int pageNum, JSONObject page)
|
|
{
|
|
std::lock_guard<std::mutex> locker(pageLock);
|
|
_pages[pageNum] = page;
|
|
}
|
|
|
|
bool CNuanceOCR::verify()
|
|
{
|
|
return initializeNuance();
|
|
}
|
|
|
|
//--------------------------------------------------------------------------------------------------------------------------------------
|
|
|
|
//Maybe one day these can be used.
|
|
void CNuanceOCR::OCRPageMT(OCRArg* arg)
|
|
{
|
|
//may need to reinitialize on a new thread.
|
|
printMessage(std::string("Initializing Nuance..."));
|
|
initializeNuance();
|
|
printMessage(std::string("Done."));
|
|
|
|
std::string inFile = arg->getFileName();
|
|
int pageNum = arg->getPageNum();
|
|
RECERR nRetVal = REC_OK;
|
|
HIMGFILE imgFile;
|
|
|
|
HPAGE hPage = NULL;
|
|
std::stringstream messageStream;
|
|
RECERR rc;
|
|
double duration;
|
|
JSONObject pageObj;
|
|
bool cantRotate = false;
|
|
bool rotated = false;
|
|
std::string tmpFixedImgFile = getFixedImgName(inFile, pageNum);
|
|
|
|
//load the image into nuance, nuance claims this to be thread-safe
|
|
printMessage(std::string("Locking image and loading page " + std::to_string(pageNum+1)));
|
|
|
|
imgLock.lock();
|
|
int ID = SID;
|
|
rc = kRecOpenImgFile(inFile.c_str(), &imgFile, IMGF_READ, (IMF_FORMAT)0);
|
|
rc = kRecLoadImg(0, imgFile, &hPage, pageNum);
|
|
imgLock.unlock();
|
|
|
|
if (rc != REC_OK)
|
|
{
|
|
messageStream << "ERROR: Cannot load page " << pageNum + 1 << "/" << pageCnt << ":" <<
|
|
getError(rc) << " (" << rc << ")";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
else
|
|
{
|
|
imgLock.lock();
|
|
rc = kRecPreprocessImg(0, hPage); //Not thread safe.
|
|
imgLock.unlock();
|
|
if (rc != REC_OK)
|
|
{
|
|
messageStream << "ERROR: Cannot pre-process page " << pageNum + 1 << "/" << pageCnt << ":" <<
|
|
getError(rc) << " (" << rc << ")";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
}
|
|
|
|
// At this point, if we are in error skip recognition, write the empty page, and bail.
|
|
if (rc == REC_OK)
|
|
{
|
|
messageStream << "Recognizing page " << pageNum + 1 << "/" << pageCnt << ".";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
imgLock.lock();
|
|
rc = kRecRecognize(ID, hPage, NULL); //also not thread safe :(
|
|
imgLock.unlock();
|
|
|
|
if (rc != REC_OK)
|
|
{
|
|
messageStream << "Failed recognition page " << pageNum + 1 << "/" << pageCnt << ": " << getError(rc) << " (" << rc << ")";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
|
|
// Try rotating the page, through all 270 degrees.
|
|
int nRotation = 0;
|
|
for (int i = 0; (rc != REC_OK) && (i < 3); ++i)
|
|
{
|
|
nRotation += 90;
|
|
messageStream << "Trying to rotate page to " << nRotation << " deg.";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
|
|
rc = kRecRotateImg(0, hPage, ROT_RIGHT);
|
|
if (rc == REC_OK)
|
|
{
|
|
//try and recognize the image again
|
|
imgLock.lock();
|
|
rc = kRecRecognize(ID, hPage, NULL);
|
|
imgLock.unlock();
|
|
//if we still can't
|
|
if (rc != REC_OK)
|
|
{
|
|
//print a message
|
|
messageStream << "Error recognizing rotated page " << pageNum + 1 <<
|
|
" to " << nRotation << " deg: " << getError(rc) << " (" << rc << ")" << std::endl;
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
|
|
// Try again...
|
|
rc = REC_OK;
|
|
}
|
|
//save that we rotated the page
|
|
rotated = true;
|
|
}
|
|
//If we couldn't recognize it after 3 rotations....
|
|
else
|
|
{
|
|
messageStream << "Error rotating page " << pageNum + 1 <<
|
|
" to " << nRotation << " deg, bailing out: " << getError(rc) << " (" << rc << ")" << std::endl;
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// At this point, we should have an OCR'd page. If not, bail!
|
|
if (rc != REC_OK)
|
|
{
|
|
nRetVal = rc;
|
|
messageStream << "Nothing on page " << pageNum + 1 << "." << std::endl;
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
//release the page
|
|
kRecFreeImg(hPage);
|
|
hPage = NULL;
|
|
|
|
pageObj.setJSONValue("page", pageNum + 1);
|
|
pageObj.setJSONValue("numLines", 0);
|
|
pageObj.setJSONArray("lines");
|
|
if (rc == IMF_FONT_MISSING_WARN)
|
|
pageObj.setJSONValue("ocrerr", "OCR_MISSING_FONT_SKIPPED");
|
|
else if ((rc == IMG_DPI_ERR) || (rc == IMG_DPI_WARN))
|
|
pageObj.setJSONValue("ocrerr", "OCR_RES_UNSUPPORTED_SKIPPED");
|
|
addToJSONPages(pageNum, pageObj);
|
|
}
|
|
|
|
//if the page was rotated
|
|
if (hPage != NULL && rotated)
|
|
{
|
|
IMF_FORMAT nImgFormat = FF_TIFNO;
|
|
if (isPDF)
|
|
nImgFormat = FF_PDF_GOOD;
|
|
|
|
messageStream << "Saving page to " << ((isPDF) ? "PDF" : "TIFF") << " file.\n";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
|
|
//We always want to save this as the only page in the document...
|
|
rc = kRecSaveImgF(ID, tmpFixedImgFile.c_str(), nImgFormat, hPage, II_CURRENT, true);
|
|
if (rc != REC_OK)
|
|
{
|
|
messageStream << "Error saving page: " << getError(rc) << " (" << rc << ")" << "\n";
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
}
|
|
|
|
if (nRetVal != REC_OK)
|
|
{
|
|
messageStream << "Error on page " << pageNum <<" \n " << getError(nRetVal);
|
|
printMessage(messageStream.str());
|
|
messageStream.str("");
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------------------------------------
|
|
// Character extraction logic
|
|
//-----------------------------------------------------------------------------------------------------------
|
|
LONG numLetters = 0;
|
|
LETTER* letters[1];
|
|
rc = kRecGetLetters(hPage, II_CURRENT, letters, &numLetters);
|
|
|
|
//set up the arrays for the variables
|
|
char chars[numLetters];
|
|
int lefts[numLetters];
|
|
int rights[numLetters];
|
|
int bottoms[numLetters];
|
|
int tops[numLetters];
|
|
int zones[numLetters];
|
|
unsigned char confs[numLetters];
|
|
|
|
IMG_INFO pageInfo = { 0 };
|
|
if (kRecGetImgInfo(ID, hPage, II_CURRENT, &pageInfo) != REC_OK)
|
|
// Hmmm, can't get image info? Punt, and default to 300x300dpi.
|
|
pageInfo.DPI.cx = pageInfo.DPI.cy = 300;
|
|
|
|
// Calculate the mult factor, using the page's resolution.
|
|
double multX = SCALE_FACTOR / static_cast<double>(pageInfo.DPI.cx);
|
|
double multY = SCALE_FACTOR / static_cast<double>(pageInfo.DPI.cy);
|
|
|
|
//pull the letter info out
|
|
for (int i = 0; i < numLetters; i++)
|
|
{
|
|
chars[i] = letters[0]->code;
|
|
lefts[i] = static_cast<int>(static_cast<double>(letters[0]->left)*multX);
|
|
tops[i] = static_cast<int>(static_cast<double>(letters[0]->top)*multY);
|
|
confs[i] = letters[0]->err;
|
|
//have to do some quick math to get the bottoms and rights
|
|
bottoms[i] = tops[i] + (static_cast<int>(static_cast<double>(letters[0]->height)*multY)); // add the height to get the bottoms
|
|
rights[i] = lefts[i] + (static_cast<int>(static_cast<double>(letters[0]->width)*multX)); // add the width to get the rights
|
|
zones[i] = letters[0]->zone;
|
|
letters[0]++;
|
|
}
|
|
|
|
//start the JSON stuff
|
|
int wordsTot = getNumWords(chars, numLetters);
|
|
pageObj.setJSONValue("page", pageNum + 1);
|
|
pageObj.setJSONValue("numLines", wordsTot);
|
|
pageObj.setJSONArray("lines");
|
|
JSONArray &linesArray = pageObj.getJSONArray("lines");
|
|
|
|
int numWords = 0;
|
|
int wLen = 0;
|
|
|
|
//character logic
|
|
for (int i = 0; i < numLetters; i += wLen)
|
|
{
|
|
JSONObject lineObj;
|
|
wLen = 0;
|
|
//get the length of the word
|
|
while (chars[i + wLen] != ' ' && i + wLen < numLetters)
|
|
{
|
|
wLen++;
|
|
chars[i + wLen];
|
|
}
|
|
wLen++;
|
|
|
|
lineObj.setJSONValue("line", numWords);
|
|
lineObj.setJSONValue("zone", zones[i]);
|
|
lineObj.setJSONValue("page", pageNum + 1);
|
|
lineObj.setJSONValue("xmin", getMin(i, i + wLen, lefts));
|
|
lineObj.setJSONValue("ymin", getMin(i, i + wLen, tops));
|
|
lineObj.setJSONValue("xmax", getMax(i, i + wLen, rights));
|
|
lineObj.setJSONValue("ymax", getMax(i, i + wLen, bottoms));
|
|
lineObj.setJSONValue("numChars", wLen);
|
|
|
|
std::stringstream tmpStr;
|
|
//write the chars
|
|
for (int j = 0; j < wLen; j++)
|
|
{
|
|
if (static_cast<int>(chars[i + j]) < 128 && static_cast<int>(chars[i + j]) >= 32 && static_cast<int>(chars[i + j]) != 34)
|
|
tmpStr << chars[i + j];
|
|
else
|
|
tmpStr << " ";
|
|
}
|
|
lineObj.setJSONValue("chars", tmpStr.str().c_str());
|
|
|
|
tmpStr.str("");
|
|
for (int j = 0; j < wLen; j++)
|
|
{
|
|
long confB = ((getBit(confs[i + j], 6) * 1000000) + (getBit(confs[i + j], 5) * 100000) + (getBit(confs[i + j], 4) * 10000) + (getBit(confs[i + j], 3) * 1000) +
|
|
(getBit(confs[i + j], 2) * 100) + (getBit(confs[i + j], 1) * 10) + getBit(confs[i + j], 0));
|
|
|
|
long confI = fromBin(confB);
|
|
tmpStr << convertConfidence(confI);
|
|
}
|
|
lineObj.setJSONValue("confs", tmpStr.str().c_str());
|
|
|
|
lineObj.setJSONArray("xmins");
|
|
{
|
|
JSONArray &xminsArray = lineObj.getJSONArray("xmins");
|
|
for (int j = 0; j < wLen; j++)
|
|
xminsArray.addElement(lefts[i + j]);
|
|
}
|
|
|
|
lineObj.setJSONArray("ymins");
|
|
{
|
|
JSONArray &yminsArray = lineObj.getJSONArray("ymins");
|
|
for (int j = 0; j < wLen; j++)
|
|
yminsArray.addElement(tops[i + j]);
|
|
}
|
|
|
|
lineObj.setJSONArray("xmaxs");
|
|
{
|
|
JSONArray &xmaxsArray = lineObj.getJSONArray("xmaxs");
|
|
//Add bottom right
|
|
for (int j = 0; j < wLen; j++)
|
|
xmaxsArray.addElement(rights[i + j]);
|
|
}
|
|
|
|
lineObj.setJSONArray("ymaxs");
|
|
{
|
|
JSONArray &ymaxsArray = lineObj.getJSONArray("ymaxs");
|
|
for (int j = 0; j < wLen; j++)
|
|
ymaxsArray.addElement(bottoms[i + j]);
|
|
}
|
|
|
|
numWords++;
|
|
linesArray.addElement(lineObj);
|
|
}
|
|
//add the page to the JSON pages array, this is thread safe.
|
|
addToJSONPages(pageNum, pageObj);
|
|
}
|
|
|
|
int CNuanceOCR::OCRMT(const std::string &inFile, int threadCount)
|
|
{
|
|
initializeNuance();
|
|
HPAGE page;
|
|
bool result;
|
|
RECERR nRetVal = REC_OK;
|
|
std::stringstream msgStr;
|
|
|
|
imgLock.lock();
|
|
nRetVal = kRecOpenImgFile(inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0);
|
|
if (nRetVal != REC_OK)
|
|
{
|
|
printMessage(std::string("Couldn't load file " + inFile));
|
|
kRecQuit();
|
|
}
|
|
imgLock.unlock();
|
|
|
|
//Set the output to XML
|
|
printMessage(std::string("Setting output to XML"));
|
|
nRetVal = kRecSetDTXTFormat(SID, DTXT_XMLCOORD);
|
|
if (nRetVal != REC_OK)
|
|
{
|
|
msgStr << getError(nRetVal);
|
|
printMessage(msgStr.str());
|
|
msgStr.str("");
|
|
}
|
|
//Create an array of the pages
|
|
|
|
int pageCnt;
|
|
nRetVal = kRecGetImgFilePageCount(hIFile, &pageCnt);
|
|
if (nRetVal != REC_OK)
|
|
{
|
|
msgStr << "Error getting the page count: " << getError(nRetVal);
|
|
printMessage(msgStr.str());
|
|
return false;
|
|
|
|
}
|
|
msgStr << "page count = " << pageCnt;
|
|
printMessage(msgStr.str());
|
|
msgStr.str("");
|
|
//-------------------------------------------------------------------------------------------------------------------------------
|
|
//set the OCR thread count
|
|
msgStr << "Setting thread count to " << threadCount << std::endl;
|
|
printMessage(msgStr.str());
|
|
msgStr.str("");
|
|
|
|
nRetVal = RecSetOCRThreadCount(SID, threadCount);
|
|
if (nRetVal != REC_OK)
|
|
{
|
|
msgStr << getError(nRetVal);
|
|
printMessage(msgStr.str());
|
|
msgStr.str("");
|
|
}
|
|
|
|
nRetVal = RecGetOCRThreadCount(SID, &threadCount);
|
|
if (nRetVal != REC_OK)
|
|
{
|
|
msgStr << "Error getting the thread count: " << getError(nRetVal);
|
|
printMessage(msgStr.str());
|
|
return false;
|
|
}
|
|
|
|
msgStr << "Seccessfully set thread count to " << threadCount;
|
|
printMessage(msgStr.str());
|
|
msgStr.str("");
|
|
//--------------------------------------------------------------------------------------------------------------------------------
|
|
|
|
//load the page into the page array
|
|
HPAGE pages[pageCnt];
|
|
|
|
for (int i = 0; i < pageCnt; i++)
|
|
{
|
|
//msgstr << "adding page " << i+1 << "/" << pagecnt << " to the page array.";
|
|
//printmessage(msgstr.str());
|
|
//msgstr.str("");
|
|
HPAGE tmpPage;
|
|
nRetVal =kRecLoadImg(SID, hIFile, &tmpPage, i);
|
|
if (nRetVal != REC_OK)
|
|
{
|
|
msgStr << "Error getting page " << i << std::endl << " ";
|
|
msgStr << getError(nRetVal);
|
|
printMessage(msgStr.str());
|
|
msgStr.str("");
|
|
}
|
|
//save the page info
|
|
pages[i] = tmpPage;
|
|
|
|
msgStr << "page " << i + 1 << " = " << pages[i];
|
|
printMessage(msgStr.str());
|
|
msgStr.str("");
|
|
}
|
|
|
|
//run Nuance
|
|
msgStr << "Writing OCR output to " << getXMLName(inFile);
|
|
printMessage(msgStr.str());
|
|
msgStr.str("");
|
|
|
|
for (int i = 0; i < pageCnt; i++)
|
|
{
|
|
nRetVal = kRecRecognize(SID, pages[i], getXMLName(inFile).c_str());
|
|
if (nRetVal != REC_OK)
|
|
{
|
|
msgStr << "Error recognizing page " << i << std::endl;
|
|
msgStr << getError(nRetVal);
|
|
printMessage(msgStr.str());
|
|
msgStr.str("");
|
|
}
|
|
}
|
|
|
|
nRetVal = kRecConvert2DTXTEx(SID, pages, pageCnt, II_CURRENT, getXMLName(inFile).c_str());
|
|
if (nRetVal != REC_OK)
|
|
{
|
|
const char* errorInfo;
|
|
kRecGetErrorInfo(nRetVal, &errorInfo);
|
|
//msgStr << getError(nRetVal);
|
|
std::string strErr(errorInfo);
|
|
printMessage(strErr);
|
|
msgStr.str("");
|
|
}
|
|
|
|
return nRetVal;
|
|
}
|
|
|
|
//Split to lines prototype (doesn't work so well...)
|
|
//---------------------------------------------------------------------------------------------------------------------------------------------
|
|
//JSONArray splitToLines(word *words, int wordCnt)
|
|
//{
|
|
// int lineNum = 1;
|
|
// JSONArray lines;
|
|
// word previous = words[0];
|
|
// //Go through the coordinate bounds for each word
|
|
// for (int i = 0; i < wordCnt; i++)
|
|
// {
|
|
// JSONObject line;
|
|
// std::string lineVal;
|
|
// std::vector<int> tops;
|
|
// std::vector<int> bottoms;
|
|
// std::vector<int> lefts;
|
|
// std::vector<int> rights;
|
|
// std::vector<int> confs;
|
|
// int numChars;
|
|
//
|
|
// word current = words[i];
|
|
// //if a word's upper left is larger than the previous one's lower right
|
|
// if (current.MinY > previous.MaxY)
|
|
// {
|
|
// //set the values
|
|
// line.setJSONValue("line", lineNum);
|
|
// line.setJSONValue("zone", current.Zone);
|
|
// line.setJSONValue("page", current.Page);
|
|
// /*line.setJSONValue("xmin", getMin(i, i + wLen, lefts));
|
|
// line.setJSONValue("ymin", getMin(i, i + wLen, bottoms));
|
|
// line.setJSONValue("xmax", getMax(i, i + wLen, tops));
|
|
// line.setJSONValue("ymax", getMax(i, i + wLen, bottoms));
|
|
// line.setJSONValue("numChars", wLen);*/
|
|
// //start a new line
|
|
// lines.addElement(line);
|
|
// lineNum++;
|
|
// }
|
|
// else
|
|
// {
|
|
// //add the values in the line
|
|
// for (int k = 0; k < current.Length; k++)
|
|
// {
|
|
// tops.push_back(current.Tops[k]);
|
|
// bottoms.push_back(current.Bottoms[k]);
|
|
// rights.push_back(current.Rights[k]);
|
|
// lefts.push_back(current.Lefts[k]);
|
|
// confs.push_back(current.Confidences[k]);
|
|
// }
|
|
// lineVal += current.value;
|
|
// numChars += current.Length;
|
|
// }
|
|
// //point previous to current
|
|
// *previous = ¤t;
|
|
// }
|
|
//
|
|
//
|
|
//
|
|
// return lines;
|
|
//}
|
|
//---------------------------------------------------------------------------------------------------------------------------------------------
|
|
|
|
//bool CNuanceOCR::OCR(const std::string &inFile, const std::string &outFile)
|
|
//{
|
|
//
|
|
// initializeNuance();
|
|
//
|
|
// JSONObject fartDoc;
|
|
// fartDoc.setJSONValue("numPages", pageCnt);
|
|
// fartDoc.setJSONValue("originalSourceFile", inFile.c_str());
|
|
// fartDoc.setJSONValue("originalSourceType", getTypeByFileExt(inFile).c_str());
|
|
// fartDoc.setJSONValue("textSourceType", "NUANCE");
|
|
// fartDoc.setJSONValue("fixedUp", false);
|
|
// fartDoc.setJSONArray("pages");
|
|
// JSONArray &pageArray = fartDoc.getJSONArray("pages");
|
|
//
|
|
// HPAGE hPage;
|
|
// RECERR rc;
|
|
// double duration;
|
|
// std::stringstream messageStream;
|
|
// clock_t start;
|
|
//
|
|
//
|
|
// rc = kRecOpenImgFile(inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0);
|
|
// if (rc != REC_OK)
|
|
// {
|
|
// printMessage(std::string("Couldn't load file " + inFile));
|
|
// kRecQuit();
|
|
// }
|
|
//
|
|
// printMessage(std::string("File loaded successfuly!"));
|
|
//
|
|
// rc = kRecGetImgFilePageCount(hIFile, &pageCnt);
|
|
// if (rc != REC_OK)
|
|
// {
|
|
// printMessage(std::string("Error counting pages."));
|
|
// kRecQuit();
|
|
// }
|
|
|
|
// for (int page = 0; page < pageCnt; ++page)
|
|
// {
|
|
// JSONObject pageObj;
|
|
//
|
|
// rc = kRecLoadImg(0, hIFile, &hPage, page);
|
|
// rc = kRecPreprocessImg(0, hPage);
|
|
//
|
|
// messageStream << "Recognizing page " << page + 1 << "/" << pageCnt << "\n";
|
|
// printMessage(messageStream.str());
|
|
// messageStream.str("");
|
|
//
|
|
// start = clock();
|
|
// rc = kRecRecognize(SID, hPage, NULL);
|
|
// duration = (clock() - start) / ((double)CLOCKS_PER_SEC);
|
|
// totOCRTime += duration;
|
|
//
|
|
// messageStream << "Done in " << duration << "s.";
|
|
// printMessage(messageStream.str());
|
|
// messageStream.str("");;
|
|
//
|
|
// if (rc != REC_OK)
|
|
// {
|
|
// //std::cout<<"Error" <<std::endl;
|
|
// messageStream << "Trying to rotate page " << page;
|
|
// printMessage(messageStream.str());
|
|
// messageStream.str("");;
|
|
//
|
|
// rc = kRecRotateImg(0, hPage, ROT_AUTO);
|
|
// rc = kRecRecognize(SID, hPage, NULL);
|
|
// if (rc != REC_OK)
|
|
// {
|
|
// //std::cout<<"Error" <<std::endl;
|
|
// messageStream << "Nothing on page " << page;
|
|
// printMessage(messageStream.str());
|
|
// messageStream.str("");;
|
|
// //release the page
|
|
// kRecFreeImg(hPage);
|
|
//
|
|
// pageObj.setJSONValue("page", page + 1);
|
|
// pageObj.setJSONValue("numLines", 0);
|
|
// pageObj.setJSONArray("lines");
|
|
// pageArray.addElement(pageObj);
|
|
// continue;
|
|
// }
|
|
// }
|
|
//
|
|
// if (isTIF)
|
|
// rc = kRecSaveImgF(SID, fixedImgFile.c_str(), FF_TIFNO, hPage, II_CURRENT, true);
|
|
// else if (isPDF)
|
|
// rc = kRecSaveImgF(SID, fixedImgFile.c_str(), FF_PDF_GOOD, hPage, II_CURRENT, true);
|
|
//
|
|
// if (!rc == REC_OK)
|
|
// {
|
|
// messageStream << "Error writing to the fixed file:\n"
|
|
// << " " << fixedImgFile << "\n"
|
|
// << " Page " << page;
|
|
// printMessage(messageStream.str());
|
|
// messageStream.str("");;
|
|
// }
|
|
//
|
|
// //-----------------------------------------------------------------------------------------------------------
|
|
// LONG numLetters = 0;
|
|
// LETTER* letters[1];
|
|
// rc = kRecGetLetters(hPage, II_CURRENT, letters, &numLetters);
|
|
//
|
|
// char chars[numLetters];
|
|
// int lefts[numLetters];
|
|
// int rights[numLetters];
|
|
// int bottoms[numLetters];
|
|
// int tops[numLetters];
|
|
// int zones[numLetters];
|
|
// unsigned char confs[numLetters];
|
|
//
|
|
// if (rc != REC_OK)
|
|
// {
|
|
// //std::cout<<"Error" <<std::endl;
|
|
// RecQuitPlus();
|
|
// return false;
|
|
// }
|
|
//
|
|
// //-----------------------------------------------------------------------------------------------------------------------
|
|
// IMG_INFO imgInfo;
|
|
//
|
|
// rc = kRecGetImgInfo(SID, hPage, II_CURRENT, &imgInfo);
|
|
//
|
|
// //TODO figure out how to adjust the multiplier based on the DPI of the image
|
|
// //First figure out what the hell the DPI actually is?
|
|
// //SIZE dpi = imgInfo.DPI;
|
|
// //-----------------------------------------------------------------------------------------------------------------------
|
|
// int mult = 6;
|
|
//
|
|
// for (int i = 0; i < numLetters; i++)
|
|
// {
|
|
// chars[i] = letters[0]->code;
|
|
// lefts[i] = letters[0]->left*mult;
|
|
// tops[i] = letters[0]->top*mult;
|
|
// confs[i] = letters[0]->err;
|
|
// bottoms[i] = tops[i] + (letters[0]->height*mult);
|
|
// rights[i] = lefts[i] + (letters[0]->width*mult);
|
|
// zones[i] = letters[0]->zone;
|
|
// letters[0]++;
|
|
// }
|
|
//
|
|
// int wordsTot = getNumWords(chars, numLetters);
|
|
// pageObj.setJSONValue("page", page + 1);
|
|
// pageObj.setJSONValue("numLines", wordsTot);
|
|
// pageObj.setJSONArray("lines");
|
|
// JSONArray &linesArray = pageObj.getJSONArray("lines");
|
|
//
|
|
// int numWords = 0;
|
|
// int wLen = 0;
|
|
// for (int i = 0; i < numLetters; i += wLen)
|
|
// {
|
|
// JSONObject lineObj;
|
|
// wLen = 0;
|
|
// //get the length of the word
|
|
// while (chars[i + wLen] != ' ' && i + wLen < numLetters)
|
|
// wLen++;
|
|
// wLen++;
|
|
// lineObj.setJSONValue("line", numWords);
|
|
// lineObj.setJSONValue("zone", zones[i]);
|
|
// lineObj.setJSONValue("page", page + 1);
|
|
// lineObj.setJSONValue("xmin", getMin(i, i + wLen, lefts));
|
|
// lineObj.setJSONValue("ymin", getMin(i, i + wLen, bottoms));
|
|
// lineObj.setJSONValue("xmax", getMax(i, i + wLen, tops));
|
|
// lineObj.setJSONValue("ymax", getMax(i, i + wLen, bottoms));
|
|
// lineObj.setJSONValue("numChars", wLen);
|
|
//
|
|
// std::stringstream tmpStr;
|
|
// //write the chars
|
|
// for (int j = 0; j < wLen; j++)
|
|
// {
|
|
// if (static_cast<int>(chars[i + j]) < 128 && static_cast<int>(chars[i + j]) >= 32 && static_cast<int>(chars[i + j]) != 34)
|
|
// tmpStr << chars[i + j];
|
|
// else
|
|
// tmpStr << " ";
|
|
// }
|
|
// lineObj.setJSONValue("chars",tmpStr.str().c_str());
|
|
//
|
|
// tmpStr.str("");
|
|
// for (int j = 0; j < wLen; j++)
|
|
// {
|
|
// long confB = ((getBit(confs[i + j], 6) * 1000000) + (getBit(confs[i + j], 5) * 100000) + (getBit(confs[i + j], 4) * 10000) + (getBit(confs[i + j], 3) * 1000) +
|
|
// (getBit(confs[i + j], 2) * 100) + (getBit(confs[i + j], 1) * 10) + getBit(confs[i + j], 0));
|
|
//
|
|
// long confI = fromBin(confB);
|
|
// tmpStr << convertConfidence(confI);
|
|
// }
|
|
// lineObj.setJSONValue("confs", tmpStr.str().c_str());
|
|
//
|
|
// lineObj.setJSONArray("xmins");
|
|
// {
|
|
// JSONArray &xminsArray = lineObj.getJSONArray("xmins");
|
|
// for (int j = 0; j < wLen; j++)
|
|
// xminsArray.addElement(lefts[i + j]);
|
|
// }
|
|
//
|
|
// lineObj.setJSONArray("ymins");
|
|
// {
|
|
// JSONArray &yminsArray = lineObj.getJSONArray("ymins");
|
|
// for (int j = 0; j < wLen; j++)
|
|
// yminsArray.addElement(tops[i + j]);
|
|
// }
|
|
//
|
|
// lineObj.setJSONArray("xmaxs");
|
|
// {
|
|
// JSONArray &xmaxsArray = lineObj.getJSONArray("xmaxs");
|
|
// //Add bottom right
|
|
// for (int j = 0; j < wLen; j++)
|
|
// xmaxsArray.addElement(rights[i + j]);
|
|
// }
|
|
//
|
|
// lineObj.setJSONArray("ymaxs");
|
|
// {
|
|
// JSONArray &ymaxsArray = lineObj.getJSONArray("ymaxs");
|
|
// for (int j = 0; j < wLen; j++)
|
|
// ymaxsArray.addElement(bottoms[i + j]);
|
|
// }
|
|
//
|
|
// numWords++;
|
|
// linesArray.addElement(lineObj);
|
|
// }
|
|
// pageArray.addElement(pageObj);
|
|
// }
|
|
// //-----------------------------------------------------------------------------------------------------------
|
|
//
|
|
// RecQuitPlus();
|
|
//
|
|
// fartDoc.SaveToFile(outFile);
|
|
// double avgPageTime = (totOCRTime / ((double)pageCnt));
|
|
//
|
|
// messageStream << "Avg time per page: \033[1;34m" << avgPageTime << "s\033[0m";
|
|
// printMessage(messageStream.str());
|
|
// messageStream.str("");;
|
|
//
|
|
// return true;
|
|
//}
|