// // Copyright (c) 2016, Sequence Logic // #include "nuanceocr.h" #include "ocrbase.h" #include "KernelApi.h" #include "RecApiPlus.h" #include "jsonobject.h" #include "sledsconstants.h" #include "Task.h" #include "ThreadPool.h" #include "Global.h" using namespace sequencelogic; namespace { struct word { int MaxY; int MaxX; int MinY; int MinX; int Page; int Length; int Tops[]; int Bottoms[]; int Lefts[]; int Rights[]; int Confidences[]; int Zone; std::string value; }; const double SCALE_FACTOR = 1200.0; } CNuanceOCR::CNuanceOCR(std::string &inFile) : COCRBase(inFile), _pages(NULL), _imgPages(NULL) { // Set up the engine. // This is where the page count gets set initializeNuance(); loadImgFile(inFile); //_pages = new JSONObject[pageCnt]; //_imgPages = new HPAGE[pageCnt]; } CNuanceOCR::~CNuanceOCR() { delete [] _pages; delete [] _imgPages; } void CNuanceOCR::addTime(double runTime) { std::lock_guard timeLocker(timeLock); totOCRTime += runTime; } //have debug code here================================================================================================================================================================ RECERR CNuanceOCR::OCRPage(int pageNum, int &adjustedPage, bool &rotated, bool singlePageSave, const std::string &singlePageFolder) { RECERR nRetVal = REC_OK; HPAGE hPage = NULL; std::stringstream messageStream; RECERR rc; double duration; clock_t start; JSONObject pageObj; bool cantRotate = false; //load the image into nuance rc = kRecLoadImg(0, hIFile, &hPage, pageNum - 1); if (rc != REC_OK) { if (hPage == NULL) messageStream << "ERROR: Cannot load page " << pageNum << "/" << pageCnt << ":" << getError(rc) << " (" << rc << ")"; else { messageStream << "WARNING: Page " << pageNum << "/" << pageCnt << ":" << getError(rc) << " (" << rc << ")"; rc = REC_OK; } printMessage(messageStream.str()); messageStream.str(""); } else { rc = kRecPreprocessImg(0, hPage); if (rc != REC_OK) { if (hPage == NULL) messageStream << "ERROR: Cannot pre-process page " << pageNum << "/" << pageCnt << ":" << getError(rc) << " (" << rc << ")"; else { messageStream << "WARNING: Pre-processing page " << pageNum << "/" << pageCnt << ":" << getError(rc) << " (" << rc << ")"; rc = REC_OK; } printMessage(messageStream.str()); messageStream.str(""); } else { // See if Nuance did anything to the image... PREPROC_INFO preprocInfo; if (kRecGetPreprocessInfo(hPage, &preprocInfo) == REC_OK) { if ( ((preprocInfo.Flags & PREPROC_INFO_3DDESKEW) == PREPROC_INFO_3DDESKEW) || ((preprocInfo.Flags & PREPROC_INFO_STRAIGHTENED) == PREPROC_INFO_STRAIGHTENED) ) { printMessage("Page was straightened."); rotated = true; } else if (preprocInfo.Rotation != ROT_NO) { rotated = true; switch (preprocInfo.Rotation) { case ROT_RIGHT: printMessage("Page was rotated image to the right (clockwise)."); break; case ROT_DOWN: printMessage("Page was rotated image down (rotate twice)."); break; case ROT_LEFT: printMessage("Page was rotated image to the left (anti-clockwise)."); break; case ROT_FLIPPED: printMessage("Page was mirrored without rotation (mirrored around the Y-axis)."); break; case ROT_RIGHT_FLIPPED: printMessage("Page was mirrored, then rotate to the right."); break; case ROT_DOWN_FLIPPED: printMessage("Page was mirrored image, then rotate twice."); break; case ROT_LEFT_FLIPPED: printMessage("Page was mirrored, then rotate to the left."); break; } } } } } // If we have a NULL image handle, bail! if (hPage == NULL) { rotated = true; // Write into the bad page object. pageObj.setJSONValue("badPage", pageNum); addToJSONPages(pageNum-1, pageObj); return rc; } messageStream << "Recognizing page " << pageNum << "/" << pageCnt << "."; printMessage(messageStream.str()); messageStream.str(""); start = clock(); rc = kRecRecognize(SID, hPage, NULL); if (rc != REC_OK) { messageStream << "WARNING: Failed recognition page " << pageNum << "/" << pageCnt << ": " << getError(rc) << " (" << rc << ")"; printMessage(messageStream.str()); messageStream.str(""); // Try rotating the page, through all 270 degrees. int nRotation = 0; for (int i = 0; (rc != REC_OK) && (i < 3); ++i) { nRotation += 90; messageStream << "Trying to rotate page to " << nRotation << " deg."; printMessage(messageStream.str()); messageStream.str(""); rc = kRecRotateImg(0, hPage, ROT_RIGHT); if (rc == REC_OK) { rc = kRecRecognize(SID, hPage, NULL); if (rc != REC_OK) { messageStream << "WARNING: Recognizing rotated page " << pageNum << " to " << nRotation << " deg: " << getError(rc) << " (" << rc << ")" << std::endl; printMessage(messageStream.str()); messageStream.str(""); } else rotated = true; } else { messageStream << "Error rotating page " << pageNum << " to " << nRotation << " deg, bailing out: " << getError(rc) << " (" << rc << ")" << std::endl; printMessage(messageStream.str()); messageStream.str(""); } } } if (rc != REC_OK) { // There was an error rotating the page, or we didn't find anything. Re-load the original // page to get the correct orientation for writing, then bail. kRecFreeImg(hPage); kRecLoadImg(0, hIFile, &hPage, pageNum - 1); nRetVal = rc; messageStream << "Nothing on page " << pageNum << "." << std::endl; printMessage(messageStream.str()); messageStream.str(""); pageObj.setJSONValue("page", adjustedPage++); pageObj.setJSONValue("numLines", 0); pageObj.setJSONArray("lines"); addToJSONPages(pageNum - 1, pageObj); } else { duration = (clock() - start) / ((double)CLOCKS_PER_SEC); totOCRTime += duration; messageStream << "Done in " << duration << "s."; printMessage(messageStream.str()); messageStream.str(""); } if (hPage != NULL) { IMF_FORMAT nImgFormat = FF_TIFG4; if (isPDF) nImgFormat = FF_PDF_GOOD; //save the image if (singlePageSave) { printMessage("Saving page to PNG file."); std::stringstream saveLoc; saveLoc << std::setw(5) << std::setfill('0') << pageNum; std::string saveFile = singlePageFolder + "/" + "page" + saveLoc.str() + ".png"; rc = kRecSaveImgF(SID, saveFile.c_str(), FF_PNG, hPage, II_CURRENT, false); //save a png Image and don't overwrite } //always need this file or gs is unreliable. messageStream << "Saving page to " << ((isPDF) ? "PDF" : "TIFF") << " file.\n"; printMessage(messageStream.str()); messageStream.str(""); //rc = kRecSaveImgF(SID, fixedImgFile.c_str(), nImgFormat, hPage, II_CURRENT, true); rc = kRecSaveImgF(SID, tmpImgFile.c_str(), nImgFormat, hPage, II_CURRENT, true); //did it work? if (rc != REC_OK) { messageStream << "Error saving page: " << getError(rc) << " (" << rc << ")" << "\n"; printMessage(messageStream.str()); messageStream.str(""); } } if (nRetVal != REC_OK) { kRecFreeImg(hPage); pageObj.setJSONValue("badPage", pageNum); addToJSONPages(pageNum-1, pageObj); return nRetVal; } // else // { // // Write into the bad page object. // pageObj.setJSONValue("badPage", pageNum); // addToJSONPages(pageNum-1, pageObj); // // kRecFreeImg(hPage); // return rc; // } //----------------------------------------------------------------------------------------------------------- // Character extraction logic //----------------------------------------------------------------------------------------------------------- LONG numLetters = 0; LETTER *pletters; rc = kRecGetLetters(hPage, II_CURRENT, &pletters, &numLetters); //set up the arrays for the variables char chars[numLetters]; int lefts[numLetters]; int rights[numLetters]; int bottoms[numLetters]; int tops[numLetters]; int zones[numLetters]; unsigned char confs[numLetters]; IMG_INFO pageInfo = {0}; if (kRecGetImgInfo(SID, hPage, II_CURRENT, &pageInfo) != REC_OK) // Hmmm, can't get image info? Punt, and default to 300x300dpi. pageInfo.DPI.cx = pageInfo.DPI.cy = 300; // Calculate the mult factor, using the page's resolution. double multX = SCALE_FACTOR / static_cast(pageInfo.DPI.cx); double multY = SCALE_FACTOR / static_cast(pageInfo.DPI.cy); //pull the letter info out LETTER *pTmpLetter = pletters; for (int i = 0; i < numLetters; i++) { chars[i] = pTmpLetter->code; lefts[i] = static_cast(static_cast(pTmpLetter->left)*multX); tops[i] = static_cast(static_cast(pTmpLetter->top)*multY); confs[i] = pTmpLetter->err; //have to do some quick math to get the bottoms and rights bottoms[i] = tops[i] + (static_cast(static_cast(pTmpLetter->height)*multY)); // add the height to get the bottoms rights[i] = lefts[i] + (static_cast(static_cast(pTmpLetter->width)*multX)); // add the width to get the rights zones[i] = pTmpLetter->zone; pTmpLetter++; } kRecFree(pletters); //start the JSON stuff int wordsTot = getNumWords(chars, numLetters); pageObj.setJSONValue("page", adjustedPage); pageObj.setJSONValue("numLines", wordsTot); pageObj.setJSONArray("lines"); JSONArray &linesArray = pageObj.getJSONArray("lines"); int numWords = 0; int wLen = 0; for (int i = 0; i < numLetters; i += wLen) { JSONObject lineObj; wLen = 0; //get the length of the word while (chars[i + wLen] != ' ' && i + wLen < numLetters) { wLen++; chars[i + wLen]; } wLen++; lineObj.setJSONValue("line", numWords); lineObj.setJSONValue("zone", zones[i]); lineObj.setJSONValue("page", adjustedPage); lineObj.setJSONValue("xmin", getMin(i, i + wLen, lefts)); lineObj.setJSONValue("ymin", getMin(i, i + wLen, tops)); lineObj.setJSONValue("xmax", getMax(i, i + wLen, rights)); lineObj.setJSONValue("ymax", getMax(i, i + wLen, bottoms)); lineObj.setJSONValue("numChars", wLen); std::stringstream tmpStr; //write the chars for (int j = 0; j < wLen; j++) { if (static_cast(chars[i + j]) < 128 && static_cast(chars[i + j]) >= 32 && static_cast(chars[i + j]) != 34) tmpStr << chars[i + j]; else tmpStr << " "; } lineObj.setJSONValue("chars", tmpStr.str().c_str()); tmpStr.str(""); for (int j = 0; j < wLen; j++) { long confB = ((getBit(confs[i + j], 6) * 1000000) + (getBit(confs[i + j], 5) * 100000) + (getBit(confs[i + j], 4) * 10000) + (getBit(confs[i + j], 3) * 1000) + (getBit(confs[i + j], 2) * 100) + (getBit(confs[i + j], 1) * 10) + getBit(confs[i + j], 0)); long confI = fromBin(confB); tmpStr << convertConfidence(confI); } lineObj.setJSONValue("confs", tmpStr.str().c_str()); lineObj.setJSONArray("xmins"); { JSONArray &xminsArray = lineObj.getJSONArray("xmins"); for (int j = 0; j < wLen; j++) xminsArray.addElement(lefts[i + j]); } lineObj.setJSONArray("ymins"); { JSONArray &yminsArray = lineObj.getJSONArray("ymins"); for (int j = 0; j < wLen; j++) yminsArray.addElement(tops[i + j]); } lineObj.setJSONArray("xmaxs"); { JSONArray &xmaxsArray = lineObj.getJSONArray("xmaxs"); //Add bottom right for (int j = 0; j < wLen; j++) xmaxsArray.addElement(rights[i + j]); } lineObj.setJSONArray("ymaxs"); { JSONArray &ymaxsArray = lineObj.getJSONArray("ymaxs"); for (int j = 0; j < wLen; j++) ymaxsArray.addElement(bottoms[i + j]); } numWords++; linesArray.addElement(lineObj); } ++adjustedPage; addToJSONPages(pageNum-1, pageObj); kRecFreeImg(hPage); return nRetVal; } bool CNuanceOCR::loadImgFile(std::string &inFile) { docReturnVals = kRecOpenImgFile(inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0); if (docReturnVals != REC_OK) { printMessage(std::string("Couldn't load file " + inFile)); kRecQuit(); return false; } printMessage(std::string("File loaded successfuly!")); docReturnVals = kRecGetImgFilePageCount(hIFile, &pageCnt); if (docReturnVals != REC_OK) { printMessage(std::string("Error counting pages.")); kRecQuit(); return false; } return true; } /** * Get the text of a Nuance error code. */ std::string CNuanceOCR::getError (RECERR nErr) { int nErrLen = 0; int nExtErr = 0; kRecGetLastError(&nExtErr, NULL, 0); kRecGetErrorUIText(nErr, nExtErr, "", NULL, &nErrLen); std::string errStr; errStr.resize(nErrLen); kRecGetErrorUIText(nErr, nExtErr, "", &errStr[0], &nErrLen); return errStr; } bool CNuanceOCR::initializeNuance() { #if (USE_OEM_LICENSE) docReturnVals = kRecSetLicense(LICENSE_FILE, OEM_CODE); if (docReturnVals != REC_OK) { std::stringstream msg; msg << "kRecSetLicense: Error obtaining license: " << getError(docReturnVals) << std::endl; printMessage(msg.str().c_str()); kRecQuit(); return false; } #endif docReturnVals = RecInitPlus(YOUR_COMPANY, YOUR_PRODUCT); // use your company and product name here if ((docReturnVals != REC_OK) && (docReturnVals != API_INIT_WARN) && (docReturnVals != API_LICENSEVALIDATION_WARN)) { std::stringstream msg; msg << "kRecSetLicense: Error obtaining license: " << getError(docReturnVals) << std::endl; printMessage(msg.str().c_str()); RecQuitPlus(); return false; } return true; } bool CNuanceOCR::savePage(int page, HPAGE &hPage) { imgPageLock.lock(); _imgPages[page] = hPage; imgPageLock.unlock(); return true; } bool CNuanceOCR::writeFixedImg() { std::stringstream messageStream; RECERR rc; imgPageLock.lock(); for (int i = 0; i < pageCnt; i++) { HPAGE imgPage = _imgPages[i]; if (isTIF) { messageStream << "Saving page to TIF file.\n"; printMessage(messageStream.str()); messageStream.str(""); rc = kRecSaveImgF(SID, fixedImgFile.c_str(), FF_TIFNO, imgPage, II_CURRENT, true); if (rc != REC_OK) { messageStream << "Error saving page.\n"; printMessage(messageStream.str()); messageStream.str(""); } } else if (isPDF) { messageStream << "Saving page to PDF file.\n"; printMessage(messageStream.str()); messageStream.str(""); rc = kRecSaveImgF(SID, fixedImgFile.c_str(), FF_PDF_GOOD, imgPage, II_CURRENT, true); if (rc != REC_OK) { messageStream << "Error saving page.\n"; printMessage(messageStream.str()); messageStream.str(""); } } } imgPageLock.unlock(); return true; } int CNuanceOCR::OCR(const OCROpts &opts, const std::string &outFile) { //set up nuance initializeNuance(); int nRetVal = OCR_SUCCESS; std::stringstream messageStream; bool pageRotated = false; RECERR rc; double duration; clock_t start; rc = kRecOpenImgFile(opts._inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0); if (rc != REC_OK) { printMessage(std::string("Couldn't load file " + opts._inFile)); kRecQuit(); } printMessage(std::string("File loaded successfuly!")); rc = kRecGetImgFilePageCount(hIFile, &pageCnt); if (rc != REC_OK) { printMessage(std::string("Error counting pages.")); kRecCloseImgFile(hIFile); kRecQuit(); } if (_pages != NULL) delete [] _pages; _pages = new JSONObject[pageCnt]; //if there isn't a page range. if (opts._nStartPage == 0 && opts._nEndPage == 0) fixedImgFile = getFixedImgName(opts._inFile); else fixedImgFile = getFixedImgName(opts._inFile, opts._nStartPage, opts._nEndPage); tmpImgFile = getTmpImgName(opts._tempFolder, opts._inFile); JSONObject fartDoc; fartDoc.setJSONValue("numPages", pageCnt); fartDoc.setJSONValue("numBadPages", 0); fartDoc.setJSONValue("originalSourceFile", opts._inFile.c_str()); fartDoc.setJSONValue("originalSourceType", getTypeByFileExt(opts._inFile).c_str()); fartDoc.setJSONValue("textSourceType", "NUANCE"); fartDoc.setJSONValue("fixedUp", false); fartDoc.setJSONArray("pages"); fartDoc.setJSONArray("badPages"); //These are the default values int page = 1; int adjustedPage = 1; int pageEnd = pageCnt; //------------------------- if (opts._nStartPage == 0 && opts._nEndPage == 0 || (opts._nEndPage < opts._nStartPage)) printMessage("No/invalid page range passed. Running full doc."); else if (opts._nStartPage != 0) page = opts._nStartPage; if (opts._nEndPage != 0) pageEnd = opts._nEndPage; rc = REC_OK; int nNumPagesErr = 0; while ((nNumPagesErr <= nPagesinErrToAllow) && (page <= pageEnd)) { rc = OCRPage(page, adjustedPage, pageRotated, opts._bSinglePageSave, opts._singlePageFolder); // Blank pages return ZONE_NOTFOUND_WARN. Ignore blank page warnings. if ((rc != REC_OK) && (rc != ZONE_NOTFOUND_WARN)) { switch (rc) { case IMF_FONT_MISSING_WARN: printMessage("MISSING FONT!\n"); if (nNumPagesErr <= nPagesinErrToAllow) nRetVal |= OCR_MISSING_FONT_SKIPPED; else nRetVal |= OCR_MISSING_FONT; ++nNumPagesErr; break; case IMG_DPI_ERR: case IMG_DPI_WARN: printMessage("BAD RESOLUTION!\n"); if (nNumPagesErr <= nPagesinErrToAllow) nRetVal |= OCR_RES_UNSUPPORTED_SKIPPED; else nRetVal |= OCR_RES_UNSUPPORTED; ++nNumPagesErr; break; case IMG_SIZE_ERR: printMessage("IMAGE SIZE ERROR\n"); if (nNumPagesErr <= nPagesinErrToAllow) nRetVal |= OCR_SIZE_UNSUPPORTED_SKIPPED; else nRetVal |= OCR_SIZE_UNSUPPORTED; ++nNumPagesErr; break; case NO_TXT_WARN: // Just a warning that no text was found on the page. Don't need to do anything! break; default: messageStream << "UNKNOWN ERROR ON PAGE " << (page + 1); printMessage(messageStream.str()); messageStream.str(""); nRetVal = OCR_FAILED; break; } } page++; } kRecCloseImgFile(hIFile); //----------------------------------------------------------------------------------------------------------- JSONArray &pageArray = fartDoc.getJSONArray("pages"); JSONArray &badPageArray = fartDoc.getJSONArray("badPages"); for (int i = 0; i < pageCnt; i++) { if (_pages[i].get("badPage") == NULL) pageArray.addElement(_pages[i]); else badPageArray.addElement(_pages[i]); } fartDoc.setJSONValue("numPages", pageArray.getnumelements()); fartDoc.setJSONValue("numBadPages", badPageArray.getnumelements()); //Close Nuance, save the frt file, and record time. RecQuitPlus(); //if it was run in full, save without name modification if (opts._nStartPage == 0 && opts._nEndPage == 0) fartDoc.SaveToFile(outFile, opts._bPrettyOutput); //if it was run in segments, save with page range in fileName. else fartDoc.SaveToFile(getFRTFilePageRangeName(outFile, opts._nStartPage, opts._nEndPage), opts._bPrettyOutput); double avgPageTime = (totOCRTime / ((double)pageCnt)); messageStream << "Avg time per page: " << avgPageTime << "s"; printMessage(messageStream.str()); messageStream.str("");; //If a page was rotated, move the image. if (pageRotated) { messageStream << "Detected page rotation, moving" << tmpImgFile << " to " << fixedImgFile << "."; printMessage(messageStream.str()); messageStream.str(""); copyFile(tmpImgFile, fixedImgFile); } //otherwise else std::remove(tmpImgFile.c_str()); //delete the temp file. //return return nRetVal; } std::string CNuanceOCR::getFRTFilePageRangeName(std::string inFile, int startPage, int endPage) { std::stringstream result; result << inFile.substr(0, inFile.find('.')) << "_" << startPage << "-" << endPage <<".frt"; return result.str(); } void CNuanceOCR::addToJSONPages(int pageNum, JSONObject page) { std::lock_guard locker(pageLock); _pages[pageNum] = page; } bool CNuanceOCR::verify() { return initializeNuance(); } //-------------------------------------------------------------------------------------------------------------------------------------- //Maybe one day these can be used. void CNuanceOCR::OCRPageMT(OCRArg* arg) { //may need to reinitialize on a new thread. printMessage(std::string("Initializing Nuance...")); initializeNuance(); printMessage(std::string("Done.")); std::string inFile = arg->getFileName(); int pageNum = arg->getPageNum(); RECERR nRetVal = REC_OK; HIMGFILE imgFile; HPAGE hPage = NULL; std::stringstream messageStream; RECERR rc; double duration; JSONObject pageObj; bool cantRotate = false; bool rotated = false; std::string tmpFixedImgFile = getFixedImgName(inFile, pageNum); //load the image into nuance, nuance claims this to be thread-safe printMessage(std::string("Locking image and loading page " + std::to_string(pageNum+1))); imgLock.lock(); int ID = SID; rc = kRecOpenImgFile(inFile.c_str(), &imgFile, IMGF_READ, (IMF_FORMAT)0); rc = kRecLoadImg(0, imgFile, &hPage, pageNum); imgLock.unlock(); if (rc != REC_OK) { messageStream << "ERROR: Cannot load page " << pageNum + 1 << "/" << pageCnt << ":" << getError(rc) << " (" << rc << ")"; printMessage(messageStream.str()); messageStream.str(""); } else { imgLock.lock(); rc = kRecPreprocessImg(0, hPage); //Not thread safe. imgLock.unlock(); if (rc != REC_OK) { messageStream << "ERROR: Cannot pre-process page " << pageNum + 1 << "/" << pageCnt << ":" << getError(rc) << " (" << rc << ")"; printMessage(messageStream.str()); messageStream.str(""); } } // At this point, if we are in error skip recognition, write the empty page, and bail. if (rc == REC_OK) { messageStream << "Recognizing page " << pageNum + 1 << "/" << pageCnt << "."; printMessage(messageStream.str()); messageStream.str(""); imgLock.lock(); rc = kRecRecognize(ID, hPage, NULL); //also not thread safe :( imgLock.unlock(); if (rc != REC_OK) { messageStream << "Failed recognition page " << pageNum + 1 << "/" << pageCnt << ": " << getError(rc) << " (" << rc << ")"; printMessage(messageStream.str()); messageStream.str(""); // Try rotating the page, through all 270 degrees. int nRotation = 0; for (int i = 0; (rc != REC_OK) && (i < 3); ++i) { nRotation += 90; messageStream << "Trying to rotate page to " << nRotation << " deg."; printMessage(messageStream.str()); messageStream.str(""); rc = kRecRotateImg(0, hPage, ROT_RIGHT); if (rc == REC_OK) { //try and recognize the image again imgLock.lock(); rc = kRecRecognize(ID, hPage, NULL); imgLock.unlock(); //if we still can't if (rc != REC_OK) { //print a message messageStream << "Error recognizing rotated page " << pageNum + 1 << " to " << nRotation << " deg: " << getError(rc) << " (" << rc << ")" << std::endl; printMessage(messageStream.str()); messageStream.str(""); // Try again... rc = REC_OK; } //save that we rotated the page rotated = true; } //If we couldn't recognize it after 3 rotations.... else { messageStream << "Error rotating page " << pageNum + 1 << " to " << nRotation << " deg, bailing out: " << getError(rc) << " (" << rc << ")" << std::endl; printMessage(messageStream.str()); messageStream.str(""); } } } } // At this point, we should have an OCR'd page. If not, bail! if (rc != REC_OK) { nRetVal = rc; messageStream << "Nothing on page " << pageNum + 1 << "." << std::endl; printMessage(messageStream.str()); messageStream.str(""); //release the page kRecFreeImg(hPage); hPage = NULL; pageObj.setJSONValue("page", pageNum + 1); pageObj.setJSONValue("numLines", 0); pageObj.setJSONArray("lines"); if (rc == IMF_FONT_MISSING_WARN) pageObj.setJSONValue("ocrerr", "OCR_MISSING_FONT_SKIPPED"); else if ((rc == IMG_DPI_ERR) || (rc == IMG_DPI_WARN)) pageObj.setJSONValue("ocrerr", "OCR_RES_UNSUPPORTED_SKIPPED"); addToJSONPages(pageNum, pageObj); } //if the page was rotated if (hPage != NULL && rotated) { IMF_FORMAT nImgFormat = FF_TIFNO; if (isPDF) nImgFormat = FF_PDF_GOOD; messageStream << "Saving page to " << ((isPDF) ? "PDF" : "TIFF") << " file.\n"; printMessage(messageStream.str()); messageStream.str(""); //We always want to save this as the only page in the document... rc = kRecSaveImgF(ID, tmpFixedImgFile.c_str(), nImgFormat, hPage, II_CURRENT, true); if (rc != REC_OK) { messageStream << "Error saving page: " << getError(rc) << " (" << rc << ")" << "\n"; printMessage(messageStream.str()); messageStream.str(""); } } if (nRetVal != REC_OK) { messageStream << "Error on page " << pageNum <<" \n " << getError(nRetVal); printMessage(messageStream.str()); messageStream.str(""); } //----------------------------------------------------------------------------------------------------------- // Character extraction logic //----------------------------------------------------------------------------------------------------------- LONG numLetters = 0; LETTER* letters[1]; rc = kRecGetLetters(hPage, II_CURRENT, letters, &numLetters); //set up the arrays for the variables char chars[numLetters]; int lefts[numLetters]; int rights[numLetters]; int bottoms[numLetters]; int tops[numLetters]; int zones[numLetters]; unsigned char confs[numLetters]; IMG_INFO pageInfo = { 0 }; if (kRecGetImgInfo(ID, hPage, II_CURRENT, &pageInfo) != REC_OK) // Hmmm, can't get image info? Punt, and default to 300x300dpi. pageInfo.DPI.cx = pageInfo.DPI.cy = 300; // Calculate the mult factor, using the page's resolution. double multX = SCALE_FACTOR / static_cast(pageInfo.DPI.cx); double multY = SCALE_FACTOR / static_cast(pageInfo.DPI.cy); //pull the letter info out for (int i = 0; i < numLetters; i++) { chars[i] = letters[0]->code; lefts[i] = static_cast(static_cast(letters[0]->left)*multX); tops[i] = static_cast(static_cast(letters[0]->top)*multY); confs[i] = letters[0]->err; //have to do some quick math to get the bottoms and rights bottoms[i] = tops[i] + (static_cast(static_cast(letters[0]->height)*multY)); // add the height to get the bottoms rights[i] = lefts[i] + (static_cast(static_cast(letters[0]->width)*multX)); // add the width to get the rights zones[i] = letters[0]->zone; letters[0]++; } //start the JSON stuff int wordsTot = getNumWords(chars, numLetters); pageObj.setJSONValue("page", pageNum + 1); pageObj.setJSONValue("numLines", wordsTot); pageObj.setJSONArray("lines"); JSONArray &linesArray = pageObj.getJSONArray("lines"); int numWords = 0; int wLen = 0; //character logic for (int i = 0; i < numLetters; i += wLen) { JSONObject lineObj; wLen = 0; //get the length of the word while (chars[i + wLen] != ' ' && i + wLen < numLetters) { wLen++; chars[i + wLen]; } wLen++; lineObj.setJSONValue("line", numWords); lineObj.setJSONValue("zone", zones[i]); lineObj.setJSONValue("page", pageNum + 1); lineObj.setJSONValue("xmin", getMin(i, i + wLen, lefts)); lineObj.setJSONValue("ymin", getMin(i, i + wLen, tops)); lineObj.setJSONValue("xmax", getMax(i, i + wLen, rights)); lineObj.setJSONValue("ymax", getMax(i, i + wLen, bottoms)); lineObj.setJSONValue("numChars", wLen); std::stringstream tmpStr; //write the chars for (int j = 0; j < wLen; j++) { if (static_cast(chars[i + j]) < 128 && static_cast(chars[i + j]) >= 32 && static_cast(chars[i + j]) != 34) tmpStr << chars[i + j]; else tmpStr << " "; } lineObj.setJSONValue("chars", tmpStr.str().c_str()); tmpStr.str(""); for (int j = 0; j < wLen; j++) { long confB = ((getBit(confs[i + j], 6) * 1000000) + (getBit(confs[i + j], 5) * 100000) + (getBit(confs[i + j], 4) * 10000) + (getBit(confs[i + j], 3) * 1000) + (getBit(confs[i + j], 2) * 100) + (getBit(confs[i + j], 1) * 10) + getBit(confs[i + j], 0)); long confI = fromBin(confB); tmpStr << convertConfidence(confI); } lineObj.setJSONValue("confs", tmpStr.str().c_str()); lineObj.setJSONArray("xmins"); { JSONArray &xminsArray = lineObj.getJSONArray("xmins"); for (int j = 0; j < wLen; j++) xminsArray.addElement(lefts[i + j]); } lineObj.setJSONArray("ymins"); { JSONArray &yminsArray = lineObj.getJSONArray("ymins"); for (int j = 0; j < wLen; j++) yminsArray.addElement(tops[i + j]); } lineObj.setJSONArray("xmaxs"); { JSONArray &xmaxsArray = lineObj.getJSONArray("xmaxs"); //Add bottom right for (int j = 0; j < wLen; j++) xmaxsArray.addElement(rights[i + j]); } lineObj.setJSONArray("ymaxs"); { JSONArray &ymaxsArray = lineObj.getJSONArray("ymaxs"); for (int j = 0; j < wLen; j++) ymaxsArray.addElement(bottoms[i + j]); } numWords++; linesArray.addElement(lineObj); } //add the page to the JSON pages array, this is thread safe. addToJSONPages(pageNum, pageObj); } int CNuanceOCR::OCRMT(const std::string &inFile, int threadCount) { initializeNuance(); HPAGE page; bool result; RECERR nRetVal = REC_OK; std::stringstream msgStr; imgLock.lock(); nRetVal = kRecOpenImgFile(inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0); if (nRetVal != REC_OK) { printMessage(std::string("Couldn't load file " + inFile)); kRecQuit(); } imgLock.unlock(); //Set the output to XML printMessage(std::string("Setting output to XML")); nRetVal = kRecSetDTXTFormat(SID, DTXT_XMLCOORD); if (nRetVal != REC_OK) { msgStr << getError(nRetVal); printMessage(msgStr.str()); msgStr.str(""); } //Create an array of the pages int pageCnt; nRetVal = kRecGetImgFilePageCount(hIFile, &pageCnt); if (nRetVal != REC_OK) { msgStr << "Error getting the page count: " << getError(nRetVal); printMessage(msgStr.str()); return false; } msgStr << "page count = " << pageCnt; printMessage(msgStr.str()); msgStr.str(""); //------------------------------------------------------------------------------------------------------------------------------- //set the OCR thread count msgStr << "Setting thread count to " << threadCount << std::endl; printMessage(msgStr.str()); msgStr.str(""); nRetVal = RecSetOCRThreadCount(SID, threadCount); if (nRetVal != REC_OK) { msgStr << getError(nRetVal); printMessage(msgStr.str()); msgStr.str(""); } nRetVal = RecGetOCRThreadCount(SID, &threadCount); if (nRetVal != REC_OK) { msgStr << "Error getting the thread count: " << getError(nRetVal); printMessage(msgStr.str()); return false; } msgStr << "Seccessfully set thread count to " << threadCount; printMessage(msgStr.str()); msgStr.str(""); //-------------------------------------------------------------------------------------------------------------------------------- //load the page into the page array HPAGE pages[pageCnt]; for (int i = 0; i < pageCnt; i++) { //msgstr << "adding page " << i+1 << "/" << pagecnt << " to the page array."; //printmessage(msgstr.str()); //msgstr.str(""); HPAGE tmpPage; nRetVal =kRecLoadImg(SID, hIFile, &tmpPage, i); if (nRetVal != REC_OK) { msgStr << "Error getting page " << i << std::endl << " "; msgStr << getError(nRetVal); printMessage(msgStr.str()); msgStr.str(""); } //save the page info pages[i] = tmpPage; msgStr << "page " << i + 1 << " = " << pages[i]; printMessage(msgStr.str()); msgStr.str(""); } //run Nuance msgStr << "Writing OCR output to " << getXMLName(inFile); printMessage(msgStr.str()); msgStr.str(""); for (int i = 0; i < pageCnt; i++) { nRetVal = kRecRecognize(SID, pages[i], getXMLName(inFile).c_str()); if (nRetVal != REC_OK) { msgStr << "Error recognizing page " << i << std::endl; msgStr << getError(nRetVal); printMessage(msgStr.str()); msgStr.str(""); } } nRetVal = kRecConvert2DTXTEx(SID, pages, pageCnt, II_CURRENT, getXMLName(inFile).c_str()); if (nRetVal != REC_OK) { const char* errorInfo; kRecGetErrorInfo(nRetVal, &errorInfo); //msgStr << getError(nRetVal); std::string strErr(errorInfo); printMessage(strErr); msgStr.str(""); } return nRetVal; } //Split to lines prototype (doesn't work so well...) //--------------------------------------------------------------------------------------------------------------------------------------------- //JSONArray splitToLines(word *words, int wordCnt) //{ // int lineNum = 1; // JSONArray lines; // word previous = words[0]; // //Go through the coordinate bounds for each word // for (int i = 0; i < wordCnt; i++) // { // JSONObject line; // std::string lineVal; // std::vector tops; // std::vector bottoms; // std::vector lefts; // std::vector rights; // std::vector confs; // int numChars; // // word current = words[i]; // //if a word's upper left is larger than the previous one's lower right // if (current.MinY > previous.MaxY) // { // //set the values // line.setJSONValue("line", lineNum); // line.setJSONValue("zone", current.Zone); // line.setJSONValue("page", current.Page); // /*line.setJSONValue("xmin", getMin(i, i + wLen, lefts)); // line.setJSONValue("ymin", getMin(i, i + wLen, bottoms)); // line.setJSONValue("xmax", getMax(i, i + wLen, tops)); // line.setJSONValue("ymax", getMax(i, i + wLen, bottoms)); // line.setJSONValue("numChars", wLen);*/ // //start a new line // lines.addElement(line); // lineNum++; // } // else // { // //add the values in the line // for (int k = 0; k < current.Length; k++) // { // tops.push_back(current.Tops[k]); // bottoms.push_back(current.Bottoms[k]); // rights.push_back(current.Rights[k]); // lefts.push_back(current.Lefts[k]); // confs.push_back(current.Confidences[k]); // } // lineVal += current.value; // numChars += current.Length; // } // //point previous to current // *previous = ¤t; // } // // // // return lines; //} //--------------------------------------------------------------------------------------------------------------------------------------------- //bool CNuanceOCR::OCR(const std::string &inFile, const std::string &outFile) //{ // // initializeNuance(); // // JSONObject fartDoc; // fartDoc.setJSONValue("numPages", pageCnt); // fartDoc.setJSONValue("originalSourceFile", inFile.c_str()); // fartDoc.setJSONValue("originalSourceType", getTypeByFileExt(inFile).c_str()); // fartDoc.setJSONValue("textSourceType", "NUANCE"); // fartDoc.setJSONValue("fixedUp", false); // fartDoc.setJSONArray("pages"); // JSONArray &pageArray = fartDoc.getJSONArray("pages"); // // HPAGE hPage; // RECERR rc; // double duration; // std::stringstream messageStream; // clock_t start; // // // rc = kRecOpenImgFile(inFile.c_str(), &hIFile, IMGF_READ, (IMF_FORMAT)0); // if (rc != REC_OK) // { // printMessage(std::string("Couldn't load file " + inFile)); // kRecQuit(); // } // // printMessage(std::string("File loaded successfuly!")); // // rc = kRecGetImgFilePageCount(hIFile, &pageCnt); // if (rc != REC_OK) // { // printMessage(std::string("Error counting pages.")); // kRecQuit(); // } // for (int page = 0; page < pageCnt; ++page) // { // JSONObject pageObj; // // rc = kRecLoadImg(0, hIFile, &hPage, page); // rc = kRecPreprocessImg(0, hPage); // // messageStream << "Recognizing page " << page + 1 << "/" << pageCnt << "\n"; // printMessage(messageStream.str()); // messageStream.str(""); // // start = clock(); // rc = kRecRecognize(SID, hPage, NULL); // duration = (clock() - start) / ((double)CLOCKS_PER_SEC); // totOCRTime += duration; // // messageStream << "Done in " << duration << "s."; // printMessage(messageStream.str()); // messageStream.str("");; // // if (rc != REC_OK) // { // //std::cout<<"Error" <code; // lefts[i] = letters[0]->left*mult; // tops[i] = letters[0]->top*mult; // confs[i] = letters[0]->err; // bottoms[i] = tops[i] + (letters[0]->height*mult); // rights[i] = lefts[i] + (letters[0]->width*mult); // zones[i] = letters[0]->zone; // letters[0]++; // } // // int wordsTot = getNumWords(chars, numLetters); // pageObj.setJSONValue("page", page + 1); // pageObj.setJSONValue("numLines", wordsTot); // pageObj.setJSONArray("lines"); // JSONArray &linesArray = pageObj.getJSONArray("lines"); // // int numWords = 0; // int wLen = 0; // for (int i = 0; i < numLetters; i += wLen) // { // JSONObject lineObj; // wLen = 0; // //get the length of the word // while (chars[i + wLen] != ' ' && i + wLen < numLetters) // wLen++; // wLen++; // lineObj.setJSONValue("line", numWords); // lineObj.setJSONValue("zone", zones[i]); // lineObj.setJSONValue("page", page + 1); // lineObj.setJSONValue("xmin", getMin(i, i + wLen, lefts)); // lineObj.setJSONValue("ymin", getMin(i, i + wLen, bottoms)); // lineObj.setJSONValue("xmax", getMax(i, i + wLen, tops)); // lineObj.setJSONValue("ymax", getMax(i, i + wLen, bottoms)); // lineObj.setJSONValue("numChars", wLen); // // std::stringstream tmpStr; // //write the chars // for (int j = 0; j < wLen; j++) // { // if (static_cast(chars[i + j]) < 128 && static_cast(chars[i + j]) >= 32 && static_cast(chars[i + j]) != 34) // tmpStr << chars[i + j]; // else // tmpStr << " "; // } // lineObj.setJSONValue("chars",tmpStr.str().c_str()); // // tmpStr.str(""); // for (int j = 0; j < wLen; j++) // { // long confB = ((getBit(confs[i + j], 6) * 1000000) + (getBit(confs[i + j], 5) * 100000) + (getBit(confs[i + j], 4) * 10000) + (getBit(confs[i + j], 3) * 1000) + // (getBit(confs[i + j], 2) * 100) + (getBit(confs[i + j], 1) * 10) + getBit(confs[i + j], 0)); // // long confI = fromBin(confB); // tmpStr << convertConfidence(confI); // } // lineObj.setJSONValue("confs", tmpStr.str().c_str()); // // lineObj.setJSONArray("xmins"); // { // JSONArray &xminsArray = lineObj.getJSONArray("xmins"); // for (int j = 0; j < wLen; j++) // xminsArray.addElement(lefts[i + j]); // } // // lineObj.setJSONArray("ymins"); // { // JSONArray &yminsArray = lineObj.getJSONArray("ymins"); // for (int j = 0; j < wLen; j++) // yminsArray.addElement(tops[i + j]); // } // // lineObj.setJSONArray("xmaxs"); // { // JSONArray &xmaxsArray = lineObj.getJSONArray("xmaxs"); // //Add bottom right // for (int j = 0; j < wLen; j++) // xmaxsArray.addElement(rights[i + j]); // } // // lineObj.setJSONArray("ymaxs"); // { // JSONArray &ymaxsArray = lineObj.getJSONArray("ymaxs"); // for (int j = 0; j < wLen; j++) // ymaxsArray.addElement(bottoms[i + j]); // } // // numWords++; // linesArray.addElement(lineObj); // } // pageArray.addElement(pageObj); // } // //----------------------------------------------------------------------------------------------------------- // // RecQuitPlus(); // // fartDoc.SaveToFile(outFile); // double avgPageTime = (totOCRTime / ((double)pageCnt)); // // messageStream << "Avg time per page: \033[1;34m" << avgPageTime << "s\033[0m"; // printMessage(messageStream.str()); // messageStream.str("");; // // return true; //}