187 lines
3.7 KiB
C++
187 lines
3.7 KiB
C++
//
|
|
// Copyright (c) 2016, Sequence Logic
|
|
//
|
|
#ifndef NUANCE_OCR
|
|
#define NUANCE_OCR
|
|
|
|
#define USE_OEM_LICENSE 1
|
|
#define LICENSE_FILE "/sequencelogic/config/sequencelogic.lcxz"
|
|
#include "sequencelogic.h" // insert name of the header file defining your OEM Code as OEM_CODE
|
|
|
|
// See the documentation of kRecInit about the use of company and product names
|
|
#define YOUR_COMPANY "Sequence Logic"
|
|
#define YOUR_PRODUCT "SLEDS"
|
|
|
|
#define SID 0
|
|
#define DEFAULT_LINE_TOLLERANCE 75
|
|
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <ctime>
|
|
#include <vector>
|
|
#include <dirent.h>
|
|
#include <sstream>
|
|
#include <iomanip>
|
|
|
|
#include "KernelApi.h"
|
|
#include "RecApiPlus.h"
|
|
|
|
#include "jsonobject.h"
|
|
|
|
#include "ocrbase.h"
|
|
#include "jsonobject.h"
|
|
#include "OCRArg.h"
|
|
|
|
namespace sequencelogic
|
|
{
|
|
/**
|
|
* The Nuance OCR engine interface.
|
|
*/
|
|
class CNuanceOCR : public COCRBase
|
|
{
|
|
public:
|
|
CNuanceOCR() {}
|
|
CNuanceOCR(std::string &inFile);
|
|
virtual ~CNuanceOCR();
|
|
/**
|
|
* OCRs a page in a thread-safe way, writes the page to a frt file with only that page's information.
|
|
*
|
|
*/
|
|
void OCRPageMT(OCRArg* arg);
|
|
|
|
protected:
|
|
virtual int OCR(const OCROpts &opts, const std::string &outFile);
|
|
virtual bool verify();
|
|
virtual int OCRMT(const std::string &inFile, int threadCount = 2);
|
|
|
|
|
|
private:
|
|
/**
|
|
*
|
|
*/
|
|
std::string getFRTFilePageRangeName(std::string inFile, int startPage, int endPage);
|
|
|
|
/**
|
|
* Get the text of a Nuance error code.
|
|
*/
|
|
std::string getError (RECERR nErr);
|
|
|
|
/**
|
|
* The test function for new things
|
|
*/
|
|
void OCRPageTest(int &pageNum);
|
|
|
|
/**
|
|
* Cleans up the resources Nuance uses.
|
|
*/
|
|
void cleanUpNuance();
|
|
|
|
/**
|
|
* Deletes the _fixed file.
|
|
*/
|
|
void deleteFixedFile();
|
|
|
|
/**
|
|
* A thread save way to save pages of a document.
|
|
*/
|
|
bool savePage(int page, HPAGE &hPage);
|
|
|
|
/**
|
|
* Adds time to the total runtime in a "thread safe" way.
|
|
*/
|
|
void addTime(double runTime);
|
|
|
|
/**
|
|
* Initialized the Nuance OCR engine.
|
|
*/
|
|
bool initializeNuance();
|
|
|
|
/**
|
|
* OCRs a single page and returns the JSONObject representing the captured text from
|
|
* the page.
|
|
*/
|
|
RECERR OCRPage(int pageNum, int &adjustedPage, bool &rotated, bool singlePageSave, const std::string &singlePageFolder);
|
|
|
|
/**
|
|
* A thread safe way to add objects to the JSON page array.
|
|
*/
|
|
bool loadImgFile(std::string &inFile);
|
|
|
|
/**
|
|
* Splits the given list of letters into lines and returns back a JSON array of
|
|
* those lines.
|
|
*/
|
|
void addToJSONPages(int pageNum, JSONObject page);
|
|
|
|
bool writeFixedImg();
|
|
|
|
/**
|
|
* Splits the given list of letters into lines and returns back a JSON array of
|
|
* those lines.
|
|
*/
|
|
//JSONArray splitToLines(word *words, int wordCnt);
|
|
|
|
/**
|
|
* The character representing the rejection character for the Nuance OCR Engine.
|
|
*/
|
|
char chRej;
|
|
WCHAR wRej;
|
|
|
|
/**
|
|
* The number of threads used to OCR an image.
|
|
*/
|
|
int THREAD_COUNT;
|
|
|
|
/**
|
|
* An object representing the current page of the image file being OCRd.
|
|
*/
|
|
HPAGE hPage;
|
|
|
|
/**
|
|
* An object representing the current Image file being processed.
|
|
*/
|
|
HIMGFILE hIFile;
|
|
|
|
/**
|
|
* The total time it takes for the Nuance OCR engine to process a file.
|
|
*/
|
|
double totOCRTime;
|
|
|
|
/**
|
|
* The return values of the Nuance OCR Engine.
|
|
*/
|
|
RECERR docReturnVals;
|
|
|
|
/**
|
|
* The number of pages contained in the given image file.
|
|
*/
|
|
int pageCnt;
|
|
|
|
/**
|
|
* The locker for adding to the total time
|
|
*/
|
|
std::mutex timeLock;
|
|
|
|
Mutex taskLock;
|
|
|
|
/**
|
|
* The lcoker for adding pages to the JSON page array.
|
|
*/
|
|
std::mutex pageLock;
|
|
|
|
Mutex imgPageLock;
|
|
|
|
Mutex imgLock;
|
|
|
|
/**
|
|
* An array to hold all the pages (in their JSON form) in.
|
|
*/
|
|
JSONObject *_pages;
|
|
|
|
HPAGE *_imgPages;
|
|
};
|
|
};
|
|
|
|
#endif
|