Sleds/slocr/nuanceocr.h

187 lines
3.7 KiB
C++

//
// Copyright (c) 2016, Sequence Logic
//
#ifndef NUANCE_OCR
#define NUANCE_OCR
#define USE_OEM_LICENSE 1
#define LICENSE_FILE "/sequencelogic/config/sequencelogic.lcxz"
#include "sequencelogic.h" // insert name of the header file defining your OEM Code as OEM_CODE
// See the documentation of kRecInit about the use of company and product names
#define YOUR_COMPANY "Sequence Logic"
#define YOUR_PRODUCT "SLEDS"
#define SID 0
#define DEFAULT_LINE_TOLLERANCE 75
#include <iostream>
#include <string>
#include <fstream>
#include <ctime>
#include <vector>
#include <dirent.h>
#include <sstream>
#include <iomanip>
#include "KernelApi.h"
#include "RecApiPlus.h"
#include "jsonobject.h"
#include "ocrbase.h"
#include "jsonobject.h"
#include "OCRArg.h"
namespace sequencelogic
{
/**
* The Nuance OCR engine interface.
*/
class CNuanceOCR : public COCRBase
{
public:
CNuanceOCR() {}
CNuanceOCR(std::string &inFile);
virtual ~CNuanceOCR();
/**
* OCRs a page in a thread-safe way, writes the page to a frt file with only that page's information.
*
*/
void OCRPageMT(OCRArg* arg);
protected:
virtual int OCR(const OCROpts &opts, const std::string &outFile);
virtual bool verify();
virtual int OCRMT(const std::string &inFile, int threadCount = 2);
private:
/**
*
*/
std::string getFRTFilePageRangeName(std::string inFile, int startPage, int endPage);
/**
* Get the text of a Nuance error code.
*/
std::string getError (RECERR nErr);
/**
* The test function for new things
*/
void OCRPageTest(int &pageNum);
/**
* Cleans up the resources Nuance uses.
*/
void cleanUpNuance();
/**
* Deletes the _fixed file.
*/
void deleteFixedFile();
/**
* A thread save way to save pages of a document.
*/
bool savePage(int page, HPAGE &hPage);
/**
* Adds time to the total runtime in a "thread safe" way.
*/
void addTime(double runTime);
/**
* Initialized the Nuance OCR engine.
*/
bool initializeNuance();
/**
* OCRs a single page and returns the JSONObject representing the captured text from
* the page.
*/
RECERR OCRPage(int pageNum, int &adjustedPage, bool &rotated, bool singlePageSave, const std::string &singlePageFolder);
/**
* A thread safe way to add objects to the JSON page array.
*/
bool loadImgFile(std::string &inFile);
/**
* Splits the given list of letters into lines and returns back a JSON array of
* those lines.
*/
void addToJSONPages(int pageNum, JSONObject page);
bool writeFixedImg();
/**
* Splits the given list of letters into lines and returns back a JSON array of
* those lines.
*/
//JSONArray splitToLines(word *words, int wordCnt);
/**
* The character representing the rejection character for the Nuance OCR Engine.
*/
char chRej;
WCHAR wRej;
/**
* The number of threads used to OCR an image.
*/
int THREAD_COUNT;
/**
* An object representing the current page of the image file being OCRd.
*/
HPAGE hPage;
/**
* An object representing the current Image file being processed.
*/
HIMGFILE hIFile;
/**
* The total time it takes for the Nuance OCR engine to process a file.
*/
double totOCRTime;
/**
* The return values of the Nuance OCR Engine.
*/
RECERR docReturnVals;
/**
* The number of pages contained in the given image file.
*/
int pageCnt;
/**
* The locker for adding to the total time
*/
std::mutex timeLock;
Mutex taskLock;
/**
* The lcoker for adding pages to the JSON page array.
*/
std::mutex pageLock;
Mutex imgPageLock;
Mutex imgLock;
/**
* An array to hold all the pages (in their JSON form) in.
*/
JSONObject *_pages;
HPAGE *_imgPages;
};
};
#endif