#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "jsonobject.h" namespace sequencelogic { struct Range { int startPage; int endPage; }; class OCRManager { public: OCRManager(); ~OCRManager(); void RunOCR(std::string inputFile, bool isPretty, int pagesInErrorToAllow, int threadCount, bool singlePageOutput); void ValidateOCR(); private: const std::string SLOCR_LOCATION = "/group/eng/Chris/SLOCR_MT/slocr/bin/lin64/sequencelogic-ocr"; const std::string PAGE_COUNT_CMD = "/usr/bin/identify -format %n -quiet "; const std::string GHOST_SCRIPT_CMD = "gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE="; std::vector ocrFiles; std::vector splitPages(std::string inputFile, int threadCount); int getPageCnt(std::string inFile); std::string getOutputFile(std::string inputFile, int pageStart, int PageEnd); std::string getOutputFile(std::string inputFile); std::string getFRTFileName(std::string imgFile); std::string callOCR(std::string inputFile, bool isPretty, int pagesInErrorToAllow, int threadCount, Range range, bool singlePageOutput); size_t getFilesInDirectory(const std::string &dirName, std::vector &files, const std::string filter); void glueFRTFiles(std::string dir, bool prettyPrint, std::string outFile); void glueImageFiles(int threadCount, std::string dir, std::string inFile, std::string outputFile, bool singlePageOutput); void getPageRange(std::string fileName, int &startPage, int &endPage); std::string runProgram(std::string command); void deleteFile(std::string fileName); }; }