Sleds/slocr-mp/MultiProcessOCR/OCRManager.h

56 lines
1.8 KiB
C++

#pragma once
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <string>
#include <sstream>
#include <fstream>
#include <vector>
#include <thread>
#include <iostream>
#include <cmath>
#include <algorithm>
#include <sys/types.h>
#include <sys/wait.h>
#include <dirent.h>
#include "jsonobject.h"
namespace sequencelogic
{
struct Range
{
int startPage;
int endPage;
};
class OCRManager
{
public:
OCRManager();
~OCRManager();
void RunOCR(std::string inputFile, bool isPretty, int pagesInErrorToAllow, int threadCount, bool singlePageOutput);
void ValidateOCR();
private:
const std::string SLOCR_LOCATION = "/group/eng/Chris/SLOCR_MT/slocr/bin/lin64/sequencelogic-ocr";
const std::string PAGE_COUNT_CMD = "/usr/bin/identify -format %n -quiet ";
const std::string GHOST_SCRIPT_CMD = "gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=";
std::vector<std::string> ocrFiles;
std::vector<Range> splitPages(std::string inputFile, int threadCount);
int getPageCnt(std::string inFile);
std::string getOutputFile(std::string inputFile, int pageStart, int PageEnd);
std::string getOutputFile(std::string inputFile);
std::string getFRTFileName(std::string imgFile);
std::string callOCR(std::string inputFile, bool isPretty, int pagesInErrorToAllow, int threadCount, Range range, bool singlePageOutput);
size_t getFilesInDirectory(const std::string &dirName, std::vector<std::string> &files, const std::string filter);
void glueFRTFiles(std::string dir, bool prettyPrint, std::string outFile);
void glueImageFiles(int threadCount, std::string dir, std::string inFile, std::string outputFile, bool singlePageOutput);
void getPageRange(std::string fileName, int &startPage, int &endPage);
std::string runProgram(std::string command);
void deleteFile(std::string fileName);
};
}