56 lines
1.8 KiB
C++
56 lines
1.8 KiB
C++
#pragma once
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <stdio.h>
|
|
#include <string>
|
|
#include <sstream>
|
|
#include <fstream>
|
|
#include <vector>
|
|
#include <thread>
|
|
#include <iostream>
|
|
#include <cmath>
|
|
#include <algorithm>
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
#include <dirent.h>
|
|
|
|
#include "jsonobject.h"
|
|
|
|
namespace sequencelogic
|
|
{
|
|
struct Range
|
|
{
|
|
int startPage;
|
|
int endPage;
|
|
};
|
|
class OCRManager
|
|
{
|
|
public:
|
|
OCRManager();
|
|
~OCRManager();
|
|
void RunOCR(std::string inputFile, bool isPretty, int pagesInErrorToAllow, int threadCount, bool singlePageOutput);
|
|
void ValidateOCR();
|
|
|
|
private:
|
|
const std::string SLOCR_LOCATION = "/group/eng/Chris/SLOCR_MT/slocr/bin/lin64/sequencelogic-ocr";
|
|
const std::string PAGE_COUNT_CMD = "/usr/bin/identify -format %n -quiet ";
|
|
const std::string GHOST_SCRIPT_CMD = "gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=";
|
|
std::vector<std::string> ocrFiles;
|
|
|
|
std::vector<Range> splitPages(std::string inputFile, int threadCount);
|
|
int getPageCnt(std::string inFile);
|
|
std::string getOutputFile(std::string inputFile, int pageStart, int PageEnd);
|
|
std::string getOutputFile(std::string inputFile);
|
|
std::string getFRTFileName(std::string imgFile);
|
|
std::string callOCR(std::string inputFile, bool isPretty, int pagesInErrorToAllow, int threadCount, Range range, bool singlePageOutput);
|
|
size_t getFilesInDirectory(const std::string &dirName, std::vector<std::string> &files, const std::string filter);
|
|
void glueFRTFiles(std::string dir, bool prettyPrint, std::string outFile);
|
|
void glueImageFiles(int threadCount, std::string dir, std::string inFile, std::string outputFile, bool singlePageOutput);
|
|
void getPageRange(std::string fileName, int &startPage, int &endPage);
|
|
std::string runProgram(std::string command);
|
|
void deleteFile(std::string fileName);
|
|
};
|
|
}
|
|
|