154 lines
3.7 KiB
C++
154 lines
3.7 KiB
C++
//
|
|
// Sequence Logic OCR
|
|
// Copyright (c) 2016, Sequence Logic
|
|
//
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <signal.h>
|
|
#include <getopt.h>
|
|
#include <iostream>
|
|
#include <memory>
|
|
|
|
#include "nuanceocr.h"
|
|
#include "sledsconstants.h"
|
|
|
|
using namespace sequencelogic;
|
|
|
|
extern const char* TODAYS_DATE;
|
|
|
|
void writeHelp()
|
|
{
|
|
std::cout << "Usage: sequencelogic-ocr [OPTIONS] [FILE_OR_FOLDER_NAME]" << std::endl
|
|
<< "Option Meaning" << std::endl
|
|
<< "-----------------------------" << std::endl
|
|
<< "-h Show the help message." << std::endl
|
|
<< "-f The given input is a folder containing images to be OCR'd." << std::endl
|
|
<< "-i The given input is a file to be OCR'd." << std::endl
|
|
<< "-r {xx}-{xx} The range of pages to be OCR'd (inclusive)." << std::endl
|
|
<< "-p 'Pretty print' the output file." << std::endl
|
|
<< "-e {xx} Stop OCR after XX pages cannot be recognized." << std::endl
|
|
<< "-v Verifies that the license is configured correctly." << std::endl
|
|
<< "-m {xx} Runs the multi threaded version of Nuance usig XX threads (1 by default)." << std::endl
|
|
<< "-s {dirname} Output every page as a separate file output 'dirname/page%%05d.png'." << std::endl
|
|
<< "-t {dirname} Use 'dirname' for temp fixed file creation." << std::endl;
|
|
}
|
|
|
|
bool getPageRange(std::string arg, int &startPage, int &endPage)
|
|
{
|
|
//input should be xx-xx
|
|
std::string startStr;
|
|
std::string endStr;
|
|
|
|
startStr = arg.substr(0, arg.find('-'));
|
|
endStr = arg.substr(arg.find('-') + 1);
|
|
|
|
startPage = atoi(startStr.c_str());
|
|
endPage = atoi(endStr.c_str());
|
|
|
|
|
|
if (startPage > endPage)
|
|
{
|
|
std::cout << "The start page cannot be bigger than the end page." << std::endl;
|
|
startPage = 0;
|
|
endPage = 0;
|
|
return false;
|
|
}
|
|
|
|
else if (startPage < 0 || endPage < 0)
|
|
{
|
|
std::cout << "The page range must be gretaer than 0." << std::endl;
|
|
startPage = 0;
|
|
endPage = 0;
|
|
return false;
|
|
}
|
|
|
|
else
|
|
{
|
|
std::cout << " Running on page range: " << startPage << "-" << endPage << "." << std::endl;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
int nExitVal = OCR_SUCCESS;
|
|
|
|
std::cout << "OCR engine, built on " << TODAYS_DATE << std::endl;
|
|
|
|
std::string opts = "t:f:i:e:m:r:s:hvp";
|
|
std::unique_ptr<COCRBase> pOCREng;
|
|
|
|
// Possibly determine which OCR engine to use. For now, we're only using Nuance.
|
|
//pOCREng.reset(new CNuanceOCR());
|
|
|
|
char opt;
|
|
pOCREng.reset(new CNuanceOCR());
|
|
std::vector<std::string> inputFiles;
|
|
OCROpts ocrOptions;
|
|
while ((opt = static_cast<char>(getopt(argc, argv, opts.c_str()))) != EOF)
|
|
{
|
|
switch (opt)
|
|
{
|
|
|
|
case 'i':
|
|
case 'f':
|
|
inputFiles.push_back(optarg);
|
|
break;
|
|
|
|
case 'v':
|
|
pOCREng->Validate();
|
|
break;
|
|
|
|
case 'e':
|
|
ocrOptions._numPagesInError = atoi(optarg);
|
|
break;
|
|
|
|
case 'h':
|
|
case '?':
|
|
default:
|
|
writeHelp();
|
|
break;
|
|
|
|
case 'p':
|
|
ocrOptions._bPrettyOutput = true;
|
|
break;
|
|
|
|
case 'm':
|
|
ocrOptions._nThreadCount = atoi(optarg);
|
|
break;
|
|
|
|
case 'r':
|
|
getPageRange(optarg, ocrOptions._nStartPage, ocrOptions._nEndPage);
|
|
break;
|
|
|
|
case 's':
|
|
ocrOptions._bSinglePageSave = true;
|
|
ocrOptions._singlePageFolder = optarg;
|
|
break;
|
|
|
|
case 't':
|
|
ocrOptions._tempFolder = optarg;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (inputFiles.size() == 0)
|
|
{
|
|
std::cout << "No files to OCR.\n";
|
|
writeHelp();
|
|
}
|
|
|
|
if (pOCREng != nullptr)
|
|
{
|
|
for (size_t i = 0; (nExitVal == OCR_SUCCESS) && (i < inputFiles.size()); ++i)
|
|
{
|
|
ocrOptions._inFile = inputFiles[i];
|
|
nExitVal = pOCREng->Recognize(ocrOptions);
|
|
}
|
|
}
|
|
|
|
return nExitVal;
|
|
}
|