171 lines
4.2 KiB
C++
171 lines
4.2 KiB
C++
|
|
#include "PDFBookmarker.h"
|
||
|
|
|
||
|
|
using namespace sequencelogic;
|
||
|
|
|
||
|
|
PDFBookmarker::PDFBookmarker()
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
PDFBookmarker::~PDFBookmarker()
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
void PDFBookmarker::bookmarkPDFs(std::string classificationJSON, std::string inFile, const std::string &outFilename)
|
||
|
|
{
|
||
|
|
std::string outFile;
|
||
|
|
if (outFilename == "")
|
||
|
|
outFile = getOutputFile(inFile);
|
||
|
|
else
|
||
|
|
outFile = outFilename;
|
||
|
|
std::string pdfMarkFile = getMarkFileName(inFile);
|
||
|
|
JSONObject classification;
|
||
|
|
JSONArray docs;
|
||
|
|
std::stringstream pdfMarkTxt;
|
||
|
|
std::stringstream gsCmd;
|
||
|
|
|
||
|
|
classification.LoadFromFile(classificationJSON);
|
||
|
|
|
||
|
|
docs = classification.getJSONArray(DOCS_TAG);
|
||
|
|
std::cout << "Found " << docs.getnumitems() << " docTypes in file" << std::endl;
|
||
|
|
//get all the pages for each doctype
|
||
|
|
for (int i =0; i < docs.getnumitems(); i++)
|
||
|
|
{
|
||
|
|
|
||
|
|
std::string docType;
|
||
|
|
JSONObject ¤tDoc = static_cast<JSONObject&>(docs[i]);
|
||
|
|
std::vector<int> pageNums;
|
||
|
|
docType = currentDoc.getJSONString(DOCTYPE_TAG.c_str());
|
||
|
|
|
||
|
|
JSONArray pages = currentDoc.getJSONArray(PAGES_ARRAY_TAG.c_str());
|
||
|
|
for (int j = 0; j < pages.getnumitems(); j++)
|
||
|
|
{
|
||
|
|
JSONObject ¤tPage = static_cast<JSONObject&>(pages[j]);
|
||
|
|
std::string pageFile;
|
||
|
|
|
||
|
|
pageFile = currentPage.getJSONString(PAGE_FILE_TAG.c_str());
|
||
|
|
pageNums.push_back(getPageNum(pageFile));
|
||
|
|
|
||
|
|
}
|
||
|
|
int pageNum = getSmallest(pageNums);
|
||
|
|
|
||
|
|
std::cout << " Found " << docType << " on page " << pageNum << std::endl;
|
||
|
|
pdfMarkTxt << getBookmarkTxt(docType, pageNum);
|
||
|
|
}
|
||
|
|
std::string fileTxt = pdfMarkTxt.str();
|
||
|
|
saveFile(pdfMarkFile, fileTxt);
|
||
|
|
|
||
|
|
gsCmd << GHOSTSCRIPT_COMMAND << "\"" << inFile << "\" update_info \"" << pdfMarkFile << "\" output \"" << outFile << "\"";
|
||
|
|
|
||
|
|
std::cout << "Bookmarking file " << inFile << "..." << std::endl;
|
||
|
|
|
||
|
|
// It seems that 'pdftk' translates the '$' character. The string "$-1" in a filename becomes "hBc1"...
|
||
|
|
std::string cmdStr = gsCmd.str();
|
||
|
|
size_t pos = cmdStr.find('$');
|
||
|
|
while ((pos = cmdStr.find('$', pos)) != std::string::npos)
|
||
|
|
{
|
||
|
|
cmdStr = cmdStr.replace(pos,1, "\\$");
|
||
|
|
++pos;
|
||
|
|
}
|
||
|
|
runCommand(cmdStr);
|
||
|
|
std::cout << " Saved." << std::endl;
|
||
|
|
|
||
|
|
deleteFile(pdfMarkFile);
|
||
|
|
}
|
||
|
|
|
||
|
|
std::string PDFBookmarker::getOutputFile(std::string inFile)
|
||
|
|
{
|
||
|
|
//ALWAYS a pdf file.
|
||
|
|
std::stringstream result;
|
||
|
|
|
||
|
|
result << inFile.substr(0, inFile.rfind('.')) << "_marked.pdf";
|
||
|
|
|
||
|
|
return result.str();
|
||
|
|
}
|
||
|
|
|
||
|
|
int PDFBookmarker::getSmallest(std::vector<int> pageNums)
|
||
|
|
{
|
||
|
|
int smallest = pageNums[0];
|
||
|
|
for (size_t i = 1; i < pageNums.size(); i++)
|
||
|
|
{
|
||
|
|
if (pageNums[i] < smallest)
|
||
|
|
smallest = pageNums[i];
|
||
|
|
}
|
||
|
|
return smallest;
|
||
|
|
}
|
||
|
|
|
||
|
|
std::string PDFBookmarker::getMarkFileName(std::string inFile)
|
||
|
|
{
|
||
|
|
std::stringstream result;
|
||
|
|
result << inFile.substr(0, inFile.rfind('/') + 1) << PDFMARK_FILE;
|
||
|
|
return result.str();
|
||
|
|
}
|
||
|
|
|
||
|
|
void PDFBookmarker::saveFile(std::string fileName, std::string &fileText)
|
||
|
|
{
|
||
|
|
std::cout << "Saving file " << fileName <<std::endl;
|
||
|
|
std::ofstream out;
|
||
|
|
|
||
|
|
try
|
||
|
|
{
|
||
|
|
out.open(fileName, std::ofstream::out);
|
||
|
|
out << fileText;
|
||
|
|
}
|
||
|
|
catch (std::exception ex)
|
||
|
|
{
|
||
|
|
std::cout << "Error opening/writing to file " << fileName << std::endl
|
||
|
|
<< " " <<ex.what() << std::endl << std::endl;
|
||
|
|
}
|
||
|
|
|
||
|
|
out.close();
|
||
|
|
}
|
||
|
|
|
||
|
|
void PDFBookmarker::deleteFile(std::string fileName)
|
||
|
|
{
|
||
|
|
std::cout << "Deleting File " << fileName << "..." << std::endl;
|
||
|
|
std::remove(fileName.c_str());
|
||
|
|
std::cout << " Done." << std::endl;
|
||
|
|
}
|
||
|
|
|
||
|
|
std::string PDFBookmarker::runCommand(std::string cmd)
|
||
|
|
{
|
||
|
|
std::cout << "Running cmd:\n" << cmd << std::endl;
|
||
|
|
std::string result;
|
||
|
|
FILE *stream;
|
||
|
|
const int MAX_BUFFER = 256;
|
||
|
|
char buffer[MAX_BUFFER];
|
||
|
|
stream = popen(cmd.c_str(), "r");
|
||
|
|
|
||
|
|
if (stream)
|
||
|
|
{
|
||
|
|
while (!feof(stream))
|
||
|
|
{
|
||
|
|
if (fgets(buffer, MAX_BUFFER, stream) != NULL)
|
||
|
|
result.append(buffer);
|
||
|
|
}
|
||
|
|
pclose(stream);
|
||
|
|
}
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
int PDFBookmarker::getPageNum(std::string imgFile)
|
||
|
|
{
|
||
|
|
int result;
|
||
|
|
|
||
|
|
std::string tmpRes = imgFile.substr(imgFile.find("page") + 4, imgFile.find('.'));
|
||
|
|
result = atoi(tmpRes.c_str());
|
||
|
|
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
std::string PDFBookmarker::getBookmarkTxt(std::string docType, int startPageNum)
|
||
|
|
{
|
||
|
|
std::stringstream result;
|
||
|
|
|
||
|
|
result << "BookmarkBegin\n"
|
||
|
|
<< "BookmarkTitle: " <<docType << "\n"
|
||
|
|
<< "BookmarkLevel: 1\n"
|
||
|
|
<< "BookmarkPageNumber: " <<startPageNum <<"\n";
|
||
|
|
|
||
|
|
return result.str();
|
||
|
|
}
|