#include "PDFBookmarker.h" using namespace sequencelogic; PDFBookmarker::PDFBookmarker() { } PDFBookmarker::~PDFBookmarker() { } void PDFBookmarker::bookmarkPDFs(std::string classificationJSON, std::string inFile, const std::string &outFilename) { std::string outFile; if (outFilename == "") outFile = getOutputFile(inFile); else outFile = outFilename; std::string pdfMarkFile = getMarkFileName(inFile); JSONObject classification; JSONArray docs; std::stringstream pdfMarkTxt; std::stringstream gsCmd; classification.LoadFromFile(classificationJSON); docs = classification.getJSONArray(DOCS_TAG); std::cout << "Found " << docs.getnumitems() << " docTypes in file" << std::endl; //get all the pages for each doctype for (int i =0; i < docs.getnumitems(); i++) { std::string docType; JSONObject ¤tDoc = static_cast(docs[i]); std::vector pageNums; docType = currentDoc.getJSONString(DOCTYPE_TAG.c_str()); JSONArray pages = currentDoc.getJSONArray(PAGES_ARRAY_TAG.c_str()); for (int j = 0; j < pages.getnumitems(); j++) { JSONObject ¤tPage = static_cast(pages[j]); std::string pageFile; pageFile = currentPage.getJSONString(PAGE_FILE_TAG.c_str()); pageNums.push_back(getPageNum(pageFile)); } int pageNum = getSmallest(pageNums); std::cout << " Found " << docType << " on page " << pageNum << std::endl; pdfMarkTxt << getBookmarkTxt(docType, pageNum); } std::string fileTxt = pdfMarkTxt.str(); saveFile(pdfMarkFile, fileTxt); gsCmd << GHOSTSCRIPT_COMMAND << "\"" << inFile << "\" update_info \"" << pdfMarkFile << "\" output \"" << outFile << "\""; std::cout << "Bookmarking file " << inFile << "..." << std::endl; // It seems that 'pdftk' translates the '$' character. The string "$-1" in a filename becomes "hBc1"... std::string cmdStr = gsCmd.str(); size_t pos = cmdStr.find('$'); while ((pos = cmdStr.find('$', pos)) != std::string::npos) { cmdStr = cmdStr.replace(pos,1, "\\$"); ++pos; } runCommand(cmdStr); std::cout << " Saved." << std::endl; deleteFile(pdfMarkFile); } std::string PDFBookmarker::getOutputFile(std::string inFile) { //ALWAYS a pdf file. std::stringstream result; result << inFile.substr(0, inFile.rfind('.')) << "_marked.pdf"; return result.str(); } int PDFBookmarker::getSmallest(std::vector pageNums) { int smallest = pageNums[0]; for (size_t i = 1; i < pageNums.size(); i++) { if (pageNums[i] < smallest) smallest = pageNums[i]; } return smallest; } std::string PDFBookmarker::getMarkFileName(std::string inFile) { std::stringstream result; result << inFile.substr(0, inFile.rfind('/') + 1) << PDFMARK_FILE; return result.str(); } void PDFBookmarker::saveFile(std::string fileName, std::string &fileText) { std::cout << "Saving file " << fileName <