Sleds/pdf-bookmarker/PDFBookmarker/PDFBookmarker.cpp

171 lines
4.2 KiB
C++
Raw Normal View History

2025-03-13 21:28:38 +00:00
#include "PDFBookmarker.h"
using namespace sequencelogic;
PDFBookmarker::PDFBookmarker()
{
}
PDFBookmarker::~PDFBookmarker()
{
}
void PDFBookmarker::bookmarkPDFs(std::string classificationJSON, std::string inFile, const std::string &outFilename)
{
std::string outFile;
if (outFilename == "")
outFile = getOutputFile(inFile);
else
outFile = outFilename;
std::string pdfMarkFile = getMarkFileName(inFile);
JSONObject classification;
JSONArray docs;
std::stringstream pdfMarkTxt;
std::stringstream gsCmd;
classification.LoadFromFile(classificationJSON);
docs = classification.getJSONArray(DOCS_TAG);
std::cout << "Found " << docs.getnumitems() << " docTypes in file" << std::endl;
//get all the pages for each doctype
for (int i =0; i < docs.getnumitems(); i++)
{
std::string docType;
JSONObject &currentDoc = static_cast<JSONObject&>(docs[i]);
std::vector<int> pageNums;
docType = currentDoc.getJSONString(DOCTYPE_TAG.c_str());
JSONArray pages = currentDoc.getJSONArray(PAGES_ARRAY_TAG.c_str());
for (int j = 0; j < pages.getnumitems(); j++)
{
JSONObject &currentPage = static_cast<JSONObject&>(pages[j]);
std::string pageFile;
pageFile = currentPage.getJSONString(PAGE_FILE_TAG.c_str());
pageNums.push_back(getPageNum(pageFile));
}
int pageNum = getSmallest(pageNums);
std::cout << " Found " << docType << " on page " << pageNum << std::endl;
pdfMarkTxt << getBookmarkTxt(docType, pageNum);
}
std::string fileTxt = pdfMarkTxt.str();
saveFile(pdfMarkFile, fileTxt);
gsCmd << GHOSTSCRIPT_COMMAND << "\"" << inFile << "\" update_info \"" << pdfMarkFile << "\" output \"" << outFile << "\"";
std::cout << "Bookmarking file " << inFile << "..." << std::endl;
// It seems that 'pdftk' translates the '$' character. The string "$-1" in a filename becomes "hBc1"...
std::string cmdStr = gsCmd.str();
size_t pos = cmdStr.find('$');
while ((pos = cmdStr.find('$', pos)) != std::string::npos)
{
cmdStr = cmdStr.replace(pos,1, "\\$");
++pos;
}
runCommand(cmdStr);
std::cout << " Saved." << std::endl;
deleteFile(pdfMarkFile);
}
std::string PDFBookmarker::getOutputFile(std::string inFile)
{
//ALWAYS a pdf file.
std::stringstream result;
result << inFile.substr(0, inFile.rfind('.')) << "_marked.pdf";
return result.str();
}
int PDFBookmarker::getSmallest(std::vector<int> pageNums)
{
int smallest = pageNums[0];
for (size_t i = 1; i < pageNums.size(); i++)
{
if (pageNums[i] < smallest)
smallest = pageNums[i];
}
return smallest;
}
std::string PDFBookmarker::getMarkFileName(std::string inFile)
{
std::stringstream result;
result << inFile.substr(0, inFile.rfind('/') + 1) << PDFMARK_FILE;
return result.str();
}
void PDFBookmarker::saveFile(std::string fileName, std::string &fileText)
{
std::cout << "Saving file " << fileName <<std::endl;
std::ofstream out;
try
{
out.open(fileName, std::ofstream::out);
out << fileText;
}
catch (std::exception ex)
{
std::cout << "Error opening/writing to file " << fileName << std::endl
<< " " <<ex.what() << std::endl << std::endl;
}
out.close();
}
void PDFBookmarker::deleteFile(std::string fileName)
{
std::cout << "Deleting File " << fileName << "..." << std::endl;
std::remove(fileName.c_str());
std::cout << " Done." << std::endl;
}
std::string PDFBookmarker::runCommand(std::string cmd)
{
std::cout << "Running cmd:\n" << cmd << std::endl;
std::string result;
FILE *stream;
const int MAX_BUFFER = 256;
char buffer[MAX_BUFFER];
stream = popen(cmd.c_str(), "r");
if (stream)
{
while (!feof(stream))
{
if (fgets(buffer, MAX_BUFFER, stream) != NULL)
result.append(buffer);
}
pclose(stream);
}
return result;
}
int PDFBookmarker::getPageNum(std::string imgFile)
{
int result;
std::string tmpRes = imgFile.substr(imgFile.find("page") + 4, imgFile.find('.'));
result = atoi(tmpRes.c_str());
return result;
}
std::string PDFBookmarker::getBookmarkTxt(std::string docType, int startPageNum)
{
std::stringstream result;
result << "BookmarkBegin\n"
<< "BookmarkTitle: " <<docType << "\n"
<< "BookmarkLevel: 1\n"
<< "BookmarkPageNumber: " <<startPageNum <<"\n";
return result.str();
}