Sleds/scorewalker-utils/RunTest/pdf-page-cnt.py

63 lines
1.8 KiB
Python
Raw Normal View History

2025-03-13 21:28:38 +00:00
import os
import csv
import subprocess
def _get_page_cnt(file):
pdftk_cmd = ['pdftk', file, 'dump_data']
proc = subprocess.Popen(pdftk_cmd, stdout=subprocess.PIPE)
num_pages = 0
for line in [l.decode('utf-8') for l in iter(proc.stdout.readline, b'')]:
if 'NumberOfPages:' in line:
data = line[15:-2]
num_pages = int(data)
proc.kill()
break
proc.wait()
return num_pages
if __name__ == '__main__':
root_dir = r'\\sl_tree.sl.int\eng\RESTRICTED-MissionGlobal\100_mg_combined\Fannie Revised Files'
out_file = os.path.join(root_dir, 'Package Page Counts.csv')
page_cnts = []
total_page_count = 0
num_docs = 0
for pkg_file in [os.path.join(root_dir, f) for f in os.listdir(root_dir) if f.endswith('.pdf')]:
num_docs += 1
file_name = os.path.split(pkg_file)[1][:-4]
page_cnt = _get_page_cnt(pkg_file)
total_page_count += page_cnt
print('%03d: Found %04d page in package file "%s"' % (num_docs, page_cnt, file_name))
page_cnts.append({'Package Name': file_name,
'Pages': page_cnt})
average = round(total_page_count / num_docs)
print('')
print('Done counting pages:')
print(' Total pages: %d' % total_page_count)
print(' Documents: %d' % num_docs)
print(' Average: %d' % average)
print('')
page_cnts.append({'Package Name': 'Total', 'Pages': total_page_count})
page_cnts.append({'Package Name': 'Average', 'Pages': average})
print('Saving page count data to %s' % out_file)
with open(out_file, 'w+', newline='') as out_writer:
writer = csv.DictWriter(out_writer, ['Package Name', 'Pages'])
writer.writeheader()
for page_cnt in page_cnts:
writer.writerow(page_cnt)
print('Done')