63 lines
1.8 KiB
Python
63 lines
1.8 KiB
Python
|
|
import os
|
||
|
|
import csv
|
||
|
|
import subprocess
|
||
|
|
|
||
|
|
|
||
|
|
def _get_page_cnt(file):
|
||
|
|
pdftk_cmd = ['pdftk', file, 'dump_data']
|
||
|
|
|
||
|
|
proc = subprocess.Popen(pdftk_cmd, stdout=subprocess.PIPE)
|
||
|
|
|
||
|
|
num_pages = 0
|
||
|
|
for line in [l.decode('utf-8') for l in iter(proc.stdout.readline, b'')]:
|
||
|
|
if 'NumberOfPages:' in line:
|
||
|
|
data = line[15:-2]
|
||
|
|
num_pages = int(data)
|
||
|
|
proc.kill()
|
||
|
|
break
|
||
|
|
|
||
|
|
proc.wait()
|
||
|
|
|
||
|
|
return num_pages
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
root_dir = r'\\sl_tree.sl.int\eng\RESTRICTED-MissionGlobal\100_mg_combined\Fannie Revised Files'
|
||
|
|
out_file = os.path.join(root_dir, 'Package Page Counts.csv')
|
||
|
|
|
||
|
|
page_cnts = []
|
||
|
|
|
||
|
|
total_page_count = 0
|
||
|
|
num_docs = 0
|
||
|
|
for pkg_file in [os.path.join(root_dir, f) for f in os.listdir(root_dir) if f.endswith('.pdf')]:
|
||
|
|
num_docs += 1
|
||
|
|
|
||
|
|
file_name = os.path.split(pkg_file)[1][:-4]
|
||
|
|
page_cnt = _get_page_cnt(pkg_file)
|
||
|
|
|
||
|
|
total_page_count += page_cnt
|
||
|
|
|
||
|
|
print('%03d: Found %04d page in package file "%s"' % (num_docs, page_cnt, file_name))
|
||
|
|
page_cnts.append({'Package Name': file_name,
|
||
|
|
'Pages': page_cnt})
|
||
|
|
|
||
|
|
average = round(total_page_count / num_docs)
|
||
|
|
print('')
|
||
|
|
print('Done counting pages:')
|
||
|
|
print(' Total pages: %d' % total_page_count)
|
||
|
|
print(' Documents: %d' % num_docs)
|
||
|
|
print(' Average: %d' % average)
|
||
|
|
print('')
|
||
|
|
|
||
|
|
page_cnts.append({'Package Name': 'Total', 'Pages': total_page_count})
|
||
|
|
page_cnts.append({'Package Name': 'Average', 'Pages': average})
|
||
|
|
|
||
|
|
print('Saving page count data to %s' % out_file)
|
||
|
|
with open(out_file, 'w+', newline='') as out_writer:
|
||
|
|
writer = csv.DictWriter(out_writer, ['Package Name', 'Pages'])
|
||
|
|
writer.writeheader()
|
||
|
|
for page_cnt in page_cnts:
|
||
|
|
writer.writerow(page_cnt)
|
||
|
|
|
||
|
|
print('Done')
|