import os import csv import subprocess def _get_page_cnt(file): pdftk_cmd = ['pdftk', file, 'dump_data'] proc = subprocess.Popen(pdftk_cmd, stdout=subprocess.PIPE) num_pages = 0 for line in [l.decode('utf-8') for l in iter(proc.stdout.readline, b'')]: if 'NumberOfPages:' in line: data = line[15:-2] num_pages = int(data) proc.kill() break proc.wait() return num_pages if __name__ == '__main__': root_dir = r'\\sl_tree.sl.int\eng\RESTRICTED-MissionGlobal\100_mg_combined\Fannie Revised Files' out_file = os.path.join(root_dir, 'Package Page Counts.csv') page_cnts = [] total_page_count = 0 num_docs = 0 for pkg_file in [os.path.join(root_dir, f) for f in os.listdir(root_dir) if f.endswith('.pdf')]: num_docs += 1 file_name = os.path.split(pkg_file)[1][:-4] page_cnt = _get_page_cnt(pkg_file) total_page_count += page_cnt print('%03d: Found %04d page in package file "%s"' % (num_docs, page_cnt, file_name)) page_cnts.append({'Package Name': file_name, 'Pages': page_cnt}) average = round(total_page_count / num_docs) print('') print('Done counting pages:') print(' Total pages: %d' % total_page_count) print(' Documents: %d' % num_docs) print(' Average: %d' % average) print('') page_cnts.append({'Package Name': 'Total', 'Pages': total_page_count}) page_cnts.append({'Package Name': 'Average', 'Pages': average}) print('Saving page count data to %s' % out_file) with open(out_file, 'w+', newline='') as out_writer: writer = csv.DictWriter(out_writer, ['Package Name', 'Pages']) writer.writeheader() for page_cnt in page_cnts: writer.writerow(page_cnt) print('Done')