I am working on a UNIX system and I\'d like to merge thousands of PDF files into one file in order to print it. I don\'t know how many pages they are in advance.
I\'
Martin had a good start. I updated to PyPdf2 and made a few tweaks like sorting the output by filename.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from argparse import ArgumentParser
from glob import glob
from PyPDF2 import PdfFileReader, PdfFileWriter
import os.path
def merge(pdfpath, blank_filename, output_filename):
with open(blank_filename, "rb") as f:
blank = PdfFileReader(f)
output = PdfFileWriter()
filelist = sorted(glob(os.path.join(pdfpath,'*.pdf')))
for pdffile in filelist:
if pdffile == output_filename:
continue
print("Parse '%s'" % pdffile)
document = PdfFileReader(open(pdffile, 'rb'))
for i in range(document.getNumPages()):
output.addPage(document.getPage(i))
if document.getNumPages() % 2 == 1:
output.addPage(blank.getPage(0))
print("Add blank page to '%s' (had %i pages)" % (pdffile, document.getNumPages()))
print("Start writing '%s'" % output_filename)
with open(output_filename, "wb") as output_stream:
output.write(output_stream)
if __name__ == "__main__":
parser = ArgumentParser()
# Add more options if you like
parser.add_argument("-o", "--output", dest="output_filename", default="merged.pdf",
help="write merged PDF to FILE", metavar="FILE")
parser.add_argument("-b", "--blank", dest="blank_filename", default="blank.pdf",
help="path to blank PDF file", metavar="FILE")
parser.add_argument("-p", "--path", dest="path", default=".",
help="path of source PDF files")
args = parser.parse_args()
merge(args.path, args.blank_filename, args.output_filename)
`