Update: Thanks to stardt whose script works! The pdf is a page of another one. I tried the script on the other one, and it also correctly spit each pdf page
Your code assumes that p.mediaBox.lowerLeft
is (0,0) but it is actually (0, 497)
This works for the file you provided:
#!/usr/bin/env python
import copy, sys
from pyPdf import PdfFileWriter, PdfFileReader
input = PdfFileReader(sys.stdin)
output = PdfFileWriter()
for i in range(input.getNumPages()):
p = input.getPage(i)
q = copy.copy(p)
bl = p.mediaBox.lowerLeft
ur = p.mediaBox.upperRight
print >> sys.stderr, 'splitting page',i
print >> sys.stderr, '\tlowerLeft:',p.mediaBox.lowerLeft
print >> sys.stderr, '\tupperRight:',p.mediaBox.upperRight
p.mediaBox.upperRight = (ur[0], (bl[1]+ur[1])/2)
p.mediaBox.lowerLeft = bl
q.mediaBox.upperRight = ur
q.mediaBox.lowerLeft = (bl[0], (bl[1]+ur[1])/2)
if i%2==0:
output.addPage(q)
output.addPage(p)
else:
output.addPage(p)
output.addPage(q)
output.write(sys.stdout)
@stardt's code was quite useful, but I had problems to split a batch of pdf files with different orientations. Here's a more general function that will work no matter what the page orientation is:
import copy
import math
import pyPdf
def split_pages(src, dst):
src_f = file(src, 'r+b')
dst_f = file(dst, 'w+b')
input = pyPdf.PdfFileReader(src_f)
output = pyPdf.PdfFileWriter()
for i in range(input.getNumPages()):
p = input.getPage(i)
q = copy.copy(p)
q.mediaBox = copy.copy(p.mediaBox)
x1, x2 = p.mediaBox.lowerLeft
x3, x4 = p.mediaBox.upperRight
x1, x2 = math.floor(x1), math.floor(x2)
x3, x4 = math.floor(x3), math.floor(x4)
x5, x6 = math.floor(x3/2), math.floor(x4/2)
if x3 > x4:
# horizontal
p.mediaBox.upperRight = (x5, x4)
p.mediaBox.lowerLeft = (x1, x2)
q.mediaBox.upperRight = (x3, x4)
q.mediaBox.lowerLeft = (x5, x2)
else:
# vertical
p.mediaBox.upperRight = (x3, x4)
p.mediaBox.lowerLeft = (x1, x6)
q.mediaBox.upperRight = (x3, x6)
q.mediaBox.lowerLeft = (x1, x2)
output.addPage(p)
output.addPage(q)
output.write(dst_f)
src_f.close()
dst_f.close()
I'd like to add that you have to pay attention that your mediaBox
variables are not shared across the copies p
and q
.
This can easily happen if you read from p.mediaBox
before taking the copy.
In that case, writing to e.g. p.mediaBox.upperRight
may modify q.mediaBox
and vice versa.
@moraes' solution takes care of this by explicitly copying the mediaBox.