How to get bookmark's page number

后端 未结 4 1790
南方客
南方客 2020-12-16 04:52
from typing import List
from PyPDF2 import PdfFileReader
from PyPDF2.generic import Destination


def get_outlines(pdf_filepat         


        
4条回答
  •  南方客
    南方客 (楼主)
    2020-12-16 05:28

    Manage bookmarks recursively with vjayky and Giulio D suggestion

    PyPDF2 >= v1.25

    from PyPDF2 import PdfFileReader
    
    def printBookmarksPageNumbers(pdf):
        def review_and_print_bookmarks(bookmarks, lvl=0):
            for b in bookmarks:
                if type(b) == list:
                    review_and_print_bookmarks(b, lvl + 4)
                    continue
                pg_num = pdf.getDestinationPageNumber(b) + 1 #page count starts from 0
                print("%s%s: Page %s" %(" "*lvl, b.title, pg_num))
        review_and_print_bookmarks(pdf.getOutlines())
    
    with open('document.pdf', "rb") as f:
        pdf = PdfFileReader(f)
        printBookmarksPageNumbers(pdf)
    

    PyPDF2 < v1.25

    from PyPDF2 import PdfFileReader
    
    def printBookmarksPageNumbers(pdf):
        # Map page ids to page numbers
        pg_id_to_num = {}
        for pg_num in range(0, pdf.getNumPages()):
            pg_id_to_num[pdf.getPage(pg_num).indirectRef.idnum] = pg_num
    
        def review_and_print_bookmarks(bookmarks, lvl=0):
            for b in bookmarks:
                if type(b) == list:
                    review_and_print_bookmarks(b, lvl + 4)
                    continue
                pg_num = pg_id_to_num[b.page.idnum] + 1 #page count starts from 0 
                print("%s%s: Page %s" %(" "*lvl, b.title, pg_num))
        review_and_print_bookmarks(pdf.getOutlines())
    
    with open('document.pdf', "rb") as f:
        pdf = PdfFileReader(f)
        printBookmarksPageNumbers(pdf)
    

提交回复
热议问题