[PDFBox]后台操作pdf的工具类

匿名 (未验证) 提交于 2019-12-02 23:05:13

  PDFBox是Apache下的一个操作pdf的类库。其也提供了一个命令行的工具,也提供了java调用的第三方类库。

  下载地址:https://pdfbox.apache.org/

  

  下面的实验基于JDK8+pdfbox-2.0.13.jar+pdfbox-app-2.0.13.jar(命令行工具库)

1.命令行使用

https://pdfbox.apache.org/2.0/commandline.html

  命令行工具可以提取pdf中的图片、文本,合并pdf与拆分pdf,pdf转换为图片等操作。

1.提取图片

java -jar pdfbox-app-2.0.13.jar ExtractImages ./1.pdf

  会在同文件夹下提取出pdf中的图片。

2.提取文字

java -jar pdfbox-app-2.0.13.jar ExtractText ./1.pdf ./text.txt

  当然还可以指定起始页号等参数。

3.pdf转换为图片

java -jar pdfbox-app-2.0.13.jar PDFToImage ./1.pdf

  还有许多命令行操作可以参考官网的文档,对于参数都有详细的解释。这种方式可以封装为工具类用Runtime多线程执行操作pdf。

2.Java中作为类库使用

https://www.cnblogs.com/qlqwjy/p/8213989.html

依赖的jar包:

commons-logging-1.0.4.jar、pdfbox-2.0.13.jar、fontbox-2.0.13.jar

package cn.qlq;  import java.awt.image.BufferedImage; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List;  import javax.imageio.IIOImage; import javax.imageio.ImageIO; import javax.imageio.ImageWriter; import javax.imageio.stream.ImageOutputStream;  import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript; import org.apache.pdfbox.text.PDFTextStripper;  public class PDFBoxTest {     public static void main(String[] args) throws IOException {         createBlankPDF();     }          /**      * PDF中增加图片      *       * @throws IOException      * @throws InvalidPasswordException      */     public static void addImg2PDF() throws IOException, InvalidPasswordException {         try (PDDocument doc = PDDocument.load(new File("G:/blank.pdf"))) {             PDPage page = doc.getPage(0);             PDImageXObject pdImage = PDImageXObject.createFromFile("G:/0101.jpg", doc);              try (PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.APPEND, true,                     true)) {                 // contentStream.drawImage(ximage, 20, 20 );                 // better method inspired by                 // http://stackoverflow.com/a/22318681/535646                 // reduce this value if the image is too large                 float scale = 1f;                 contentStream.drawImage(pdImage, 20, 20, pdImage.getWidth() * scale, pdImage.getHeight() * scale);             }             doc.save("G:/blank.pdf");         }     }      /**      * PDF文件中增加JS脚本      *       * @throws IOException      * @throws InvalidPasswordException      */     public static void addJavaScript2PDF() throws IOException, InvalidPasswordException {         try (PDDocument document = PDDocument.load(new File("G:/blank.pdf"))) {             PDActionJavaScript javascript = new PDActionJavaScript(                     "app.alert( {cMsg: 'PDFBox rocks!', nIcon: 3, nType: 0, cTitle: 'PDFBox Javascript example' } );");             document.getDocumentCatalog().setOpenAction(javascript);             if (document.isEncrypted()) {                 throw new IOException("Encrypted documents are not supported for this example");             }             document.save("G:/blank.pdf");         }     }      /**      * 阅读PDF文件的内容,支持阅读中文(如果需要阅读指定页面的PDF自己改写此方法)      *       * @throws IOException      * @throws InvalidPasswordException      */     public static void readPDFText() throws IOException, InvalidPasswordException {         try (PDDocument document = PDDocument.load(new File("G:/jl.pdf"))) {             AccessPermission ap = document.getCurrentAccessPermission();             if (!ap.canExtractContent()) {                 throw new IOException("You do not have permission to extract text");             }              PDFTextStripper stripper = new PDFTextStripper();              stripper.setSortByPosition(true);              for (int p = 1; p <= document.getNumberOfPages(); ++p) {                 stripper.setStartPage(p);                 stripper.setEndPage(p);                  String text = stripper.getText(document);                  String pageStr = String.format("page %d:", p);                 System.out.println(pageStr);                 for (int i = 0; i < pageStr.length(); ++i) {                     System.out.print("-");                 }                 System.out.println();                 System.out.println(text.trim());                 System.out.println();             }         }     }      /**      * 创建空的PDF文件并且添加一个空白页,多页可以插入多个PDPage      *       * @throws IOException      */     public static void createBlankPDF() throws IOException {         String filename = "G:/blank.pdf";          try (PDDocument doc = new PDDocument()) {             // a valid PDF document requires at least one page             PDPage blankPage = new PDPage();             doc.addPage(blankPage);             doc.save(filename);         }     }      /**      * 图片转换为PDF文件      *       * @throws IOException      */     public static void img2PDF() throws IOException {         String imagePath = "G:/0101.jpg";         String pdfPath = "G:/0101.pdf";          try (PDDocument doc = new PDDocument()) {             PDPage page = new PDPage();             doc.addPage(page);              PDImageXObject pdImage = PDImageXObject.createFromFile(imagePath, doc);              try (PDPageContentStream contents = new PDPageContentStream(doc, page)) {                 contents.drawImage(pdImage, 20, 20);              }             doc.save(pdfPath);         }     }      /**      * 创建PDF文件并写入内容(暂时不支持写入中文)      *       * @throws IOException      */     public static void createPDF() throws IOException {         String filename = "G:/Test.pdf";         String message = "pdf";// 字体不能有中文,暂时不支持中文          try (PDDocument doc = new PDDocument()) {             PDPage page = new PDPage();             doc.addPage(page);              PDFont font = PDType1Font.HELVETICA_BOLD;              try (PDPageContentStream contents = new PDPageContentStream(doc, page)) {                 contents.beginText();                 contents.setFont(font, 12);                 contents.newLineAtOffset(100, 700);                 contents.showText(message);                 contents.endText();             }              doc.save(filename);         }     } }

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!