How to get font color using pdfbox

后端 未结 4 1051
迷失自我
迷失自我 2020-12-09 22:10

I am trying to extract text with all information from the pdf using pdfbox. I got all the information i want, except color. I tried different ways to get the fontcolor (incl

4条回答
  •  执笔经年
    2020-12-09 22:50

    I also ended up doing something like this. Pasting code below, hope it helps someone.

    import java.io.IOException;
    import java.util.List;
    import org.apache.pdfbox.exceptions.COSVisitorException;
    import org.apache.pdfbox.pdmodel.PDDocument;
    import org.apache.pdfbox.pdmodel.PDPage;
    import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
    import org.apache.pdfbox.pdmodel.font.PDFont;
    import org.apache.pdfbox.pdmodel.font.PDType1Font;
    import org.apache.pdfbox.pdmodel.graphics.PDGraphicsState;
    import org.apache.pdfbox.util.PDFTextStripper;
    import org.apache.pdfbox.util.ResourceLoader;
    import org.apache.pdfbox.util.TextPosition;
    
    public class Parser extends PDFTextStripper {
    
    public Parser() throws IOException {
        super(ResourceLoader.loadProperties(
                "org/apache/pdfbox/resources/PageDrawer.properties", true));
        super.setSortByPosition(true);
    }
    
    public void parse(String path) throws IOException{
        PDDocument doc = PDDocument.load(path);
        List pages = doc.getDocumentCatalog().getAllPages();
        for (PDPage page : pages) {
            this.processStream(page, page.getResources(), page.getContents().getStream());
        }
    }
    
    @Override
    protected void processTextPosition(TextPosition text) {
        try {
            PDGraphicsState graphicsState = getGraphicsState();
            System.out.println("R = " + graphicsState.getNonStrokingColor().getJavaColor().getRed());
            System.out.println("G = " + graphicsState.getNonStrokingColor().getJavaColor().getGreen());
            System.out.println("B = " + graphicsState.getNonStrokingColor().getJavaColor().getBlue());
        }
        catch (IOException ioe) {}
    
    }
    
    public static void main(String[] args) throws IOException, COSVisitorException {
        Parser p = new Parser();
        p.parse("/Users/apple/Desktop/123.pdf");
    }
    
    }
    

提交回复
热议问题