I am trying to run the Java Code written by Stefano Chizzolini (Awesome guy : Creator of PDFClown) to Parse a PDF using PDF Clown library. I am getting this error and I dont know what I can do to fix this.
Exception in thread "main" org.pdfclown.util.parsers.ParseException: 'name' table does NOT exist. at org.pdfclown.documents.contents.fonts.OpenFontParser.getName(OpenFontParser.java:570) at org.pdfclown.documents.contents.fonts.OpenFontParser.load(OpenFontParser.java:221) at org.pdfclown.documents.contents.fonts.OpenFontParser.<init>(OpenFontParser.java:205) at org.pdfclown.documents.contents.fonts.TrueTypeFont.loadEncoding(TrueTypeFont.java:91) at org.pdfclown.documents.contents.fonts.SimpleFont.onLoad(SimpleFont.java:118) at org.pdfclown.documents.contents.fonts.Font.load(Font.java:738) at org.pdfclown.documents.contents.fonts.Font.<init>(Font.java:351) at org.pdfclown.documents.contents.fonts.SimpleFont.<init>(SimpleFont.java:62) at org.pdfclown.documents.contents.fonts.TrueTypeFont.<init>(TrueTypeFont.java:68) at org.pdfclown.documents.contents.fonts.Font.wrap(Font.java:253) at org.pdfclown.documents.contents.FontResources.wrap(FontResources.java:72) at org.pdfclown.documents.contents.FontResources.wrap(FontResources.java:1) at org.pdfclown.documents.contents.ResourceItems.get(ResourceItems.java:119) at org.pdfclown.documents.contents.objects.SetFont.getResource(SetFont.java:119) at org.pdfclown.documents.contents.objects.SetFont.getFont(SetFont.java:83) at org.pdfclown.documents.contents.objects.SetFont.scan(SetFont.java:97) at org.pdfclown.documents.contents.ContentScanner.moveNext(ContentScanner.java:1330) at org.pdfclown.tools.TextExtractor.extract(TextExtractor.java:626) at org.pdfclown.tools.TextExtractor.extract(TextExtractor.java:296) at PDFReader.FullExtract.run(FullExtract.java:71) at PDFReader.FullExtract.main(FullExtract.java:142) I know the class OpenFontParser in the library package is throwing this error. Is there anything I can do to fix this?
This code works for most PDF. I have a PDF that it does not parse. I am guessing it is because of this symbol below in the pdf.
public class PDFReader extends Sample { @Override public void run() { String filePath = new String("C:\\Users\\XYZ\\Desktop\\SomeSamplePDF.pdf"); // 1. Open the PDF file! File file; try {file = new File(filePath);} catch(Exception e) {throw new RuntimeException(filePath + " file access error.",e);} // 2. Get the PDF document! Document document = file.getDocument(); // 3. Extracting text from the document pages... for(Page page : document.getPages()) { extract(new ContentScanner(page)); // Wraps the page contents into a scanner. } close(file); } private void close(File file) { // TODO Auto-generated method stub } /** Scans a content level looking for text. */ /* NOTE: Page contents are represented by a sequence of content objects, possibly nested into multiple levels. */ private void extract( ContentScanner level ) { if(level == null) return; while(level.moveNext()) { ContentObject content = level.getCurrent(); if(content instanceof ShowText) { Font font = level.getState().getFont(); // Extract the current text chunk, decoding it! System.out.println(font.decode(((ShowText)content).getText())); } else if(content instanceof Text || content instanceof ContainerObject) { // Scan the inner level! extract(level.getChildLevel()); } } } private boolean prompt(Page page) { int pageIndex = page.getIndex(); if(pageIndex > 0) { Map<String,String> options = new HashMap<String,String>(); options.put("", "Scan next page"); options.put("Q", "End scanning"); if(!promptChoice(options).equals("")) return false; } System.out.println("\nScanning page " + (pageIndex+1) + "...\n"); return true; } public static void main(String args[]) { new PDFReader().run(); } }