I m trying to extract images from a pdf using pdfbox. The example pdf here
But i m getting blank images only.
The code i m trying:-
public st
The below GetImagesFromPDF java class get all images in 04-Request-Headers.pdf file and save those files into destination folder PDFCopy.
import java.io.File;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
@SuppressWarnings({ "unchecked", "rawtypes", "deprecation" })
public class GetImagesFromPDF {
public static void main(String[] args) {
try {
String sourceDir = "C:/PDFCopy/04-Request-Headers.pdf";// Paste pdf files in PDFCopy folder to read
String destinationDir = "C:/PDFCopy/";
File oldFile = new File(sourceDir);
if (oldFile.exists()) {
PDDocument document = PDDocument.load(sourceDir);
List list = document.getDocumentCatalog().getAllPages();
String fileName = oldFile.getName().replace(".pdf", "_cover");
int totalImages = 1;
for (PDPage page : list) {
PDResources pdResources = page.getResources();
Map pageImages = pdResources.getImages();
if (pageImages != null) {
Iterator imageIter = pageImages.keySet().iterator();
while (imageIter.hasNext()) {
String key = (String) imageIter.next();
PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key);
pdxObjectImage.write2file(destinationDir + fileName+ "_" + totalImages);
totalImages++;
}
}
}
} else {
System.err.println("File not exists");
}
} catch (Exception e) {
e.printStackTrace();
}
}
}