I am trying to extract all the images from a pdf using itextsharp but can\'t seem to overcome this one hurdle.
The error occures on the line System.Drawing.I
Works for me like this, using these two methods:
public static List ExtractImagesFromPDF(byte[] bytes)
{
var imgs = new List();
var pdf = new PdfReader(bytes);
try
{
for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
{
PdfDictionary pg = pdf.GetPageN(pageNumber);
List objs = FindImageInPDFDictionary(pg);
foreach (var obj in objs)
{
if (obj != null)
{
int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
PdfStream pdfStrem = (PdfStream)pdfObj;
var pdfImage = new PdfImageObject((PRStream)pdfStrem);
var img = pdfImage.GetDrawingImage();
imgs.Add(img);
}
}
}
}
finally
{
pdf.Close();
}
return imgs;
}
private static List FindImageInPDFDictionary(PdfDictionary pg)
{
var res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
var xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
var pdfObgs = new List();
if (xobj != null)
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
var tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
var type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
if (PdfName.IMAGE.Equals(type)) // image at the root of the pdf
{
pdfObgs.Add(obj);
}
else if (PdfName.FORM.Equals(type)) // image inside a form
{
FindImageInPDFDictionary(tg).ForEach(o => pdfObgs.Add(o));
}
else if (PdfName.GROUP.Equals(type)) // image inside a group
{
FindImageInPDFDictionary(tg).ForEach(o => pdfObgs.Add(o));
}
}
}
}
return pdfObgs;
}