I am using iTextSharp library and C#.Net for splitting my PDF file.
Consider a PDF file named sample.pdf containing 72 pages. This sample.pdf contains pages that hav
This function below uses iTextSharp to:
Step #4 is to insert whatever logic you want in here... update the links, log them, etc.
/// Inspects PDF files for internal links.
///
public static void FindPdfDocsWithInternalLinks()
{
foreach (var fi in PdfFiles) {
try {
var reader = new PdfReader(fi.FullName);
// Pagination
for(var i = 1; i <= reader.NumberOfPages; i++) {
var pageDict = reader.GetPageN(i);
var annotArray = (PdfArray)PdfReader.GetPdfObject(pageDict.Get(PdfName.ANNOTS));
if (annotArray == null) continue;
if (annotArray.Length <= 0) continue;
// check every annotation on the page
foreach (var annot in annotArray.ArrayList) {
var annotDict = (PdfDictionary)PdfReader.GetPdfObject(annot);
if (annotDict == null) continue;
var subtype = annotDict.Get(PdfName.SUBTYPE).ToString();
if (subtype != "/Link") continue;
var linkDict = (PdfDictionary)annotDict.GetDirectObject(PdfName.A);
if (linkDict == null) continue;
// if it makes it this far, its an Anchor annotation
// so we can grab it's URI
var sUri = linkDict.Get(PdfName.URI).ToString();
if (String.IsNullOrEmpty(sUri)) continue;
}
}
reader.Close();
}
catch (InvalidPdfException e)
{
if (!fi.FullName.Contains("_vti_cnf"))
Console.WriteLine("\r\nInvalid PDF Exception\r\nFilename: " + fi.FullName + "\r\nException:\r\n" + e);
continue;
}
catch (NullReferenceException e)
{
if (!fi.FullName.Contains("_vti_cnf"))
Console.WriteLine("\r\nNull Reference Exception\r\nFilename: " + fi.Name + "\r\nException:\r\n" + e);
continue;
}
}
// DO WHATEVER YOU WANT HERE
}
Good luck.