I added a watermark on pdf using Pdfstamper. Here is the code:
for (int pageIndex = 1; pageIndex <= pageCount; pageIndex++)
{
iTextSharp.text.Rectangl
As an extension to Chris's answer, a VB.Net class for removing a layer is included at the bottom of this post which should be a bit more precise.
OCGs
array in the OCProperties
dictionary in the file's catalog). This array contains indirect references to objects in the PDF file which contain the name/OC /{PagePropertyReference} BDC {Actual Content} EMC
so it can remove just these segments as appropriateThe code then cleans up all the references as much as it can. Calling the code might work as shown:
Public Shared Sub RemoveWatermark(path As String, savePath As String)
Using reader = New PdfReader(path)
Using fs As New FileStream(savePath, FileMode.Create, FileAccess.Write, FileShare.None)
Using stamper As New PdfStamper(reader, fs)
Using remover As New PdfLayerRemover(reader)
remover.RemoveByName("WatermarkLayer")
End Using
End Using
End Using
End Using
End Sub
Full class:
Imports iTextSharp.text
Imports iTextSharp.text.io
Imports iTextSharp.text.pdf
Imports iTextSharp.text.pdf.parser
Public Class PdfLayerRemover
Implements IDisposable
Private _reader As PdfReader
Private _layerNames As New List(Of String)
Public Sub New(reader As PdfReader)
_reader = reader
End Sub
Public Sub RemoveByName(name As String)
_layerNames.Add(name)
End Sub
Private Sub RemoveLayers()
Dim ocProps = _reader.Catalog.GetAsDict(PdfName.OCPROPERTIES)
If ocProps Is Nothing Then Return
Dim ocgs = ocProps.GetAsArray(PdfName.OCGS)
If ocgs Is Nothing Then Return
'Get a list of indirect references to the layer information
Dim layerRefs = (From l In (From i In ocgs
Select Obj = DirectCast(PdfReader.GetPdfObject(i), PdfDictionary),
Ref = DirectCast(i, PdfIndirectReference))
Where _layerNames.Contains(l.Obj.GetAsString(PdfName.NAME).ToString)
Select l.Ref).ToList
'Get a list of numbers for these layer references
Dim layerRefNumbers = (From l In layerRefs Select l.Number).ToList
'Loop through the pages
Dim page As PdfDictionary
Dim propsToRemove As IEnumerable(Of PdfName)
For i As Integer = 1 To _reader.NumberOfPages
'Get the page
page = _reader.GetPageN(i)
'Get the page properties which reference the layers to remove
Dim props = _reader.GetPageResources(i).GetAsDict(PdfName.PROPERTIES)
propsToRemove = (From k In props.Keys Where layerRefNumbers.Contains(props.GetAsIndirectObject(k).Number) Select k).ToList
'Get the raw content
Dim contentarray = page.GetAsArray(PdfName.CONTENTS)
If contentarray IsNot Nothing Then
For j As Integer = 0 To contentarray.Size - 1
'Parse the stream data looking for references to a property pointing to the layer.
Dim stream = DirectCast(contentarray.GetAsStream(j), PRStream)
Dim streamData = PdfReader.GetStreamBytes(stream)
Dim newData = GetNewStream(streamData, (From p In propsToRemove Select p.ToString.Substring(1)))
'Store data without the stream references in the stream
If newData.Length <> streamData.Length Then
stream.SetData(newData)
stream.Put(PdfName.LENGTH, New PdfNumber(newData.Length))
End If
Next
End If
'Remove the properties from the page data
For Each prop In propsToRemove
props.Remove(prop)
Next
Next
'Remove references to the layer in the master catalog
RemoveIndirectReferences(ocProps, layerRefNumbers)
'Clean up unused objects
_reader.RemoveUnusedObjects()
End Sub
Private Shared Function GetNewStream(data As Byte(), propsToRemove As IEnumerable(Of String)) As Byte()
Dim item As PdfLayer = Nothing
Dim positions As New List(Of Integer)
positions.Add(0)
Dim pos As Integer
Dim inGroup As Boolean = False
Dim tokenizer As New PRTokeniser(New RandomAccessFileOrArray(New RandomAccessSourceFactory().CreateSource(data)))
While tokenizer.NextToken
If tokenizer.TokenType = PRTokeniser.TokType.NAME AndAlso tokenizer.StringValue = "OC" Then
pos = CInt(tokenizer.FilePointer - 3)
If tokenizer.NextToken() AndAlso tokenizer.TokenType = PRTokeniser.TokType.NAME Then
If Not inGroup AndAlso propsToRemove.Contains(tokenizer.StringValue) Then
inGroup = True
positions.Add(pos)
End If
End If
ElseIf tokenizer.TokenType = PRTokeniser.TokType.OTHER AndAlso tokenizer.StringValue = "EMC" AndAlso inGroup Then
positions.Add(CInt(tokenizer.FilePointer))
inGroup = False
End If
End While
positions.Add(data.Length)
If positions.Count > 2 Then
Dim length As Integer = 0
For i As Integer = 0 To positions.Count - 1 Step 2
length += positions(i + 1) - positions(i)
Next
Dim newData(length) As Byte
length = 0
For i As Integer = 0 To positions.Count - 1 Step 2
Array.Copy(data, positions(i), newData, length, positions(i + 1) - positions(i))
length += positions(i + 1) - positions(i)
Next
Dim origStr = System.Text.Encoding.UTF8.GetString(data)
Dim newStr = System.Text.Encoding.UTF8.GetString(newData)
Return newData
Else
Return data
End If
End Function
Private Shared Sub RemoveIndirectReferences(dict As PdfDictionary, refNumbers As IEnumerable(Of Integer))
Dim newDict As PdfDictionary
Dim arrayData As PdfArray
Dim indirect As PdfIndirectReference
Dim i As Integer
For Each key In dict.Keys
newDict = dict.GetAsDict(key)
arrayData = dict.GetAsArray(key)
If newDict IsNot Nothing Then
RemoveIndirectReferences(newDict, refNumbers)
ElseIf arrayData IsNot Nothing Then
i = 0
While i < arrayData.Size
indirect = arrayData.GetAsIndirectObject(i)
If refNumbers.Contains(indirect.Number) Then
arrayData.Remove(i)
Else
i += 1
End If
End While
End If
Next
End Sub
#Region "IDisposable Support"
Private disposedValue As Boolean ' To detect redundant calls
' IDisposable
Protected Overridable Sub Dispose(disposing As Boolean)
If Not Me.disposedValue Then
If disposing Then
RemoveLayers()
End If
' TODO: free unmanaged resources (unmanaged objects) and override Finalize() below.
' TODO: set large fields to null.
End If
Me.disposedValue = True
End Sub
' TODO: override Finalize() only if Dispose(ByVal disposing As Boolean) above has code to free unmanaged resources.
'Protected Overrides Sub Finalize()
' ' Do not change this code. Put cleanup code in Dispose(ByVal disposing As Boolean) above.
' Dispose(False)
' MyBase.Finalize()
'End Sub
' This code added by Visual Basic to correctly implement the disposable pattern.
Public Sub Dispose() Implements IDisposable.Dispose
' Do not change this code. Put cleanup code in Dispose(ByVal disposing As Boolean) above.
Dispose(True)
GC.SuppressFinalize(Me)
End Sub
#End Region
End Class