Excel VBA to Search for Text in PDF and Extract and Name Pages

后端 未结 3 1437
情深已故
情深已故 2020-12-17 07:23

I have the following code, which looks at each cell in column A of my spreadsheet, searches for the text it finds there in the specified PDF and then extracts the page where

3条回答
  •  刺人心
    刺人心 (楼主)
    2020-12-17 07:28

    Sub BatchRenameCS()
    
    Dim objApp As Object
    Dim objPDDoc As Object
    Dim objjso As Object
    Dim newPDF As Acrobat.CAcroPDDoc
    Dim lastrow2 As Long
    Dim strFileName As String
    Dim Folder As String
    Dim Page As Long
    Dim Cell As Long
    Dim PDFCharacterCount() As Long
    Dim CharacterCount As Long
    Dim i As Integer
    Dim c As Integer
    Dim x As Integer
    Dim strSource As String
    Dim strResult As String
    Dim PDFCharacters As String
    Dim PDFCharacters2 As String
    Dim PDFPasteData() As String
    Dim PasteDataPage As Integer
    Dim LastRow As Long
    Dim NewName As String
    Dim NewNamePageNum As Integer
    Dim Check()
    
    Sheets("Sheet1").Range("C:D").ClearContents
    
    strFileName = selectFile()
    Folder = GetFolder()
    
    'create array with pdf word count
    Set objApp = CreateObject("AcroExch.App")
    Set objPDDoc = CreateObject("AcroExch.PDDoc")
    'AD.1 open file, if =false file is damage
        If objPDDoc.Open(strFileName) Then
            Set objjso = objPDDoc.GetJSObject
    
    ReDim PDFCharacterCount(1 To objPDDoc.GetNumPages) As Long
    
    For Page = 1 To objPDDoc.GetNumPages
    PDFCharacters = ""
    PDFCharacters2 = ""
        For c = 0 To objjso.GetPageNumWords(Page - 1)
        PDFCharacters = PDFCharacters & objjso.getPageNthWord(Page - 1, c)
        Next c
        For i = 1 To Len(PDFCharacters)
            Select Case Asc(Mid(PDFCharacters, i, 1))
                Case 48 To 57, 65 To 90, 97 To 122:
                PDFCharacters2 = PDFCharacters2 & (Mid(PDFCharacters, i, 1))
                Case Else
                PDFCharacters2 = PDFCharacters2 & ""
            End Select
        Next
        PDFCharacterCount(Page) = Len(PDFCharacters2)
    
    Next Page
    
    lastrow2 = Sheets("Sheet2").Cells(Rows.Count, "A").End(xlUp).Row
    Page = 1
    ReDim PDFPasteData(1 To objPDDoc.GetNumPages) As String
    For Cell = 1 To lastrow2
        strResult = ""
        strSource = Sheets("Sheet2").Cells(Cell, 1).Text
        PDFPasteData(Page) = PDFPasteData(Page) & " " & strSource
        For i = 1 To Len(strSource)
            Select Case Asc(Mid(strSource, i, 1))
                Case 48 To 57, 65 To 90, 97 To 122:
                strResult = strResult & (Mid(strSource, i, 1))
                Case Else
                strResult = strResult & ""
            End Select
        Next
    
    CharacterCount = CharacterCount + Len(strResult)
    
    If CharacterCount = PDFCharacterCount(Page) Then
    CharacterCount = 0
    Page = Page + 1
    End If
    
    Next Cell
    ReDim Check(2, objPDDoc.GetNumPages)
    LastRow = Sheets("Sheet1").Cells(Rows.Count, "A").End(xlUp).Row
    For Each LookUpCell In Worksheets("Sheet1").Range("A2:A" & LastRow)
        For PasteDataPage = 1 To objPDDoc.GetNumPages
            If InStr(PDFPasteData(PasteDataPage), LookUpCell.Value) Then
            Check(1, PasteDataPage) = Check(1, PasteDataPage) + 1
            Check(2, PasteDataPage) = Check(2, PasteDataPage) & LookUpCell.Offset(0, 1).Value & Chr(10)
                                    If FileExist(Folder & "\" & LookUpCell.Offset(0, 1) & ".pdf") Then
    
                                            Set newPDF = CreateObject("AcroExch.pdDoc")
                                            NewName = Folder & "\" & LookUpCell.Offset(0, 1) & ".pdf"
                                            newPDF.Open (NewName)
                                            newPDF.InsertPages newPDF.GetNumPages - 1, objPDDoc, PasteDataPage - 1, 1, 0
                                            newPDF.Save 1, NewName
                                            newPDF.Close
                                            Set newPDF = Nothing
                                     Else
                                            Set newPDF = CreateObject("AcroExch.PDDoc")
                                            newPDF.Create
                                            NewName = Folder & "\" & LookUpCell.Offset(0, 1) & ".pdf"
                                            newPDF.InsertPages -1, objPDDoc, PasteDataPage - 1, 1, 0
                                            newPDF.Save 1, NewName
                                            newPDF.Close
                                            Set newPDF = Nothing
                                    End If
            End If
        Next PasteDataPage
    Next LookUpCell
    x = 1
    For PasteDataPage = 1 To objPDDoc.GetNumPages
        If Check(1, PasteDataPage) <> 1 Then
        Sheets("Sheet1").Cells(x, 3) = PasteDataPage
        Sheets("Sheet1").Cells(x, 4) = Check(2, PasteDataPage)
        x = x + 1
        End If
    Next PasteDataPage
    End If
    MsgBox "Done"
    End Sub
    Function FileExist(path As String) As Boolean
        If Dir(path) <> vbNullString Then FileExist = True
    End Function
    Function selectFile()
    Dim fd As FileDialog, fileName As String
    On Error GoTo ErrorHandler
    Set fd = Application.FileDialog(msoFileDialogFilePicker)
    fd.AllowMultiSelect = False
    If fd.Show = True Then
        If fd.SelectedItems(1) <> vbNullString Then
            fileName = fd.SelectedItems(1)
        End If
    Else
        'Exit code if no file is selected
        End
    End If
    'Return Selected FileName
    selectFile = fileName
    Set fd = Nothing
    Exit Function
    ErrorHandler:
    Set fd = Nothing
    MsgBox "Error " & Err & ": " & Error(Err)
    End Function
    Function GetFolder() As String
        Dim fldr As FileDialog
        Dim sItem As String
        Set fldr = Application.FileDialog(msoFileDialogFolderPicker)
        With fldr
            .Title = "Select the Folder where you want you new PDFs to go"
            .AllowMultiSelect = False
            .InitialFileName = Application.DefaultFilePath
            If .Show <> -1 Then GoTo NextCode
            sItem = .SelectedItems(1)
        End With
    NextCode:
        GetFolder = sItem
        Set fldr = Nothing
    
    End Function
    

提交回复
热议问题