Fastest way to delete rows which cannot be grabbed with SpecialCells

后端 未结 3 529
深忆病人
深忆病人 2020-11-28 14:49

Based on another question on this site I started wondering about the fastest way to delete all rows with a certain condition.

The above-referenced question came with

相关标签:
3条回答
  • 2020-11-28 15:21

    One efficient solution is to tag all the rows to keep and move all the rows to delete at the end by sorting the tags. This way, the complexity doesn't increase with the number of rows to delete.

    This example deletes in less than a second, for 50000 rows, all the rows where column I is equal to 2:

    Sub DeleteMatchingRows()
        Dim rgTable As Range, rgTags As Range, data(), tags(), count&, r&
    
        ' load the data in an array
        Set rgTable = ActiveSheet.UsedRange
        data = rgTable.Value
    
        ' tag all the rows to keep with the row number. Leave empty otherwise.
        ReDim tags(1 To UBound(data), 1 To 1)
        tags(1, 1) = 1  ' keep the header
        For r = 2 To UBound(data)
          If data(r, 9) <> 2 Then tags(r, 1) = r  ' if column I <> 2 keep the row
        Next
    
        ' insert the tags in the last column on the right
        Set rgTags = rgTable.Columns(rgTable.Columns.count + 1)
        rgTags.Value = tags
    
        ' sort the rows on the tags which will move the rows to delete at the end
        Union(rgTable, rgTags).Sort key1:=rgTags, Orientation:=xlTopToBottom, Header:=xlYes
        count = rgTags.End(xlDown).Row
    
        ' delete the tags on the right and the rows that weren't tagged
        rgTags.EntireColumn.Delete
        rgTable.Resize(UBound(data) - count + 1).Offset(count).EntireRow.Delete
    End Sub
    

    Note that it doesn't alter the order of the rows.

    0 讨论(0)
  • 2020-11-28 15:36

    edited

    after some more testings it seems that Sort&Delete is a little faster than RemoveDuplicates

    so I put in the following solution (keeping the first one for reference by the end of the answer)

    Sub FixWithSort()
    Dim testRng As Range
    Dim dttStart As Date
    
    Set testRng = Worksheets("Test").Range("I25:I50000")
    
    With testRng
        .Formula = "=RandBetween(1, 5)"
        .Value2 = .Value2
    End With
    
    dttStart = Now()
    
    With testRng
        With .Offset(, 1)
            .FormulaR1C1 = "=IF(RC[-1]=2,"""",row())"
            .Value2 = .Value2
        End With
    
        .Resize(, 2).Sort key1:=.Columns(2), Orientation:=xlTopToBottom, Header:=xlYes
        Range(.Cells(1, 2).End(xlDown).Offset(1, -1), .Cells(1, 1).End(xlDown)).EntireRow.Delete
        .Columns(2).ClearContents
    End With
    
    Debug.Print Format(Now() - dttStart, "HH:MM:SS")
    dttStartGlobal = dttStartGlobal + Now() - dttStart
    
    End Sub
    

    previous (and a little slower) solution with RemoveDuplicates

    Option Explicit
    
    Sub FixWithRemoveDuplicates()
    Dim testRng As Range
    Dim dttStart As Date
    
    Set testRng = Worksheets("Test").Range("I25:I50000")
    
    With testRng
        .Formula = "=RandBetween(1, 5)"
        .Value2 = .Value2
    End With
    
    dttStart = Now()
    
    Application.ScreenUpdating = False
    Application.Calculation = xlCalculationManual
    Application.EnableEvents = False
    
    With testRng
        With .Offset(, 1)
            .FormulaR1C1 = "=IF(RC[-1]=2,""a"",row())"
            .Value2 = .Value2
        End With
        .EntireRow.RemoveDuplicates Columns:=Array(.Columns(2).Column), Header:=xlNo
        .Offset(, 1).Find(what:="a", LookIn:=xlValues, LookAt:=xlWhole).EntireRow.Delete
        .Columns(2).ClearContents
    End With
    
    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic
    Application.EnableEvents = True
    
    Debug.Print Format(Now() - dttStart, "HH:MM:SS")
    'Average time around 0 seconds
    
    End Sub
    
    0 讨论(0)
  • 2020-11-28 15:38

    Here are all the possible options I could think of with an "average time" to complete the tasks:

    Option Base 1
    Option Explicit
    
    Sub FixWithArraysAndDeleteRange()
    
    Dim lngItem As Long
    Dim varArray() As Variant
    Dim wksItem As Worksheet
    Dim rngRangeToDelete As Range
    
    Dim dttStart As Date
    
    Worksheets(1).Range("I25:I50000").Formula = "=RandBetween(1, 5)"
    Worksheets(1).Range("I25:I50000").Value2 = Worksheets(1).Range("I25:I50000").Value2
    
    dttStart = Now()
    
    Application.ScreenUpdating = False
    Application.Calculation = xlCalculationManual
    Application.EnableEvents = False
    
    Set wksItem = Worksheets(1)
    varArray() = wksItem.Range("I25:I50000").Value2
    
    For lngItem = LBound(varArray) To UBound(varArray)
        If IsNumeric(varArray(lngItem, 1)) Then
            If Int(varArray(lngItem, 1)) = 2 Then
                If rngRangeToDelete Is Nothing Then
                  Set rngRangeToDelete = wksItem.Rows(lngItem + 24)
                Else
                  Set rngRangeToDelete = Intersect(rngRangeToDelete, wksItem.Rows(lngItem + 24))
                End If
            End If
        End If
    Next lngItem
    
    rngRangeToDelete.EntireRow.Delete
    
    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic
    Application.EnableEvents = True
    
    Debug.Print Format(Now() - dttStart, "HH:MM:SS")
    'Average time around 0 seconds
    
    End Sub
    

    Sub FixWithLoop()
    
    Dim lngRow As Long
    Dim lngLastRow As Long
    Dim wksItem As Worksheet
    
    Dim dttStart As Date
    
    Worksheets(1).Range("I25:I50000").Formula = "=RandBetween(1, 5)"
    Worksheets(1).Range("I25:I50000").Value2 = Worksheets(1).Range("I25:I50000").Value2
    
    dttStart = Now()
    
    Application.ScreenUpdating = False
    Application.Calculation = xlCalculationManual
    Application.EnableEvents = False
    
    Set wksItem = Worksheets(1)
    lngLastRow = wksItem.Cells(wksItem.Rows.Count, "I").End(xlUp).Row
    
    For lngRow = lngLastRow To 25 Step -1
        If Int(wksItem.Cells(lngRow, "I").Value) = 2 Then wksItem.Rows(lngRow).Delete
    Next lngRow
    
    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic
    Application.EnableEvents = True
    
    Debug.Print Format(Now() - dttStart, "HH:MM:SS")
    'Average time ~3 seconds
    
    End Sub
    

    Sub FixWithLoopInChunks()
    
    Dim lngRow As Long
    Dim lngLastRow As Long
    Dim wksItem As Worksheet
    Dim strRowsToDelete As String
    Dim intDeleteCount As Integer
    
    Dim dttStart As Date
    
    Worksheets(1).Range("I25:I50000").Formula = "=RandBetween(1, 5)"
    Worksheets(1).Range("I25:I50000").Value2 = Worksheets(1).Range("I25:I50000").Value2
    
    dttStart = Now()
    
    Application.ScreenUpdating = False
    Application.Calculation = xlCalculationManual
    Application.EnableEvents = False
    
    Set wksItem = Worksheets(1)
    lngLastRow = wksItem.Cells(wksItem.Rows.Count, "I").End(xlUp).Row
    
    For lngRow = lngLastRow To 25 Step -1
        If Int(wksItem.Cells(lngRow, "I").Value) = 2 Then
            intDeleteCount = intDeleteCount + 1
            strRowsToDelete = strRowsToDelete & ",I" & lngRow
        End If
        If intDeleteCount >= 30 Then
            strRowsToDelete = Mid(strRowsToDelete, 2)
            wksItem.Range(strRowsToDelete).EntireRow.Delete
            intDeleteCount = 0
            strRowsToDelete = ""
        End If
    Next lngRow
    
    If intDeleteCount > 0 Then
        strRowsToDelete = Mid(strRowsToDelete, 2)
        wksItem.Range(strRowsToDelete).EntireRow.Delete
    End If
    
    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic
    Application.EnableEvents = True
    
    Debug.Print Format(Now() - dttStart, "HH:MM:SS")
    'Average time ~3 seconds
    
    End Sub
    

    Sub FixWithArraysAndDeleteChunks()
    
    Dim lngItem As Long
    Dim varArray() As Variant
    Dim wksItem As Worksheet
    Dim strRowsToDelete As String
    Dim intDeleteCount As Integer
    
    Dim dttStart As Date
    
    Worksheets(1).Range("I25:I50000").Formula = "=RandBetween(1, 5)"
    Worksheets(1).Range("I25:I50000").Value2 = Worksheets(1).Range("I25:I50000").Value2
    
    dttStart = Now()
    
    Application.ScreenUpdating = False
    Application.Calculation = xlCalculationManual
    Application.EnableEvents = False
    
    Set wksItem = Worksheets(1)
    varArray() = wksItem.Range("I25:I50000").Value2
    
    For lngItem = UBound(varArray) To LBound(varArray) Step -1
        If IsNumeric(varArray(lngItem, 1)) Then
            If Int(varArray(lngItem, 1)) = 2 Then
                intDeleteCount = intDeleteCount + 1
                strRowsToDelete = strRowsToDelete & ",I" & lngItem + 24
            End If
            If intDeleteCount >= 30 Then
                strRowsToDelete = Mid(strRowsToDelete, 2)
                wksItem.Range(strRowsToDelete).EntireRow.Delete
                intDeleteCount = 0
                strRowsToDelete = ""
            End If
        End If
    Next lngItem
    
    If intDeleteCount > 0 Then
        strRowsToDelete = Mid(strRowsToDelete, 2)
        wksItem.Range(strRowsToDelete).EntireRow.Delete
    End If
    
    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic
    Application.EnableEvents = True
    
    Debug.Print Format(Now() - dttStart, "HH:MM:SS")
    'Average time ~2 seconds
    
    End Sub
    

    Based on the above tests the "fastest" route is to use an array, save the range of rows to be deleted using Intersectand then delete all rows together.

    Note, if you are using Application.Union instead of Intersect then the time of that approach drops significantly and the sub will run for almost 30 seconds.

    Yet, the time difference is very small and negligible (for 50.000 rows).

    Please do let me know if my speed-test-setup has any flaws which might bias the results or if I am missing another approach you would like to see.

    Update:

    Here is another approach offered by @SiddharthRout. I do not wish to plagiarise. Yet, I wanted to compare time results. Hence, here is the sub rewritten to compare to the others with the average time recorded on my system.

    Sub DeleteFilteredRows_SiddharthRout()
    
    Dim wksItem As Worksheet
    Dim rngRowsToDelete As Range
    
    Dim dttStart As Date
    
    Worksheets(1).Range("I25:I50000").Formula = "=RandBetween(1, 5)"
    Worksheets(1).Range("I25:I50000").Value2 = Worksheets(1).Range("I25:I50000").Value2
    
    dttStart = Now()
    
    Application.ScreenUpdating = False
    Application.Calculation = xlCalculationManual
    Application.EnableEvents = False
    
    Set wksItem = Worksheets(1)
    wksItem.AutoFilterMode = False
    wksItem.Range("I25:I50000").AutoFilter Field:=1, Criteria1:=2
    Set rngRowsToDelete = wksItem.Range("I25:I50000").SpecialCells(xlCellTypeVisible)
    wksItem.AutoFilterMode = False
    wksItem.Rows.Hidden = False
    
    rngRowsToDelete.EntireRow.Delete
    
    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic
    Application.EnableEvents = True
    
    Debug.Print Format(Now() - dttStart, "HH:MM:SS")
    'Average time around 5 seconds
    
    End Sub
    

    It seems that this approach is slightly slower compared to all the others.

    0 讨论(0)
提交回复
热议问题