SQL INSERT but avoid duplicates

前端 未结 6 997
北荒
北荒 2020-12-31 10:42

I want to do some quick inserts but avoid duplicates into a Table. For argument\'s sake lets call it MarketPrices, I\'ve been experimenting with two ways of doing it but not

6条回答
  •  醉话见心
    2020-12-31 11:31

    EDIT: to prevent race conditions in concurrent environments, use WITH (UPDLOCK) in the correlated subquery or EXCEPT'd SELECT. The test script I wrote below doesn't require it, since it uses temporary tables that are only visible to the current connection, but in a real environment, operating against user tables, it would be necessary.

    MERGE doesn't require UPDLOCK.


    Inspired by mcl's answer re: unique index & let the database throw an error, I decided to benchmark conditional inserts vs. try/catch.

    The results appear to support the conditional insert over try/catch, but YMMV. It's a very simple scenario (one column, small table, etc), executed on one machine, etc.

    Here are the results (SQL Server 2008, build 10.0.1600.2):

    duplicates (short table)    
      try/catch:                14440 milliseconds / 100000 inserts
      conditional insert:        2983 milliseconds / 100000 inserts
      except:                    2966 milliseconds / 100000 inserts
      merge:                     2983 milliseconds / 100000 inserts
    
    uniques
      try/catch:                 3920 milliseconds / 100000 inserts
      conditional insert:        3860 milliseconds / 100000 inserts
      except:                    3873 milliseconds / 100000 inserts
      merge:                     3890 milliseconds / 100000 inserts
    
      straight insert:           3173 milliseconds / 100000 inserts
    
    duplicates (tall table)
      try/catch:                14436 milliseconds / 100000 inserts
      conditional insert:        3063 milliseconds / 100000 inserts
      except:                    3063 milliseconds / 100000 inserts
      merge:                     3030 milliseconds / 100000 inserts
    

    Notice, even on unique inserts, there's slightly more overhead to try/catch than a conditional insert. I wonder if this varies by version, CPU, number of cores, etc.

    I did not benchmark the IF conditional inserts, just WHERE. I assume the IF variety would show more overhead, since a) would you have two statements, and b) you would need to wrap the two statements in a transaction and set the isolation level to serializable (!). If someone wanted to test this, you would need to change the temp table to a regular user table (serializable doesn't apply to local temp tables).

    Here is the script:

    -- tested on SQL 2008.
    -- to run on SQL 2005, comment out the statements using MERGE
    set nocount on
    
    if object_id('tempdb..#temp') is not null drop table #temp
    create table #temp (col1 int primary key)
    go
    
    -------------------------------------------------------
    
    -- duplicate insert test against a table w/ 1 record
    
    -------------------------------------------------------
    
    insert #temp values (1)
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      begin try 
        insert #temp select @x
      end try
      begin catch end catch
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (short table), try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      insert #temp select @x where not exists (select * from #temp where col1 = @x)
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (short table), conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      insert #temp select @x except select col1 from #temp
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (short table), except: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    -- comment this batch out for SQL 2005
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (short table), merge: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    -------------------------------------------------------
    
    -- unique insert test against an initially empty table
    
    -------------------------------------------------------
    
    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 0, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      insert #temp select @x
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, straight insert: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    
    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 0, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      begin try 
        insert #temp select @x
      end try
      begin catch end catch
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, try/catch: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    
    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 0, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      insert #temp select @x where not exists (select * from #temp where col1 = @x)
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    
    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 0, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      insert #temp select @x except select col1 from #temp
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, except: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    
    -- comment this batch out for SQL 2005
    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 1, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, merge: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    
    -------------------------------------------------------
    
    -- duplicate insert test against a table w/ 100000 records
    
    -------------------------------------------------------
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      begin try 
        insert #temp select @x
      end try
      begin catch end catch
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (tall table), try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      insert #temp select @x where not exists (select * from #temp where col1 = @x)
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (tall table), conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      insert #temp select @x except select col1 from #temp
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (tall table), except: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    -- comment this batch out for SQL 2005
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (tall table), merge: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    

提交回复
热议问题