SQL INSERT but avoid duplicates

前端 未结 6 979
北荒
北荒 2020-12-31 10:42

I want to do some quick inserts but avoid duplicates into a Table. For argument\'s sake lets call it MarketPrices, I\'ve been experimenting with two ways of doing it but not

相关标签:
6条回答
  • 2020-12-31 11:07

    if you don't need to trap duplicates, you can always create a unique index with "ignore duplicates" set to true. SQL Server will take care of this for you.

    0 讨论(0)
  • 2020-12-31 11:09

    Another option: create a unique index on the fields (SecurityCode, BuyPrice, SellPrice) in question, issue a simple insert, and let the database decide whether the records are duplicates. The insert will fail on an attempt to insert a duplicate.

    Using code (whether external language or SQL proc) to guarantee uniqueness is not strict enough and will ultimately lead to the very duplicates you hope to prevent.

    0 讨论(0)
  • 2020-12-31 11:10

    Below I have added the top answers from Only inserting a row if it's not already there to Peter Radocchia's excellent answer.

    The takeaway is that using the race safe with try/catch technique is marginally (~1%) faster than race safe with updlock, holdlock technique when there are no actual collisions (i.e. you expect that collisions will be very rare - this is the uniques scenario), and is a little slower (~20%) when there are always collisions (this is the duplicates scenario). This is not taking complex issues like lock escalation into account.

    Here are the results (SQL Server 2014, build 12.0.2000.8):

    duplicates (short table)    
      try/catch:                       15546 milliseconds / 100000 inserts
      conditional insert:               1460 milliseconds / 100000 inserts
      except:                           1490 milliseconds / 100000 inserts
      merge:                            1420 milliseconds / 100000 inserts
      race safe with try/catch:         1650 milliseconds / 100000 inserts
      race safe with updlock, holdlock: 1330 milliseconds / 100000 inserts
    
    uniques
      try/catch:                        2266 milliseconds / 100000 inserts
      conditional insert:               2156 milliseconds / 100000 inserts
      except:                           2273 milliseconds / 100000 inserts
      merge:                            2136 milliseconds / 100000 inserts
      race safe with try/catch:         2400 milliseconds / 100000 inserts
      race safe with updlock, holdlock: 2430 milliseconds / 100000 inserts
    
      straight insert:                  1686 milliseconds / 100000 inserts
    
    duplicates (tall table)
      try/catch:                       15826 milliseconds / 100000 inserts
      conditional insert:               1530 milliseconds / 100000 inserts
      except:                           1506 milliseconds / 100000 inserts
      merge:                            1443 milliseconds / 100000 inserts
      race safe with try/catch:         1636 milliseconds / 100000 inserts
      race safe with updlock, holdlock: 1426 milliseconds / 100000 inserts
    

    Duplicates (short table) section:

    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      begin try 
        insert #temp select @x where not exists (select * from #temp where col1 = @x)
      end try
      begin catch 
        if error_number() <> 2627
          throw
      end catch
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (short table), race safe with try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x)
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (short table), race safe with updlock, holdlock: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    

    Uniques section

    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 0, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      begin try 
        insert #temp select @x where not exists (select * from #temp where col1 = @x)
      end try
      begin catch 
        if error_number() <> 2627
          throw
      end catch
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, race safe with try/catch: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    
    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 0, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x)
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, race safe with updlock, holdlock: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    

    Duplicates (tall table) section

    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      begin try 
        insert #temp select @x where not exists (select * from #temp where col1 = @x)
      end try
      begin catch 
        if error_number() <> 2627
          throw
      end catch
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (tall table), race safe with try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x)
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (tall table), race safe with updlock, holdlock: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    0 讨论(0)
  • 2020-12-31 11:13

    EDIT: to prevent race conditions in a concurrent environment, use WITH (UPDLOCK) in the correlated subquery.


    I think this would be the standard method:

    INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
    SELECT @SecurityCode, @BuyPrice,  @SellPrice, @IsMarketOpen
    WHERE NOT EXISTS (
      SELECT * FROM MarketPrices WITH (UPDLOCK)
      WHERE SecurityCode = @SecurityCode 
        AND BuyPrice = @BuyPrice 
        AND SellPrice = @SellPrice
      )
    

    If any of your fields are nullable, you would have to add that to the condition.

    Your first method is interesting, but the requirements for EXCEPT have you jumping through hoops. This method is essentially the same, but it gets you around the column matching issue.

    Alternatively:

    INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
    SELECT SecurityCode, BuyPrice, SellPrice, @IsMarketOpen
    FROM (
      SELECT @SecurityCode, @BuyPrice,  @SellPrice
      EXCEPT
      SELECT SecurityCode, BuyPrice, SellPrice FROM MarketPrices WITH (UPDLOCK)
      ) a (SecurityCode, BuyPrice, SellPrice)
    

    The nice thing about EXCEPT in this instance is that it handles NULLs without any extra coding on your part. To achieve the same thing in first example, you would need to test each pair for NULLs as well as equality, long-hand.

    Your second method is ok, but you don't need the variable. See Tomalak's solution, he cleaned it up nicely. Also, you would need to explicitly handle the possibility of concurrent inserts, if that were a concern.

    0 讨论(0)
  • 2020-12-31 11:22

    I would go for a semantic solution anytime. Your two proposals seem quite obscure to me (though the latter is better than the former).

    IF NOT EXISTS (
      SELECT 1
      FROM   MarketPrices 
      WHERE  SecurityCode  = @SecurityCode 
             AND BuyPrice  = @BuyPrice 
             AND SellPrice = @SellPrice
    )  
    BEGIN
      INSERT MarketPrices 
        (SecurityCode,   BuyPrice,  SellPrice,  IsMarketOpen)
      VALUES 
        (@SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen)
    END
    

    With a conglomerate index over SecurityCode, BuyPrice, SellPrice the EXISTS query should go reasonably fast.

    Benchmarking it is a matter of timing a WHILE loop, I would say. Test it and see for yourself.

    0 讨论(0)
  • 2020-12-31 11:31

    EDIT: to prevent race conditions in concurrent environments, use WITH (UPDLOCK) in the correlated subquery or EXCEPT'd SELECT. The test script I wrote below doesn't require it, since it uses temporary tables that are only visible to the current connection, but in a real environment, operating against user tables, it would be necessary.

    MERGE doesn't require UPDLOCK.


    Inspired by mcl's answer re: unique index & let the database throw an error, I decided to benchmark conditional inserts vs. try/catch.

    The results appear to support the conditional insert over try/catch, but YMMV. It's a very simple scenario (one column, small table, etc), executed on one machine, etc.

    Here are the results (SQL Server 2008, build 10.0.1600.2):

    duplicates (short table)    
      try/catch:                14440 milliseconds / 100000 inserts
      conditional insert:        2983 milliseconds / 100000 inserts
      except:                    2966 milliseconds / 100000 inserts
      merge:                     2983 milliseconds / 100000 inserts
    
    uniques
      try/catch:                 3920 milliseconds / 100000 inserts
      conditional insert:        3860 milliseconds / 100000 inserts
      except:                    3873 milliseconds / 100000 inserts
      merge:                     3890 milliseconds / 100000 inserts
    
      straight insert:           3173 milliseconds / 100000 inserts
    
    duplicates (tall table)
      try/catch:                14436 milliseconds / 100000 inserts
      conditional insert:        3063 milliseconds / 100000 inserts
      except:                    3063 milliseconds / 100000 inserts
      merge:                     3030 milliseconds / 100000 inserts
    

    Notice, even on unique inserts, there's slightly more overhead to try/catch than a conditional insert. I wonder if this varies by version, CPU, number of cores, etc.

    I did not benchmark the IF conditional inserts, just WHERE. I assume the IF variety would show more overhead, since a) would you have two statements, and b) you would need to wrap the two statements in a transaction and set the isolation level to serializable (!). If someone wanted to test this, you would need to change the temp table to a regular user table (serializable doesn't apply to local temp tables).

    Here is the script:

    -- tested on SQL 2008.
    -- to run on SQL 2005, comment out the statements using MERGE
    set nocount on
    
    if object_id('tempdb..#temp') is not null drop table #temp
    create table #temp (col1 int primary key)
    go
    
    -------------------------------------------------------
    
    -- duplicate insert test against a table w/ 1 record
    
    -------------------------------------------------------
    
    insert #temp values (1)
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      begin try 
        insert #temp select @x
      end try
      begin catch end catch
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (short table), try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      insert #temp select @x where not exists (select * from #temp where col1 = @x)
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (short table), conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      insert #temp select @x except select col1 from #temp
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (short table), except: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    -- comment this batch out for SQL 2005
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (short table), merge: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    -------------------------------------------------------
    
    -- unique insert test against an initially empty table
    
    -------------------------------------------------------
    
    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 0, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      insert #temp select @x
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, straight insert: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    
    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 0, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      begin try 
        insert #temp select @x
      end try
      begin catch end catch
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, try/catch: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    
    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 0, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      insert #temp select @x where not exists (select * from #temp where col1 = @x)
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    
    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 0, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      insert #temp select @x except select col1 from #temp
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, except: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    
    -- comment this batch out for SQL 2005
    truncate table #temp
    declare @x int, @now datetime, @duration int
    select @x = 1, @now = getdate()
    while @x < 100000 begin
      set @x = @x+1
      merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('uniques, merge: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
    go
    
    -------------------------------------------------------
    
    -- duplicate insert test against a table w/ 100000 records
    
    -------------------------------------------------------
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      begin try 
        insert #temp select @x
      end try
      begin catch end catch
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (tall table), try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      insert #temp select @x where not exists (select * from #temp where col1 = @x)
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (tall table), conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      insert #temp select @x except select col1 from #temp
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (tall table), except: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    -- comment this batch out for SQL 2005
    declare @x int, @y int, @now datetime, @duration int
    select @x = 1, @y = 0, @now = getdate()
    while @y < 100000 begin
      set @y = @y+1
      merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
    end
    set @duration = datediff(ms,@now,getdate())
    raiserror('duplicates (tall table), merge: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
    go
    
    0 讨论(0)
提交回复
热议问题