Find and Remove Repeated Substrings

前端 未结 2 733
情歌与酒
情歌与酒 2020-12-21 02:15

I\'ve a column in a SQL Server 2008 table where part of the string was accidentally repeated.

Does anyone have a quick and easy way to remove the trailing duplicated

相关标签:
2条回答
  • 2020-12-21 02:46

    If you don't already have a numbers table:

    SET NOCOUNT ON;
    DECLARE @UpperLimit INT;
    SET @UpperLimit = 4000;
    
    WITH n(rn) AS
    (
        SELECT ROW_NUMBER() OVER (ORDER BY [object_id])
        FROM sys.all_columns
    )
    SELECT [Number] = rn - 1
    INTO dbo.Numbers FROM n
    WHERE rn <= @UpperLimit + 1;
    
    CREATE UNIQUE CLUSTERED INDEX n ON dbo.Numbers([Number]);
    

    Now a generic split function, that will turn your delimited string into a set:

    CREATE FUNCTION dbo.SplitString
    (
        @List NVARCHAR(MAX),
        @Delim CHAR(1)
    )
    RETURNS TABLE
    AS
        RETURN ( SELECT 
            rn, 
            vn = ROW_NUMBER() OVER (PARTITION BY [Value] ORDER BY rn), 
            [Value]
          FROM 
          ( 
            SELECT 
              rn = ROW_NUMBER() OVER (ORDER BY CHARINDEX(@Delim, @List + @Delim)),
              [Value] = LTRIM(RTRIM(SUBSTRING(@List, [Number],
              CHARINDEX(@Delim, @List + @Delim, [Number]) - [Number])))
            FROM dbo.Numbers
            WHERE Number <= LEN(@List)
            AND SUBSTRING(@Delim + @List, [Number], 1) = @Delim
          ) AS x
        );
    GO
    

    And then a function that puts them back together:

    CREATE FUNCTION dbo.DedupeString
    (
        @List NVARCHAR(MAX)
    )
    RETURNS NVARCHAR(MAX)
    AS
    BEGIN
        RETURN ( SELECT newval = STUFF((
         SELECT '\' + x.[Value] FROM dbo.SplitString(@List, '\') AS x
          WHERE (x.vn = 1)
          ORDER BY x.rn
          FOR XML PATH, TYPE).value('.', 'nvarchar(max)'), 1, 1, '')
        );
    END
    GO
    

    Sample usage:

    SELECT dbo.DedupeString('alpha\bravo\bravo\charlie\delta\bravo\charlie\delta');
    

    Results:

    alpha\bravo\charlie\delta
    

    You can also say something like:

    UPDATE dbo.MessedUpTable
      SET OopsColumn = dbo.DedupeString(OopsColumn);
    

    @MikaelEriksson will probably swoop in with a more efficient way to use XML to eliminate duplicates, but that is what I can offer until then. :-)

    0 讨论(0)
  • 2020-12-21 02:52
    create function RemoveDups(@S nvarchar(max)) returns nvarchar(max)
    as
    begin
      declare @R nvarchar(max)
      declare @W nvarchar(max)
      set @R = ''
    
      while len(@S) > 1
      begin
        -- Get the first word
        set @W = left(@S, charindex('/', @S+'/')-1)
    
        -- Add word to result if not already added
        if '/'+@R not like '%/'+@W+'/%'
        begin
          set @R = @R + @W + '/'
        end 
    
        -- Remove first word
        set @S = stuff(@S, 1, charindex('/', @S+'/'), '')
      end
    
      return left(@R, len(@R)- 1)
    end
    

    As requested by Aaron Bertrand. I will however make no claim on what is the fastest to execute.

    -- Table to replace in
    declare @T table
    (
      ID int identity,
      Value nvarchar(max)
    )
    
    -- Add some sample data
    insert into @T values ('alpha/beta/alpha/gamma/delta/gamma/delta/alpha')
    insert into @T values ('delta/beta/alpha/beta/alpha/gamma/delta/gamma/delta/alpha')
    
    -- Update the column
    update T
    set Value = NewValue
    from (
           select T1.ID,
                  Value,
                  stuff((select '/' + T4.Value
                         from (
                                select T3.X.value('.', 'nvarchar(max)') as Value,
                                       row_number() over(order by T3.X) as rn
                                from T2.X.nodes('/x') as T3(X)
                              ) as T4
                         group by T4.Value
                         order by min(T4.rn)
                         for xml path(''), type).value('.',  'nvarchar(max)'), 1, 1, '') as NewValue
           from @T as T1
             cross apply (select cast('<x>'+replace(T1.Value, '/', '</x><x>')+'</x>' as xml)) as T2(X)
         ) as T
    
    select *
    from @T  
    
    0 讨论(0)
提交回复
热议问题