Simple string hashing function

前端 未结 6 1265
情话喂你
情话喂你 2020-12-14 11:28

I\'m trying to hash a string into an integer for placing it in an array. However I do not know all too much about hashing functions, and that\'s why my current method is jus

相关标签:
6条回答
  • 2020-12-14 11:40

    As Dummy00001 pointed out, this has been asked and answered before. Take a look at Best algorithm for hashing number values?, particularly the suggestion of using MurmurHash.

    I'd recommend MurmurHash because:

    1. It's very fast.

    2. Its distribution and avalanche characteristics are excellent for a non-cryptographic hash.

    3. Its worst-case behavior is still pretty good.

    I've used it. It doesn't suck.

    edit

    There was a lot of discussion about how to best port it to Delphi, on https://forums.embarcadero.com/thread.jspa?threadID=13902&tstart=0. The resulting code is available at https://forums.codegear.com/thread.jspa?threadID=14879

    Delphi translation

    function Murmur2(const S: AnsiString; const Seed: LongWord=$9747b28c): LongWord;
    var
        h: LongWord;
        len: LongWord;
        k: LongWord;
        data: Integer;
    const
        // 'm' and 'r' are mixing constants generated offline.
        // They're not really 'magic', they just happen to work well.
        m = $5bd1e995;
        r = 24;
    begin
        len := Length(S);
    
        //The default seed, $9747b28c, is from the original C library
    
        // Initialize the hash to a 'random' value
        h := seed xor len;
    
        // Mix 4 bytes at a time into the hash
        data := 1;
    
        while(len >= 4) do
        begin
            k := PLongWord(@S[data])^;
    
            k := k*m;
            k := k xor (k shr r);
            k := k* m;
    
            h := h*m;
            h := h xor k;
    
            data := data+4;
            len := len-4;
        end;
    
        {   Handle the last few bytes of the input array
                S: ... $69 $18 $2f
        }
        Assert(len <= 3);
        if len = 3 then
            h := h xor (LongWord(s[data+2]) shl 16);
        if len >= 2 then
            h := h xor (LongWord(s[data+1]) shl 8);
        if len >= 1 then
        begin
            h := h xor (LongWord(s[data]));
            h := h * m;
        end;
    
        // Do a few final mixes of the hash to ensure the last few
        // bytes are well-incorporated.
        h := h xor (h shr 13);
        h := h * m;
        h := h xor (h shr 15);
    
        Result := h;
    end;
    

    Passes all self-tests from the original C implementation.

    0 讨论(0)
  • 2020-12-14 11:44

    I've tried many fast hash functions and chosen this one:

    function StrHash(const st:string):cardinal; 
     var
      i:integer;
     begin
      result:=0;
      for i:=1 to length(st) do
       result:=result*$20844 xor byte(st[i]);
     end;
    

    It is as fast as K&R function (actually even faster) but makes better (more even) distribution.

    0 讨论(0)
  • 2020-12-14 11:45

    The FNV-1a hash is quick and easy to implement.

    0 讨论(0)
  • 2020-12-14 11:47

    A very simple method is to just XOR all values. The simplest as far as I know.

    0 讨论(0)
  • 2020-12-14 11:48

    Jenkins hash function should help you get started.

    my current method is just adding all the ASCII numbers of the characters together and taking it mod the array size.

    You discard important bit of information which is the position of the character in the string. That is a bad idea, since then strings "AB" and "BA" would have same the same hash value.

    Instead of simple addition, keeping it primitive, one can use expression like hash = hash*P1 + str[i]*P2 + P3; where Pi are some prime numbers. That's how I do it if I need a hash function quickly. I often use 7, 5 and 3 as the primes, but the numbers should be obviously adjusted (as well as initial value of hash) so that the result of hash function is usable to your task.

    For more information read the corresponding (and rather informative) Wikipedia article.

    0 讨论(0)
  • 2020-12-14 11:53

    See http://www.strchr.com/hash_functions for a very good panel of hashing functions.

    In Delphi implementation, here are several versions:

    The first coming to mind is the one used in TStringHash.HashOf method from official IniFiles.pas unit. Including a faster asm version:

    function HashOf(P: PByteArray; Len: integer): cardinal;
    // algorithm from IniFiles.TStringHash.HashOf
    {$ifdef PUREPASCAL}
    var I: Integer;
    begin
      Result := 0;
      for I := 1 to Len do
        Result := ((Result shl 2) or (Result shr (SizeOf(Result)*8-2))) xor P[I];
    end;
    {$else}
    asm // faster asm version by Synopse
        or edx,edx
        jz @z
        push ebx
        mov ebx,edx     // ebx = length(Key)
        mov edx,eax     // edx = Text
        xor eax,eax     // eax = Result
        xor ecx,ecx     // ecx = Result shl 2 = 0
    @1: shr eax,$1e     // eax = Result shr (SizeOf(Result) * 8 - 2))
        or ecx,eax      // ecx = ((Result shl 2) or (Result shr (SizeOf(Result)*8-2)))
        movzx eax,byte ptr [edx] // eax = ord(Key[i])
        inc edx
        xor eax,ecx     // eax = () xor ord(Key[i])
        dec ebx
        lea ecx,[eax*4] // ecx = Result shl 2
        jnz @1
        pop ebx
    @z:
    end;
    {$endif}
    

    The classic Kernighan & Ritchie hash from "The C programming Language", 3rd edition - not the best, but simple and efficient code.

    function kr32(crc: cardinal; buf: PAnsiChar; len: cardinal): cardinal;
    var i: integer;
    begin
      for i := 0 to len-1 do
        crc := ord(buf[i])+crc*31;
      result := crc;
    end;
    

    The fast "Adler" CRC as implemented in zlib - optimized asm version here:

    function Adler32Pas(Adler: cardinal; p: pointer; Count: Integer): cardinal;
    var s1, s2: cardinal;
        i, n: integer;
    begin
      s1 := LongRec(Adler).Lo;
      s2 := LongRec(Adler).Hi;
      while Count>0 do begin
        if Count<5552 then
          n := Count else
          n := 5552;
        for i := 1 to n do begin
          inc(s1,pByte(p)^);
          inc(cardinal(p));
          inc(s2,s1);
        end;
        s1 := s1 mod 65521;
        s2 := s2 mod 65521;
        dec(Count,n);
      end;
      result := word(s1)+cardinal(word(s2)) shl 16;
    end;
    

    My own faster variant - not re-entrant, but faster since it will read by DWORDs - and an even faster asm version here:

    function Hash32(Data: pointer; Len: integer): cardinal;
    function SubHash(P: PCardinalArray; L: integer): cardinal;
    {$ifdef HASINLINE}inline;{$endif}
    var s1,s2: cardinal;
        i: PtrInt;
    const Mask: array[0..3] of cardinal = (0,$ff,$ffff,$ffffff);
    begin
      if P<>nil then begin
        s1 := 0;
        s2 := 0;
        for i := 1 to L shr 4 do begin // 16 bytes (4 DWORD) by loop - aligned read
          inc(s1,P^[0]);
          inc(s2,s1);
          inc(s1,P^[1]);
          inc(s2,s1);
          inc(s1,P^[2]);
          inc(s2,s1);
          inc(s1,P^[3]);
          inc(s2,s1);
          inc(PtrUInt(P),16);
        end;
        for i := 1 to (L shr 2)and 3 do begin // 4 bytes (DWORD) by loop
          inc(s1,P^[0]);
          inc(s2,s1);
          inc(PtrUInt(P),4);
        end;
        inc(s1,P^[0] and Mask[L and 3]);      // remaining 0..3 bytes
        inc(s2,s1);
        result := s1 xor (s2 shl 16);
      end else
        result := 0;
    end;
    begin // use a sub function for better code generation under Delphi
      result := SubHash(Data,Len);
    end;
    

    The classic CRC32 version - you can find a very optimized asm version (using 8 tables) here:

    function UpdateCrc32(aCRC32: cardinal; inBuf: pointer; inLen: integer) : cardinal;
    var i: integer;
    begin
      result := aCRC32;
      // if we used a dynamic table, we assume we want shorter code size
      for i := 1 to inLen do begin
        result := crc32Tab[byte(result xor pByte(inBuf)^)] xor (result shr 8);
        inc(cardinal(inBuf));
      end;
    end;
    
    0 讨论(0)
提交回复
热议问题