DotNet Soundex Function

前端 未结 4 912
太阳男子
太阳男子 2021-01-01 06:14

I have a database table that has a column of SQLServer Soundex encoded last name + first name. In my C# program I would like to convert a string using soundex for use in my

4条回答
  •  暗喜
    暗喜 (楼主)
    2021-01-01 06:44

    I know this is late, but I also needed something similar (though no database involved), and the only answer isn't accurate (fails for 'Tymczak' and 'Pfister').

    This is what I came up with:

    class Program
    {
        public static void Main(string[] args)
        {
                    Assert.AreEqual(Soundex.Generate("H"), "H000");
                    Assert.AreEqual(Soundex.Generate("Robert"), "R163");
                    Assert.AreEqual(Soundex.Generate("Rupert"), "R163");
                    Assert.AreEqual(Soundex.Generate("Rubin"), "R150");
                    Assert.AreEqual(Soundex.Generate("Ashcraft"), "A261");
                    Assert.AreEqual(Soundex.Generate("Ashcroft"), "A261");
                    Assert.AreEqual(Soundex.Generate("Tymczak"), "T522");
                    Assert.AreEqual(Soundex.Generate("Pfister"), "P236");
                    Assert.AreEqual(Soundex.Generate("Gutierrez"), "G362");
                    Assert.AreEqual(Soundex.Generate("Jackson"), "J250");
                    Assert.AreEqual(Soundex.Generate("VanDeusen"), "V532");
                    Assert.AreEqual(Soundex.Generate("Deusen"), "D250");
                    Assert.AreEqual(Soundex.Generate("Sword"), "S630");
                    Assert.AreEqual(Soundex.Generate("Sord"), "S630");
                    Assert.AreEqual(Soundex.Generate("Log-out"), "L230");
                    Assert.AreEqual(Soundex.Generate("Logout"), "L230");
                    Assert.AreEqual(Soundex.Generate("123"), Soundex.Empty);
                    Assert.AreEqual(Soundex.Generate(""), Soundex.Empty);
                    Assert.AreEqual(Soundex.Generate(null), Soundex.Empty);
        }
    }
    
    public static class Soundex
    {
        public const string Empty = "0000";
    
        private static readonly Regex Sanitiser = new Regex(@"[^A-Z]", RegexOptions.Compiled);
        private static readonly Regex CollapseRepeatedNumbers = new Regex(@"(\d)?\1*[WH]*\1*", RegexOptions.Compiled);
        private static readonly Regex RemoveVowelSounds = new Regex(@"[AEIOUY]", RegexOptions.Compiled);
    
        public static string Generate(string Phrase)
        {
            // Remove non-alphas
            Phrase = Sanitiser.Replace((Phrase ?? string.Empty).ToUpper(), string.Empty);
    
            // Nothing to soundex, return empty
            if (string.IsNullOrEmpty(Phrase))
                return Empty;
    
            // Convert consonants to numerical representation
            var Numified = Numify(Phrase);
    
            // Remove repeated numberics (characters of the same sound class), even if separated by H or W
            Numified = CollapseRepeatedNumbers.Replace(Numified, @"$1");
    
            if (Numified.Length > 0 && Numified[0] == Numify(Phrase[0]))
            {
                // Remove first numeric as first letter in same class as subsequent letters
                Numified = Numified.Substring(1);
            }
    
            // Remove vowels
            Numified = RemoveVowelSounds.Replace(Numified, string.Empty);
    
            // Concatenate, pad and trim to ensure X### format.
            return string.Format("{0}{1}", Phrase[0], Numified).PadRight(4, '0').Substring(0, 4);
        }
    
        private static string Numify(string Phrase)
        {
            return new string(Phrase.ToCharArray().Select(Numify).ToArray());
        }
    
        private static char Numify(char Character)
        {
            switch (Character)
            {
                case 'B': case 'F': case 'P': case 'V':
                    return '1';
                case 'C': case 'G': case 'J': case 'K': case 'Q': case 'S': case 'X': case 'Z':
                    return '2';
                case 'D': case 'T':
                    return '3';
                case 'L':
                    return '4';
                case 'M': case 'N':
                    return '5';
                case 'R':
                    return '6';
                default:
                    return Character;
            }
        }
    }
    

提交回复
热议问题