C# string comparison ignoring spaces, carriage return or line breaks

前端 未结 10 935
执念已碎
执念已碎 2020-12-05 13:07

How can I compare 2 strings in C# ignoring the case, spaces and any line-breaks. I also need to check if both strings are null then they are marked as same.

Thanks!<

相关标签:
10条回答
  • 2020-12-05 13:37

    First replace all whitespace via regular expression from both string and then use the String.Compare method with parameter ignoreCase = true.

    string a = System.Text.RegularExpressions.Regex.Replace("void foo", @"\s", "");
    string b = System.Text.RegularExpressions.Regex.Replace("voidFoo", @"\s", "");
    bool isTheSame = String.Compare(a, b, true) == 0;
    
    0 讨论(0)
  • 2020-12-05 13:39
    1. I would Trim the string using Trim() to remove all the
      whitespace.
    2. Use StringComparison.OrdinalIgnoreCase to ignore case sensitivity ex. stringA.Equals(stringB, StringComparison.OrdinalIgnoreCase)
    0 讨论(0)
  • 2020-12-05 13:42

    An approach not optimized for performance, but for completeness.

    • normalizes null
    • normalizes unicode, combining characters, diacritics
    • normalizes new lines
    • normalizes white space
    • normalizes casing

    code snippet:

    public static class StringHelper
    {
        public static bool AreEquivalent(string source, string target)
        {
            if (source == null) return target == null;
            if (target == null) return false;
            var normForm1 = Normalize(source);
            var normForm2 = Normalize(target);
            return string.Equals(normForm1, normForm2);
        }
    
        private static string Normalize(string value)
        {
            Debug.Assert(value != null);
            // normalize unicode, combining characters, diacritics
            value = value.Normalize(NormalizationForm.FormC);
            // normalize new lines to white space
            value = value.Replace("\r\n", "\n").Replace("\r", "\n");
            // normalize white space
            value = Regex.Replace(value, @"\s", string.Empty);
            // normalize casing
            return value.ToLowerInvariant();
        }
    }
    
    0 讨论(0)
  • 2020-12-05 13:45

    If you need performance, the Regex solutions on this page run too slow for you. Maybe you have a large list of strings you want to sort. (A Regex solution is more readable however)

    I have a class that looks at each individual char in both strings and compares them while ignoring case and whitespace. It doesn't allocate any new strings. It uses the char.IsWhiteSpace(ch) to determine whitespace, and char.ToLowerInvariant(ch) for case-insensitivity (if required). In my testing, my solution runs about 5x - 8x faster than a Regex-based solution. My class also implements IEqualityComparer's GetHashCode(obj) method using this code in another SO answer. This GetHashCode(obj) also ignores whitespace and optionally ignores case.

    Here's my class:

    private class StringCompIgnoreWhiteSpace : IEqualityComparer<string>
    {
        public bool Equals(string strx, string stry)
        {
            if (strx == null) //stry may contain only whitespace
                return string.IsNullOrWhiteSpace(stry);
    
            else if (stry == null) //strx may contain only whitespace
                return string.IsNullOrWhiteSpace(strx);
    
            int ix = 0, iy = 0;
            for (; ix < strx.Length && iy < stry.Length; ix++, iy++)
            {
                char chx = strx[ix];
                char chy = stry[iy];
    
                //ignore whitespace in strx
                while (char.IsWhiteSpace(chx) && ix < strx.Length)
                {
                    ix++;
                    chx = strx[ix];
                }
    
                //ignore whitespace in stry
                while (char.IsWhiteSpace(chy) && iy < stry.Length)
                {
                    iy++;
                    chy = stry[iy];
                }
    
                if (ix == strx.Length && iy != stry.Length)
                { //end of strx, so check if the rest of stry is whitespace
                    for (int iiy = iy + 1; iiy < stry.Length; iiy++)
                    {
                        if (!char.IsWhiteSpace(stry[iiy]))
                            return false;
                    }
                    return true;
                }
    
                if (ix != strx.Length && iy == stry.Length)
                { //end of stry, so check if the rest of strx is whitespace
                    for (int iix = ix + 1; iix < strx.Length; iix++)
                    {
                        if (!char.IsWhiteSpace(strx[iix]))
                            return false;
                    }
                    return true;
                }
    
                //The current chars are not whitespace, so check that they're equal (case-insensitive)
                //Remove the following two lines to make the comparison case-sensitive.
                chx = char.ToLowerInvariant(chx);
                chy = char.ToLowerInvariant(chy);
    
                if (chx != chy)
                    return false;
            }
    
            //If strx has more chars than stry
            for (; ix < strx.Length; ix++)
            {
                if (!char.IsWhiteSpace(strx[ix]))
                    return false;
            }
    
            //If stry has more chars than strx
            for (; iy < stry.Length; iy++)
            {
                if (!char.IsWhiteSpace(stry[iy]))
                    return false;
            }
    
            return true;
        }
    
        public int GetHashCode(string obj)
        {
            if (obj == null)
                return 0;
    
            int hash = 17;
            unchecked // Overflow is fine, just wrap
            {
                for (int i = 0; i < obj.Length; i++)
                {
                    char ch = obj[i];
                    if(!char.IsWhiteSpace(ch))
                        //use this line for case-insensitivity
                        hash = hash * 23 + char.ToLowerInvariant(ch).GetHashCode();
    
                        //use this line for case-sensitivity
                        //hash = hash * 23 + ch.GetHashCode();
                }
            }
            return hash;
        }
    }
    

    private static void TestComp()
    {
        var comp = new StringCompIgnoreWhiteSpace();
    
        Console.WriteLine(comp.Equals("abcd", "abcd")); //true
        Console.WriteLine(comp.Equals("abCd", "Abcd")); //true
        Console.WriteLine(comp.Equals("ab Cd", "Ab\n\r\tcd   ")); //true
        Console.WriteLine(comp.Equals(" ab Cd", "  A b" + Environment.NewLine + "cd ")); //true
        Console.WriteLine(comp.Equals(null, "  \t\n\r ")); //true
        Console.WriteLine(comp.Equals("  \t\n\r ", null)); //true
        Console.WriteLine(comp.Equals("abcd", "abcd   h")); //false
    
        Console.WriteLine(comp.GetHashCode(" a b c d")); //-699568861
    
    
        //This is -699568861 if you #define StringCompIgnoreWhiteSpace_CASE_INSENSITIVE
        //  Otherwise it's -1555613149
        Console.WriteLine(comp.GetHashCode("A B c      \t       d"));
    }
    

    Here's my testing code (with a Regex example):

    private static void SpeedTest()
    {
        const int loop = 100000;
        string first = "a bc d";
        string second = "ABC D";
    
        var compChar = new StringCompIgnoreWhiteSpace();
        Stopwatch sw1 = Stopwatch.StartNew();
        for (int i = 0; i < loop; i++)
        {
            bool equals = compChar.Equals(first, second);
        }
        sw1.Stop();
        Console.WriteLine(string.Format("char time =  {0}", sw1.Elapsed)); //char time =  00:00:00.0361159
    
        var compRegex = new StringCompIgnoreWhiteSpaceRegex();
        Stopwatch sw2 = Stopwatch.StartNew();
        for (int i = 0; i < loop; i++)
        {
            bool equals = compRegex.Equals(first, second);
        }
        sw2.Stop();
        Console.WriteLine(string.Format("regex time = {0}", sw2.Elapsed)); //regex time = 00:00:00.2773072
    }
    
    private class StringCompIgnoreWhiteSpaceRegex : IEqualityComparer<string>
    {
        public bool Equals(string strx, string stry)
        {
            if (strx == null)
                return string.IsNullOrWhiteSpace(stry);
            else if (stry == null)
                return string.IsNullOrWhiteSpace(strx);
    
            string a = System.Text.RegularExpressions.Regex.Replace(strx, @"\s", "");
            string b = System.Text.RegularExpressions.Regex.Replace(stry, @"\s", "");
            return String.Compare(a, b, true) == 0;
        }
    
        public int GetHashCode(string obj)
        {
            if (obj == null)
                return 0;
    
            string a = System.Text.RegularExpressions.Regex.Replace(obj, @"\s", "");
            return a.GetHashCode();
        }
    }
    
    0 讨论(0)
提交回复
热议问题