Using unicode characters bigger than 2 bytes with .Net

前端 未结 4 1871
说谎
说谎 2020-12-15 08:32

I\'m using this code to generate U+10FFFC

var s = Encoding.UTF8.GetString(new byte[] {0xF4,0x8F,0xBF,0xBC});

I know it\'s for

4条回答
  •  陌清茗
    陌清茗 (楼主)
    2020-12-15 09:18

    Yet another alternative to enumerate the UTF32 characters in a C# string is to use the System.Globalization.StringInfo.GetTextElementEnumerator method, as in the code below.

    public static class StringExtensions
    {
        public static System.Collections.Generic.IEnumerable GetUTF32Chars(this string s)
        {
            var tee = System.Globalization.StringInfo.GetTextElementEnumerator(s);
    
            while (tee.MoveNext())
            {
                yield return new UTF32Char(s, tee.ElementIndex);
            }
        }
    }
    
    public struct UTF32Char
    {
        private string s;
        private int index;
    
        public UTF32Char(string s, int index)
        {
            this.s = s;
            this.index = index;
        }
    
        public override string ToString()
        {
            return char.ConvertFromUtf32(this.UTF32Code);
        }
    
        public int UTF32Code {  get { return char.ConvertToUtf32(s, index); } }
        public double NumericValue { get { return char.GetNumericValue(s, index); } }
        public UnicodeCategory UnicodeCategory { get { return char.GetUnicodeCategory(s, index); } } 
        public bool IsControl { get { return char.IsControl(s, index); } }
        public bool IsDigit { get { return char.IsDigit(s, index); } }
        public bool IsLetter { get { return char.IsLetter(s, index); } }
        public bool IsLetterOrDigit { get { return char.IsLetterOrDigit(s, index); } }
        public bool IsLower { get { return char.IsLower(s, index); } }
        public bool IsNumber { get { return char.IsNumber(s, index); } }
        public bool IsPunctuation { get { return char.IsPunctuation(s, index); } }
        public bool IsSeparator { get { return char.IsSeparator(s, index); } }
        public bool IsSurrogatePair { get { return char.IsSurrogatePair(s, index); } }
        public bool IsSymbol { get { return char.IsSymbol(s, index); } }
        public bool IsUpper { get { return char.IsUpper(s, index); } }
        public bool IsWhiteSpace { get { return char.IsWhiteSpace(s, index); } }
    }
    

提交回复
热议问题