Using unicode characters bigger than 2 bytes with .Net

前端未结

关注

 4  1871

说谎 2020-12-15 08:32

I\'m using this code to generate U+10FFFC

var s = Encoding.UTF8.GetString(new byte[] {0xF4,0x8F,0xBF,0xBC});

I know it\'s for

4条回答

陌清茗 (楼主)

2020-12-15 09:18

Yet another alternative to enumerate the UTF32 characters in a C# string is to use the System.Globalization.StringInfo.GetTextElementEnumerator method, as in the code below.

public static class StringExtensions
{
    public static System.Collections.Generic.IEnumerable GetUTF32Chars(this string s)
    {
        var tee = System.Globalization.StringInfo.GetTextElementEnumerator(s);

        while (tee.MoveNext())
        {
            yield return new UTF32Char(s, tee.ElementIndex);
        }
    }
}

public struct UTF32Char
{
    private string s;
    private int index;

    public UTF32Char(string s, int index)
    {
        this.s = s;
        this.index = index;
    }

    public override string ToString()
    {
        return char.ConvertFromUtf32(this.UTF32Code);
    }

    public int UTF32Code {  get { return char.ConvertToUtf32(s, index); } }
    public double NumericValue { get { return char.GetNumericValue(s, index); } }
    public UnicodeCategory UnicodeCategory { get { return char.GetUnicodeCategory(s, index); } } 
    public bool IsControl { get { return char.IsControl(s, index); } }
    public bool IsDigit { get { return char.IsDigit(s, index); } }
    public bool IsLetter { get { return char.IsLetter(s, index); } }
    public bool IsLetterOrDigit { get { return char.IsLetterOrDigit(s, index); } }
    public bool IsLower { get { return char.IsLower(s, index); } }
    public bool IsNumber { get { return char.IsNumber(s, index); } }
    public bool IsPunctuation { get { return char.IsPunctuation(s, index); } }
    public bool IsSeparator { get { return char.IsSeparator(s, index); } }
    public bool IsSurrogatePair { get { return char.IsSurrogatePair(s, index); } }
    public bool IsSymbol { get { return char.IsSymbol(s, index); } }
    public bool IsUpper { get { return char.IsUpper(s, index); } }
    public bool IsWhiteSpace { get { return char.IsWhiteSpace(s, index); } }
}

0 讨论(0)

查看其它4个回答