Convert Unicode char to closest (most similar) char in ASCII (.NET)

后端 未结 2 559
日久生厌
日久生厌 2020-12-06 05:26

How do I to convert different Unicode characters to their closest ASCII equivalents? Like Ä -> A. I googled but didn\'t find any suitable solution. The trick Encoding.

相关标签:
2条回答
  • 2020-12-06 06:07

    If it is just removing of the diacritical marks, then head to this answer:

    static string RemoveDiacritics(string stIn) {
      string stFormD = stIn.Normalize(NormalizationForm.FormD);
      StringBuilder sb = new StringBuilder();
    
      for(int ich = 0; ich < stFormD.Length; ich++) {
        UnicodeCategory uc = CharUnicodeInfo.GetUnicodeCategory(stFormD[ich]);
        if(uc != UnicodeCategory.NonSpacingMark) {
          sb.Append(stFormD[ich]);
        }
      }
    
      return(sb.ToString().Normalize(NormalizationForm.FormC));
    }
    
    0 讨论(0)
  • 2020-12-06 06:09

    MS Dynamics has a problem where it won't allow for any character outside of x20 to x7f and some characters within that range are also invalid. My answer was to create an array keyed to the invalid characters returning the best guess of the valid characters.
    It ain't pretty, but it works.

    Function PlainAscii(InText)
    Dim i, c, a
    Const cUTF7 = "^[\x20-\x7e]+$"
    Const IgnoreCase = False
        PlainAscii = ""
        If InText = "" Then Exit Function
        If RegExTest(InText, cUTF7, IgnoreCase) Then
            PlainAscii = InText
        Else
            For i = 1 To Len(InText)
                c = Mid(InText, i, 1)
                a = Asc(c)
                If a = 10 Or a = 13 Or a = 9 Then
                    ' Do Nothing - Allow LF, CR & TAB
                ElseIf a < 32 Then
                    c = " "
                ElseIf a > 126 Then
                    c = CvtToAscii(a)
                End If
                PlainAscii = PlainAscii & c
            Next
        End If
    End Function
    
    Function CvtToAscii(inChar)
    ' Maps The Characters With The 8th Bit Set To 7 Bit Characters
    Dim arrChars
        arrChars = Array(" ", " ", "$", " ", ",", "f", """", " ", "t", "t", "^", "%", "S", "<", "O", " ", "Z", " ", " ", "'", "'", """", """", ".", "-", "-", "~", "T", "S", ">", "o", " ", "Z", "Y", " ", "!", "$", "$", "o", "$", "|", "S", " ", "c", " ", " ", " ", "_", "R", "_", ".", " ", " ", " ", " ", "u", "P", ".", ",", "i", " ", " ", " ", " ", " ", " ", "A", "A", "A", "A", "A", "A", "A", "C", "E", "E", "E", "E", "I", "I", "I", "I", "D", "N", "O", "O", "O", "O", "O", "X", "O", "U", "U", "U", "U", "Y", "b", "B", "a", "a", "a", "a", "a", "a", "a", "c", "e", "e", "e", "e", "i", "i", "i", "i", "o", "n", "o", "o", "o", "o", "o", "/", "O", "u", "u", "u", "u", "y", "p", "y")
        CvtToAscii = arrChars(inChar - 127)
    End Function
    
    Function RegExTest(ByVal strStringToSearch, strExpression, IgnoreCase)
    Dim objRegEx
        On Error Resume Next
        Err.Clear
        strStringToSearch = Replace(Replace(strStringToSearch, vbCr, ""), vbLf, "")
        RegExTest = False
        Set objRegEx = New RegExp
        With objRegEx
            .Pattern = strExpression    '//the reg expression that should be searched for
            If Err.Number = 0 Then
                .IgnoreCase = CBool(IgnoreCase)    '//not case sensitive
                .Global = True              '//match all instances of pattern
                RegExTest = .Test(strStringToSearch)
            End If
        End With
        Set objRegEx = Nothing
        On Error Goto 0
    End Function
    

    Your answer is necessarily going to be different.

    0 讨论(0)
提交回复
热议问题