HTML character decoding in Objective-C / Cocoa Touch

后端 未结 13 2157
我寻月下人不归
我寻月下人不归 2020-11-22 10:24

First of all, I found this: Objective C HTML escape/unescape, but it doesn\'t work for me.

My encoded characters (come from a RSS feed, btw) look like this: &a

13条回答
  •  爱一瞬间的悲伤
    2020-11-22 11:00

    Swift 3 version of Jugale's answer

    extension String {
        static private let mappings = [""" : "\"","&" : "&", "<" : "<", ">" : ">"," " : " ","¡" : "¡","¢" : "¢","£" : " £","¤" : "¤","¥" : "¥","¦" : "¦","§" : "§","¨" : "¨","©" : "©","ª" : " ª","«" : "«","¬" : "¬","®" : "®","¯" : "¯","°" : "°","±" : "±","² " : "²","³" : "³","´" : "´","µ" : "µ","¶" : "¶","·" : "·","¸" : "¸","¹" : "¹","º" : "º","»" : "»&","frac14" : "¼","½" : "½","¾" : "¾","¿" : "¿","×" : "×","÷" : "÷","Ð" : "Ð","ð" : "ð","Þ" : "Þ","þ" : "þ","Æ" : "Æ","æ" : "æ","&OElig" : "Œ","&oelig" : "œ","Å" : "Å","Ø" : "Ø","Ç" : "Ç","ç" : "ç","ß" : "ß","Ñ" : "Ñ","ñ":"ñ",]
    
        func stringByDecodingXMLEntities() -> String {
    
            guard let _ = self.range(of: "&", options: [.literal]) else {
                return self
            }
    
            var result = ""
    
            let scanner = Scanner(string: self)
            scanner.charactersToBeSkipped = nil
    
            let boundaryCharacterSet = CharacterSet(charactersIn: " \t\n\r;")
    
            repeat {
                var nonEntityString: NSString? = nil
    
                if scanner.scanUpTo("&", into: &nonEntityString) {
                    if let s = nonEntityString as? String {
                        result.append(s)
                    }
                }
    
                if scanner.isAtEnd {
                    break
                }
    
                var didBreak = false
                for (k,v) in String.mappings {
                    if scanner.scanString(k, into: nil) {
                        result.append(v)
                        didBreak = true
                        break
                    }
                }
    
                if !didBreak {
    
                    if scanner.scanString("&#", into: nil) {
    
                        var gotNumber = false
                        var charCodeUInt: UInt32 = 0
                        var charCodeInt: Int32 = -1
                        var xForHex: NSString? = nil
    
                        if scanner.scanString("x", into: &xForHex) {
                            gotNumber = scanner.scanHexInt32(&charCodeUInt)
                        }
                        else {
                            gotNumber = scanner.scanInt32(&charCodeInt)
                        }
    
                        if gotNumber {
                            let newChar = String(format: "%C", (charCodeInt > -1) ? charCodeInt : charCodeUInt)
                            result.append(newChar)
                            scanner.scanString(";", into: nil)
                        }
                        else {
                            var unknownEntity: NSString? = nil
                            scanner.scanUpToCharacters(from: boundaryCharacterSet, into: &unknownEntity)
                            let h = xForHex ?? ""
                            let u = unknownEntity ?? ""
                            result.append("&#\(h)\(u)")
                        }
                    }
                    else {
                        scanner.scanString("&", into: nil)
                        result.append("&")
                    }
                }
    
            } while (!scanner.isAtEnd)
    
            return result
        }
    }
    

提交回复
热议问题