HTML character decoding in Objective-C / Cocoa Touch

后端 未结 13 2116
我寻月下人不归
我寻月下人不归 2020-11-22 10:24

First of all, I found this: Objective C HTML escape/unescape, but it doesn\'t work for me.

My encoded characters (come from a RSS feed, btw) look like this: &a

13条回答
  •  没有蜡笔的小新
    2020-11-22 11:02

    Here's a Swift version of Walty Yeung's answer:

    extension String {
        static private let mappings = [""" : "\"","&" : "&", "<" : "<", ">" : ">"," " : " ","¡" : "¡","¢" : "¢","£" : " £","¤" : "¤","¥" : "¥","¦" : "¦","§" : "§","¨" : "¨","©" : "©","ª" : " ª","«" : "«","¬" : "¬","®" : "®","¯" : "¯","°" : "°","±" : "±","² " : "²","³" : "³","´" : "´","µ" : "µ","¶" : "¶","·" : "·","¸" : "¸","¹" : "¹","º" : "º","»" : "»&","frac14" : "¼","½" : "½","¾" : "¾","¿" : "¿","×" : "×","÷" : "÷","Ð" : "Ð","ð" : "ð","Þ" : "Þ","þ" : "þ","Æ" : "Æ","æ" : "æ","&OElig" : "Œ","&oelig" : "œ","Å" : "Å","Ø" : "Ø","Ç" : "Ç","ç" : "ç","ß" : "ß","Ñ" : "Ñ","ñ":"ñ",]
    
        func stringByDecodingXMLEntities() -> String {
    
            guard let _ = self.rangeOfString("&", options: [.LiteralSearch]) else {
                return self
            }
    
            var result = ""
    
            let scanner = NSScanner(string: self)
            scanner.charactersToBeSkipped = nil
    
            let boundaryCharacterSet = NSCharacterSet(charactersInString: " \t\n\r;")
    
            repeat {
                var nonEntityString: NSString? = nil
    
                if scanner.scanUpToString("&", intoString: &nonEntityString) {
                    if let s = nonEntityString as? String {
                        result.appendContentsOf(s)
                    }
                }
    
                if scanner.atEnd {
                    break
                }
    
                var didBreak = false
                for (k,v) in String.mappings {
                    if scanner.scanString(k, intoString: nil) {
                        result.appendContentsOf(v)
                        didBreak = true
                        break
                    }
                }
    
                if !didBreak {
    
                    if scanner.scanString("&#", intoString: nil) {
    
                        var gotNumber = false
                        var charCodeUInt: UInt32 = 0
                        var charCodeInt: Int32 = -1
                        var xForHex: NSString? = nil
    
                        if scanner.scanString("x", intoString: &xForHex) {
                            gotNumber = scanner.scanHexInt(&charCodeUInt)
                        }
                        else {
                            gotNumber = scanner.scanInt(&charCodeInt)
                        }
    
                        if gotNumber {
                            let newChar = String(format: "%C", (charCodeInt > -1) ? charCodeInt : charCodeUInt)
                            result.appendContentsOf(newChar)
                            scanner.scanString(";", intoString: nil)
                        }
                        else {
                            var unknownEntity: NSString? = nil
                            scanner.scanUpToCharactersFromSet(boundaryCharacterSet, intoString: &unknownEntity)
                            let h = xForHex ?? ""
                            let u = unknownEntity ?? ""
                            result.appendContentsOf("&#\(h)\(u)")
                        }
                    }
                    else {
                        scanner.scanString("&", intoString: nil)
                        result.appendContentsOf("&")
                    }
                }
    
            } while (!scanner.atEnd)
    
            return result
        }
    }
    

提交回复
热议问题