Given either a unicode symbol as a String
or its XML/HTML entities, how could one generate its Unicode number? For example if you\'re given the string \"෴
With my SwiftSoup, it is easy.
SwiftSoup
is a pure Swift library, cross-platform(macOS, iOS, tvOS, watchOS and Linux!)
let text = "Hello &<> Å å π 新 there ¾ © »"
print(Entities.escape(text))
print(Entities.unescape(text))
print(Entities.escape(text, OutputSettings().encoder(String.Encoding.ascii).escapeMode(Entities.EscapeMode.base)))
print(Entities.escape(text, OutputSettings().charset(String.Encoding.ascii).escapeMode(Entities.EscapeMode.extended)))
print(Entities.escape(text, OutputSettings().charset(String.Encoding.ascii).escapeMode(Entities.EscapeMode.xhtml)))
print(Entities.escape(text, OutputSettings().charset(String.Encoding.utf8).escapeMode(Entities.EscapeMode.extended)))
print(Entities.escape(text, OutputSettings().charset(String.Encoding.utf8).escapeMode(Entities.EscapeMode.xhtml)))
Output:
"Hello &<> Å å π 新 there ¾ © »"
"Hello &<> Å å π 新 there ¾ © »"
"Hello &<> Å å π 新 there ¾ © »"
"Hello &<> Å å π 新 there ¾ © »"
"Hello &<> Å å π 新 there ¾ © »"
"Hello &<> Å å π 新 there ¾ © »"
"Hello &<> Å å π 新 there ¾ © »"
update: Xcode 11.4 • Swift 5.2
extension String {
var data: Data { .init(utf8) }
var html2AttributedString: NSAttributedString? {
do {
return try NSAttributedString(data: data, options: [.documentType: NSAttributedString.DocumentType.html, .characterEncoding: String.Encoding.utf8.rawValue], documentAttributes: nil)
} catch {
print(error)
return nil
}
}
var html2String: String { html2AttributedString?.string ?? "" }
var unicodes: [UInt32] { unicodeScalars.map(\.value) }
}
let str = "<span>€€</span>".html2String // "€€"
str.unicodes // [8364, 8364]
extension StringTransform {
static let toUnicodeHex = Self("Hex/Unicode")
static let toJavaHex = Self("Hex/Java")
static let toPerlHex = Self("Hex/Perl")
}
extension String {
var convertedToUnicodeHex: String { applyingTransform(.toUnicodeHex, reverse: false) ?? "" }
var convertedToJavaHex: String { applyingTransform(.toJavaHex, reverse: false) ?? "" }
var convertedToXMLHex: String { applyingTransform(.toXMLHex, reverse: false) ?? "" }
var convertedToPerlHex: String { applyingTransform(.toPerlHex, reverse: false) ?? "" }
}
"෴".convertedToUnicodeHex // U+0DF4
"෴".convertedToJavaHex // \u0DF4
"෴".convertedToXMLHex // ෴
"෴".convertedToPerlHex // \x{DF4}
"෴".unicodes // [3572]
0x0DF4 // 3572