I have this MTST column, which when printed yields
[1] \"G
Here's another alternative for recovering the true uncode character encoded in a string (borrowed from this question). Here we carefully match the form and unroll that hex value into a properly sized unicode character with some bit manipulation.
trueunicode <- function(x) {
packuni<-Vectorize(function(cp) {
bv <- intToBits(cp)
maxbit <- tail(which(bv!=as.raw(0)),1)
if(maxbit < 8) {
rawToChar(as.raw(codepoint))
} else if (maxbit < 12) {
rawToChar(rev(packBits(c(bv[1:6], as.raw(c(0,1)), bv[7:11], as.raw(c(0,1,1))), "raw")))
} else if (maxbit < 17){
rawToChar(rev(packBits(c(bv[1:6], as.raw(c(0,1)), bv[7:12], as.raw(c(0,1)), bv[13:16], as.raw(c(0,1,1,1))), "raw")))
} else {
stop("too many bits")
}
})
m <- gregexpr("", x)
codes <- regmatches(x,m)
chars <- lapply(codes, function(x) {
codepoints <- strtoi(paste0("0x", substring(x,4,7)))
packuni(codepoints)
})
regmatches(x,m) <- chars
Encoding(x)<-"UTF-8"
x
}
using the sample
input <- c("G", "GS","G", "G", "S","()")
you get
trueunicode(input)
# [1] "ΑGΡΙΝΙΟ" "ΑGΧΙΑΛΟS" "ΑΙGΙΝΑ" "ΑΙGΙΟ"
# [5] "ΑΙΔΗΨΟS" "ΑΚΤΙΟ(ΠΡΕΒΕΖΑ)"