Searching for a platform- and 3rd-party-library- independent way of iterating UTF-8 string or splitting it into array of UTF-8 symbols.
Please post a code snippet.>
Off the cuff:
// Return length of s converted. On success return should equal s.length().
// On error return points to the character where decoding failed.
// Remember to check the success flag since decoding errors could occur at
// the end of the string
int convert(std::vector& u, const std::string& s, bool& success) {
success = false;
int cp = 0;
int runlen = 0;
for (std::string::const_iterator it = s.begin(), end = s.end(); it != end; ++it) {
int ch = static_cast(*it);
if (runlen > 0) {
if ((ch & 0xc0 != 0x80) || cp == 0) return it-s.begin();
cp = (cp << 6) + (ch & 0x3f);
if (--runlen == 0) {
u.push_back(cp);
cp = 0;
}
}
else if (cp == 0) {
if (ch < 0x80) { u.push_back(ch); }
else if (ch > 0xf8) return it-s.begin();
else if (ch > 0xf0) { cp = ch & 7; runlen = 3; }
else if (ch > 0xe0) { cp = ch & 0xf; runlen = 2; }
else if (ch > 0xc0) { cp = ch & 0x1f; runlen = 1; }
else return it-s.begin(); // stop on error
}
else return it-s.begin();
}
success = runlen == 0; // verify we are between codepoints
return s.length();
}