I\'ve got an international character stored in a unichar variable. This character does not come from a file or url. The variable itself only stores an unsigned short(0xce91)
Here is an algorithm for UTF-8 encoding on a single character:
if (utf8char<0x80){
chars[0] = (utf8char>>0) & (0x7F | 0x00);
chars[1] = 0x00;
chars[2] = 0x00;
chars[3] = 0x00;
}
else if (utf8char<0x0800){
chars[0] = (utf8char>>6) & (0x1F | 0xC0);
chars[1] = (utf8char>>0) & (0x3F | 0x80);
chars[2] = 0x00;
chars[3] = 0x00;
}
else if (utf8char<0x010000) {
chars[0] = (utf8char>>12) & (0x0F | 0xE0);
chars[1] = (utf8char>>6) & (0x3F | 0x80);
chars[2] = (utf8char>>0) & (0x3F | 0x80);
chars[3] = 0x00;
}
else if (utf8char<0x110000) {
chars[0] = (utf8char>>18) & (0x07 | 0xF0);
chars[1] = (utf8char>>12) & (0x3F | 0x80);
chars[2] = (utf8char>>6) & (0x3F | 0x80);
chars[3] = (utf8char>>0) & (0x3F | 0x80);
}