I can\'t think of a way to remove the leading zeros. My goal was in a for
loop to then create the UTF-8 and UTF-32 versions of each number.
For example,
Many ways to do this fun exercise, converting a code point to UTF-8.
As not to give it all the coding experience away, following is a pseudo code to get OP started.
#define UTF_WIDTH1_MAX 0x7F
#define UTF_WIDTH2_MAX 0x7FF
#define UTF_WIDTH3_MAX 0xFFFF
#define UTF_WIDTH4_MAX 0x10FFFF
void PrintCodepointUTF8(uint32_t codepoint) {
uint8_t first;
uint8_t continuation_bytes[3];
unsigned continuation_bytes_n;
if (codepoint <= UTF_WIDTH1_MAX) {
first = codepoint;
continuation_bytes = 0;
} else if (codepoint <= UTF_WIDTH2_MAX) {
// extract 5 bits for first and 6 bits for one continuation_byte
// and set some bits
first = ...;
continuation_bytes = ...
continuation_bytes_n = 1;
} else if (codepoint <= UTF_WIDTH4_MAX) {
if (isasurrogate(codepoint)) fail.
// else extract 4 bits for first and 6 bits for each continuation_byte
// and set some bits
first = ...;
continuation_bytes = ...
continuation_bytes_n = 2;
} else if (codepoint <= UTF_WIDTH4_MAX) {
// extract 3 bits for first and 6 bits for each continuation_byte
// and set some bits
first = ...;
continuation_bytes = ...
continuation_bytes_n = 3;
} else {
fail out of range.
}
print first and 0-3 continuation_bytes
}