My software is getting some strings in UTF8 than I need to convert to ISO 8859 1. I know that UTF8 domain is bigger than ISO 8859. But the data in UTF8 has been previously
The following example uses iconv library also. It works even when you have a file (or input stream) that contains mixed UTF-8 and ISO-8859-1 characters (this could happen, for example, if you have an UTF-8 file and edit it in environement that uses ISO-8859-1).
int Utf8ToLatin1(char* input, char* output, size_t size)
{
size_t in_left = size;
size_t out_left = size;
char *in_buf = input;
char *out_buf = output;
iconv_t cd = iconv_open("ISO_8859-1", "UTF-8");
if (cd == (iconv_t)-1) {
(void) fprintf(stderr, "iconv_open() failed, msg encoding will be kept!");
strncpy(output, input, size);
return -1;
}
do {
if (iconv(cd, &in_buf, &in_left, &out_buf, &out_left) == (size_t) -1) {
if (errno == EILSEQ) {
/* Input conversion stopped due to an input byte that
* does not belong to the input codeset.
*/
printf("Input conversion stopped due to an input byte that does not belong to the input codeset.\n");
*out_buf= *in_buf;
out_buf++ ;out_left--;
in_buf++ ;in_left--;
} else if (errno == E2BIG) {
/* Input conversion stopped due to lack of space in
* the output buffer.
*/
printf("Input conversion stopped due to lack of space in the output buffer.\n");
perror("iconv failed!, propably the encoding is already Latin, msg encoding will be kept!\n");
strncpy(output, input, size);
return -1;
} else if (errno == EINVAL) {
/* Input conversion stopped due to an incomplete
* character or shift sequence at the end of the
* input buffer.
*/
printf("Input conversion stopped due to an incomplete character or shift sequence at the end of the input buffer.\n");
*out_buf= *in_buf;
out_buf++ ;out_left--;
in_buf++ ;in_left--;
}
}
} while (in_left > 0 && out_left > 0);
*out_buf = 0;
iconv_close(cd);
printf("*********************************************************\n");
printf("ISO-8859-1:\n %s\n", input, output);
return 0;
}