How to printf accented characters in ANSI C (like á é í ó ú)

后端 未结 3 1112
佛祖请我去吃肉
佛祖请我去吃肉 2020-12-10 13:50

I tried to printf with some accented characters such as á é í ó ú:

printf(\"my name is Seán\\n\");

The text editor in

3条回答
  •  情书的邮戳
    2020-12-10 14:23

    Perhaps the best is to use Unicode.

    Here's how...

    First, manually set your console font to "Consolas" or "Lucida Console" or whichever True-Type Unicode font you can choose ("Raster fonts" may not work, those aren't Unicode fonts, although they may include characters you're interested in).

    Next, set the console code page to 65001 (UTF-8) with SetConsoleOutputCP(CP_UTF8).

    Then convert your text to UTF-8 (if it's not yet in UTF-8) using WideCharToMultiByte(CP_UTF8, ...).

    Finally, call WriteConsoleA() to output the UTF-8 text.

    Here's a little function that does all these things for you, it's an "improved" variant of wprintf():

    int _wprintf(const wchar_t* format, ...)
    {
      int r;
      static int utf8ModeSet = 0;
      static wchar_t* bufWchar = NULL;
      static size_t bufWcharCount = 256;
      static char* bufMchar = NULL;
      static size_t bufMcharCount = 256;
      va_list vl;
      int mcharCount = 0;
    
      if (utf8ModeSet == 0)
      {
        if (!SetConsoleOutputCP(CP_UTF8))
        {
          DWORD err = GetLastError();
          fprintf(stderr, "SetConsoleOutputCP(CP_UTF8) failed with error 0x%X\n", err);
          utf8ModeSet = -1;
        }
        else
        {
          utf8ModeSet = 1;
        }
      }
    
      if (utf8ModeSet != 1)
      {
        va_start(vl, format);
        r = vwprintf(format, vl);
        va_end(vl);
        return r;
      }
    
      if (bufWchar == NULL)
      {
        if ((bufWchar = malloc(bufWcharCount * sizeof(wchar_t))) == NULL)
        {
          return -1;
        }
      }
    
      for (;;)
      {
        va_start(vl, format);
        r = vswprintf(bufWchar, bufWcharCount, format, vl);
        va_end(vl);
    
        if (r < 0)
        {
          break;
        }
    
        if (r + 2 <= bufWcharCount)
        {
          break;
        }
    
        free(bufWchar);
        if ((bufWchar = malloc(bufWcharCount * sizeof(wchar_t) * 2)) == NULL)
        {
          return -1;
        }
        bufWcharCount *= 2;
      }
    
      if (r > 0)
      {
        if (bufMchar == NULL)
        {
          if ((bufMchar = malloc(bufMcharCount)) == NULL)
          {
            return -1;
          }
        }
    
        for (;;)
        {
          mcharCount = WideCharToMultiByte(CP_UTF8,
                                           0,
                                           bufWchar,
                                           -1,
                                           bufMchar,
                                           bufMcharCount,
                                           NULL,
                                           NULL);
          if (mcharCount > 0)
          {
            break;
          }
    
          if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
          {
            return -1;
          }
    
          free(bufMchar);
          if ((bufMchar = malloc(bufMcharCount * 2)) == NULL)
          {
            return -1;
          }
          bufMcharCount *= 2;
        }
      }
    
      if (mcharCount > 1)
      {
        DWORD numberOfCharsWritten, consoleMode;
    
        if (GetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), &consoleMode))
        {
          fflush(stdout);
          if (!WriteConsoleA(GetStdHandle(STD_OUTPUT_HANDLE),
                             bufMchar,
                             mcharCount - 1,
                             &numberOfCharsWritten,
                             NULL))
          {
            return -1;
          }
        }
        else
        {
          if (fputs(bufMchar, stdout) == EOF)
          {
            return -1;
          }
        }
      }
    
      return r;
    }
    

    Following tests this function:

    _wprintf(L"\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7"
             L"\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
             L"\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
             L"\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
             L"\n"
             L"\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7"
             L"\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
             L"\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7"
             L"\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
             L"\n"
             L"\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7"
             L"\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
             L"\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7"
             L"\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"
             L"\n");
    
    _wprintf(L"\x391\x392\x393\x394\x395\x396\x397"
             L"\x398\x399\x39A\x39B\x39C\x39D\x39E\x39F"
             L"\x3A0\x3A1\x3A2\x3A3\x3A4\x3A5\x3A6\x3A7"
             L"\x3A8\x3A9\x3AA\x3AB\x3AC\x3AD\x3AE\x3AF\x3B0"
             L"\n"
             L"\x3B1\x3B2\x3B3\x3B4\x3B5\x3B6\x3B7"
             L"\x3B8\x3B9\x3BA\x3BB\x3BC\x3BD\x3BE\x3BF"
             L"\x3C0\x3C1\x3C2\x3C3\x3C4\x3C5\x3C6\x3C7"
             L"\x3C8\x3C9\x3CA\x3CB\x3CC\x3CD\x3CE"
             L"\n");
    
    _wprintf(L"\x410\x411\x412\x413\x414\x415\x401\x416\x417"
             L"\x418\x419\x41A\x41B\x41C\x41D\x41E\x41F"
             L"\x420\x421\x422\x423\x424\x425\x426\x427"
             L"\x428\x429\x42A\x42B\x42C\x42D\x42E\x42F"
             L"\n"
             L"\x430\x431\x432\x433\x434\x435\x451\x436\x437"
             L"\x438\x439\x43A\x43B\x43C\x43D\x43E\x43F"
             L"\x440\x441\x442\x443\x444\x445\x446\x447"
             L"\x448\x449\x44A\x44B\x44C\x44D\x44E\x44F"
             L"\n");
    

    And should result in the following text in the console:

     ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿
    ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
    àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
    ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ΢ΣΤΥΦΧΨΩΪΫάέήίΰ
    αβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ
    АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
    абвгдеёжзийклмнопрстуфхцчшщъыьэюя
    

    I do not know the encoding in which your IDE stores non-ASCII characters in .c/.cpp files and I do not know what your compiler does when encounters non-ASCII characters. This part you should figure out yourself.

    As long as you supply to _wprintf() properly encoded UTF-16 text or call WriteConsoleA() with properly encoded UTF-8 text, things should work.

    P.S. Some gory details about console fonts can be found here.

提交回复
热议问题