How to convert from UTF-8 to ANSI using standard c++

后端 未结 4 825
孤街浪徒
孤街浪徒 2020-12-15 13:46

I have some strings read from the database, stored in a char* and in UTF-8 format (you know, \"á\" is encoded as 0xC3 0xA1). But, in order to write them to a file, I first n

4条回答
  •  青春惊慌失措
    2020-12-15 13:49

    This should work:

    #include 
    #include 
    
    using namespace std::string_literals;
    
    std::string to_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
      using wcvt = std::wstring_convert, char32_t>;
      std::u32string wstr(str.size(), U'\0');
      std::use_facet>(loc).widen(str.data(), str.data() + str.size(), &wstr[0]);
      return wcvt{}.to_bytes(wstr.data(),wstr.data() + wstr.size());
    }
    
    std::string from_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
      using wcvt = std::wstring_convert, char32_t>;
      auto wstr = wcvt{}.from_bytes(str);
      std::string result(wstr.size(), '0');
      std::use_facet>(loc).narrow(wstr.data(), wstr.data() + wstr.size(), '?', &result[0]);
      return result;
    }
    
    int main() {
      auto s0 = u8"Blöde C++ Scheiße äöü!!1Elf"s;
      auto s1 = from_utf8(s0);
      auto s2 = to_utf8(s1);
    
      return 0;
    }
    

    For VC++:

    #include 
    #include 
    
    using namespace std::string_literals;
    
    std::string to_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
      using wcvt = std::wstring_convert, int32_t>;
      std::u32string wstr(str.size(), U'\0');
      std::use_facet>(loc).widen(str.data(), str.data() + str.size(), &wstr[0]);
      return wcvt{}.to_bytes(
        reinterpret_cast(wstr.data()),
        reinterpret_cast(wstr.data() + wstr.size())
      );
    }
    
    std::string from_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
      using wcvt = std::wstring_convert, int32_t>;
      auto wstr = wcvt{}.from_bytes(str);
      std::string result(wstr.size(), '0');
      std::use_facet>(loc).narrow(
        reinterpret_cast(wstr.data()),
        reinterpret_cast(wstr.data() + wstr.size()),
        '?', &result[0]);
      return result;
    }
    
    int main() {
      auto s0 = u8"Blöde C++ Scheiße äöü!!1Elf"s;
      auto s1 = from_utf8(s0);
      auto s2 = to_utf8(s1);
    
      return 0;
    }
    

提交回复
热议问题