How to convert from UTF-8 to ANSI using standard c++

六月ゝ 毕业季﹏ 提交于 2019-11-30 10:19:39

A few days ago, somebody answered that if I had a C++11 compiler, I could try this:

#include <string>
#include <codecvt>
#include <locale>

string utf8_to_string(const char *utf8str, const locale& loc)
{
    // UTF-8 to wstring
    wstring_convert<codecvt_utf8<wchar_t>> wconv;
    wstring wstr = wconv.from_bytes(utf8str);
    // wstring to string
    vector<char> buf(wstr.size());
    use_facet<ctype<wchar_t>>(loc).narrow(wstr.data(), wstr.data() + wstr.size(), '?', buf.data());
    return string(buf.data(), buf.size());
}

int main(int argc, char* argv[])
{
    string ansi;
    char utf8txt[] = {0xc3, 0xa1, 0};

    // I guess you want to use Windows-1252 encoding...
    ansi = utf8_to_string(utf8txt, locale(".1252"));
    // Now do something with the string
    return 0;
}

Don't know what happened to the response, apparently someone deleted it. But, turns out that it is the perfect solution. To whoever posted, thanks a lot, and you deserve the AC and upvote!!

If you mean ASCII, just discard any byte that has bit 7 set, this will remove all multibyte sequences. Note that you could create more advanced algorithms, like removing the accent from the "á", but that would require much more work.

This should work:

#include <string>
#include <codecvt>

using namespace std::string_literals;

std::string to_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
  using wcvt = std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>;
  std::u32string wstr(str.size(), U'\0');
  std::use_facet<std::ctype<char32_t>>(loc).widen(str.data(), str.data() + str.size(), &wstr[0]);
  return wcvt{}.to_bytes(wstr.data(),wstr.data() + wstr.size());
}

std::string from_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
  using wcvt = std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>;
  auto wstr = wcvt{}.from_bytes(str);
  std::string result(wstr.size(), '0');
  std::use_facet<std::ctype<char32_t>>(loc).narrow(wstr.data(), wstr.data() + wstr.size(), '?', &result[0]);
  return result;
}

int main() {
  auto s0 = u8"Blöde C++ Scheiße äöü!!1Elf"s;
  auto s1 = from_utf8(s0);
  auto s2 = to_utf8(s1);

  return 0;
}

For VC++:

#include <string>
#include <codecvt>

using namespace std::string_literals;

std::string to_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
  using wcvt = std::wstring_convert<std::codecvt_utf8<int32_t>, int32_t>;
  std::u32string wstr(str.size(), U'\0');
  std::use_facet<std::ctype<char32_t>>(loc).widen(str.data(), str.data() + str.size(), &wstr[0]);
  return wcvt{}.to_bytes(
    reinterpret_cast<const int32_t*>(wstr.data()),
    reinterpret_cast<const int32_t*>(wstr.data() + wstr.size())
  );
}

std::string from_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
  using wcvt = std::wstring_convert<std::codecvt_utf8<int32_t>, int32_t>;
  auto wstr = wcvt{}.from_bytes(str);
  std::string result(wstr.size(), '0');
  std::use_facet<std::ctype<char32_t>>(loc).narrow(
    reinterpret_cast<const char32_t*>(wstr.data()),
    reinterpret_cast<const char32_t*>(wstr.data() + wstr.size()),
    '?', &result[0]);
  return result;
}

int main() {
  auto s0 = u8"Blöde C++ Scheiße äöü!!1Elf"s;
  auto s1 = from_utf8(s0);
  auto s2 = to_utf8(s1);

  return 0;
}
#include <stdio.h>
#include <string>
#include <codecvt>
#include <locale>
#include <vector>

using namespace std;
std::string utf8_to_string(const char *utf8str, const locale& loc){
    // UTF-8 to wstring
    wstring_convert<codecvt_utf8<wchar_t>> wconv;
    wstring wstr = wconv.from_bytes(utf8str);
    // wstring to string
    vector<char> buf(wstr.size());
    use_facet<ctype<wchar_t>>(loc).narrow(wstr.data(), wstr.data() + wstr.size(), '?', buf.data());
    return string(buf.data(), buf.size());
}

int main(int argc, char* argv[]){
    std::string ansi;
    char utf8txt[] = {0xc3, 0xa1, 0};

    // I guess you want to use Windows-1252 encoding...
    ansi = utf8_to_string(utf8txt, locale(".1252"));
    // Now do something with the string
    return 0;
}
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!