问题
Consider:
STDMETHODIMP CFileSystemAPI::setRRConfig( BSTR config_str, VARIANT* ret )
{
mReportReaderFactory.reset( new sbis::report_reader::ReportReaderFactory() );
USES_CONVERSION;
std::string configuration_str = W2A( config_str );
But in config_str I get a string in UTF-16. How can I convert it to UTF-8 in this piece of code?
回答1:
If you are using C++11 you may check this out:
http://www.cplusplus.com/reference/codecvt/codecvt_utf8_utf16/
回答2:
void encode_unicode_character(char* buffer, int* offset, wchar_t ucs_character)
{
if (ucs_character <= 0x7F)
{
// Plain single-byte ASCII.
buffer[(*offset)++] = (char) ucs_character;
}
else if (ucs_character <= 0x7FF)
{
// Two bytes.
buffer[(*offset)++] = 0xC0 | (ucs_character >> 6);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
}
else if (ucs_character <= 0xFFFF)
{
// Three bytes.
buffer[(*offset)++] = 0xE0 | (ucs_character >> 12);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
}
else if (ucs_character <= 0x1FFFFF)
{
// Four bytes.
buffer[(*offset)++] = 0xF0 | (ucs_character >> 18);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 12) & 0x3F);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
}
else if (ucs_character <= 0x3FFFFFF)
{
// Five bytes.
buffer[(*offset)++] = 0xF8 | (ucs_character >> 24);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 18) & 0x3F);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 12) & 0x3F);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
}
else if (ucs_character <= 0x7FFFFFFF)
{
// Six bytes.
buffer[(*offset)++] = 0xFC | (ucs_character >> 30);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 24) & 0x3F);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 18) & 0x3F);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 12) & 0x3F);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
buffer[(*offset)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
}
else
{
// Invalid char; don't encode anything.
}
}
ISO10646-2012 it is all you need to understand UCS.
回答3:
You can do something like this
std::string WstrToUtf8Str(const std::wstring& wstr)
{
std::string retStr;
if (!wstr.empty())
{
int sizeRequired = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, NULL, 0, NULL, NULL);
if (sizeRequired > 0)
{
std::vector<char> utf8String(sizeRequired);
int bytesConverted = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(),
-1, &utf8String[0], utf8String.size(), NULL,
NULL);
if (bytesConverted != 0)
{
retStr = &utf8String[0];
}
else
{
std::stringstream err;
err << __FUNCTION__
<< " std::string WstrToUtf8Str failed to convert wstring '"
<< wstr.c_str() << L"'";
throw std::runtime_error( err.str() );
}
}
}
return retStr;
}
You can give your BSTR to the function as a std::wstring
来源:https://stackoverflow.com/questions/21456926/how-do-i-convert-a-string-in-utf-16-to-utf-8-in-c