How do I HTML-/ URL-Encode a std::wstring containing Unicode characters?

后端 未结 4 1750
猫巷女王i
猫巷女王i 2021-01-06 02:36

I have another question yet. If I had a std::wstring looking like this:

ドイツ語で検索していてこちらのサイトにたどり着きました。

How could I possibly get it

4条回答
  •  庸人自扰
    2021-01-06 03:01

    Here is an example which shows two methods, one based on the Qt library and one based on the ICU library. Both should be fairly platform-independent:

    #include 
    #include 
    #include 
    #include 
    
    #include 
    
    #include 
    #include 
    #include 
    
    #include 
    #include 
    #include 
    #include 
    
    void encodeQt() {
      const QString str = QString::fromWCharArray(L"ドイツ語で検索していてこちらのサイトにたどり着きました。");
      const QUrl url = str;
      std::cout << "URL encoded: " << url.toEncoded().constData() << std::endl;
      typedef QVector CodePointVector;
      const CodePointVector codePoints = str.toUcs4();
      std::stringstream htmlEncoded;
      for (CodePointVector::const_iterator it = codePoints.constBegin(); it != codePoints.constEnd(); ++it) {
        htmlEncoded << "&#" << *it << ';';
      }
      std::cout << "HTML encoded: " << htmlEncoded.str() << std::endl;
    }
    
    void encodeICU() {
      const std::wstring cppString = L"ドイツ語で検索していてこちらのサイトにたどり着きました。";
      int bufSize = cppString.length() * 2;
      boost::scoped_array strBuffer(new UChar[bufSize]);
      int size = 0;
      UErrorCode error = U_ZERO_ERROR;
      u_strFromWCS(strBuffer.get(), bufSize, &size, cppString.data(), cppString.length(), &error);
      if (error) return;
      const UnicodeString str(strBuffer.get(), size);
      bufSize = str.length() * 4;
      boost::scoped_array buffer(new char[bufSize]);
      u_strToUTF8(buffer.get(), bufSize, &size, str.getBuffer(), str.length(), &error);
      if (error) return;
      const std::string urlUtf8(buffer.get(), size);
      std::stringstream urlEncoded;
      urlEncoded << std::hex << std::setfill('0');
      for (std::string::const_iterator it = urlUtf8.begin(); it != urlUtf8.end(); ++it) {
        urlEncoded << '%' << std::setw(2) << static_cast(static_cast(*it));
      }
      std::cout << "URL encoded: " << urlEncoded.str() << std::endl;
      std::stringstream htmlEncoded;
      StringCharacterIterator it = str;
      while (it.hasNext()) {
        const UChar32 pt = it.next32PostInc();
        htmlEncoded << "&#" << pt << ';';
      }
      std::cout << "HTML encoded: " << htmlEncoded.str() << std::endl;
    }
    
    
    int main() {
      encodeQt();
      encodeICU();
    }
    

提交回复
热议问题