Writing utf16 to file in binary mode

前端 未结 6 1134
逝去的感伤
逝去的感伤 2020-11-27 04:44

I\'m trying to write a wstring to file with ofstream in binary mode, but I think I\'m doing something wrong. This is what I\'ve tried:

ofstream outFile(\"tes         


        
6条回答
  •  予麋鹿
    予麋鹿 (楼主)
    2020-11-27 05:25

    Here we run into the little used locale properties. If you output your string as a string (rather than raw data) you can get the locale to do the appropriate conversion auto-magically.

    N.B.This code does not take into account edianness of the wchar_t character.

    #include 
    #include 
    #include 
    // See Below for the facet
    #include "UTF16Facet.h"
    
    int main(int argc,char* argv[])
    {
       // construct a custom unicode facet and add it to a local.
       UTF16Facet *unicodeFacet = new UTF16Facet();
       const std::locale unicodeLocale(std::cout.getloc(), unicodeFacet);
    
       // Create a stream and imbue it with the facet
       std::wofstream   saveFile;
       saveFile.imbue(unicodeLocale);
    
    
       // Now the stream is imbued we can open it.
       // NB If you open the file stream first. Any attempt to imbue it with a local will silently fail.
       saveFile.open("output.uni");
       saveFile << L"This is my Data\n";
    
    
       return(0);
    }    
    

    The File: UTF16Facet.h

     #include 
    
    class UTF16Facet: public std::codecvt::state_type>
    {
       typedef std::codecvt::state_type> MyType;
       typedef MyType::state_type          state_type;
       typedef MyType::result              result;
    
    
       /* This function deals with converting data from the input stream into the internal stream.*/
       /*
        * from, from_end:  Points to the beginning and end of the input that we are converting 'from'.
        * to,   to_limit:  Points to where we are writing the conversion 'to'
        * from_next:       When the function exits this should have been updated to point at the next location
        *                  to read from. (ie the first unconverted input character)
        * to_next:         When the function exits this should have been updated to point at the next location
        *                  to write to.
        *
        * status:          This indicates the status of the conversion.
        *                  possible values are:
        *                  error:      An error occurred the bad file bit will be set.
        *                  ok:         Everything went to plan
        *                  partial:    Not enough input data was supplied to complete any conversion.
        *                  nonconv:    no conversion was done.
        */
       virtual result  do_in(state_type &s,
                               const char  *from,const char *from_end,const char* &from_next,
                               wchar_t     *to,  wchar_t    *to_limit,wchar_t*    &to_next) const
       {
           // Loop over both the input and output array/
           for(;(from < from_end) && (to < to_limit);from += 2,++to)
           {
               /*Input the Data*/
               /* As the input 16 bits may not fill the wchar_t object
                * Initialise it so that zero out all its bit's. This
                * is important on systems with 32bit wchar_t objects.
                */
               (*to)                               = L'\0';
    
               /* Next read the data from the input stream into
                * wchar_t object. Remember that we need to copy
                * into the bottom 16 bits no matter what size the
                * the wchar_t object is.
                */
               reinterpret_cast(to)[0]  = from[0];
               reinterpret_cast(to)[1]  = from[1];
           }
           from_next   = from;
           to_next     = to;
    
           return((from > from_end)?partial:ok);
       }
    
    
    
       /* This function deals with converting data from the internal stream to a C/C++ file stream.*/
       /*
        * from, from_end:  Points to the beginning and end of the input that we are converting 'from'.
        * to,   to_limit:  Points to where we are writing the conversion 'to'
        * from_next:       When the function exits this should have been updated to point at the next location
        *                  to read from. (ie the first unconverted input character)
        * to_next:         When the function exits this should have been updated to point at the next location
        *                  to write to.
        *
        * status:          This indicates the status of the conversion.
        *                  possible values are:
        *                  error:      An error occurred the bad file bit will be set.
        *                  ok:         Everything went to plan
        *                  partial:    Not enough input data was supplied to complete any conversion.
        *                  nonconv:    no conversion was done.
        */
       virtual result do_out(state_type &state,
                               const wchar_t *from, const wchar_t *from_end, const wchar_t* &from_next,
                               char          *to,   char          *to_limit, char*          &to_next) const
       {
           for(;(from < from_end) && (to < to_limit);++from,to += 2)
           {
               /* Output the Data */
               /* NB I am assuming the characters are encoded as UTF-16.
                * This means they are 16 bits inside a wchar_t object.
                * As the size of wchar_t varies between platforms I need
                * to take this into consideration and only take the bottom
                * 16 bits of each wchar_t object.
                */
               to[0]     = reinterpret_cast(from)[0];
               to[1]     = reinterpret_cast(from)[1];
    
           }
           from_next   = from;
           to_next     = to;
    
           return((to > to_limit)?partial:ok);
       }
    };
    

提交回复
热议问题