How can I make Unicode iostream i/o work in both Windows and Unix-land?

前端 未结 1 1525
青春惊慌失措
青春惊慌失措 2020-12-17 01:10

Note: This is a question-with-answer in order to document a technique that others might find useful, and in order to perhaps become aware of others&rsq

相关标签:
1条回答
  • 2020-12-17 01:33

    Fix for the conversion problem:

    cppx/stdlib/iostreams_conversion_defect.fix.hpp
    #pragma once
    //----------------------------------------------------------------------------------------
    //    PROBLEM DESCRIPTION.
    //
    //    Output of wchar_t const* is only supported via an operator<< template. User-defined
    //    conversions are not considered for template matching. This results in actual argument
    //    with user conversion to wchar_t const*, for a wide stream, being presented as the
    //    pointer value instead of the string.
    
    #include <iostream>
    
    #ifndef CPPX_NO_IOSTREAM_CONVERSION_FIX
        namespace std{
            template< class Char_traits >
            inline auto operator<<(
                basic_ostream<wchar_t, Char_traits>&    stream,
                wchar_t const                           ch
                )
                -> basic_ostream<wchar_t, Char_traits>&
            { return operator<< <wchar_t, Char_traits>( stream, ch ); }
    
            template< class Char_traits >
            inline auto operator<<(
                basic_ostream<wchar_t, Char_traits>&    stream,
                wchar_t const* const                    s
                )
                -> basic_ostream<wchar_t, Char_traits>&
            { return operator<< <wchar_t, Char_traits>( stream, s ); }
        }  // namespace std
    #endif
    

    Setting direct i/o mode in Windows:

    This is a standard library extension that's supported by both Visual C++ and MinGW g++.

    First, just because it's used in the code, definition of the Ptr type builder (the main drawback of library-provided type builders is that ordinary type inference doesn't kick in, i.e. it's necessary in some cases to still use the raw operator notation):

    cppx/core_language/type_builders.hpp
    ⋮
        template< class T >         using Ptr           = T*;
    ⋮
    

    A helper definition, because it's used in more than one file:

    cppx/stdlib/Iostream_mode.hpp
    #pragma once
    // Mode for a possibly console-attached iostream, such as std::wcout.
    
    namespace cppx {
        enum Iostream_mode: int { unknown, utf_8, direct_io };
    }  // namespace cppx
    

    Mode setters (base functionality):

    cppx/stdlib/impl/utf8_mode.for_windows.hpp
    #pragma once
    // UTF-8 mode for a stream in Windows.
    #ifndef _WIN32
    #   error This is a Windows only implementation.
    #endif
    
    #include <cppx/stdlib/Iostream_mode.hpp>
    
    #include <stdio.h>      // FILE, stdin, stdout, stderr, etc.
    
    // Non-standard headers, which are de facto standard in Windows:
    #include <io.h>         // _setmode, _isatty, _fileno etc.
    #include <fcntl.h>      // _O_WTEXT etc.
    
    namespace cppx {
    
        inline
        auto set_utf8_mode( const Ptr< FILE > f )
            -> Iostream_mode
        {
            const int file_number = _fileno( f );       // See docs for error handling.
            if( file_number == -1 ) { return Iostream_mode::unknown; }
            const int new_mode = (_isatty( file_number )? _O_WTEXT : _O_U8TEXT);
            const int previous_mode = _setmode( file_number, new_mode );
            return (0?Iostream_mode()
                : previous_mode == -1?      Iostream_mode::unknown
                : new_mode == _O_WTEXT?     Iostream_mode::direct_io
                :                           Iostream_mode::utf_8
                );
        }
    
    }  // namespace cppx
    
    cppx/stdlib/impl/utf8_mode.generic.hpp
    #pragma once
    #include <stdio.h>      // FILE, stdin, stdout, stderr, etc.
    #include <cppx/core_language/type_builders.hpp>     // cppx::Ptr
    
    namespace cppx {
    
        inline
        auto set_utf8_mode( const Ptr< FILE > )
            -> Iostream_mode
        { return Iostream_mode::unknown; }
    
    }  // namespace cppx
    
    cppx/stdlib/utf8_mode.hpp
    #pragma once
    // UTF-8 mode for a stream. For Unix-land this is a no-op & the locale must be UTF-8.
    
    #include <cppx/core_language/type_builders.hpp>     // cppx::Ptr
    #include <cppx/stdlib/Iostream_mode.hpp>
    
    namespace cppx {
        inline
        auto set_utf8_mode( const Ptr< FILE > ) -> Iostream_mode;
    }  // namespace cppx
    
    #ifdef _WIN32   // This also covers 64-bit Windows.
    #   include "impl/utf8_mode.for_windows.hpp"    // Using Windows-specific _setmode.
    #else
    #   include "impl/utf8_mode.generic.hpp"        // A do-nothing implementation.
    #endif
    

    Configuring the standard streams.

    In addition to setting direct console i/o mode or UTF-8 as appropriate in Windows, this fixes the implicit conversion defect; (indirectly) calls setlocale so that wide streams work in Unix-land; sets boolalpha just for good measure, as a more reasonable default; and includes all standard library headers to do with iostreams (I don't show the separate header file that does that, and it is to a degree a personal preference how much to include or whether to do such inclusion at all):

    cppx/stdlib/iostreams.hpp
    #pragma once
    // Standard iostreams but configured to work, plus, as utility, with boolalpha set.
    
    #include <raw_stdlib/iostreams.hpp>         // <iostream>, <sstream>, <fstream> etc. for convenience.
    
    #include <cppx/core_language/type_builders.hpp>     // cppx::Ptr
    #include <cppx/stdlib/utf8_mode.hpp>        // stdin etc., stdlib::set_utf8_mode
    #include <locale>                           // std::locale
    #include <string>                           // std::string
    
    #include <cppx/stdlib/impl/iostreams_conversion_defect.fix.hpp> // Support arg conv.
    
    inline auto operator<< ( std::wostream& stream, const std::string& s )
        -> std::wostream&
    { return (stream << s.c_str()); }
    
    // The following code's sole purpose is to automatically initialize the streams.
    namespace cppx { namespace utf8_iostreams {
        using std::locale;
        using std::ostream;
        using std::cin; using std::cout; using std::cerr; using std::clog;
        using std::wostream;
        using std::wcin; using std::wcout; using std::wcerr; using std::wclog;
        using std::boolalpha;
    
        namespace detail {
            using std::wstreambuf;
    
            // Based on "Filtering streambufs" code by James Kanze published at
            // <url: http://gabisoft.free.fr/articles/fltrsbf1.html>.
            class Correcting_input_buffer
                : public wstreambuf
            {
            private:
                wstreambuf*     provider_;
                wchar_t         buffer_;
    
            protected:
                auto underflow()
                    -> int_type override
                {
                    if( gptr() < egptr() )  { return *gptr(); }
    
                    const int_type result = provider_->sbumpc();
                    if( result == L'\n' )
                    {
                        // Ad hoc workaround for g++ extra newline undesirable behavior:
                        provider_->pubsync();
                    }
    
                    if( traits_type::not_eof( result ) )
                    {
                        buffer_ = result;
                        setg( &buffer_, &buffer_, &buffer_ + 1 );
                    }
                    return result ;
                }
    
            public:
                Correcting_input_buffer( wstreambuf* a_provider )
                    : provider_( a_provider )
                {}
            };
        }  // namespace detail
    
        class Usage
        {
        private:
            static
            void init_once()
            {
                // In Windows there is no UTF-8 encoding spec for the locale, in Unix-land
                // it's the default. From Microsoft's documentation: "If you provide a code
                // page like UTF-7 or UTF-8, setlocale will fail, returning NULL". Still
                // this call is essential for making the wide streams work correctly in
                // Unix-land.
                locale::global( locale( "" ) ); // Effects a `setlocale( LC_ALL, "" )`.
    
                for( const Ptr<FILE> c_stream : {stdin, stdout, stderr} )
                {
                    const auto new_mode = set_utf8_mode( c_stream );
                    if( c_stream == stdin && new_mode == Iostream_mode::direct_io )
                    {
                        static detail::Correcting_input_buffer  correcting_buffer( wcin.rdbuf() );
                        wcin.rdbuf( &correcting_buffer );
                    }
                }
    
                for( const Ptr<ostream> stream_ptr : {&cout, &cerr, &clog} )
                {
                    *stream_ptr << boolalpha;
                }
    
                for( const Ptr<wostream> stream_ptr : {&wcout, &wcerr, &wclog} )
                {
                    *stream_ptr << boolalpha;
                }
            }
    
        public:
            Usage()
            { static const bool dummy = (init_once(), true); (void) dummy; }
        };
    
        namespace detail {
            const Usage usage;
        }  // namespace detail
    
    }}  // namespace cppx::utf8_iostreams
    

    The two example programs in the question are fixed simply by including the above header instead of or in addition to <iostream>. When it's in addition to it can be in a separate translation unit (except for the implicit conversion defect fix, if that's desired the header for it must be included somehow). Or e.g. as a forced include in the build command.

    0 讨论(0)
提交回复
热议问题