Reading JSON file with C++ and BOOST

后端 未结 3 1389
青春惊慌失措
青春惊慌失措 2020-12-02 01:33

An HTTP server sends me a JSON response (a string) like this :

{
    \"folders\" :
    [{
            \"id\" : 109,
            \"parent_id\" : 110,
                 


        
3条回答
  •  忘掉有多难
    2020-12-02 02:03

    Because the data structure in the other answer was deemed "very complex" and the target data structure was suggested to be:

    struct Data {
        struct Folder { int id, parent_id; std::string path; };
        struct File   { int id, parent_id; std::string path, name, md5_hash; };
    
        using Folders = std::vector;
        using Files   = std::vector;
    
        Folders folders;
        Files   files;
    };
    

    I ended up writing a transformation from generic "JSON" to that data structure (see the other answer: Reading JSON file with C++ and BOOST).

    However, perhaps the OP will be more pleased if we "skip the middle man" and parse the JSON specifically into the shown Data structure. This "simplifies" the grammar making it specific for this type of document only:

    start    = '{' >> 
               (folders_ >> commasep) ^
               (files_ >> commasep)
             >> '}';
    
    folders_ = prop_key(+"folders") >> '[' >> -(folder_ % ',') >> ']';
    files_   = prop_key(+"files")   >> '[' >> -(file_   % ',') >> ']';
    
    folder_  = '{' >> (
                    (prop_key(+"id")        >> int_  >> commasep) ^
                    (prop_key(+"parent_id") >> int_  >> commasep) ^
                    (prop_key(+"path")      >> text_ >> commasep)
                ) >> '}';
    file_    = '{' >> (
                    (prop_key(+"id")        >> int_  >> commasep) ^
                    (prop_key(+"parent_id") >> int_  >> commasep) ^
                    (prop_key(+"path")      >> text_ >> commasep) ^
                    (prop_key(+"name")      >> text_ >> commasep) ^
                    (prop_key(+"hash")      >> text_ >> commasep)
                ) >> '}';
    
    prop_key = lexeme ['"' >> lazy(_r1) >> '"'] >> ':';
    commasep = &char_('}') | ',';
    

    This grammar allows

    • insignificant whitespace,
    • re-ordering of properties within objects
    • and omitted object properties

    Benefits:

    • early checking of property value types
    • lower compile times
    • less code indeed: 37 fewer LoC (not counting the sample JSON lines that's ~22%)

    That last benefit has a flip side: if ever you want to read slightly different JSON, now you need to muck with the grammar instead of just writing a different extraction/transform. At 37 lines of code, my preference is with the other answer but I'll leave it to you to decide.

    Here's the same demo program using this grammar directly:

    Live On Coliru

    //#define BOOST_SPIRIT_DEBUG
    #include 
    #include 
    #include 
    
    namespace qi = boost::spirit::qi;
    
    static std::string const sample = R"(
        {
            "folders" :
            [{
                    "id" : 109,
                    "parent_id" : 110,
                    "path" : "\/1\/105\/110\/"
                },
                {
                    "id" : 110,
                    "parent_id" : 105,
                    "path" : "\/1\/105\/"
                }
            ],
    
            "files" :
            [{
                    "id" : 26,
                    "parent_id" : 105,
                    "name" : "picture.png",
                    "hash" : "md5_hash",
                    "path" : "\/1\/105\/"
                },
                {
                    "id" : 25,
                    "parent_id" : 110,
                    "name" : "another_picture.jpg",
                    "hash" : "md5_hash",
                    "path" : "\/1\/105\/110\/"
                }
            ]
        })";
    
    struct Data {
        struct Folder { int id, parent_id; std::string path; };
        struct File   { int id, parent_id; std::string path, name, md5_hash; };
    
        using Folders = std::vector;
        using Files   = std::vector;
    
        Folders folders;
        Files   files;
    };
    
    BOOST_FUSION_ADAPT_STRUCT(Data::Folder, (int,id)(int,parent_id)(std::string,path))
    BOOST_FUSION_ADAPT_STRUCT(Data::File,   (int,id)(int,parent_id)(std::string,path)(std::string,name)(std::string,md5_hash))
    BOOST_FUSION_ADAPT_STRUCT(Data,         (Data::Folders,folders)(Data::Files,files))
    
    namespace folder_info { // adhoc JSON parser
    
        template 
        struct grammar : qi::grammar
        {
            grammar() : grammar::base_type(start) {
                using namespace qi;
    
                start    = '{' >> 
                           (folders_ >> commasep) ^
                           (files_ >> commasep)
                         >> '}';
    
                folders_ = prop_key(+"folders") >> '[' >> -(folder_ % ',') >> ']';
                files_   = prop_key(+"files")   >> '[' >> -(file_   % ',') >> ']';
    
                folder_  = '{' >> (
                                (prop_key(+"id")        >> int_  >> commasep) ^
                                (prop_key(+"parent_id") >> int_  >> commasep) ^
                                (prop_key(+"path")      >> text_ >> commasep)
                            ) >> '}';
                file_    = '{' >> (
                                (prop_key(+"id")        >> int_  >> commasep) ^
                                (prop_key(+"parent_id") >> int_  >> commasep) ^
                                (prop_key(+"path")      >> text_ >> commasep) ^
                                (prop_key(+"name")      >> text_ >> commasep) ^
                                (prop_key(+"hash")      >> text_ >> commasep)
                            ) >> '}';
    
                prop_key = lexeme ['"' >> lazy(_r1) >> '"'] >> ':';
                commasep = &char_('}') | ',';
    
                ////////////////////////////////////////
                // Bonus: properly decoding the string:
                text_   = '"' >> *ch_ >> '"';
    
                ch_ = +(
                        ~char_("\"\\")) [ _val += _1 ] |
                           qi::lit("\x5C") >> (               // \ (reverse solidus)
                           qi::lit("\x22") [ _val += '"'  ] | // "    quotation mark  U+0022
                           qi::lit("\x5C") [ _val += '\\' ] | // \    reverse solidus U+005C
                           qi::lit("\x2F") [ _val += '/'  ] | // /    solidus         U+002F
                           qi::lit("\x62") [ _val += '\b' ] | // b    backspace       U+0008
                           qi::lit("\x66") [ _val += '\f' ] | // f    form feed       U+000C
                           qi::lit("\x6E") [ _val += '\n' ] | // n    line feed       U+000A
                           qi::lit("\x72") [ _val += '\r' ] | // r    carriage return U+000D
                           qi::lit("\x74") [ _val += '\t' ] | // t    tab             U+0009
                           qi::lit("\x75")                    // uXXXX                U+XXXX
                                >> _4HEXDIG [ append_utf8(qi::_val, qi::_1) ]
                        );
    
                BOOST_SPIRIT_DEBUG_NODES((files_)(folders_)(file_)(folder_)(start)(text_))
            }
        private:
            qi::rule start;
            qi::rule files_;
            qi::rule folders_;
            qi::rule file_;
            qi::rule folder_;
            qi::rule prop_key;
    
            qi::rule text_, ch_;
            qi::rule commasep;
    
            struct append_utf8_f {
                template  struct result { typedef void type; };
                template 
                void operator()(String& to, Codepoint codepoint) const {
                    auto out = std::back_inserter(to);
                    boost::utf8_output_iterator convert(out);
                    *convert++ = codepoint;
                }
            };
            boost::phoenix::function append_utf8;
            qi::uint_parser _4HEXDIG;
        };
    
        template ::type>
        Data parse(Range const& input) {
            grammar g;
    
            It first(boost::begin(input)), last(boost::end(input));
            Data parsed;
            bool ok = qi::phrase_parse(first, last, g, qi::space, parsed);
    
            if (ok && (first == last))
                return parsed;
    
            throw std::runtime_error("Remaining unparsed: '" + std::string(first, last) + "'");
        }
    }
    
    int main()
    {
        auto parsed = folder_info::parse(sample);
    
        for (auto& e : parsed.folders) 
            std::cout << "folder:\t" << e.id << "\t" << e.path << "\n";
        for (auto& e : parsed.files) 
            std::cout << "file:\t"   << e.id << "\t" << e.path << "\t" << e.name << "\n";
    }
    

    Output:

    folder: 109 /1/105/110/
    folder: 110 /1/105/
    file:   26  /1/105/ picture.png
    file:   25  /1/105/110/ another_picture.jpg
    

提交回复
热议问题