Spirit-Qi: How can I write a nonterminal parser?

后端 未结 1 614
粉色の甜心
粉色の甜心 2020-11-30 12:46

I want to write a parser (as a qi extension) which can be used via my_parser(p1, p2, ...) where p1, p2, ... are qi parser expressions.

Actu

相关标签:
1条回答
  • 2020-11-30 13:41

    Your example

    I don't see how your sample requires any of this. Just reorder your branches, then realize that the short branch is just a special case of the qualified case with n=1: Live On Coliru¹ (or using X3 version if you prefer).

    The generic case

    Now, having mentioned x3, it has the capacity to make your live much easier!

    Here's what I think you wanted, in the general case:

    namespace parser {
    
        template <typename... Parsers>
        struct longest_parser : x3::parser_base {
            longest_parser(Parsers... sub) : _alternatives {sub...} { }
    
            template <typename It, typename Ctx, typename Other, typename Attr>
            bool parse(It& f, It l, Ctx& ctx, Other const& other, Attr& attr) const {
                auto const saved = f;
    
                //// To exclude pre-skip from length comparisons, do pre-skip here:
                // x3::skip_over(f, l, ctx);
                auto seq = std::make_index_sequence<sizeof...(Parsers)>();
    
                auto best = select_best(f, l, ctx, seq);
                //std::cout << "Longest match at index #" << best << "\n";
    
                bool ok = dispatch(f, l, ctx, other, attr, best, seq);
    
                if (!ok)
                    f = saved;
    
                return ok;
            }
    
          private:
            template <typename It, typename Ctx, typename P>
            size_t length_of(It f, It l, Ctx ctx, P const& p) const {
                boost::iterator_range<It> matched;
                return x3::raw[p].parse(f, l, ctx, x3::unused, matched)? boost::size(matched) : 0;
            }
    
            template <typename It, typename Ctx, size_t... I>
                size_t select_best(It f, It l, Ctx& ctx, std::index_sequence<I...>) const {
                    std::array<size_t, sizeof...(I)> lengths { length_of(f, l, ctx, std::get<I>(_alternatives))... };
                    return std::distance(lengths.begin(), std::max_element(lengths.begin(), lengths.end()));
                }
    
            template <typename It, typename Ctx, typename Other, typename Attr, size_t... I>
            bool dispatch(It& f, It l, Ctx& ctx, Other const& other, Attr& attr, size_t targetIdx, std::index_sequence<I...>) const {
                //return (real_parse<I>(f, l, ctx, other, attr, targetIdx) || ...);
                std::array<bool, sizeof...(I)> b = { real_parse<I>(f, l, ctx, other, attr, targetIdx)... };
    
                return std::accumulate(b.begin(), b.end(), false, std::logical_or<bool>());
            }
    
            template <size_t Idx, typename It, typename Ctx, typename Other, typename Attr>
            bool real_parse(It& f, It l, Ctx& ctx, Other const& other, Attr& attr, size_t targetIdx) const {
                if (targetIdx != Idx)
                    return false;
    
                return std::get<Idx>(_alternatives).parse(f, l, ctx, other, attr);
            }
    
            std::tuple<Parsers...> _alternatives;
        };
    
        template <typename... Ps>
            longest_parser<Ps...> longest(Ps... p) { return {x3::as_parser(p)...}; }
    }
    

    Note the fold expression you could use in dispatch if your compiler supports it (Coliru does, edit it to see!).

    Note also the subtle choice regarding skippable (probably whitespace); if it's not significant for the length comparisons, uncomment the pre-skip.

    Live Demo

    Live On Coliru

    #include <boost/spirit/home/x3.hpp>
    #include <type_traits>
    #include <iostream>
    #include <numeric>
    
    namespace x3 = boost::spirit::x3;
    
    namespace std {
        template <typename T> // just for easy debug printing; hack
        static std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) {
            for (auto& el : v) std::cout << '[' << el << ']';
            return os;
        }
    }
    
    using string_vec  = std::vector<std::string>;
    using result_type = boost::variant<std::string, double, string_vec>;
    
    template <typename Parser>
    void parse(const std::string message, const std::string &input, const std::string &rule, const Parser &parser) {
        auto iter = input.begin(), end = input.end();
    
        std::cout << "-------------------------\n";
        std::cout << message << "\n";
        std::cout << "Rule:     " << rule  << "\n";
        std::cout << "Parsing: '" << input << "'\n";
    
        result_type parsed_result;
        bool result = phrase_parse(iter, end, parser, x3::space, parsed_result);
    
        if (result) {
            std::cout << "Parsed " << parsed_result << "\n";
        } else {
            std::cout << "Parser failed\n";
        }
        if (iter != end)
            std::cout << "EOI not reached. Unparsed: '" << std::string(iter, end) << "'\n";
    }
    
    namespace parser {
    
        template <typename... Parsers>
        struct longest_parser : x3::parser_base {
            longest_parser(Parsers... sub) : _alternatives {sub...} { }
    
            template <typename It, typename Ctx, typename Other, typename Attr>
            bool parse(It& f, It l, Ctx& ctx, Other const& other, Attr& attr) const {
                auto const saved = f;
    
                //// To exclude pre-skip from length comparisons, do pre-skip here:
                // x3::skip_over(f, l, ctx);
                auto seq = std::make_index_sequence<sizeof...(Parsers)>();
    
                auto best = select_best(f, l, ctx, seq);
                //std::cout << "Longest match at index #" << best << "\n";
    
                bool ok = dispatch(f, l, ctx, other, attr, best, seq);
    
                if (!ok)
                    f = saved;
    
                return ok;
            }
    
          private:
            template <typename It, typename Ctx, typename P>
            size_t length_of(It f, It l, Ctx ctx, P const& p) const {
                boost::iterator_range<It> matched;
                return x3::raw[p].parse(f, l, ctx, x3::unused, matched)? boost::size(matched) : 0;
            }
    
            template <typename It, typename Ctx, size_t... I>
                size_t select_best(It f, It l, Ctx& ctx, std::index_sequence<I...>) const {
                    std::array<size_t, sizeof...(I)> lengths { length_of(f, l, ctx, std::get<I>(_alternatives))... };
                    return std::distance(lengths.begin(), std::max_element(lengths.begin(), lengths.end()));
                }
    
            template <typename It, typename Ctx, typename Other, typename Attr, size_t... I>
            bool dispatch(It& f, It l, Ctx& ctx, Other const& other, Attr& attr, size_t targetIdx, std::index_sequence<I...>) const {
                //return (real_parse<I>(f, l, ctx, other, attr, targetIdx) || ...);
                std::array<bool, sizeof...(I)> b = { real_parse<I>(f, l, ctx, other, attr, targetIdx)... };
    
                return std::accumulate(b.begin(), b.end(), false, std::logical_or<bool>());
            }
    
            template <size_t Idx, typename It, typename Ctx, typename Other, typename Attr>
            bool real_parse(It& f, It l, Ctx& ctx, Other const& other, Attr& attr, size_t targetIdx) const {
                if (targetIdx != Idx)
                    return false;
    
                return std::get<Idx>(_alternatives).parse(f, l, ctx, other, attr);
            }
    
            std::tuple<Parsers...> _alternatives;
        };
    
        template <typename... Ps>
            longest_parser<Ps...> longest(Ps... p) { return {x3::as_parser(p)...}; }
    }
    
    int main() {
        auto id        = x3::rule<void, std::string> {} = x3::lexeme [ x3::char_("a-zA-Z_") >> *x3::char_("a-zA-Z0-9_") ];
        auto qualified = x3::rule<void, string_vec>  {} = id % "::";
    
    #define TEST_CASE(label, input, rule) parse(label, input, #rule, rule)
        TEST_CASE("unqualified"                , "willy"                , parser::longest(id, x3::int_, x3::double_));
        TEST_CASE("unqualified with whitespace", " willy \t"            , parser::longest(id, x3::int_, x3::double_));
        TEST_CASE("integral or number"         , "123.78::anton::lutz"  , parser::longest(id, x3::int_, x3::double_));
        TEST_CASE("qualified"                  , "willy anton::lutz"    , parser::longest(id, x3::int_, x3::double_));
        TEST_CASE("qualified with whitespace"  , "willy \tanton::lutz"  , parser::longest(id, x3::int_, x3::double_));
    
        TEST_CASE("unqualified"                , "willy"                , parser::longest(id, x3::int_, x3::double_, qualified));
        TEST_CASE("unqualified with whitespace", " willy \t"            , parser::longest(id, x3::int_, x3::double_, qualified));
        TEST_CASE("integral or number"         , "123.78::anton::lutz"  , parser::longest(id, x3::int_, x3::double_, qualified));
        TEST_CASE("qualified"                  , "willy::anton::lutz"   , parser::longest(id, x3::int_, x3::double_, qualified));
        TEST_CASE("qualified with whitespace"  , "willy ::\tanton::lutz", parser::longest(id, x3::int_, x3::double_, qualified));
    
        TEST_CASE("unqualified"                , "willy"                , parser::longest(x3::int_, x3::double_, qualified));
        TEST_CASE("unqualified with whitespace", " willy \t"            , parser::longest(x3::int_, x3::double_, qualified));
        TEST_CASE("integral or number"         , "123.78::anton::lutz"  , parser::longest(x3::int_, x3::double_, qualified));
        TEST_CASE("qualified"                  , "willy::anton::lutz"   , parser::longest(x3::int_, x3::double_, qualified));
        TEST_CASE("qualified with whitespace"  , "willy ::\tanton::lutz", parser::longest(x3::int_, x3::double_, qualified));
    }
    

    Prints

    -------------------------
    unqualified
    Rule:     parser::longest(id, x3::int_, x3::double_)
    Parsing: 'willy'
    Parsed willy
    -------------------------
    unqualified with whitespace
    Rule:     parser::longest(id, x3::int_, x3::double_)
    Parsing: ' willy    '
    Parsed willy
    -------------------------
    integral or number
    Rule:     parser::longest(id, x3::int_, x3::double_)
    Parsing: '123.78::anton::lutz'
    Parsed 123.78
    EOI not reached. Unparsed: '::anton::lutz'
    -------------------------
    qualified
    Rule:     parser::longest(id, x3::int_, x3::double_)
    Parsing: 'willy anton::lutz'
    Parsed willy
    EOI not reached. Unparsed: 'anton::lutz'
    -------------------------
    qualified with whitespace
    Rule:     parser::longest(id, x3::int_, x3::double_)
    Parsing: 'willy     anton::lutz'
    Parsed willy
    EOI not reached. Unparsed: 'anton::lutz'
    -------------------------
    unqualified
    Rule:     parser::longest(id, x3::int_, x3::double_, qualified)
    Parsing: 'willy'
    Parsed willy
    -------------------------
    unqualified with whitespace
    Rule:     parser::longest(id, x3::int_, x3::double_, qualified)
    Parsing: ' willy    '
    Parsed willy
    -------------------------
    integral or number
    Rule:     parser::longest(id, x3::int_, x3::double_, qualified)
    Parsing: '123.78::anton::lutz'
    Parsed 123.78
    EOI not reached. Unparsed: '::anton::lutz'
    -------------------------
    qualified
    Rule:     parser::longest(id, x3::int_, x3::double_, qualified)
    Parsing: 'willy::anton::lutz'
    Parsed [willy][anton][lutz]
    -------------------------
    qualified with whitespace
    Rule:     parser::longest(id, x3::int_, x3::double_, qualified)
    Parsing: 'willy ::  anton::lutz'
    Parsed [willy][anton][lutz]
    -------------------------
    unqualified
    Rule:     parser::longest(x3::int_, x3::double_, qualified)
    Parsing: 'willy'
    Parsed [willy]
    -------------------------
    unqualified with whitespace
    Rule:     parser::longest(x3::int_, x3::double_, qualified)
    Parsing: ' willy    '
    Parsed [willy]
    -------------------------
    integral or number
    Rule:     parser::longest(x3::int_, x3::double_, qualified)
    Parsing: '123.78::anton::lutz'
    Parsed 123.78
    EOI not reached. Unparsed: '::anton::lutz'
    -------------------------
    qualified
    Rule:     parser::longest(x3::int_, x3::double_, qualified)
    Parsing: 'willy::anton::lutz'
    Parsed [willy][anton][lutz]
    -------------------------
    qualified with whitespace
    Rule:     parser::longest(x3::int_, x3::double_, qualified)
    Parsing: 'willy ::  anton::lutz'
    Parsed [willy][anton][lutz]
    

    Note the different results depending on the parser expressions in the alternatives.

    0 讨论(0)
提交回复
热议问题