AST and operator precedence in rule definition

前端 未结 2 1967
清歌不尽
清歌不尽 2020-12-10 23:16

Hello [¹]

I have a simple parser (see below).

It intends to parse conditional expressions (relational arithmetic operations and

2条回答
  •  佛祖请我去吃肉
    2020-12-10 23:32

    Sticking with simple:

    relop_expr = eq_r_ | ineq_r_ | ineq_2_r_;
    
    expr_  =
      ("not" >> expr_)               [ _val = phx::construct< unop >(_1) ]     |
      (relop_expr >> "and" >> expr_) [ _val = phx::construct< binop >(_1,_2) ] |
      (relop_expr >> "or" >> expr_)  [ _val = phx::construct< binop  >(_1,_2) ] |
      (relop_expr >> "xor" >> expr_) [ _val = phx::construct< binop >(_1,_2) ] |
      (relop_expr                  ) [ _val = _1 ]
      ;
    
     BOOST_SPIRIT_DEBUG_NODES((metric_r_)(eq_r_)(ineq_r_)(ineq_2_r_)(relop_expr)(expr_))
    

    Note:

    • the ordering of branches
    • the use of an extra "level" (relop_expr) to induce precedence

    There's still work to do (3.4 did not parse yet, and neither did 2). Also, it's excruciatingly inefficient (could do with left factorization). Fixing those:

    number_r_ = real_parser>() | int_;
    
    relop_expr = eq_r_ | ineq_2_r_ | ineq_r_;
    
    expr_  =
      ("not" >> expr_)       [ _val = construct> (_1) ] |
      relop_expr [_a = _1] >> (
             ("and" >> expr_ [ _val = bin_() ]) |
             ("or"  >> expr_ [ _val = bin_() ]) |
             ("xor" >> expr_ [ _val = bin_() ]) |
             (eps            [ _val = _a ])
        )
      ;
    

    As you can see, I can't really stand those complicated semantic actions. The chief reason for this is BUGS. Make the code readable, lose half the bugs. So, with just a two simple helpers we can reduce the verbosity:

    template              using bin_  = decltype(phx::construct>(qi::_a, qi::_1));
    template  using tern_ = decltype(phx::construct>(phx::construct>(qi::_a, qi::_1), phx::construct>(qi::_1, qi::_2)));
    

    As you can see, I don't make a great effort to write traits etc. Just a quick decltype on whatever you'd write anyways, and, bam

    down from 35 crufty lines to 4 very clean lines:

    ineq_2_r_ = number_r_ [ _a = _1 ] >> (
         ("<"  >> metric_r_ >> "<"  >> number_r_) [_val = tern_()  ] |
         ("<"  >> metric_r_ >> "<=" >> number_r_) [_val = tern_() ] |
         ("<=" >> metric_r_ >> "<"  >> number_r_) [_val = tern_()  ] |
         ("<=" >> metric_r_ >> "<=" >> number_r_) [_val = tern_() ] |
    
    // see, that's so easy, we can even trow in the bonus - I bet you were just fed up with writing boiler plate :)
    
         (">"  >> metric_r_ >> ">"  >> number_r_) [_val = tern_()  ] |
         (">"  >> metric_r_ >> ">=" >> number_r_) [_val = tern_() ] |
         (">=" >> metric_r_ >> ">"  >> number_r_) [_val = tern_()  ] |
         (">=" >> metric_r_ >> ">=" >> number_r_) [_val = tern_() ]
     );
    

    Oh, I just remembered: I have defined the op_gte and op_lte operators, since not having them was causing quadratic growth of your semantic actions. My fast rule of thumb is:

    • Rule #1: keep rules simple, avoid semantic actions
    • Corollary #1: make your AST directly reflect the grammar.

    In this case, you were conflating AST transformation with parsing. If you want to transform the AST to 'expand' lte (a,b) <- (lt(a,b) || eq(a,b)), you can trivially do that after parsing. Update see the other answer for a demo

    All in all, I have attached the suggestions in a working program. It implements many more features, and comes in 73 lines shorter (28%). That's even with more test cases:

    'A  >  5':    result: (A > 5)
    'A  <  5':    result: (A < 5)
    'A  >= 5':    result: (A >= 5)
    'A  <= 5':    result: (A <= 5)
    'A   = 5':    result: (A = 5)
    'A  != 5':    result: !(A = 5)
    'A>5 and B<4 xor A>3.4 or 2 5) and ((B < 4) xor ((A > 3.4) or ((2 < A) and (A < 3)))))
    'A>5 and B<4 xor A!=3.4 or 7.9e10 >= B >= -42':    result: ((A > 5) and ((B < 4) xor (!(A = 3.4) or ((7.9e+10 >= B) and (B >= -42)))))
    

    Well, I'd have shown it live on Coliru, but it seems down at the moment. Hope you like this.

    Full sample

    //#define BOOST_SPIRIT_DEBUG
    #include 
    #include 
    #include 
    #include 
    #include 
    
    namespace qi    = boost::spirit::qi;
    namespace phx   = boost::phoenix;
    
    /// Terminals
    enum metric_t : std::uint8_t { A=0u, B };
    const std::string metric_names[] = { "A", "B" };
    
    struct metrics_parser : boost::spirit::qi::symbols {
        metrics_parser() {
            this->add(metric_names[A], A)
                     (metric_names[B], B);
        }
    };
    
    /// Operators
    template  struct unop;
    template  struct binop;
    
    /// Expression
    typedef boost::variant<
      int,
      double,
      metric_t,
      boost::recursive_wrapper< unop< struct op_not> >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >
    > expr;
    
    template 
    struct binop { 
        explicit binop(const expr& l, const expr& r) : oper1(l), oper2(r) { }
        expr oper1, oper2; 
    };
    
    template 
    struct unop  { 
        explicit unop(const expr& o) : oper1(o) { }
        expr oper1; 
    };
    
    std::ostream& operator<<(std::ostream& os, metric_t m)
    { return os << metric_names[m]; }
    
    struct printer : boost::static_visitor
    {
        printer(std::ostream& os) : _os(os) {}
        std::ostream& _os;
    
        void operator()(const binop& b) const { print(" and ", b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" or ",  b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" xor ", b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" = ",   b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" < ",   b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" > ",   b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" <= ",  b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" >= ",  b.oper1, b.oper2); }
    
        void print(const std::string& op, const expr& l, const expr& r) const {
            _os << "(";
            boost::apply_visitor(*this, l); _os << op; boost::apply_visitor(*this, r);
            _os << ")";
        }
    
        void operator()(const unop& u) const {
            _os << "!"; boost::apply_visitor(*this, u.oper1);
        }
    
        template  void operator()(other_t i) const { 
            _os << i; 
        }
    };
    
    std::ostream& operator<<(std::ostream& os, const expr& e)
    { boost::apply_visitor(printer(os), e); return os; }
    
    template 
    struct parser : qi::grammar >
    {
        template              using bin_  = decltype(phx::construct>(qi::_a, qi::_1));
        template  using tern_ = decltype(phx::construct>(phx::construct>(qi::_a, qi::_1), phx::construct>(qi::_1, qi::_2)));
    
        parser() : parser::base_type(expr_)
        {
            using namespace qi;
            using namespace phx;
    
            number_r_ = real_parser>() | int_;
    
            metric_r_ = metric_p_;
    
            eq_r_ = metric_r_ [ _a = _1 ] >> (
                    ("="  >> number_r_) [ _val = bin_() ] |
                    ("!=" >> number_r_) [ _val = construct>(bin_()) ]
                );
            ineq_2_r_ = number_r_ [ _a = _1 ] >> (
                    ("<"  >> metric_r_ >> "<"  >> number_r_) [_val = tern_()  ] |
                    ("<"  >> metric_r_ >> "<=" >> number_r_) [_val = tern_() ] |
                    ("<=" >> metric_r_ >> "<"  >> number_r_) [_val = tern_()  ] |
                    ("<=" >> metric_r_ >> "<=" >> number_r_) [_val = tern_() ] |
                    (">"  >> metric_r_ >> ">"  >> number_r_) [_val = tern_()  ] |
                    (">"  >> metric_r_ >> ">=" >> number_r_) [_val = tern_() ] |
                    (">=" >> metric_r_ >> ">"  >> number_r_) [_val = tern_()  ] |
                    (">=" >> metric_r_ >> ">=" >> number_r_) [_val = tern_() ]
                );
            ineq_r_ = metric_r_ [ _a = _1 ] >> (
                    (">" >> number_r_)  [ _val = bin_() ] |
                    ("<" >> number_r_)  [ _val = bin_() ] |
                    (">=" >> number_r_) [ _val = bin_() ] |
                    ("<=" >> number_r_) [ _val = bin_() ]
                );
    
            relop_expr = eq_r_ | ineq_2_r_ | ineq_r_;
    
            expr_  = 
                ("not" >> expr_)       [ _val = construct> (_1) ] |
                relop_expr [_a = _1] >> (
                     ("and" >> expr_ [ _val = bin_() ]) |
                     ("or"  >> expr_ [ _val = bin_() ]) |
                     ("xor" >> expr_ [ _val = bin_() ]) |
                     (eps            [ _val = _a ])
                );
    
            BOOST_SPIRIT_DEBUG_NODES((metric_r_)(eq_r_)(ineq_r_)(ineq_2_r_)(relop_expr)(expr_))
        }
      private:
        qi::rule > eq_r_, ineq_r_, ineq_2_r_, relop_expr, expr_;
        qi::rule                    number_r_, metric_r_;
        metrics_parser                                   metric_p_;
    };
    
    int main()
    {
        for (std::string const& input : { 
            "A  >  5",
            "A  <  5",
            "A  >= 5",
            "A  <= 5",
            "A   = 5",
            "A  != 5",
            "A>5 and B<4 xor A>3.4 or 25 and B<4 xor A!=3.4 or 7.9e10 >= B >= -42"
        })
        {
            auto f(std::begin(input)), l(std::end(input));
            parser p;
    
            try
            {
                std::cout << "'" << input << "':\t";
                expr result;
                bool ok = qi::phrase_parse(f,l,p,qi::space,result);
    
                if (!ok) std::cout << "invalid input\n";
                else     std::cout << "result: " << result << "\n";
    
            } catch (const qi::expectation_failure& e)
            {
                std::cout << "expectation_failure at '" << std::string(e.first, e.last) << "'\n";
            }
    
            if (f!=l) std::cout << "unparsed: '" << std::string(f,l) << "'\n";
        }
    }
    

提交回复
热议问题