AST and operator precedence in rule definition

前端 未结 2 1968
清歌不尽
清歌不尽 2020-12-10 23:16

Hello [¹]

I have a simple parser (see below).

It intends to parse conditional expressions (relational arithmetic operations and

2条回答
  •  甜味超标
    2020-12-10 23:37

    From the other answer:

    • Rule #1: keep rules simple, avoid semantic actions
    • Corollary #1: make your AST directly reflect the grammar.

    In this case, you were conflating AST transformation with parsing. If you want to transform the AST to 'expand' lte (a,b) <- (lt(a,b) || eq(a,b)), you can trivially do that after parsing.

    Perhaps that needs a proof of concept to be convincing.

    I could hardly leave that as an exercise for the reader, now could I :) So to transform the AST, let's write a simple visitor:

    struct expander : boost::static_visitor
    {
        expr operator()(binop const& e) const {
            expr oper1(recurse(e.oper1)), oper2(recurse(e.oper2));
            return binop(
                    binop(oper1, oper2),
                    binop(oper1, oper2));
        }
        expr operator()(binop const& e) const {
            expr oper1(recurse(e.oper1)), oper2(recurse(e.oper2));
            return binop(
                    binop(oper1, oper2),
                    binop(oper1, oper2));
        }
    
        // recurse compound nodes
        template  expr operator()(unop  const& e) const { return unop(recurse(e.oper1)); }
        template  expr operator()(binop const& e) const { return binop(recurse(e.oper1), recurse(e.oper2)); }
        // copy leaf nodes
        template  expr operator()(T const& e) const { return e; }
    private:
        expr recurse(expr const& e) const { return boost::apply_visitor(*this, e); };
    };
    
    expr expand(expr const& e) {
        return boost::apply_visitor(expander(), e);
    }
    

    See, only two expression nodes get transformed, the rest is recursed/copied (leafs). Now, we can add the expanded result to our test program output:

    input:    A  >  5
    result:   (A > 5)
    expanded: (A > 5)
    input:    A  <  5
    result:   (A < 5)
    expanded: (A < 5)
    input:    A  >= 5
    result:   (A >= 5)
    expanded: ((A > 5) or (A = 5))
    input:    A  <= 5
    result:   (A <= 5)
    expanded: ((A < 5) or (A = 5))
    input:    A   = 5
    result:   (A = 5)
    expanded: (A = 5)
    input:    A  != 5
    result:   !(A = 5)
    expanded: !(A = 5)
    input:    A>5 and B<4 xor A>3.4 or 2 5) and ((B < 4) xor ((A > 3.4) or ((2 < A) and (A < 3)))))
    expanded: ((A > 5) and ((B < 4) xor ((A > 3.4) or ((2 < A) and (A < 3)))))
    input:    A>5 and B<4 xor A!=3.4 or 7.9e10 >= B >= -42
    result:   ((A > 5) and ((B < 4) xor (!(A = 3.4) or ((7.9e+10 >= B) and (B >= -42)))))
    expanded: ((A > 5) and ((B < 4) xor (!(A = 3.4) or (((7.9e+10 > B) or (7.9e+10 = B)) and ((B > -42) or (B = -42))))))
    

    Q.E.D.:

    • Keep your parser simple by matching the AST to the grammar
    • See your dentist twice a year :)

    See the full program with this transformation:

    //#define BOOST_SPIRIT_DEBUG
    #include 
    #include 
    #include 
    #include 
    #include 
    
    namespace qi    = boost::spirit::qi;
    namespace phx   = boost::phoenix;
    
    /// Terminals
    enum metric_t : std::uint8_t { A=0u, B };
    const std::string metric_names[] = { "A", "B" };
    
    struct metrics_parser : boost::spirit::qi::symbols {
        metrics_parser() {
            this->add(metric_names[A], A)
                     (metric_names[B], B);
        }
    };
    
    /// Operators
    template  struct unop;
    template  struct binop;
    
    /// Expression
    typedef boost::variant<
      int,
      double,
      metric_t,
      boost::recursive_wrapper< unop< struct op_not> >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >,
      boost::recursive_wrapper< binop >
    > expr;
    
    template 
    struct unop  { 
        explicit unop(const expr& o) : oper1(o) { }
        expr oper1; 
    };
    
    template 
    struct binop { 
        explicit binop(const expr& l, const expr& r) : oper1(l), oper2(r) { }
        expr oper1, oper2; 
    };
    
    std::ostream& operator<<(std::ostream& os, metric_t m)
    { return os << metric_names[m]; }
    
    struct expander : boost::static_visitor
    {
        expr operator()(binop const& e) const {
            expr oper1(recurse(e.oper1)), oper2(recurse(e.oper2));
            return binop(
                    binop(oper1, oper2),
                    binop(oper1, oper2));
        }
        expr operator()(binop const& e) const {
            expr oper1(recurse(e.oper1)), oper2(recurse(e.oper2));
            return binop(
                    binop(oper1, oper2),
                    binop(oper1, oper2));
        }
    
        // recurse compound nodes
        template  expr operator()(unop  const& e) const { return unop(recurse(e.oper1)); }
        template  expr operator()(binop const& e) const { return binop(recurse(e.oper1), recurse(e.oper2)); }
        // copy leaf nodes
        template  expr operator()(T const& e) const { return e; }
      private:
        expr recurse(expr const& e) const { return boost::apply_visitor(*this, e); };
    };
    
    expr expand(expr const& e) {
        return boost::apply_visitor(expander(), e);
    }
    
    struct printer : boost::static_visitor
    {
        printer(std::ostream& os) : _os(os) {}
        std::ostream& _os;
    
        void operator()(const binop& b) const { print(" and ", b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" or ",  b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" xor ", b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" = ",   b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" < ",   b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" > ",   b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" <= ",  b.oper1, b.oper2); }
        void operator()(const binop& b) const { print(" >= ",  b.oper1, b.oper2); }
    
        void print(const std::string& op, const expr& l, const expr& r) const {
            _os << "(";
            boost::apply_visitor(*this, l); _os << op; boost::apply_visitor(*this, r);
            _os << ")";
        }
    
        void operator()(const unop& u) const {
            _os << "!"; boost::apply_visitor(*this, u.oper1);
        }
    
        template  void operator()(other_t i) const { 
            _os << i; 
        }
    };
    
    std::ostream& operator<<(std::ostream& os, const expr& e)
    { boost::apply_visitor(printer(os), e); return os; }
    
    template 
    struct parser : qi::grammar >
    {
        template              using bin_  = decltype(phx::construct>(qi::_a, qi::_1));
        template  using tern_ = decltype(phx::construct>(phx::construct>(qi::_a, qi::_1), phx::construct>(qi::_1, qi::_2)));
    
        parser() : parser::base_type(expr_)
        {
            using namespace qi;
            using namespace phx;
    
            number_r_ = real_parser>() | int_;
    
            metric_r_ = metric_p_;
    
            eq_r_ = metric_r_ [ _a = _1 ] >> (
                    ("="  >> number_r_) [ _val = bin_() ] |
                    ("!=" >> number_r_) [ _val = construct>(bin_()) ]
                );
            ineq_2_r_ = number_r_ [ _a = _1 ] >> (
                    ("<"  >> metric_r_ >> "<"  >> number_r_) [_val = tern_()  ] |
                    ("<"  >> metric_r_ >> "<=" >> number_r_) [_val = tern_() ] |
                    ("<=" >> metric_r_ >> "<"  >> number_r_) [_val = tern_()  ] |
                    ("<=" >> metric_r_ >> "<=" >> number_r_) [_val = tern_() ] |
                    (">"  >> metric_r_ >> ">"  >> number_r_) [_val = tern_()  ] |
                    (">"  >> metric_r_ >> ">=" >> number_r_) [_val = tern_() ] |
                    (">=" >> metric_r_ >> ">"  >> number_r_) [_val = tern_()  ] |
                    (">=" >> metric_r_ >> ">=" >> number_r_) [_val = tern_() ]
                );
            ineq_r_ = metric_r_ [ _a = _1 ] >> (
                    (">" >> number_r_)  [ _val = bin_() ] |
                    ("<" >> number_r_)  [ _val = bin_() ] |
                    (">=" >> number_r_) [ _val = bin_() ] |
                    ("<=" >> number_r_) [ _val = bin_() ]
                );
    
            relop_expr = eq_r_ | ineq_2_r_ | ineq_r_;
    
            expr_  = 
                ("not" >> expr_)       [ _val = construct> (_1) ] |
                relop_expr [_a = _1] >> (
                     ("and" >> expr_ [ _val = bin_() ]) |
                     ("or"  >> expr_ [ _val = bin_() ]) |
                     ("xor" >> expr_ [ _val = bin_() ]) |
                     (eps            [ _val = _a ])
                );
    
            BOOST_SPIRIT_DEBUG_NODES((metric_r_)(eq_r_)(ineq_r_)(ineq_2_r_)(relop_expr)(expr_))
        }
      private:
        qi::rule > eq_r_, ineq_r_, ineq_2_r_, relop_expr, expr_;
        qi::rule                    number_r_, metric_r_;
        metrics_parser                                   metric_p_;
    };
    
    int main()
    {
        for (std::string const& input : { 
            "A  >  5",
            "A  <  5",
            "A  >= 5",
            "A  <= 5",
            "A   = 5",
            "A  != 5",
            "A>5 and B<4 xor A>3.4 or 25 and B<4 xor A!=3.4 or 7.9e10 >= B >= -42"
        })
        {
            auto f(std::begin(input)), l(std::end(input));
            parser p;
    
            try
            {
                std::cout << "input:    " << input << "\n";
                expr result;
                bool ok = qi::phrase_parse(f,l,p,qi::space,result);
    
                if (!ok) std::cout << "invalid input\n";
                else     
                {
                    std::cout << "result:   " << result         << "\n";
                    std::cout << "expanded: " << expand(result) << "\n";
                }
    
            } catch (const qi::expectation_failure& e)
            {
                std::cout << "expectation_failure at '" << std::string(e.first, e.last) << "'\n";
            }
    
            if (f!=l) std::cout << "unparsed: '" << std::string(f,l) << "'\n";
        }
    }
    

提交回复
热议问题