Whitespace skipper when using Boost.Spirit Qi and Lex

后端 未结 1 953
刺人心
刺人心 2020-12-21 10:47

Let\'s consider following code:

#include 
#include 
#include          


        
相关标签:
1条回答
  • 2020-12-21 11:20

    For some strange reason only now I found a different question, Boost.Spirit SQL grammar/lexer failure, where some other solution to whitespace skipping is provided. A better one!

    So below is the example code reworked along the suggestions there:

    #include <boost/spirit/include/lex_lexertl.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <algorithm>
    #include <iostream>
    #include <string>
    #include <utility>
    #include <vector>
    
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;
    
    template<typename Lexer>
    class expression_lexer
        : public lex::lexer<Lexer>
    {
    public:
        typedef lex::token_def<> operator_token_type;
        typedef lex::token_def<> value_token_type;
        typedef lex::token_def<> variable_token_type;
        typedef lex::token_def<lex::omit> parenthesis_token_type;
        typedef std::pair<parenthesis_token_type, parenthesis_token_type> parenthesis_token_pair_type;
        typedef lex::token_def<lex::omit> whitespace_token_type;
    
        expression_lexer()
            : operator_add('+'),
              operator_sub('-'),
              operator_mul("[x*]"),
              operator_div("[:/]"),
              value("\\d+(\\.\\d+)?"),
              variable("%(\\w+)"),
              parenthesis({
                std::make_pair(parenthesis_token_type('('), parenthesis_token_type(')')),
                std::make_pair(parenthesis_token_type('['), parenthesis_token_type(']'))
              }),
              whitespace("[ \\t]+")
        {
            this->self
                += operator_add
                | operator_sub
                | operator_mul
                | operator_div
                | value
                | variable
                | whitespace [lex::_pass = lex::pass_flags::pass_ignore]
                ;
    
            std::for_each(parenthesis.cbegin(), parenthesis.cend(),
                [&](parenthesis_token_pair_type const& token_pair)
                {
                    this->self += token_pair.first | token_pair.second;
                }
            );
        }
    
        operator_token_type operator_add;
        operator_token_type operator_sub;
        operator_token_type operator_mul;
        operator_token_type operator_div;
    
        value_token_type value;
        variable_token_type variable;
    
        std::vector<parenthesis_token_pair_type> parenthesis;
    
        whitespace_token_type whitespace;
    };
    
    template<typename Iterator>
    class expression_grammar
        : public qi::grammar<Iterator>
    {
    public:
        template<typename Tokens>
        explicit expression_grammar(Tokens const& tokens)
            : expression_grammar::base_type(start)
        {
            start                     %= expression >> qi::eoi;
    
            expression                %= sum_operand >> -(sum_operator >> expression);
            sum_operator              %= tokens.operator_add | tokens.operator_sub;
            sum_operand               %= fac_operand >> -(fac_operator >> sum_operand);
            fac_operator              %= tokens.operator_mul | tokens.operator_div;
    
            if(!tokens.parenthesis.empty())
                fac_operand           %= parenthesised | terminal;
            else
                fac_operand           %= terminal;
    
            terminal                  %= tokens.value | tokens.variable;
    
            if(!tokens.parenthesis.empty())
            {
                parenthesised         %= tokens.parenthesis.front().first >> expression >> tokens.parenthesis.front().second;
                std::for_each(tokens.parenthesis.cbegin() + 1, tokens.parenthesis.cend(),
                    [&](typename Tokens::parenthesis_token_pair_type const& token_pair)
                    {
                        parenthesised %= parenthesised.copy() | (token_pair.first >> expression >> token_pair.second);
                    }
                );
            }
        }
    
    private:
        qi::rule<Iterator> start;
        qi::rule<Iterator> expression;
        qi::rule<Iterator> sum_operand;
        qi::rule<Iterator> sum_operator;
        qi::rule<Iterator> fac_operand;
        qi::rule<Iterator> fac_operator;
        qi::rule<Iterator> terminal;
        qi::rule<Iterator> parenthesised;
    };
    
    
    int main()
    {
        typedef lex::lexertl::token<std::string::const_iterator> token_type;
        typedef expression_lexer<lex::lexertl::actor_lexer<token_type>> expression_lexer_type;
        typedef expression_lexer_type::iterator_type expression_lexer_iterator_type;
        typedef expression_grammar<expression_lexer_iterator_type> expression_grammar_type;
    
        expression_lexer_type lexer;
        expression_grammar_type grammar(lexer);
    
        while(std::cin)
        {
            std::string line;
            std::getline(std::cin, line);
    
            std::string::const_iterator first = line.begin();
            std::string::const_iterator const last = line.end();
    
            bool const result = lex::tokenize_and_parse(first, last, lexer, grammar);
            if(!result)
                std::cout << "Parsing failed! Reminder: >" << std::string(first, last) << "<" << std::endl;
            else
            {
                if(first != last)
                    std::cout << "Parsing succeeded! Reminder: >" << std::string(first, last) << "<" << std::endl;
                else
                    std::cout << "Parsing succeeded!" << std::endl;
            }
        }
    }
    

    The differences are following:

    1. whitespace token is added to lexer's self as all other tokens.
    2. However, an action is associated with it. The action makes the lexer ignore the token. Which is exactly what we want.
    3. My expression_grammar no longer takes Skipper template argument. And so it is also removed from rules.
    4. lex::lexertl::actor_lexer is used instead of lex::lexertl::lexer since now there is an action associated with a token.
    5. I'm calling tokenize_and_parse instead of tokenize_and_phrase_parse as I don't need to pass skipper anymore.
    6. Also I changed first assignment to this->self in lexer from = to += as it seems more flexible (resistant to order changes). But it doesn't affect the solution here.

    I'm good with this. It suites my needs (or better to say my taste) perfectly. However I wonder whether there are any other consequences of such change? Is any approach preferred in some situations? That I don't know.

    0 讨论(0)
提交回复
热议问题