Whitespace skipper when using Boost.Spirit Qi and Lex

后端未结

关注

 1  953

Let\'s consider following code:

#include 
#include 
#include


                      
              相关标签:


      
      
        
          1条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  闹比i        
                
              
                            
                2020-12-21 11:20
              
            
            
                                                                       
For some strange reason only now I found a different question, Boost.Spirit SQL grammar/lexer failure, where some other solution to whitespace skipping is provided. A better one!

So below is the example code reworked along the suggestions there:

#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <algorithm>
#include <iostream>
#include <string>
#include <utility>
#include <vector>

namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;

template<typename Lexer>
class expression_lexer
    : public lex::lexer<Lexer>
{
public:
    typedef lex::token_def<> operator_token_type;
    typedef lex::token_def<> value_token_type;
    typedef lex::token_def<> variable_token_type;
    typedef lex::token_def<lex::omit> parenthesis_token_type;
    typedef std::pair<parenthesis_token_type, parenthesis_token_type> parenthesis_token_pair_type;
    typedef lex::token_def<lex::omit> whitespace_token_type;

    expression_lexer()
        : operator_add('+'),
          operator_sub('-'),
          operator_mul("[x*]"),
          operator_div("[:/]"),
          value("\\d+(\\.\\d+)?"),
          variable("%(\\w+)"),
          parenthesis({
            std::make_pair(parenthesis_token_type('('), parenthesis_token_type(')')),
            std::make_pair(parenthesis_token_type('['), parenthesis_token_type(']'))
          }),
          whitespace("[ \\t]+")
    {
        this->self
            += operator_add
            | operator_sub
            | operator_mul
            | operator_div
            | value
            | variable
            | whitespace [lex::_pass = lex::pass_flags::pass_ignore]
            ;

        std::for_each(parenthesis.cbegin(), parenthesis.cend(),
            [&](parenthesis_token_pair_type const& token_pair)
            {
                this->self += token_pair.first | token_pair.second;
            }
        );
    }

    operator_token_type operator_add;
    operator_token_type operator_sub;
    operator_token_type operator_mul;
    operator_token_type operator_div;

    value_token_type value;
    variable_token_type variable;

    std::vector<parenthesis_token_pair_type> parenthesis;

    whitespace_token_type whitespace;
};

template<typename Iterator>
class expression_grammar
    : public qi::grammar<Iterator>
{
public:
    template<typename Tokens>
    explicit expression_grammar(Tokens const& tokens)
        : expression_grammar::base_type(start)
    {
        start                     %= expression >> qi::eoi;

        expression                %= sum_operand >> -(sum_operator >> expression);
        sum_operator              %= tokens.operator_add | tokens.operator_sub;
        sum_operand               %= fac_operand >> -(fac_operator >> sum_operand);
        fac_operator              %= tokens.operator_mul | tokens.operator_div;

        if(!tokens.parenthesis.empty())
            fac_operand           %= parenthesised | terminal;
        else
            fac_operand           %= terminal;

        terminal                  %= tokens.value | tokens.variable;

        if(!tokens.parenthesis.empty())
        {
            parenthesised         %= tokens.parenthesis.front().first >> expression >> tokens.parenthesis.front().second;
            std::for_each(tokens.parenthesis.cbegin() + 1, tokens.parenthesis.cend(),
                [&](typename Tokens::parenthesis_token_pair_type const& token_pair)
                {
                    parenthesised %= parenthesised.copy() | (token_pair.first >> expression >> token_pair.second);
                }
            );
        }
    }

private:
    qi::rule<Iterator> start;
    qi::rule<Iterator> expression;
    qi::rule<Iterator> sum_operand;
    qi::rule<Iterator> sum_operator;
    qi::rule<Iterator> fac_operand;
    qi::rule<Iterator> fac_operator;
    qi::rule<Iterator> terminal;
    qi::rule<Iterator> parenthesised;
};


int main()
{
    typedef lex::lexertl::token<std::string::const_iterator> token_type;
    typedef expression_lexer<lex::lexertl::actor_lexer<token_type>> expression_lexer_type;
    typedef expression_lexer_type::iterator_type expression_lexer_iterator_type;
    typedef expression_grammar<expression_lexer_iterator_type> expression_grammar_type;

    expression_lexer_type lexer;
    expression_grammar_type grammar(lexer);

    while(std::cin)
    {
        std::string line;
        std::getline(std::cin, line);

        std::string::const_iterator first = line.begin();
        std::string::const_iterator const last = line.end();

        bool const result = lex::tokenize_and_parse(first, last, lexer, grammar);
        if(!result)
            std::cout << "Parsing failed! Reminder: >" << std::string(first, last) << "<" << std::endl;
        else
        {
            if(first != last)
                std::cout << "Parsing succeeded! Reminder: >" << std::string(first, last) << "<" << std::endl;
            else
                std::cout << "Parsing succeeded!" << std::endl;
        }
    }
}


The differences are following:


whitespace token is added to lexer's self as all other tokens.
However, an action is associated with it. The action makes the lexer ignore the token. Which is exactly what we want.
My expression_grammar no longer takes Skipper template argument. And so it is also removed from rules.
lex::lexertl::actor_lexer is used instead of lex::lexertl::lexer since now there is an action associated with a token.
I'm calling tokenize_and_parse instead of tokenize_and_phrase_parse as I don't need to pass skipper anymore.
Also I changed first assignment to this->self in lexer from = to += as it seems more flexible (resistant to order changes). But it doesn't affect the solution here.


I'm good with this. It suites my needs (or better to say my taste) perfectly. However I wonder whether there are any other consequences of such change? Is any approach preferred in some situations? That I don't know.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复