Boost Spirit Signals Successful Parsing Despite Token Being Incomplete

前端 未结 3 1937
隐瞒了意图╮
隐瞒了意图╮ 2020-12-21 04:44

I have a very simple path construct that I am trying to parse with boost spirit.lex.

We have the following grammar:

token := [a-z]+
path := (token :          


        
3条回答
  •  臣服心动
    2020-12-21 05:02

    This is what I finally ended up with. It uses the suggestions from both @sehe and @llonesmiz. Note the conversion to std::wstring and the use of actions in the grammar definition, which were not present in the original post.

    #include 
    #include 
    #include 
    #include 
    #include 
    
    #include 
    #include 
    
    //
    // This example uses boost spirit to parse a simple
    // colon-delimited grammar.
    //
    // The grammar we want to recognize is:
    //    identifier := [a-z]+
    //    separator = :
    //    path= (identifier separator path) | identifier
    //
    // From the boost spirit perspective this example shows
    // a few things I found hard to come by when building my
    // first parser.
    //    1. How to flag an incomplete token at the end of input
    //       as an error. (use of boost::spirit::eoi)
    //    2. How to bind an action on an instance of an object
    //       that is taken as input to the parser.
    //    3. Use of std::wstring.
    //    4. Use of the lexer iterator.
    //
    
    // This using directive will cause issues with boost::bind
    // when referencing placeholders such as _1.
    // using namespace boost::spirit;
    
    //! A class that tokenizes our input.
    template
    struct Tokens : boost::spirit::lex::lexer
    {
          Tokens()
          {
             identifier = L"[a-z]+";
             separator = L":";
    
             this->self.add
                (identifier)
                (separator)
                ;
          }
          boost::spirit::lex::token_def identifier, separator;
    };
    
    //! This class provides a callback that echoes strings to stderr.
    struct Echo
    {
          void echo(boost::fusion::vector const& t) const
          {
             using namespace boost::fusion;
             std::wcerr << at_c<0>(t) << "\n";
          }
    };
    
    
    //! The definition of our grammar, as described above.
    template 
    struct Grammar : boost::spirit::qi::grammar 
    {
          template 
          Grammar(TokenDef const& tok, Echo const& e)
             : Grammar::base_type(path)
          {
             using boost::spirit::_val;
             path
                = 
                ((token >> tok.separator >> path)[boost::bind(&Echo::echo, e,::_1)]
                 |
                 (token)[boost::bind(&Echo::echo, &e, ::_1)]
                 ) >> boost::spirit::eoi; // Look for end of input.
    
              token 
                 = (tok.identifier) [_val=boost::spirit::qi::_1]
              ;
    
          }
          boost::spirit::qi::rule path;
          boost::spirit::qi::rule token;
    };
    
    
    int main()
    {
       // A set of typedefs to make things a little clearer. This stuff is
       // well described in the boost spirit documentation/examples.
       typedef std::wstring::iterator BaseIteratorType;
       typedef boost::spirit::lex::lexertl::token > TokenType;
       typedef boost::spirit::lex::lexertl::lexer LexerType;
       typedef Tokens::iterator_type TokensIterator;
       typedef LexerType::iterator_type LexerIterator;
    
       // Define some paths to parse.
       typedef std::vector Tests;
       Tests paths;
       paths.push_back(L"abc");
       paths.push_back(L"abc:xyz");
       paths.push_back(L"abc:xyz:");
       paths.push_back(L":");
    
       // Parse 'em.
       for ( Tests::iterator iter = paths.begin(); iter != paths.end(); ++iter )
       {
          std::wstring str = *iter;
          std::wcerr << L"*****" << str << L"*****\n";
    
          Echo e;
          Tokens tokens;
          Grammar grammar(tokens, e);
    
          BaseIteratorType first = str.begin();
          BaseIteratorType last = str.end();
    
          // Have the lexer consume our string.
          LexerIterator lexFirst = tokens.begin(first, last);
          LexerIterator lexLast = tokens.end();
    
          // Have the parser consume the output of the lexer.
          bool r = boost::spirit::qi::parse(lexFirst, lexLast, grammar);
    
          // Print the status and whether or note all output of the lexer 
          // was processed.
          std::wcerr << r << L" " << (lexFirst==lexLast) << L"\n";
       }
    }
    

提交回复
热议问题