Troubles with boost::spirit::lex & whitespace

后端 未结 1 872
心在旅途
心在旅途 2020-12-03 23:18

I try learning to use boost::spirit. To do that, I wanted to create some simple lexer, combine them and then start parsing using spirit. I tried modifying the example, but i

相关标签:
1条回答
  • 2020-12-04 00:00

    You have created a second lexer state, but never invoked it.

    Simplify and profit:


    For most cases, the easiest way to have the desired effect would be to use single-state lexing with a pass_ignore flag on the skippable tokens:

        this->self += identifier
                    | white_space [ lex::_pass = lex::pass_flags::pass_ignore ];
    

    Note that this requires an actor_lexer to allow for the semantic action:

    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    

    Full sample:

    #include <boost/spirit/include/lex_lexertl.hpp>
    #include <boost/spirit/include/lex_lexertl.hpp>
    namespace lex = boost::spirit::lex;
    
    template <typename Lexer>
    struct lexer_identifier : lex::lexer<Lexer>
    {
        lexer_identifier()
            : identifier("[a-zA-Z_][a-zA-Z0-9_]*")
            , white_space("[ \\t\\n]+")
        {
            using boost::spirit::lex::_start;
            using boost::spirit::lex::_end;
    
            this->self += identifier
                        | white_space [ lex::_pass = lex::pass_flags::pass_ignore ];
        }
        lex::token_def<> identifier;
        lex::token_def<> white_space;
        std::string identifier_name;
    };
    
    int main(int argc, const char *argv[])
    {
        typedef lex::lexertl::token<char const*,lex::omit, boost::mpl::false_> token_type;
        typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    
        typedef lexer_identifier<lexer_type>::iterator_type iterator_type;
    
        lexer_identifier<lexer_type> my_lexer;
    
        std::string test("adedvied das934adf dfklj_03245");
    
        char const* first = test.c_str();
        char const* last = &first[test.size()];
    
        lexer_type::iterator_type iter = my_lexer.begin(first, last);
        lexer_type::iterator_type end = my_lexer.end();
    
        while (iter != end && token_is_valid(*iter))
        {
            ++iter;
        }
    
        bool r = (iter == end);
        std::cout << std::boolalpha << r << "\n";
    }
    

    Prints

    true
    

    "WS" as a Skipper state


    It is also possible you came across a sample that uses the second parser state for the skipper (lex::tokenize_and_phrase_parse). Let me take a minute or 10 to create a working sample for that.

    Update Took me a bit more than 10 minutes (waaaah) :) Here's a comparative test, showing how the lexer states interact, and how to use Spirit Skipper parsing to invoke the second parser state:

    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/lex_lexertl.hpp>
    namespace lex = boost::spirit::lex;
    namespace qi  = boost::spirit::qi;
    
    template <typename Lexer>
    struct lexer_identifier : lex::lexer<Lexer>
    {
        lexer_identifier()
            : identifier("[a-zA-Z_][a-zA-Z0-9_]*")
            , white_space("[ \\t\\n]+")
        {
            this->self       = identifier;
            this->self("WS") = white_space;
        }
        lex::token_def<> identifier;
        lex::token_def<lex::omit> white_space;
    };
    
    int main()
    {
        typedef lex::lexertl::token<char const*, lex::omit, boost::mpl::true_> token_type;
        typedef lex::lexertl::lexer<token_type> lexer_type;
    
        typedef lexer_identifier<lexer_type>::iterator_type iterator_type;
    
        lexer_identifier<lexer_type> my_lexer;
    
        std::string test("adedvied das934adf dfklj_03245");
    
        {
            char const* first = test.c_str();
            char const* last = &first[test.size()];
    
            // cannot lex in just default WS state:
            bool ok = lex::tokenize(first, last, my_lexer, "WS");
            std::cout << "Starting state WS:\t" << std::boolalpha << ok << "\n";
        }
    
        {
            char const* first = test.c_str();
            char const* last = &first[test.size()];
    
            // cannot lex in just default state either:
            bool ok = lex::tokenize(first, last, my_lexer, "INITIAL");
            std::cout << "Starting state INITIAL:\t" << std::boolalpha << ok << "\n";
        }
    
        {
            char const* first = test.c_str();
            char const* last = &first[test.size()];
    
            bool ok = lex::tokenize_and_phrase_parse(first, last, my_lexer, *my_lexer.self, qi::in_state("WS")[my_lexer.self]);
            ok = ok && (first == last); // verify full input consumed
            std::cout << std::boolalpha << ok << "\n";
        }
    }
    

    The output is

    Starting state WS:  false
    Starting state INITIAL: false
    true
    
    0 讨论(0)
提交回复
热议问题