Boost Spirit Segfault In Parser

不想你离开。 提交于 2019-12-06 11:21:18

If you use AddressSanitizer, it will tell you:

<start>...
  <try>[]</try>...
ASAN:DEADLYSIGNAL...
=================================================================...
==8985==ERROR: AddressSanitizer: stack-overflow on address 0x7ffeb280dfc8 (pc 0x0000004c9cf6 bp 0x7f...
    #0 0x4c9cf5 in __asan_memcpy (/home/sehe/Projects/stackoverflow/sotest+0x4c9cf5)...
    #1 0x68eb77 in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::spi...
    #2 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
    #3 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
    #4 0x68e190 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #5 0x68de4a in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #6 0x68d8b5 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #7 0x6e085c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusion...
    #8 0x6e053f in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::sequence<b...
    #9 0x6e0218 in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::sequence<boost::fu...
    #10 0x6dffc5 in bool boost::spirit::qi::alternative<boost::fusion::cons<boost::spirit::qi::seque...
    #11 0x6dfbf7 in bool boost::spirit::qi::detail::parser_binder<boost::spirit::qi::alternative<boo...
    #12 0x6de330 in boost::detail::function::function_obj_invoker4<boost::spirit::qi::detail::parser...
    #13 0x5d633a in boost::function4<bool, boost::spirit::lex::lexertl::iterator<boost::spirit::lex:...
    #14 0x5d58e8 in bool boost::spirit::qi::rule<boost::spirit::lex::lexertl::iterator<boost::spirit...
    #15 0x5d54e9 in bool boost::spirit::qi::reference<boost::spirit::qi::rule<boost::spirit::lex::le...
    #16 0x5d49bf in bool boost::spirit::qi::detail::fail_function<boost::spirit::lex::lexertl::itera...
    #17 0x68f56c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusio...
    #18 0x68f267 in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::reference...
    #19 0x68ef6e in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::reference<boost::...
    #20 0x68ebae in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::sp...
    #21 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion:...
    [ snip repeated frames ]
    #250 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion...


SUMMARY: AddressSanitizer: stack-overflow (/home/sehe/Projects/stackoverflow/sotest+0x4c9cf5) in __a...
==8985==ABORTING...

So, this is clearly left-recursion leading to stack overflow.

The fact that other parser generators cope with it means very little: Spirit is a PEG parser generator, and left-recursion is impossible.

You need to rewrite things like

    exp %= exp >> qi::token(k_equalTo) >> exp

Into something that makes the lhs more specific.

Note: I had to fix some random issues with the way you presented your code. This is what I used to repro:

Live On Coliru

#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
namespace lex = boost::spirit::lex;

namespace interpreter {
    enum Tokens
    {
        k_andTok = 1,
        k_def = 2,
        k_elihw = 3,
        k_elseTok = 4,
        k_falseTok = 5,
        k_fed = 6,
        k_fi = 7,
        k_ifTok = 8,
        k_input = 9,
        k_notTok = 10,
        k_orTok = 11,
        k_print = 12,
        k_returnTok = 13,
        k_trueTok = 14,
        k_whileTok = 15,
        k_plues = 16,
        k_minus = 17,
        k_mult = 18,
        k_div = 19,
        k_bang = 20,
        k_equalTo = 21,
        k_greaterEq = 22,
        k_lessEq = 23,
        k_notEq = 24,
        k_less = 25,
        k_greater = 26,
        k_assign = 27,
        k_comma = 28,
        k_colon = 29,
        k_leftParen = 30,
        k_rightParen = 31,
        k_leftBracket = 32,
        k_rightBracket = 33,
        k_nonTerminal = 34,
        k_terminal = 35
    };

    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \\t\\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\\+"),
           minus("\\-"),
           mult("\\*"),
           div("\\/"),
           bang("\\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\\("),
           rightParen("\\)"),
           leftBracket("\\["),
           rightBracket("\\["),
           nonTerminal("[a-z][a-zA-Z0-9]*"),
           terminal("[0-9]")
        {
            this->self("WHITESPACE") = whiteSpace;

            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (nonTerminal, k_nonTerminal)
                (terminal, k_terminal);
        }

        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> nonTerminal;
        lex::token_def<std::string> terminal;
    };

    namespace qi = boost::spirit::qi;
    template <typename Iterator, typename Skipper>
    struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
    {        
    //    using boost::phoenix::ref;
    //    using boost::phoenix::size;

        template <typename TokenDef>
        InterpreterGrammar(TokenDef const& )
            : InterpreterGrammar::base_type(start)
              //, connect(0)
        {
            start 
                = functionList >> endList >> qi::eoi
                ;

            // different expressions
            exp = exp >> qi::token(k_equalTo) >> exp
                | exp >> qi::token(k_notEq) >> exp
                | exp >> qi::token(k_less) >> exp
                | exp >> qi::token(k_lessEq) >> exp
                | exp >> qi::token(k_greater) >> exp
                | exp >> qi::token(k_greaterEq) >> exp
                | exp >> qi::token(k_andTok) >> exp
                | exp >> qi::token(k_orTok) >> exp
                | qi::token(k_notTok) >> exp 
                | exp >> qi::token(k_plues) >> exp
                | exp >> qi::token(k_minus) >> exp
                | exp >> qi::token(k_mult) >> exp
                | qi::token(k_minus) >> exp
                | qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal)
                | qi::token(k_terminal)
                | qi::token(k_trueTok)
                | qi::token(k_falseTok)
                ;

            // parameter list
            paramList 
                = paramList >> qi::token(k_comma) >> exp
                | exp
                ;

            // return statements
            returnStatement 
                = returnStatement >> exp
                | returnStatement
                ;

            // function call statements
            callStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen)
                ;

            // variable assignment
            assignmentStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
                    >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp
                ;

            // list of integers
            intList 
                = intList >> qi::token(k_comma) >> qi::token(k_terminal)
                | qi::token(k_terminal)
                ;

            // print out a variable
            printStatement 
                = qi::token(k_print) >> exp
                ;

            // take input
            inputStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_input)
                ;

            // conditional statement
            conditionStatement 
                = qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse
                ;

            // consitions have optional else
            optionalElse 
                = qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                | qi::eps
                ;

            // while loop
            whileStatement 
                = qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw)
                ;

            // actual program statements
            endList 
                = endList >> end
                | end
                ;

            // end possibilities of program in global space
            end = callStatement
                | printStatement
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                    >> qi::token(k_rightBracket)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                    >> qi::token(k_assign) >> exp
                ;

            // function parameters
            paramList 
                = paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
                | qi::token(k_nonTerminal)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
                ;

            // define a statement as assignment print input condition while or call
            statement 
                = assignmentStatement
                | printStatement
                | inputStatement
                | conditionStatement
                | whileStatement
                | callStatement
                | returnStatement
                ;

            // general statement list
            statements 
                = statements >> statement
                | statement
                ;

            // functions
            functionList 
                = qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                               >> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
                               >> statements >> qi::token(k_fed)
                | qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                               >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
                | qi::eps
                ;

            BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
        }

        qi::rule<Iterator, Skipper> start;
        qi::rule<Iterator, Skipper> functionList;
        qi::rule<Iterator, Skipper> endList;
        qi::rule<Iterator, Skipper> paramList;
        qi::rule<Iterator, Skipper> statements;
        qi::rule<Iterator, Skipper> statement;
        qi::rule<Iterator, Skipper> assignmentStatement;
        qi::rule<Iterator, Skipper> printStatement;
        qi::rule<Iterator, Skipper> inputStatement;
        qi::rule<Iterator, Skipper> conditionStatement;
        qi::rule<Iterator, Skipper> whileStatement;
        qi::rule<Iterator, Skipper> callStatement;
        qi::rule<Iterator, Skipper> returnStatement;
        qi::rule<Iterator, Skipper> exp;
        qi::rule<Iterator, Skipper> intList;
        qi::rule<Iterator, Skipper> optionalElse;
        qi::rule<Iterator, Skipper> end;
    };
}

#include <fstream>
#include <iterator>

int main(int argc, char** argv) {
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;

    typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
    typedef lex::lexertl::lexer<token_type> lexer_type;
    typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
    typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

    interpreter::LexerTokens< lexer_type > lexer;
    interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);

    // read the file
    if (argc != 2)
    {
        std::cout << "File required" << std::endl;
        return 1;
    }

    std::ifstream t(argv[1]); 
    std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };

    char const* first = sourceCode.data();
    char const* last = first + sourceCode.size();
    bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

    std::cout << "Remaining " << std::string(first,last) << std::endl;
    std::cout << "R is " << r << std::endl;
}
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!