cannot get boost::spirit parser&lexer working for token types other than std::string or int or double

↘锁芯ラ 提交于 2019-12-30 11:02:36

问题


This does not compile (code below).

There was another question here with the same error. But I don't understand the answer. I already tried inserting qi::eps in places -- but without success.

I also tried already adding meta functions (boost::spirit::raits::is_container) for the types used -- but this also does not help.

I also tried using the same variant containing all types I need to use everywhere. Same problem.

Has anybody gotten this working for a lexer returning something else than double or int or string? And for the parser also returning non-trivial objects?

I've tried implementing semantic functions everywhere returning default objects. But this also does not help.

Here comes the code:

// spirit_error.cpp : Defines the entry point for the console application.
//

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/phoenix/object.hpp>
#include <boost/spirit/include/qi_char_class.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <boost/mpl/index_of.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/intrusive_ptr.hpp>
#include <boost/smart_ptr/intrusive_ref_counter.hpp>

namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;


namespace frank
{
class ref_counter:public boost::intrusive_ref_counter<ref_counter>
{   public:
    virtual ~ref_counter(void)
    {
    }
};
class symbol:public ref_counter
{   public:
    typedef boost::intrusive_ptr<const symbol> symbolPtr;
    typedef std::vector<symbolPtr> symbolVector;
    struct push_scope
    {   push_scope()
        {
        }
        ~push_scope(void)
        {
        }
    };
};
class nature:public symbol
{   public:
    enum enumAttribute
    {   eAbstol,
        eAccess,
        eDDT,
        eIDT,
        eUnits
    };
    struct empty
    {   bool operator<(const empty&) const
        {   return false;
        }
        friend std::ostream &operator<<(std::ostream &_r, const empty&)
        {   return _r;
        }
    };
    typedef boost::variant<empty, std::string> attributeValue;
};
class discipline:public symbol
{   public:
    enum enumDomain
    {   eDiscrete,
        eContinuous
    };
};

class type:public ref_counter
{   public:
    typedef boost::intrusive_ptr<type> typePtr;
};
struct myIterator:std::iterator<std::random_access_iterator_tag, char, std::ptrdiff_t, const char*, const char&>
{   std::string *m_p;
    std::size_t m_iPos;
    myIterator(void)
        :m_p(nullptr),
        m_iPos(~std::size_t(0))
    {
    }
    myIterator(std::string &_r, const bool _bEnd = false)
        :m_p(&_r),
        m_iPos(_bEnd ? ~std::size_t(0) : 0)
    {
    }
    myIterator(const myIterator &_r)
        :m_p(_r.m_p),
        m_iPos(_r.m_iPos)
    {
    }
    myIterator &operator=(const myIterator &_r)
    {   if (this != &_r)
        {   m_p = _r.m_p;
            m_iPos = _r.m_iPos;
        }
        return *this;
    }
    const char &operator*(void) const
    {   return m_p->at(m_iPos);
    }
    bool operator==(const myIterator &_r) const
    {   return m_p == _r.m_p && m_iPos == _r.m_iPos;
    }
    bool operator!=(const myIterator &_r) const
    {   return m_p != _r.m_p || m_iPos != _r.m_iPos;
    }
    myIterator &operator++(void)
    {   ++m_iPos;
        if (m_iPos == m_p->size())
            m_iPos = ~std::size_t(0);
        return *this;
    }
    myIterator operator++(int)
    {   const myIterator s(*this);
        operator++();
        return s;
    }
    myIterator &operator--(void)
    {   --m_iPos;
        return *this;
    }
    myIterator operator--(int)
    {   const myIterator s(*this);
        operator--();
        return s;
    }
    bool operator<(const myIterator &_r) const
    {   if (m_p == _r.m_p)
            return m_iPos < _r.m_iPos;
        else
            return m_p < _r.m_p;
    }
    std::ptrdiff_t operator-(const myIterator &_r) const
    {   return m_iPos - _r.m_iPos;
    }
};
struct onInclude
{   auto operator()(myIterator &_rStart, myIterator &_rEnd) const
    {       // erase what has been matched (the include statement)
        _rStart.m_p->erase(_rStart.m_iPos, _rEnd.m_iPos - _rStart.m_iPos);
        // and insert the contents of the file
        _rStart.m_p->insert(_rStart.m_iPos, "abcd");
        _rEnd = _rStart;
        return lex::pass_flags::pass_ignore;
    }
};
template<typename LEXER>
class lexer:public lex::lexer<LEXER>
{   public:
    lex::token_def<type::typePtr> m_sKW_real, m_sKW_integer, m_sKW_string;
    lex::token_def<lex::omit> m_sLineComment, m_sCComment;
    lex::token_def<lex::omit> m_sWS;
    lex::token_def<lex::omit> m_sSemicolon, m_sEqual, m_sColon, m_sInclude, m_sCharOP, m_sCharCP,
        m_sComma;
    lex::token_def<std::string> m_sIdentifier, m_sString;
    lex::token_def<double> m_sReal;
    lex::token_def<int> m_sInteger;
    lex::token_def<lex::omit> m_sKW_units, m_sKW_access, m_sKW_idt_nature, m_sKW_ddt_nature, m_sKW_abstol,
        m_sKW_nature, m_sKW_endnature, m_sKW_continuous, m_sKW_discrete,
        m_sKW_potential, m_sKW_flow, m_sKW_domain, m_sKW_discipline, m_sKW_enddiscipline, m_sKW_module,
        m_sKW_endmodule, m_sKW_parameter;
    //typedef const type *typePtr;
    template<typename T>
    struct extractValue
    {   T operator()(const myIterator &_rStart, const myIterator &_rEnd) const
        {   return boost::lexical_cast<T>(std::string(_rStart, _rEnd));
        }
    };
    struct extractString
    {   std::string operator()(const myIterator &_rStart, const myIterator &_rEnd) const
        {   const auto s = std::string(_rStart, _rEnd);
            return s.substr(1, s.size() - 2);
        }
    };
    lexer(void)
        :m_sWS("[ \\t\\n\\r]+"),
        m_sKW_parameter("\"parameter\""),
        m_sKW_real("\"real\""),
        m_sKW_integer("\"integer\""),
        m_sKW_string("\"string\""),
        m_sLineComment("\\/\\/[^\\n]*"),
        m_sCComment("\\/\\*"
            "("
                "[^*]"
                    "|" "[\\n]"
                    "|" "([*][^/])"
            ")*"
            "\\*\\/"),
        m_sSemicolon("\";\""),
        m_sEqual("\"=\""),
        m_sColon("\":\""),
        m_sCharOP("\"(\""), 
        m_sCharCP("\")\""),
        m_sComma("\",\""),
        m_sIdentifier("[a-zA-Z_]+[a-zA-Z0-9_]*"),
        m_sString("[\\\"]"
            //"("
            //  "(\\[\"])"
            //  "|"
                //"[^\"]"
            //")*"
            "[^\\\"]*"
            "[\\\"]"),
        m_sKW_units("\"units\""),
        m_sKW_access("\"access\""),
        m_sKW_idt_nature("\"idt_nature\""),
        m_sKW_ddt_nature("\"ddt_nature\""),
        m_sKW_abstol("\"abstol\""),
        m_sKW_nature("\"nature\""),
        m_sKW_endnature("\"endnature\""),
        m_sKW_continuous("\"continuous\""),
        m_sKW_discrete("\"discrete\""),
        m_sKW_domain("\"domain\""),
        m_sKW_discipline("\"discipline\""),
        m_sKW_enddiscipline("\"enddiscipline\""),
        m_sKW_potential("\"potential\""),
        m_sKW_flow("\"flow\""),
//realnumber      ({uint}{exponent})|((({uint}\.{uint})|(\.{uint})){exponent}?)
//exponent        [Ee][+-]?{uint}
//uint            [0-9][_0-9]*

        m_sReal("({uint}{exponent})"
            "|"
                "("
                    "(({uint}[\\.]{uint})|([\\.]{uint})){exponent}?"
                ")"
        ),
        m_sInteger("{uint}"),
        m_sInclude("\"`include\""),
        m_sKW_module("\"module\""),
        m_sKW_endmodule("\"endmodule\"")
    {   this->self.add_pattern
            ("uint", "[0-9]+")
            ("exponent", "[eE][\\+\\-]?{uint}");
        this->self = m_sSemicolon
            | m_sEqual
            | m_sColon
            | m_sCharOP
            | m_sCharCP
            | m_sComma
            | m_sString[lex::_val = boost::phoenix::bind(extractString(), lex::_start, lex::_end)]
            | m_sKW_real//[lex::_val = boost::phoenix::bind(&type::getReal)]
            | m_sKW_integer//[lex::_val = boost::phoenix::bind(&type::getInteger)]
            | m_sKW_string//[lex::_val = boost::phoenix::bind(&type::getString)]
            | m_sKW_parameter
            | m_sKW_units
            | m_sKW_access
            | m_sKW_idt_nature
            | m_sKW_ddt_nature
            | m_sKW_abstol
            | m_sKW_nature
            | m_sKW_endnature
            | m_sKW_continuous
            | m_sKW_discrete
            | m_sKW_domain
            | m_sKW_discipline
            | m_sKW_enddiscipline
            | m_sReal[lex::_val = boost::phoenix::bind(extractValue<double>(), lex::_start, lex::_end)]
            | m_sInteger[lex::_val = boost::phoenix::bind(extractValue<int>(), lex::_start, lex::_end)]
            | m_sKW_potential
            | m_sKW_flow
            | m_sKW_module
            | m_sKW_endmodule
            | m_sIdentifier
            | m_sInclude [ lex::_state = "INCLUDE" ]
            ;
        this->self("INCLUDE") += m_sString [
            lex::_state = "INITIAL", lex::_pass = boost::phoenix::bind(onInclude(), lex::_start, lex::_end)
        ];
        this->self("WS") = m_sWS
            | m_sLineComment
            | m_sCComment
            ;
    }
};
template<typename Iterator, typename Lexer>
class natureParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{   qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
    qi::rule<Iterator, std::pair<nature::enumAttribute, nature::attributeValue>(void), qi::in_state_skipper<Lexer> > m_sProperty;
    qi::rule<Iterator, std::string(), qi::in_state_skipper<Lexer> > m_sName;
    public:
    template<typename Tokens>
    natureParser(const Tokens &_rTokens)
        :natureParser::base_type(m_sStart)
    {   m_sProperty = (_rTokens.m_sKW_units
                >> _rTokens.m_sEqual
                >> _rTokens.m_sString
                >> _rTokens.m_sSemicolon
                )
            | (_rTokens.m_sKW_access
                >> _rTokens.m_sEqual
                >> _rTokens.m_sIdentifier
                >> _rTokens.m_sSemicolon
                )
            | (_rTokens.m_sKW_idt_nature
                >> _rTokens.m_sEqual
                >> _rTokens.m_sIdentifier
                >> _rTokens.m_sSemicolon
                )
            | (_rTokens.m_sKW_ddt_nature
                >> _rTokens.m_sEqual
                >> _rTokens.m_sIdentifier
                >> _rTokens.m_sSemicolon
                )
            | (_rTokens.m_sKW_abstol
                >> _rTokens.m_sEqual
                >> _rTokens.m_sReal
                >> _rTokens.m_sSemicolon
                )
            ;
        m_sName = (_rTokens.m_sColon >> _rTokens.m_sIdentifier);
        m_sStart = (_rTokens.m_sKW_nature
            >> _rTokens.m_sIdentifier
            >> -m_sName
            >> _rTokens.m_sSemicolon
            >> *(m_sProperty)
            >> _rTokens.m_sKW_endnature
            );
        m_sStart.name("start");
        m_sProperty.name("property");
    }
};
/*
// Conservative discipline
discipline electrical; 
  potential    Voltage;
  flow         Current;
enddiscipline
*/
// a parser for a discipline declaration
template<typename Iterator, typename Lexer>
class disciplineParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{   qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
    typedef std::pair<bool, boost::intrusive_ptr<const nature> > CPotentialAndNature;
    struct empty
    {   bool operator<(const empty&) const
        {   return false;
        }
        friend std::ostream &operator<<(std::ostream &_r, const empty&)
        {   return _r;
        }
    };
    typedef boost::variant<empty, CPotentialAndNature, discipline::enumDomain> property;
    qi::rule<Iterator, discipline::enumDomain(), qi::in_state_skipper<Lexer> > m_sDomain;
    qi::rule<Iterator, property(void), qi::in_state_skipper<Lexer> > m_sProperty;
    public:
    template<typename Tokens>
    disciplineParser(const Tokens &_rTokens)
        :disciplineParser::base_type(m_sStart)
    {   m_sDomain = _rTokens.m_sKW_continuous
            | _rTokens.m_sKW_discrete
            ;
        m_sProperty = (_rTokens.m_sKW_potential >> _rTokens.m_sIdentifier >> _rTokens.m_sSemicolon)
            | (_rTokens.m_sKW_flow >> _rTokens.m_sIdentifier >> _rTokens.m_sSemicolon)
            | (_rTokens.m_sKW_domain >> m_sDomain >> _rTokens.m_sSemicolon)
            ;
        m_sStart = (_rTokens.m_sKW_discipline
            >> _rTokens.m_sIdentifier
            >> _rTokens.m_sSemicolon
            >> *m_sProperty
            >> _rTokens.m_sKW_enddiscipline
        );
    }
};
template<typename Iterator, typename Lexer>
class moduleParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{   public:
    qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
    qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sModulePortList;
    qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sPortList;
    qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sPort;
    qi::rule<Iterator, std::shared_ptr<symbol::push_scope>(void), qi::in_state_skipper<Lexer> > m_sModule;

    typedef boost::intrusive_ptr<const ref_counter> intrusivePtr;
    typedef std::vector<intrusivePtr> vectorOfPtr;
    qi::rule<Iterator, vectorOfPtr(void), qi::in_state_skipper<Lexer> > m_sModuleItemList;
    qi::rule<Iterator, intrusivePtr(void), qi::in_state_skipper<Lexer> > m_sParameter;
    qi::rule<Iterator, intrusivePtr(void), qi::in_state_skipper<Lexer> > m_sModuleItem;
    qi::rule<Iterator, type::typePtr(void), qi::in_state_skipper<Lexer> > m_sType;

    template<typename Tokens>
    moduleParser(const Tokens &_rTokens)
        :moduleParser::base_type(m_sStart)
    {   m_sPort = _rTokens.m_sIdentifier;
        m_sPortList %= m_sPort % _rTokens.m_sComma;
        m_sModulePortList %= _rTokens.m_sCharOP >> m_sPortList >> _rTokens.m_sCharCP;
        m_sModule = _rTokens.m_sKW_module;
        m_sType = _rTokens.m_sKW_real | _rTokens.m_sKW_integer | _rTokens.m_sKW_string;
        m_sParameter = _rTokens.m_sKW_parameter
            >> m_sType
            >> _rTokens.m_sIdentifier
        ;
        m_sModuleItem = m_sParameter;
        m_sModuleItemList %= *m_sModuleItem;
        m_sStart = (m_sModule
                >> _rTokens.m_sIdentifier
                >> m_sModulePortList
                >> m_sModuleItemList
                >> _rTokens.m_sKW_endmodule);
    }
};
template<typename Iterator, typename Lexer>
class fileParser:public qi::grammar<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> >
{   public:
    disciplineParser<Iterator, Lexer> m_sDiscipline;
    natureParser<Iterator, Lexer> m_sNature;
    moduleParser<Iterator, Lexer> m_sModule;
    qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sStart;
    qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sItem;
    //public:
    template<typename Tokens>
    fileParser(const Tokens &_rTokens)
        :fileParser::base_type(m_sStart),
        m_sNature(_rTokens),
        m_sDiscipline(_rTokens),
        m_sModule(_rTokens)
    {   m_sItem = m_sDiscipline | m_sNature | m_sModule;
        m_sStart = *m_sItem;
    }
};
}
int main()
{   std::string sInput = "\
nature Current;\n\
  units        = \"A\";\n\
  access       = I;\n\
  idt_nature   = Charge;\n\
  abstol       = 1e-12;\n\
endnature\n\
\n\
// Charge in coulombs\n\
nature Charge;\n\
  units      = \"coul\";\n\
  access     = Q;\n\
  ddt_nature = Current;\n\
  abstol     = 1e-14;\n\
endnature\n\
\n\
// Potential in volts\n\
nature Voltage;\n\
  units      = \"V\";\n\
  access     = V;\n\
  idt_nature = Flux;\n\
  abstol     = 1e-6;\n\
endnature\n\
\n\
discipline electrical;\n\
  potential    Voltage;\n\
  flow         Current;\n\
enddiscipline\n\
";
    typedef lex::lexertl::token<frank::myIterator, boost::mpl::vector<frank::type::typePtr, std::string, double, int> > token_type;
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef frank::lexer<lexer_type>::iterator_type iterator_type;
    typedef frank::fileParser<iterator_type, frank::lexer<lexer_type>::lexer_def> grammar_type;

    frank::lexer<lexer_type> sLexer;
    grammar_type sParser(sLexer);
    frank::symbol::push_scope sPush;
    auto pStringBegin = frank::myIterator(sInput);
    auto pBegin(sLexer.begin(pStringBegin, frank::myIterator(sInput, true)));
    const auto b = qi::phrase_parse(pBegin, sLexer.end(), sParser, qi::in_state("WS")[sLexer.self]); 
}

回答1:


Has anybody gotten this working for a lexer returning something else than double or int or string?

Sure. Simple examples might be found on this site

And for the parser also returning non-trivial objects?

Here's your real problem. Spirit is nice for a subset of parsers that are expressed easily in a eDSL, and has the huge benefit of "magically" mapping to a selection of attributes.

Some of the realities are:

  • attributes are expected to have value-semantic; using polymorphic attributes is hard (How can I use polymorphic attributes with boost::spirit::qi parsers?, e.g.)

  • using Lex makes most of the sweet-spot disappear since all "highlevel" parsers (like real_parser, [u]int_parser) are out the window. The Spirit devs are on record they prefer not to use Lex. Moreover, Spirit X3 doesn't have Lex support anymore.


Background Information:

I'd very much consider parsing the source as-is, into direct value-typed AST nodes. I know, this is probably what you consider "trivial objects", but don't be deceived by apparent simplicity: recursive variant trees have some expressive power.

Examples

  • Here's a trivial AST to represent JSON in <20 LoC: Boost Karma generator for composition of classes¹
  • Here we represent the Graphviz source format with full fidelity: How to use boost spirit list operator with mandatory minimum amount of elements?

I've since created the code to transform that AST into a domain representation with fully correct ownership, cascading lexically scoped node/edge attributes and cross references. I have just recovered that work and put it up on github if you're interested, mainly because the task is pretty similar in many respects, like the overriding/inheriting of properties and resolving identifiers within scopes: https://github.com/sehe/spirit-graphviz/blob/master/spirit-graphviz.cpp#L660

Suggestions, Ideas

In your case I'd take similar approach to retain simplicity. The code shown doesn't (yet) cover the trickiest ingredients (like nature attribute overrides within a discipline).

Once you start implementing use-cases like resolving compatible disciplines and the absolute tolerances at a given node, you want a domain model with full fidelity. Preferrably, there would be no loss of source information, and immutable AST information².

As a middle ground, you could probably avoid building an entire source-AST in memory only to transform it in one big go, at the top-level you could have:

file = qi::skip(skipper) [
        *(m_sDiscipline | m_sNature | m_sModule) [process_ast(_1)]
    ];

Where process_ast would apply the "trivial" AST representation into the domain types, one at a time. That way you keep only small bits of temporary AST representation around.

The domain representation can be arbitrarily sophisticated to support all your logic and use-cases.

Let's "Show, Don't Tell"

Baking the simplest AST that comes to mind matching the grammar³:

namespace frank { namespace ast {
    struct nature {
        struct empty{};

        std::string name;
        std::string inherits;

        enum class Attribute { units, access, idt, ddt, abstol };
        using Value = boost::variant<int, double, std::string>;
        std::map<Attribute, Value> attributes;
    };

    struct discipline {
        enum enumDomain { eUnspecified, eDiscrete, eContinuous };
        struct properties_t {
            enumDomain domain = eUnspecified;
            boost::optional<std::string> flow, potential;
        };

        std::string name;
        properties_t properties;
    };

    // TODO
    using module = qi::unused_type;
    using file   = std::vector<boost::variant<nature, discipline, module> >;

    enum class type { real, integer, string };
} }

This is trivial and maps 1:1 onto the grammar productions, which means we have very little impedance.

Tokens? We Don't Need Lex For That

You can have common token parsers without requiring the complexities of Lex

Yes, Lex (especially statically generated) can potentially improve performance, but

  • if you need that, I wager Spirit Qi is not your best option anyways
  • premature optimization...

What I did:

struct tokens {
    // implicit lexemes
    qi::rule<It, std::string()> string, identifier;
    qi::rule<It, double()> real;
    qi::rule<It, int()> integer;
    qi::rule<It, ast::nature::Value()> value;
    qi::rule<It, ast::nature::Attribute()> attribute;
    qi::rule<It, ast::discipline::enumDomain()> domain;

    struct attribute_sym_t : qi::symbols<char, ast::nature::Attribute> {
        attribute_sym_t() {
            this->add
               ("units", ast::nature::Attribute::units)
               ("access", ast::nature::Attribute::access)
               ("idt_nature", ast::nature::Attribute::idt)
               ("ddt_nature", ast::nature::Attribute::ddt)
               ("abstol", ast::nature::Attribute::abstol);
        }
    } attribute_sym;

    struct domain_sym_t : qi::symbols<char, ast::discipline::enumDomain> {
        domain_sym_t() {
            this->add
               ("discrete", ast::discipline::eDiscrete)
               ("continuous", ast::discipline::eContinuous);
        }
    } domain_sym;

    tokens() {
        using namespace qi;
        auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));

        string     = '"' >> *("\\" >> char_ | ~char_('"')) >> '"';
        identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
        real       = double_;
        integer    = int_;
        attribute  = kw[attribute_sym];
        domain     = kw[domain_sym];

        value = string | identifier | real | integer;

        BOOST_SPIRIT_DEBUG_NODES((string)(identifier)(real)(integer)(value)(domain)(attribute))
    }
};

Liberating, isn't it? Note how

  • all attributes are automatically propagated
  • strings handle escapes (this bit was commented out in your Lex approach). We don't even need semantic actions to (badly) pry out the unquoted/unescaped value
  • we used distinct to ensure keyword parsing matches only full identifiers. (See How to parse reserved words correctly in boost spirit).

    This is actually where you notice the lack of separate lexer.

    On the flipside, this makes context-sensitive keywords a breeze (lex can easily prioritizes keywords over identifiers that occur in places where keywords cannot occur.⁴)

What About Skipping Space/Comments?

We could have added a token, but for reasons of convention I made it a parser:

struct skipParser : qi::grammar<It> {
    skipParser() : skipParser::base_type(spaceOrComment) {
        using namespace qi;
        spaceOrComment = space
            | ("//" >> *(char_ - eol) >> (eoi|eol))
            | ("/*" >> *(char_ - "*/") >> "*/");

        BOOST_SPIRIT_DEBUG_NODES((spaceOrComment))
    }
  private:
    qi::rule<It> spaceOrComment;
};

natureParser

We inherit our AST parsers from tokens:

struct natureParser : tokens, qi::grammar<It, ast::nature(), skipParser> {

And from there it is plain sailing:

property = attribute >> '=' >> value >> ';';

nature
    = kw["nature"] >> identifier >> -(':' >> identifier) >> ';'
    >> *property
    >> kw["endnature"];

disciplineParser

discipline = kw["discipline"] >> identifier >> ';' 
    >> properties 
    >> kw["enddiscipline"]
    ;

properties
    = kw["domain"] >> domain >> ';'
    ^ kw["flow"] >> identifier >> ';'
    ^ kw["potential"] >> identifier >> ';'
    ;

This shows a competing approach that uses the permutation operator (^) to parse optional alternatives in any order into a fixed frank::ast::discipline properties struct. Of course, you might elect to have a more generic representation here, like we had with ast::nature.

Module AST is left as an exercise for the reader, though the parser rules are implemented below.

Top Level, Encapsulating The Skipper

I hate having to specify the skipper from the calling code (it's more complex than required, and changing the skipper changes the grammar). So, I encapsulate it in the top-level parser:

struct fileParser : qi::grammar<It, ast::file()> {
    fileParser() : fileParser::base_type(file) {
        file = qi::skip(qi::copy(m_sSkip)) [
                *(m_sDiscipline | m_sNature | m_sModule)
            ];

        BOOST_SPIRIT_DEBUG_NODES((file))
    }
  private:
    disciplineParser m_sDiscipline;
    natureParser     m_sNature;
    moduleParser     m_sModule;
    skipParser       m_sSkip;

    qi::rule<It, ast::file()> file;
};

Demo Time

This demo adds operator<< for the enums, and a variant visitor to print some AST details for debug/demonstrational purposes (print_em).

Then we have a test driver:

int main() {
    using iterator_type = std::string::const_iterator;

    iterator_type iter = sInput.begin(), last = sInput.end();

    frank::Parsers<iterator_type>::fileParser parser;
    print_em print;

    frank::ast::file file;
    bool ok = qi::parse(iter, last, parser, file);

    if (ok) {
        for (auto& symbol : file)
            print(symbol);
    }
    else {
        std::cout << "Parse failed\n";
    }

    if (iter != last) {
        std::cout << "Remaining unparsed: '" << std::string(iter,last) << "'\n";
    }
}

With the sample input from your question we get the following output:

Live On Coliru

-- Nature
name: Current
inherits: 
attribute: units = A
attribute: access = I
attribute: idt = Charge
attribute: abstol = 1e-12
-- Nature
name: Charge
inherits: 
attribute: units = coul
attribute: access = Q
attribute: ddt = Current
attribute: abstol = 1e-14
-- Nature
name: Voltage
inherits: 
attribute: units = V
attribute: access = V
attribute: idt = Flux
attribute: abstol = 1e-06
-- Discipline
name: electrical
domain: (unspecified)
flow:  Current
potential:  Voltage
Remaining unparsed: '
'

With BOOST_SPIRIT_DEBUG defined, you get rich debug information: Live On Coliru

Full Listing

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <map>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>

namespace qi = boost::spirit::qi;

namespace frank { namespace ast {
    struct nature {
        struct empty{};

        std::string name;
        std::string inherits;

        enum class Attribute { units, access, idt, ddt, abstol };
        using Value = boost::variant<int, double, std::string>;
        std::map<Attribute, Value> attributes;
    };

    struct discipline {
        enum enumDomain { eUnspecified, eDiscrete, eContinuous };
        struct properties_t {
            enumDomain domain = eUnspecified;
            boost::optional<std::string> flow, potential;
        };

        std::string name;
        properties_t properties;
    };

    // TODO
    using module = qi::unused_type;
    using file   = std::vector<boost::variant<nature, discipline, module> >;

    enum class type { real, integer, string };
} }

BOOST_FUSION_ADAPT_STRUCT(frank::ast::nature, name, inherits, attributes)
BOOST_FUSION_ADAPT_STRUCT(frank::ast::discipline, name, properties)
BOOST_FUSION_ADAPT_STRUCT(frank::ast::discipline::properties_t, domain, flow, potential)

namespace frank {
    namespace qr = boost::spirit::repository::qi;

    template <typename It> struct Parsers {

        struct tokens {
            // implicit lexemes
            qi::rule<It, std::string()> string, identifier;
            qi::rule<It, double()> real;
            qi::rule<It, int()> integer;
            qi::rule<It, ast::nature::Value()> value;
            qi::rule<It, ast::nature::Attribute()> attribute;
            qi::rule<It, ast::discipline::enumDomain()> domain;

            struct attribute_sym_t : qi::symbols<char, ast::nature::Attribute> {
                attribute_sym_t() {
                    this->add
                       ("units", ast::nature::Attribute::units)
                       ("access", ast::nature::Attribute::access)
                       ("idt_nature", ast::nature::Attribute::idt)
                       ("ddt_nature", ast::nature::Attribute::ddt)
                       ("abstol", ast::nature::Attribute::abstol);
                }
            } attribute_sym;

            struct domain_sym_t : qi::symbols<char, ast::discipline::enumDomain> {
                domain_sym_t() {
                    this->add
                       ("discrete", ast::discipline::eDiscrete)
                       ("continuous", ast::discipline::eContinuous);
                }
            } domain_sym;

            tokens() {
                using namespace qi;
                auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));

                string     = '"' >> *("\\" >> char_ | ~char_('"')) >> '"';
                identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
                real       = double_;
                integer    = int_;
                attribute  = kw[attribute_sym];
                domain     = kw[domain_sym];

                value = string | identifier | real | integer;

                BOOST_SPIRIT_DEBUG_NODES((string)(identifier)(real)(integer)(value)(domain)(attribute))
            }
        };

        struct skipParser : qi::grammar<It> {
            skipParser() : skipParser::base_type(spaceOrComment) {
                using namespace qi;
                spaceOrComment = space
                    | ("//" >> *(char_ - eol) >> (eoi|eol))
                    | ("/*" >> *(char_ - "*/") >> "*/");

                BOOST_SPIRIT_DEBUG_NODES((spaceOrComment))
            }
          private:
            qi::rule<It> spaceOrComment;
        };

        struct natureParser : tokens, qi::grammar<It, ast::nature(), skipParser> {
            natureParser() : natureParser::base_type(nature) {
                using namespace qi;
                auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));

                property = attribute >> '=' >> value >> ';';

                nature
                    = kw["nature"] >> identifier >> -(':' >> identifier) >> ';'
                    >> *property
                    >> kw["endnature"];

                BOOST_SPIRIT_DEBUG_NODES((nature)(property))
            }
          private:
            using Attribute = std::pair<ast::nature::Attribute, ast::nature::Value>;

            qi::rule<It, ast::nature(), skipParser> nature;
            qi::rule<It, Attribute(), skipParser> property;

            using tokens::attribute;
            using tokens::value;
            using tokens::identifier;
        };

        struct disciplineParser : tokens, qi::grammar<It, ast::discipline(), skipParser> {
            disciplineParser() : disciplineParser::base_type(discipline) {

                auto kw = qr::distinct(qi::copy(qi::char_("a-zA-Z0-9_")));

                discipline = kw["discipline"] >> identifier >> ';' 
                    >> properties 
                    >> kw["enddiscipline"]
                    ;

                properties
                    = kw["domain"] >> domain >> ';'
                    ^ kw["flow"] >> identifier >> ';'
                    ^ kw["potential"] >> identifier >> ';'
                    ;

                BOOST_SPIRIT_DEBUG_NODES((discipline)(properties))
            }
          private:
            qi::rule<It, ast::discipline(), skipParser> discipline;
            qi::rule<It, ast::discipline::properties_t(), skipParser> properties;

            using tokens::domain;
            using tokens::identifier;
        };

        struct moduleParser : tokens, qi::grammar<It, ast::module(), skipParser> {
            moduleParser() : moduleParser::base_type(module) {
                auto kw = qr::distinct(qi::copy(qi::char_("a-zA-Z0-9_")));

                m_sPort           = identifier;
                m_sPortList       = m_sPort % ',';
                m_sModulePortList = '(' >> m_sPortList >> ')';
                m_sModule         = kw["module"];
                m_sType           = kw["real"] | kw["integer"] | kw["string"];
                m_sParameter      = kw["parameter"] >> m_sType >> identifier;
                m_sModuleItem     = m_sParameter;
                m_sModuleItemList = *m_sModuleItem;
                module =
                    (m_sModule >> identifier >> m_sModulePortList >> m_sModuleItemList >> kw["endmodule"]);
            }
          private:
            qi::rule<It, ast::module(), skipParser> module;
            qi::rule<It, skipParser> m_sModulePortList;
            qi::rule<It, skipParser> m_sPortList;
            qi::rule<It, skipParser> m_sPort;
            qi::rule<It, skipParser> m_sModule;

            qi::rule<It, skipParser> m_sModuleItemList;
            qi::rule<It, skipParser> m_sParameter;
            qi::rule<It, skipParser> m_sModuleItem;
            qi::rule<It, skipParser> m_sType;

            using tokens::identifier;
        };

        struct fileParser : qi::grammar<It, ast::file()> {
            fileParser() : fileParser::base_type(file) {
                file = qi::skip(qi::copy(m_sSkip)) [
                        *(m_sDiscipline | m_sNature | m_sModule)
                    ];

                BOOST_SPIRIT_DEBUG_NODES((file))
            }
          private:
            disciplineParser m_sDiscipline;
            natureParser     m_sNature;
            moduleParser     m_sModule;
            skipParser       m_sSkip;

            qi::rule<It, ast::file()> file;
        };
    };

}

extern std::string const sInput;

// just for demo
#include <boost/optional/optional_io.hpp>

namespace frank { namespace ast {
    //static inline std::ostream &operator<<(std::ostream &os, const nature::empty &) { return os; }
    static inline std::ostream &operator<<(std::ostream &os, nature::Attribute a) {
        switch(a) {
            case nature::Attribute::units:  return os << "units";
            case nature::Attribute::access: return os << "access";
            case nature::Attribute::idt:    return os << "idt";
            case nature::Attribute::ddt:    return os << "ddt";
            case nature::Attribute::abstol: return os << "abstol";
        };
        return os << "?";
    }
    static inline std::ostream &operator<<(std::ostream &os, discipline::enumDomain d) {
        switch(d) {
            case discipline::eDiscrete:    return os << "discrete";
            case discipline::eContinuous:  return os << "continuous";
            case discipline::eUnspecified: return os << "(unspecified)";
        };
        return os << "?";
    }
} }

struct print_em {
    using result_type = void;
    template <typename V>
    void operator()(V const& variant) const {
        boost::apply_visitor(*this, variant);
    }
    void operator()(frank::ast::nature const& nature) const {
        std::cout << "-- Nature\n";
        std::cout << "name: " << nature.name << "\n";
        std::cout << "inherits: " << nature.inherits << "\n";
        for (auto& a : nature.attributes) {
            std::cout << "attribute: " << a.first << " = " << a.second << "\n";
        }
    }
    void operator()(frank::ast::discipline const& discipline) const {
        std::cout << "-- Discipline\n";
        std::cout << "name: " << discipline.name << "\n";
        std::cout << "domain: " << discipline.properties.domain << "\n";
        std::cout << "flow: " << discipline.properties.flow << "\n";
        std::cout << "potential: " << discipline.properties.potential << "\n";
    }
    void operator()(frank::ast::module const&) const {
        std::cout << "-- Module (TODO)\n";
    }
};

int main() {
    using iterator_type = std::string::const_iterator;

    iterator_type iter = sInput.begin(), last = sInput.end();

    frank::Parsers<iterator_type>::fileParser parser;
    print_em print;

    frank::ast::file file;
    bool ok = parse(iter, last, parser, file);

    if (ok) {
        for (auto& symbol : file)
            print(symbol);
    }
    else {
        std::cout << "Parse failed\n";
    }

    if (iter != last) {
        std::cout << "Remaining unparsed: '" << std::string(iter,last) << "'\n";
    }
}

std::string const sInput = R"(
nature Current;
  units        = "A";
  access       = I;
  idt_nature   = Charge;
  abstol       = 1e-12;
endnature

// Charge in coulombs
nature Charge;
  units      = "coul";
  access     = Q;
  ddt_nature = Current;
  abstol     = 1e-14;
endnature

// Potential in volts
nature Voltage;
  units      = "V";
  access     = V;
  idt_nature = Flux;
  abstol     = 1e-6;
endnature

discipline electrical;
  potential    Voltage;
  flow         Current;
enddiscipline
)";

¹ incidentally, the other answer there demonstrates the "impedance mismatch" with polymorphic attributes and Spirit - this time on the Karma side of it

² (to prevent subtle bugs that depend on evaluation order or things like that, e.g.)

³ (gleaning some from here but not importing too much complexity that wasn't reflected in your Lex approach)

⁴ (In fact, this is where you'd need state-switching inside the grammar, an area notoriously underdeveloped and practically unusable in Spirit Lex: e.g. when it works how to avoid defining token which matchs everything in boost::spirit::lex or when it goes badly: Boost.Spirit SQL grammar/lexer failure)




回答2:


One solution would be to use a std::string everywhere and define a boost::variant with everything needed but not use it anywhere in the parser or lexer directly but only serialize & deserialize it into/from the string.

Is this what the originators of boost::spirit intended?



来源:https://stackoverflow.com/questions/47191531/cannot-get-boostspirit-parserlexer-working-for-token-types-other-than-stdst

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!