How can I simply consume unrecognized characters?

走远了吗. 提交于 2019-12-04 09:32:27

For what it's worth, here's significantly simplified:

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP

#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>

namespace loloof64 {

struct pgn_tag {
    std::string key;
    std::string value;
};

struct game_move {
    unsigned move_number;
    std::string white_move;
    std::string black_move;
    enum result_t { white_won, black_won, draw, undecided } result;
};

struct pgn_game {
    std::vector<pgn_tag> header;
    std::vector<game_move> moves;
};

class PgnGamesExtractor {
  public:
    PgnGamesExtractor(std::string inputFilePath);
    PgnGamesExtractor(std::istream &inputFile);
    /*
    Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
    */
    std::vector<pgn_game> getGames() const { return games; }
    virtual ~PgnGamesExtractor();

  protected:
  private:
    std::vector<pgn_game> games;
    void parseInput(std::istream &inputFile);
};

class PgnParsingException : public virtual std::runtime_error {
  public:
    PgnParsingException(std::string message) : std::runtime_error(message) {}
};

class InputFileException : public virtual std::runtime_error {
  public:
    InputFileException(std::string message) : std::runtime_error(message) {}
};
}

#endif // PGNGAMESEXTRACTOR_HPP

#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)

namespace loloof64 {
namespace qi = boost::spirit::qi;

template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::space_type> {
    pgn_parser() : pgn_parser::base_type(games) {
        using namespace qi;

        const std::string no_move;
        result.add
            ("1-0",     game_move::white_won)
            ("0-1",     game_move::black_won)
            ("1/2-1/2", game_move::draw)
            ("*",       game_move::undecided);

        quoted_string    = '"' >> *~char_('"') >> '"';
        tag              = '[' >> +alnum >> quoted_string >> ']';
        header           = +tag;
        regular_move     = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
        single_move      = raw [ regular_move >> -char_("+#") ];
        full_move        = uint_ 
            >> (lexeme["..." >> attr(no_move)] | "." >> single_move) 
            >> (single_move | attr(no_move))
            >> -result;

        game_description = +full_move;
        single_game      = -header >> game_description;
        games            = *single_game;

        BOOST_SPIRIT_DEBUG_NODES(
                    (tag)(header)(quoted_string)(regular_move)(single_move)
                    (full_move)(game_description)(single_game)(games)
                )
    }

  private:
    qi::rule<Iterator, pgn_tag(),              qi::space_type> tag;
    qi::rule<Iterator, std::vector<pgn_tag>,   qi::space_type> header;

    qi::rule<Iterator, game_move(),            qi::space_type> full_move;
    qi::rule<Iterator, std::vector<game_move>, qi::space_type> game_description;

    qi::rule<Iterator, pgn_game,               qi::space_type> single_game;
    qi::rule<Iterator, std::vector<pgn_game>,  qi::space_type> games;

    // lexemes
    qi::symbols<char, game_move::result_t> result;
    qi::rule<Iterator, std::string()> quoted_string;
    qi::rule<Iterator> regular_move;
    qi::rule<Iterator, std::string()> single_move;
};
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
    std::ifstream inputFile(inputFilePath);
    parseInput(inputFile);
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }

loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
    // dtor
}

void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
    if (inputFile.fail() || inputFile.bad())
        throw new InputFileException("Could not read the input file !");

    typedef boost::spirit::istream_iterator It;
    loloof64::pgn_parser<It> parser;
    std::vector<loloof64::pgn_game> temp_games;

    It iter(inputFile >> std::noskipws), end;

    bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);

    if (success && iter == end) {
        games.swap(temp_games);
    } else {
        std::string error_fragment(iter, end);
        throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
    }
}

int main() {
    loloof64::PgnGamesExtractor pge(std::cin); // "ScotchGambit.pgn"
    std::cout << "Parsed " << pge.getGames().size() << " games\n";
    for (auto& g : pge.getGames())
        for (auto& m : g.moves)
            std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}

Notes:

  • don't read full file in memory (boost::spirit::istream_iterator)
  • don't manually skip (use skippers)
  • don't explicitly lexeme (Boost spirit skipper issues)
  • don't use %= if not needed
  • don't synthesize unneeded attributes (use raw[])
  • treat optional parts of move as optional, don't store assymetric magic flags like "..." (look for no_move)
  • don't be overly specific (use istream& instead of ifstream&)

Probably some other things I forgot. Output is e.g.

Parsed 6166 games
1.  e4  e5
2.  Nf3 Nc6
3.  d4  exd4
4.  Bc4 Qf6
5.  O-O d6
6.  Ng5 Nh6
7.  f4  Be7
8.  e5  Qg6
9.  exd6    cxd6
10. c3  dxc3
11. Nxc3    O-O
12. Nd5 Bd7
13. Rf3 Bg4
14. Bd3 Bxf3
15. Qxf3    f5
16. Bc4 Kh8
17. Nxe7    Nxe7
18. Qxb7    Qf6
19. Be3 Rfb8
20. Qd7 Rd8
21. Qb7 d5
22. Bb3 Nc6
23. Bxd5    Nd4
24. Rd1 Ne2+
25. Kf1 Rab8
26. Qxa7    Rxb2
27. Ne6 Qxe6
28. Bxe6    Rxd1+
29. Kf2 
1.  e4  e5
2.  Nf3 Nc6
3.  d4  exd4
4.  Bc4 Bc5
5.  Ng5 Ne5
6.  Bxf7+   Nxf7
7.  Nxf7    Bb4+
8.  c3  dxc3
9.  bxc3    Bxc3+
10. Nxc3    Kxf7
11. Qd5+    Kf8
12. Ba3+    d6
13. e5  Qg5
14. exd6    Qxd5

As requested the simple X3 translation.

  • fewer lines of code (10 lines)
  • compilation time down from 7.4s to 3.6s (clang)
  • compilation time down from 11.4s to 6.0s (gcc5)
  • runtime down from 0.80s to 0.55s (clang and gcc)

The outputs are identical (exactly).

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP

#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>

namespace loloof64 {

struct pgn_tag {
    std::string key;
    std::string value;
};

struct game_move {
    unsigned move_number;
    std::string white_move;
    std::string black_move;
    enum result_t { white_won, black_won, draw, undecided } result;
};

struct pgn_game {
    std::vector<pgn_tag> header;
    std::vector<game_move> moves;
};

class PgnGamesExtractor {
  public:
    PgnGamesExtractor(std::string inputFilePath);
    PgnGamesExtractor(std::istream &inputFile);
    /*
    Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
    */
    std::vector<pgn_game> getGames() const { return games; }
    virtual ~PgnGamesExtractor();

  protected:
  private:
    std::vector<pgn_game> games;
    void parseInput(std::istream &inputFile);
};

class PgnParsingException : public virtual std::runtime_error {
  public:
    PgnParsingException(std::string message) : std::runtime_error(message) {}
};

class InputFileException : public virtual std::runtime_error {
  public:
    InputFileException(std::string message) : std::runtime_error(message) {}
};
}

#endif // PGNGAMESEXTRACTOR_HPP

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)

namespace loloof64 {
    namespace pgn_parser {
        using namespace boost::spirit::x3;

        static std::string const no_move;
        static auto const result = []{
            symbols<game_move::result_t> table;
            table.add
                ("1-0",     game_move::white_won)
                ("0-1",     game_move::black_won)
                ("1/2-1/2", game_move::draw)
                ("*",       game_move::undecided);
            return table;
        }();

        static auto const quoted_string    = lexeme['"' >> *~char_('"') >> '"'];
        static auto const tag              = '[' >> +alnum >> quoted_string >> ']';
        static auto const header           = +tag;
        static auto const regular_move     = as_parser("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
        static auto const single_move      = rule<struct single_move_, std::string> { "single_move" }
                                           = raw [ lexeme [ regular_move >> -char_("+#")] ];
        static auto const full_move        = rule<struct full_move_, game_move> { "full_move" }
                                     = uint_ 
            >> (lexeme["..." >> attr(no_move)] | "." >> single_move) 
            >> (single_move | attr(no_move))
            >> -result;

        static auto const game_description = +full_move;
        static auto const single_game      = rule<struct single_game_, pgn_game> { "single_game" }
                                           = -header >> game_description;
        static auto const games            = *single_game;
    }

}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
    std::ifstream inputFile(inputFilePath);
    parseInput(inputFile);
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }

loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
    // dtor
}

void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
    if (inputFile.fail() || inputFile.bad())
        throw new InputFileException("Could not read the input file !");

    typedef boost::spirit::istream_iterator It;
    std::vector<loloof64::pgn_game> temp_games;

    It iter(inputFile >> std::noskipws), end;

    bool success = boost::spirit::x3::phrase_parse(iter, end, pgn_parser::games, boost::spirit::x3::space, temp_games);

    if (success && iter == end) {
        games.swap(temp_games);
    } else {
        std::string error_fragment(iter, end);
        throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
    }
}

#include <iostream>

int main() {
    loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
    std::cout << "Parsed " << pge.getGames().size() << " games\n";
    for (auto& g : pge.getGames())
        for (auto& m : g.moves)
            std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}

Indeed the problem is with Veronica. Or, actually, it's with Ver?nica. Where ? is the code unit <93> - which, lacking codepage/encoding information could mean anything really.

You're using ascii::char and this requires 7-bit only characters.

Easily fix it by changing

using ascii::char_;

into

using qi::char_;
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!