问题
I have prepared a small benchmark program for measuring different ways of parsing. The problem comes with the huge decrease in performance when using a stream and a custom function for storing a date as a time_t + double.
The weird boost spirit trait for std::string is because seek backtracking fills the variable string with all the common parts of non-matching lines until a line that matches is found.
Sorry for the source code quality (copy/paste, bad variable names, weak indentation...). I am aware that this benchmark code is not going to be included in Clean Code book, so please ignore this fact and let's focus on the subject.
I understand that the fastest way is using a string without backtracking, but time increment for stream is really strange. Can someone explain me what is going on?
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/chrono/chrono.hpp>
#include <iomanip>
#include <ctime>
typedef std::string::const_iterator It;
namespace structs {
struct Timestamp {
std::time_t date;
double ms;
friend std::istream& operator>> (std::istream& stream, Timestamp& time)
{
struct std::tm tm;
if (stream >> std::get_time(&tm, "%Y-%b-%d %H:%M:%S") >> time.ms)
time.date = std::mktime(&tm);
return stream;
}
};
struct Record1 {
std::string date;
double time;
std::string str;
};
struct Record2 {
Timestamp date;
double time;
std::string str;
};
typedef std::vector<Record1> Records1;
typedef std::vector<Record2> Records2;
}
BOOST_FUSION_ADAPT_STRUCT(structs::Record1,
(std::string, date)
(double, time)
(std::string, str))
BOOST_FUSION_ADAPT_STRUCT(structs::Record2,
(structs::Timestamp, date)
(double, time)
(std::string, str))
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<std::string, It, void> {
static inline void call(It f, It l, std::string& attr) {
attr = std::string(&*f, std::distance(f,l));
}
};
} } }
namespace qi = boost::spirit::qi;
namespace QiParsers {
template <typename It>
struct Parser1 : qi::grammar<It, structs::Record1()>
{
Parser1() : Parser1::base_type(start) {
using namespace qi;
start = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record1()> start;
};
template <typename It>
struct Parser2 : qi::grammar<It, structs::Record2()>
{
Parser2() : Parser2::base_type(start) {
using namespace qi;
start = '[' >> stream >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record2()> start;
};
template <typename It>
struct Parser3 : qi::grammar<It, structs::Records1()>
{
Parser3() : Parser3::base_type(start) {
using namespace qi;
using boost::phoenix::push_back;
line = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
ignore = *~char_("\r\n");
start = (line[push_back(_val, _1)] | ignore) % eol;
}
private:
qi::rule<It> ignore;
qi::rule<It, structs::Record1()> line;
qi::rule<It, structs::Records1()> start;
};
template <typename It>
struct Parser4 : qi::grammar<It, structs::Records2()>
{
Parser4() : Parser4::base_type(start) {
using namespace qi;
using boost::phoenix::push_back;
line = '[' >> stream >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
ignore = *~char_("\r\n");
start = (line[push_back(_val, _1)] | ignore) % eol;
}
private:
qi::rule<It> ignore;
qi::rule<It, structs::Record2()> line;
qi::rule<It, structs::Records2()> start;
};
}
template<typename Parser, typename Container>
Container parse_seek(It b, It e, const std::string& message)
{
static const Parser parser;
Container records;
boost::chrono::high_resolution_clock::time_point t0 = boost::chrono::high_resolution_clock::now();
parse(b, e, *boost::spirit::repository::qi::seek[parser], records);
boost::chrono::high_resolution_clock::time_point t1 = boost::chrono::high_resolution_clock::now();
auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";
return records;
}
template<typename Parser, typename Container>
Container parse_ignoring(It b, It e, const std::string& message)
{
static const Parser parser;
Container records;
boost::chrono::high_resolution_clock::time_point t0 = boost::chrono::high_resolution_clock::now();
parse(b, e, parser, records);
boost::chrono::high_resolution_clock::time_point t1 = boost::chrono::high_resolution_clock::now();
auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";
return records;
}
static const std::string input1 = "[2018-Mar-01 00:00:00.000000] - 1.000 s => String: Valid_string\n";
static const std::string input2 = "[2018-Mar-02 00:00:00.000000] - 2.000 s => I dont care\n";
static std::string input("");
int main() {
const int N1 = 10;
const int N2 = 100000;
input.reserve(N1 * (input1.size() + N2*input2.size()));
for (int i = N1; i--;)
{
input += input1;
for (int j = N2; j--;)
input += input2;
}
const auto records1 = parse_seek<QiParsers::Parser1<It>, structs::Records1>(input.begin(), input.end(), "std::string + seek");
const auto records2 = parse_seek<QiParsers::Parser2<It>, structs::Records2>(input.begin(), input.end(), "stream + seek");
const auto records3 = parse_ignoring<QiParsers::Parser3<It>, structs::Records1>(input.begin(), input.end(), "std::string + ignoring");
const auto records4 = parse_ignoring<QiParsers::Parser4<It>, structs::Records2>(input.begin(), input.end(), "stream + ignoring");
return 0;
}
The results in the console are:
Elapsed time: 1445 ms (std::string + seek)
Elapsed time: 21519 ms (stream + seek)
Elapsed time: 860 ms (std::string + ignoring)
Elapsed time: 19046 ms (stream + ignoring)
回答1:
Okay, in the code posted, 70%¹ of time is spent in the stream's underflow operation.

I haven't looked into /why/ that is, but instead² wrote a few naive implementations to see whether I could do better. First steps:
² Update I've since analyzed it and provided a PR.
The improvement created by that PR does not affect the bottom line in this particular case (see SUMMARY)
- drop
operator>>
forTimestamp
(we won't be using that) - replace all instances of
'[' >> stream >> ']'
with the alternative'[' >> raw[*~char_(']')] >> ']'
so that we will always be using the trait to transform the iterator range into the attribute type (std::string
orTimestamp
)
Now, we implement the assign_to_attribute_from_iterators<structs::Timestamp, It>
trait:
Variant 1: Array Source
template <typename It>
struct assign_to_attribute_from_iterators<structs::Timestamp, It, void> {
static inline void call(It f, It l, structs::Timestamp& time) {
boost::iostreams::stream<boost::iostreams::array_source> stream(f, l);
struct std::tm tm;
if (stream >> std::get_time(&tm, "%Y-%b-%d %H:%M:%S") >> time.ms)
time.date = std::mktime(&tm);
else throw "Parse failure";
}
};
Profiling with callgrind:

It does improve considerably, probably because we make the assumption that the underlying char-buffer is contiguous, where the Spirit implementation cannot make that assumption. We spend ~42% of the time in time_get
.
Roughly speaking, 25% of time is devoted to locale stuff, of which a worrying ~20% is spent doing dynamic casts :(
Variant 2: Array Source with re-use
Same, but reusing a static stream instance to see whether it makes a significant difference:
static boost::iostreams::stream<boost::iostreams::array_source> s_stream;
template <typename It>
struct assign_to_attribute_from_iterators<structs::Timestamp, It, void> {
static inline void call(It f, It l, structs::Timestamp& time) {
struct std::tm tm;
if (s_stream.is_open()) s_stream.close();
s_stream.clear();
boost::iostreams::array_source as(f, l);
s_stream.open(as);
if (s_stream >> std::get_time(&tm, "%Y-%b-%d %H:%M:%S") >> time.ms)
time.date = std::mktime(&tm);
else throw "Parse failure";
}
};
Profiling reveals is no significant difference).
Variant 3: strptime
and strtod
/from_chars
Let's see if dropping to C-level reduces the locale hurt:
template <typename It>
struct assign_to_attribute_from_iterators<structs::Timestamp, It, void> {
static inline void call(It f, It l, structs::Timestamp& time) {
struct std::tm tm;
auto remain = strptime(&*f, "%Y-%b-%d %H:%M:%S", &tm);
time.date = std::mktime(&tm);
#if __has_include(<charconv>) || __cpp_lib_to_chars >= 201611
auto result = std::from_chars(&*f, &*l, time.ms); // using <charconv> from c++17
#else
char* end;
time.ms = std::strtod(remain, &end);
assert(end > remain);
static_cast<void>(l); // unused
#endif
}
};
As you can see, using
strtod
is a bit suboptimal here. The input range is bounded, but there's no way to tellstrtod
about that. I have not been able to profile thefrom_chars
approach, which is strictly safer because it doesn't have this issue.In practice for your sample code it is safe to use
strtod
because we know the input buffer is NUL-terminated.

Here you can see that parsing the date-time is still a factor of concern:
- mktime 15.58 %
- strptime 40.54 %
- strtod 5.88 %
But all in all the difference is less egregious now:
- Parser1: 14.17 %
- Parser2: 43.44 %
- Parser3: 5.69 %
- Parser4: 35.49 %
Variant 4: Boost DateTime Again
Interestingly, the performance of the "lowlevel" C-APIs is not far from using the much more highlevel Boost posix_time::ptime
functions:
template <typename It>
struct assign_to_attribute_from_iterators<structs::Timestamp, It, void> {
static inline void call(It f, It l, structs::Timestamp& time) {
time.date = to_time_t(boost::posix_time::time_from_string(std::string(f,l)));
}
};
This might sacrifice some precision, according to the docs:

Here, the total time spent parsing date and time is 68%. The relative speeds of the parsers are close to the last ones:
- Parser1: 12.33 %
- Parser2: 43.86 %
- Parser3: 5.22 %
- Parser4: 37.43 %
SUMMARY
All in all, it turns out that storing the strings seems faster, even if you risk allocating more. I've done a very simple check whether this could be down to SSO by increasing the length of the substring:
static const std::string input1 = "[2018-Mar-01 00:01:02.012345 THWARTING THE SMALL STRING OPTIMIZATION HERE THIS WON'T FIT, NO DOUBT] - 1.000 s => String: Valid_string\n";
static const std::string input2 = "[2018-Mar-02 00:01:02.012345 THWARTING THE SMALL STRING OPTIMIZATION HERE THIS WON'T FIT, NO DOUBT] - 2.000 s => I dont care\n";
There was no significant impact, so that leaves the parsing itself.
It seems clear that either you will want to delay parsing the time (Parser3
is by far the quickest) or should go with the time-tested Boost posix_time
functions.
LISTING
Here's the combined benchmark code I used. A few things changed:
- added some sanity check output (to avoid testing nonsensical code)
- made the iterator generic (changing to
char*
has no significant effect on performance in optimized builds) - the above variants are all manually switchable in the code by changing
#if 1
to#if 0
in the right spots - reduced N1/N2 for convenience
I've liberally used C++14 because the purpose of the code was to find bottlenecks. Any wisdom gained can be backported relatively easily after the profiling.
Live On Coliru
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/chrono/chrono.hpp>
#include <iomanip>
#include <ctime>
#if __has_include(<charconv>) || __cpp_lib_to_chars >= 201611
# include <charconv> // not supported yet until GCC 8
#endif
namespace structs {
struct Timestamp {
std::time_t date;
double ms;
};
struct Record1 {
std::string date;
double time;
std::string str;
};
struct Record2 {
Timestamp date;
double time;
std::string str;
};
typedef std::vector<Record1> Records1;
typedef std::vector<Record2> Records2;
}
BOOST_FUSION_ADAPT_STRUCT(structs::Record1,
(std::string, date)
(double, time)
(std::string, str))
BOOST_FUSION_ADAPT_STRUCT(structs::Record2,
(structs::Timestamp, date)
(double, time)
(std::string, str))
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<std::string, It, void> {
static inline void call(It f, It l, std::string& attr) {
attr = std::string(&*f, std::distance(f,l));
}
};
static boost::iostreams::stream<boost::iostreams::array_source> s_stream;
template <typename It>
struct assign_to_attribute_from_iterators<structs::Timestamp, It, void> {
static inline void call(It f, It l, structs::Timestamp& time) {
#if 1
time.date = to_time_t(boost::posix_time::time_from_string(std::string(f,l)));
#elif 1
struct std::tm tm;
boost::iostreams::stream<boost::iostreams::array_source> stream(f, l);
if (stream >> std::get_time(&tm, "%Y-%b-%d %H:%M:%S") >> time.ms)
time.date = std::mktime(&tm);
else
throw "Parse failure";
#elif 1
struct std::tm tm;
if (s_stream.is_open()) s_stream.close();
s_stream.clear();
boost::iostreams::array_source as(f, l);
s_stream.open(as);
if (s_stream >> std::get_time(&tm, "%Y-%b-%d %H:%M:%S") >> time.ms)
time.date = std::mktime(&tm);
else
throw "Parse failure";
#else
struct std::tm tm;
auto remain = strptime(&*f, "%Y-%b-%d %H:%M:%S", &tm);
time.date = std::mktime(&tm);
#if __has_include(<charconv>) || __cpp_lib_to_chars >= 201611
auto result = std::from_chars(&*f, &*l, time.ms); // using <charconv> from c++17
#else
char* end;
time.ms = std::strtod(remain, &end);
assert(end > remain);
static_cast<void>(l); // unused
#endif
#endif
}
};
} } }
namespace qi = boost::spirit::qi;
namespace QiParsers {
template <typename It>
struct Parser1 : qi::grammar<It, structs::Record1()>
{
Parser1() : Parser1::base_type(start) {
using namespace qi;
start = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record1()> start;
};
template <typename It>
struct Parser2 : qi::grammar<It, structs::Record2()>
{
Parser2() : Parser2::base_type(start) {
using namespace qi;
start = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record2()> start;
};
template <typename It>
struct Parser3 : qi::grammar<It, structs::Records1()>
{
Parser3() : Parser3::base_type(start) {
using namespace qi;
using boost::phoenix::push_back;
line = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
ignore = *~char_("\r\n");
start = (line[push_back(_val, _1)] | ignore) % eol;
}
private:
qi::rule<It> ignore;
qi::rule<It, structs::Record1()> line;
qi::rule<It, structs::Records1()> start;
};
template <typename It>
struct Parser4 : qi::grammar<It, structs::Records2()>
{
Parser4() : Parser4::base_type(start) {
using namespace qi;
using boost::phoenix::push_back;
line = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
ignore = *~char_("\r\n");
start = (line[push_back(_val, _1)] | ignore) % eol;
}
private:
qi::rule<It> ignore;
qi::rule<It, structs::Record2()> line;
qi::rule<It, structs::Records2()> start;
};
}
template <typename Parser> static const Parser s_instance {};
template<template <typename> class Parser, typename Container, typename It>
Container parse_seek(It b, It e, const std::string& message)
{
Container records;
auto const t0 = boost::chrono::high_resolution_clock::now();
parse(b, e, *boost::spirit::repository::qi::seek[s_instance<Parser<It> >], records);
auto const t1 = boost::chrono::high_resolution_clock::now();
auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";
return records;
}
template<template <typename> class Parser, typename Container, typename It>
Container parse_ignoring(It b, It e, const std::string& message)
{
Container records;
auto const t0 = boost::chrono::high_resolution_clock::now();
parse(b, e, s_instance<Parser<It> >, records);
auto const t1 = boost::chrono::high_resolution_clock::now();
auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";
return records;
}
static const std::string input1 = "[2018-Mar-01 00:01:02.012345] - 1.000 s => String: Valid_string\n";
static const std::string input2 = "[2018-Mar-02 00:01:02.012345] - 2.000 s => I dont care\n";
std::string prepare_input() {
std::string input;
const int N1 = 10;
const int N2 = 1000;
input.reserve(N1 * (input1.size() + N2*input2.size()));
for (int i = N1; i--;) {
input += input1;
for (int j = N2; j--;)
input += input2;
}
return input;
}
int main() {
auto const input = prepare_input();
auto f = input.data(), l = f + input.length();
for (auto& r: parse_seek<QiParsers::Parser1, structs::Records1>(f, l, "std::string + seek")) {
std::cout << r.date << "\n";
break;
}
for (auto& r: parse_seek<QiParsers::Parser2, structs::Records2>(f, l, "stream + seek")) {
auto tm = *std::localtime(&r.date.date);
std::cout << std::put_time(&tm, "%Y-%b-%d %H:%M:%S") << "\n";
break;
}
for (auto& r: parse_ignoring<QiParsers::Parser3, structs::Records1>(f, l, "std::string + ignoring")) {
std::cout << r.date << "\n";
break;
}
for (auto& r: parse_ignoring<QiParsers::Parser4, structs::Records2>(f, l, "stream + ignoring")) {
auto tm = *std::localtime(&r.date.date);
std::cout << std::put_time(&tm, "%Y-%b-%d %H:%M:%S") << "\n";
break;
}
}
Printing something like
Elapsed time: 14 ms (std::string + seek)
2018-Mar-01 00:01:02.012345
Elapsed time: 29 ms (stream + seek)
2018-Mar-01 00:01:02
Elapsed time: 2 ms (std::string + ignoring)
2018-Mar-01 00:01:02.012345
Elapsed time: 22 ms (stream + ignoring)
2018-Mar-01 00:01:02
¹ all percentages are relative to total program cost. That does skew the percentages (the 70% mentioned would be even worse if the non-stream parser tests weren't taken into account), but the numbers are a good enough guide for relative comparions within a test run.
回答2:
The stream
parser ends up doing this:
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& /*context*/, Skipper const& skipper
, Attribute& attr_) const
{
typedef qi::detail::iterator_source<Iterator> source_device;
typedef boost::iostreams::stream<source_device> instream;
qi::skip_over(first, last, skipper);
instream in(first, last); // copies 'first'
in >> attr_; // use existing operator>>()
// advance the iterator if everything is ok
if (in) {
if (!in.eof()) {
std::streamsize pos = in.tellg();
std::advance(first, pos);
} else {
first = last;
}
return true;
}
return false;
}
The detail::iterator_source<Iterator>
device is a costly abstraction because it needs to be generic. It needs to be able support forward-only iterators¹.
Specializing For Random Access Iterators
I've created a Pull Request to specialize on random-access iterators: https://github.com/boostorg/spirit/pull/383 which specializes iterator_source
for random-access iterators:
std::streamsize read (char_type* s, std::streamsize n)
{
if (first == last)
return -1;
n = std::min(std::distance(first, last), n);
// copy_n is only part of c++11, so emulate it
std::copy(first, first + n, s);
first += n;
pos += n;
return n;
}
Without the specialization we can observe these timings: Interactive Plot.ly
(100 samples, confidence interval 0.95)
benchmarking std::string + seek mean: 31.9222 ms std dev: 228.862 μs
benchmarking std::string + ignoring mean: 16.1855 ms std dev: 257.903 μs
benchmarking stream + seek mean: 1075.46 ms std dev: 22.23 ms
benchmarking stream + ignoring mean: 1064.41 ms std dev: 26.7218 ms
With the specialization we can observe these timings: Interactive Plot.ly
benchmarking std::string + seek mean: 31.8703 ms std dev: 529.196 μs
benchmarking std::string + ignoring mean: 15.913 ms std dev: 848.514 μs
benchmarking stream + seek mean: 436.263 ms std dev: 19.4035 ms
benchmarking stream + ignoring mean: 419.539 ms std dev: 20.0511 ms
Nonius Benchmark Code
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/chrono/chrono.hpp>
#include <iomanip>
#include <ctime>
namespace structs {
struct Timestamp {
std::time_t date;
double ms;
friend std::istream& operator>> (std::istream& stream, Timestamp& time)
{
struct std::tm tm;
if (stream >> std::get_time(&tm, "%Y-%b-%d %H:%M:%S") >> time.ms)
time.date = std::mktime(&tm);
return stream;
}
};
struct Record1 {
std::string date;
double time;
std::string str;
};
struct Record2 {
Timestamp date;
double time;
std::string str;
};
typedef std::vector<Record1> Records1;
typedef std::vector<Record2> Records2;
}
BOOST_FUSION_ADAPT_STRUCT(structs::Record1,
(std::string, date)
(double, time)
(std::string, str))
BOOST_FUSION_ADAPT_STRUCT(structs::Record2,
(structs::Timestamp, date)
(double, time)
(std::string, str))
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<std::string, It, void> {
static inline void call(It f, It l, std::string& attr) {
attr = std::string(&*f, std::distance(f,l));
}
};
} } }
namespace qi = boost::spirit::qi;
namespace QiParsers {
template <typename It>
struct Parser1 : qi::grammar<It, structs::Record1()>
{
Parser1() : Parser1::base_type(start) {
using namespace qi;
start = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record1()> start;
};
template <typename It>
struct Parser2 : qi::grammar<It, structs::Record2()>
{
Parser2() : Parser2::base_type(start) {
using namespace qi;
start = '[' >> stream >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record2()> start;
};
template <typename It>
struct Parser3 : qi::grammar<It, structs::Records1()>
{
Parser3() : Parser3::base_type(start) {
using namespace qi;
using boost::phoenix::push_back;
line = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
ignore = *~char_("\r\n");
start = (line[push_back(_val, _1)] | ignore) % eol;
}
private:
qi::rule<It> ignore;
qi::rule<It, structs::Record1()> line;
qi::rule<It, structs::Records1()> start;
};
template <typename It>
struct Parser4 : qi::grammar<It, structs::Records2()>
{
Parser4() : Parser4::base_type(start) {
using namespace qi;
using boost::phoenix::push_back;
line = '[' >> stream >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
ignore = *~char_("\r\n");
start = (line[push_back(_val, _1)] | ignore) % eol;
}
private:
qi::rule<It> ignore;
qi::rule<It, structs::Record2()> line;
qi::rule<It, structs::Records2()> start;
};
}
typedef boost::chrono::high_resolution_clock::time_point time_point;
template<template <typename> class Parser, typename Container, typename It>
Container parse_seek(It b, It e, const std::string& message)
{
static const Parser<It> s_instance;
Container records;
time_point const t0 = boost::chrono::high_resolution_clock::now();
parse(b, e, *boost::spirit::repository::qi::seek[s_instance], records);
time_point const t1 = boost::chrono::high_resolution_clock::now();
std::cout << "Elapsed time: " << boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0).count() << " ms (" << message << ")\n";
return records;
}
template<template <typename> class Parser, typename Container, typename It>
Container parse_ignoring(It b, It e, const std::string& message)
{
static const Parser<It> s_instance;
Container records;
time_point const t0 = boost::chrono::high_resolution_clock::now();
parse(b, e, s_instance, records);
time_point const t1 = boost::chrono::high_resolution_clock::now();
std::cout << "Elapsed time: " << boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0).count() << " ms (" << message << ")\n";
return records;
}
static const std::string input1 = "[2018-Mar-01 00:01:02.012345] - 1.000 s => String: Valid_string\n";
static const std::string input2 = "[2018-Mar-02 00:01:02.012345] - 2.000 s => I dont care\n";
std::string prepare_input() {
std::string input;
const int N1 = 10;
const int N2 = 10000;
input.reserve(N1 * (input1.size() + N2*input2.size()));
for (int i = N1; i--;) {
input += input1;
for (int j = N2; j--;)
input += input2;
}
return input;
}
void verify(structs::Records1 const& records) {
if (records.empty())
std::cout << "Oops nothing parsed\n";
else {
structs::Record1 const& r = *records.begin();
std::cout << r.date << "\n";
}
}
void verify(structs::Records2 const& records) {
if (records.empty())
std::cout << "Oops nothing parsed\n";
else {
structs::Record2 const& r = *records.begin();
auto tm = *std::localtime(&r.date.date);
std::cout << std::put_time(&tm, "%Y-%b-%d %H:%M:%S") << " " << r.date.ms << "\n";
}
}
static std::string const input = prepare_input();
#define NONIUS_RUNNER
#include <nonius/benchmark.h++>
#include <nonius/main.h++>
NONIUS_BENCHMARK("std::string + seek", [] {
char const* f = input.data();
char const* l = f + input.length();
//std::string::const_iterator f = input.begin(), l = input.end();
verify(parse_seek<QiParsers::Parser1, structs::Records1>(f, l, "std::string + seek"));
})
NONIUS_BENCHMARK("stream + seek", [] {
char const* f = input.data();
char const* l = f + input.length();
//std::string::const_iterator f = input.begin(), l = input.end();
verify(parse_seek<QiParsers::Parser2, structs::Records2>(f, l, "stream + seek"));
})
NONIUS_BENCHMARK("std::string + ignoring", [] {
char const* f = input.data();
char const* l = f + input.length();
//std::string::const_iterator f = input.begin(), l = input.end();
verify(parse_ignoring<QiParsers::Parser3, structs::Records1>(f, l, "std::string + ignoring"));
})
NONIUS_BENCHMARK("stream + ignoring", [] {
char const* f = input.data();
char const* l = f + input.length();
//std::string::const_iterator f = input.begin(), l = input.end();
verify(parse_ignoring<QiParsers::Parser4, structs::Records2>(f, l, "stream + ignoring"));
})
¹ e.g. when the parser's iterator happens to be a multi_pass_adaptor<InputIterator>
, which is a separate monster on its own: Boost spirit memory leak
来源:https://stackoverflow.com/questions/49693376/why-does-using-a-stream-in-boost-spirit-penalize-performance-so-much