#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef BOOST_MSVC # include #endif // The following is excerpted from cstdint.hpp which defines, and then promptly // undefines, INT64_C (at least on my principal platform) #if !defined(BOOST__STDC_CONSTANT_MACROS_DEFINED) # if defined(BOOST_HAS_LONG_LONG) && \ (defined(ULLONG_MAX) || defined(ULONG_LONG_MAX) || defined(ULONGLONG_MAX)) # if defined(__hpux) // HP-UX's value of ULONG_LONG_MAX is unusable in preprocessor expressions # elif (defined(ULLONG_MAX) && ULLONG_MAX == 18446744073709551615U) || \ (defined(ULONG_LONG_MAX) && ULONG_LONG_MAX == 18446744073709551615U) || \ (defined(ULONGLONG_MAX) && ULONGLONG_MAX == 18446744073709551615U) # else # error defaults not correct; you must hand modify boost/cstdint.hpp # endif # define INT64_C(value) value##LL # elif ULONG_MAX != 0xffffffff # if ULONG_MAX == 18446744073709551615 // 2**64 - 1 # define INT64_C(value) value##L # else # error defaults not correct; you must hand modify boost/cstdint.hpp # endif # endif #endif using boost::int64_t; // Windows default headers suck! #ifdef max # undef max #endif // max #ifdef min # undef min #endif // min // |||||||||||||||||||||| Begin Parsing Variants ||||||||||||||||||||| // The required interface is int64_t parse(std::string const &). The functor, // parser, provides a means to insert the appropriate parse() call into // run_test(), below, without adding function pointer overhead. // // Implement int64_t parse(std::string const &) for each variant, at the bottom // of the file. namespace string_parsing { int64_t parse(std::string const & _value); struct parser { int64_t operator ()(std::string const & _input) { return parse(_input); } }; } namespace xpressive_parsing { int64_t parse(std::string const & _input); struct parser { int64_t operator ()(std::string const & _input) { return parse(_input); } }; } namespace spirit_parsing { int64_t parse(std::string const & _input); struct parser { int64_t operator ()(std::string const & _input) { return parse(_input); } }; } // ||||||||||||||||||||||| End Parsing Variants |||||||||||||||||||||| // ||||||||||||||||||||||||||| Test Harness |||||||||||||||||||||||||| namespace testing { typedef std::pair pair_type; typedef std::vector tests_type; void load_tests(tests_type &, std::string const &); void load_tests_(tests_type &, std::string const &); void raise_parse_failed(int64_t, int64_t, std::string const &); template time_t run_tests(std::string const &, tests_type const &, Parser, unsigned const); } // ||||||||||||||||||||||| Common Functionality |||||||||||||||||||||| namespace core { template T numeric_cast(char const *); template T numeric_cast(std::string const &); void raise_bad_numeric_cast(char const *, char const *); void raise_parse_failure(std::string const &); void raise_zero_denominator(std::string const &); int64_t round(double); bool to_number(double &, char const *, char const * *, int); bool to_number(int &, char const *, char const * *, int); bool to_number(int64_t &, char const *, char const * *, int); bool to_number(unsigned &, char const *, char const * *, int); // ************************************************************************** // Design Notes: // -------------------------------------------------------------------------- template <> inline double numeric_cast(char const * const _value) { double result; if (!to_number(result, _value, 0, 10)) { raise_bad_numeric_cast("double", _value); } return result; } // ************************************************************************** // Design Notes: // -------------------------------------------------------------------------- template <> inline int numeric_cast(char const * const _value) { int result; if (!to_number(result, _value, 0, 10)) { raise_bad_numeric_cast("int", _value); } return result; } // ************************************************************************** // Design Notes: // -------------------------------------------------------------------------- template <> inline int64_t numeric_cast(char const * const _value) { int64_t result; if (!to_number(result, _value, 0, 10)) { raise_bad_numeric_cast("int64_t", _value); } return result; } // ************************************************************************** // Design Notes: // -------------------------------------------------------------------------- template <> inline unsigned numeric_cast(char const * const _value) { unsigned result; if (!to_number(result, _value, 0, 10)) { raise_bad_numeric_cast("unsigned", _value); } return result; } // ************************************************************************** // Design Notes: // -------------------------------------------------------------------------- template inline T numeric_cast(std::string const & _value) { return numeric_cast(_value.c_str()); } } // ||||||||||||||||||||||||| Main Test Logic ||||||||||||||||||||||||| // **************************************************************************** // Design Notes: // ---------------------------------------------------------------------------- int main(int argc, char ** argv) { using namespace testing; if (2 > argc) { std::cerr << "Usage: " << argv[0] << " pathname [iterations]" << std::endl; return 1; } tests_type tests; tests.reserve(150000); // Should be plenty of space, this shaves like 8 seconds off the load_tests time... load_tests(tests, argv[1]); unsigned iterations(100); if (2 < argc) { iterations = core::numeric_cast(argv[2]); } try { time_t const string_elapsed( run_tests("string", tests, string_parsing::parser(), iterations)); time_t const xpressive_elapsed( run_tests("Xpressive", tests, xpressive_parsing::parser(), iterations)); time_t const spirit_elapsed( run_tests("Spirit", tests, spirit_parsing::parser(), iterations)); std::cout << "string parsing: " << string_elapsed << "s\n" << "xpressive parsing: " << xpressive_elapsed << "s\n" << "spirit parsing: " << spirit_elapsed << 's' << std::endl; } catch (std::runtime_error const & e) { std::cerr << e.what() << std::endl; } } // |||||||||||||||||||| More Common Functionality |||||||||||||||||||| namespace core { // The least common multiple of all supported fractional price denominators. struct lcm_generator { lcm_generator() { } template T as() const; }; // ************************************************************************** // Design Notes: // -------------------------------------------------------------------------- template <> inline int lcm_generator::as() const { return 160000; } // ************************************************************************** // Design Notes: // -------------------------------------------------------------------------- template <> inline unsigned lcm_generator::as() const { return 160000U; } // ************************************************************************** // Design Notes: // -------------------------------------------------------------------------- template <> inline int64_t lcm_generator::as() const { return INT64_C(160000); } // ************************************************************************** // Design Notes: // -------------------------------------------------------------------------- template <> inline double lcm_generator::as() const { return 160000.0; } const lcm_generator lcm; } using core::lcm; // ***************************************************************************** // Design Notes: Adding 0.5 to _value before casting to int64_t, which rounds // down, causes rounding to the nearest whole number. This only works for // non-negative values, however. // // If the calling code didn't ensure that _value were non-negative, then // calling std::floor() would be necessary or the code would need to subtract // 0.5 when _value is negative and add 0.5 otherwise. Since std::floor() is // slower, at least when tested with GCC 4.1.2 on 64b SLES10, versus managing // the sign separately, 'tis better to forego std::floor(). // ----------------------------------------------------------------------------- inline int64_t core::round(double const _value) { BOOST_ASSERT(0.0 <= _value); return static_cast(_value + 0.5); } // **************************************************************************** // Design Notes: // ---------------------------------------------------------------------------- void core::raise_bad_numeric_cast(char const * const _type, char const * const _value) { std::cerr << "core::numeric_cast<" << _type << ">(" << _value << ") failed" << std::endl; throw std::bad_cast(); } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- void core::raise_parse_failure(std::string const & _value) { std::string message("Invalid price input: \""); message += _value; message += '"'; throw std::runtime_error(message); } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- void core::raise_zero_denominator(std::string const & _value) { std::string message("Cannot accept zero denominator in: \""); message += _value; message += '"'; throw std::runtime_error(message); } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- bool core::to_number(double & _result, char const * const _value, char const * * const _end, int const) { char * end; errno = 0; _result = std::strtod(_value, &end); bool const failed((0.0 == _result && (end == _value || ERANGE == errno)) || ((HUGE_VAL == _result || -HUGE_VAL == _result) && ERANGE == errno)); bool const result(!failed); if (_end) { *_end = end; } return result; } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- bool core::to_number(int & _result, char const * const _value, char const * * const _end, int const _radix) { char * end; errno = 0; long value(std::strtol(_value, &end, _radix)); bool const result(end != _value && ERANGE != errno && value <= std::numeric_limits::max()); if (result) { _result = static_cast(value); if (_end) { *_end = end; } } return result; } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- bool core::to_number(int64_t & _result, char const * const _value, char const * * const _end, int const _radix) { char * end; errno = 0; #ifdef BOOST_MSVC _result = _strtoi64(_value, &end, _radix); #else _result = std::strtoll(_value, &end, _radix); #endif bool const result(end != _value && ERANGE != errno); if (_end) { *_end = end; } return result; } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- bool core::to_number(unsigned & _result, char const * const _value, char const * * const _end, int const _radix) { char * end; errno = 0; unsigned long value(std::strtoul(_value, &end, _radix)); bool const result(end != _value && ERANGE != errno && value <= std::numeric_limits::max()); if (result) { _result = static_cast(value); if (_end) { *_end = end; } } return result; } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- void testing::load_tests(tests_type & _tests, std::string const & _pathname) { using namespace boost::xpressive; std::ifstream file(_pathname.c_str()); if (!file) { std::string message("Unable to read test inputs from "); message += _pathname; throw std::runtime_error(message); } sregex re = '"' >> (s1 = +_) >> '"' >> +blank >> (s2 = +_d); std::string line; smatch matches; std::string input; while (getline(file, line)) { if (regex_search(line.begin(), line.end(), matches, re)) { int64_t const expected( core::numeric_cast(matches.str(2).c_str())); _tests.push_back(pair_type(matches.str(1), expected)); } } } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- void testing::raise_parse_failed(int64_t const _expected, int64_t const _actual, std::string const & _input) { std::ostringstream message; message << "Parsing failed for \"" << _input << "\": expected " << _expected << ", but got " << _actual; throw std::runtime_error(message.str()); } // **************************************************************************** // Design Notes: // ---------------------------------------------------------------------------- template time_t testing::run_tests(std::string const & _description, tests_type const & _tests, Parser _parse, unsigned const _iterations) { std::cout << "Testing " << _description << "-based parsing" << std::endl; _parse("0"); // prime the cache time_t const start(time(0)); for (unsigned i(0); _iterations > i; ++i) { for (tests_type::const_iterator it(_tests.begin()), end(_tests.end()); it != end; ++it) { std::string const & input(it->first); int64_t const expected(it->second); int64_t const actual(_parse(input)); if (actual != expected) { raise_parse_failed(expected, actual, input); } } } time_t const end(time(0)); return end - start; } // |||||||||||||||||| String Parsing Implementation |||||||||||||||||| namespace string_parsing { template T extract(char const * & _input, char const * _description, std::string const & _value); unsigned extract_denominator(char const * & _input, std::string const & _value); int64_t parse_fraction(char const *& _end, int64_t _result, char const * _denominator, bool _is_mixed_number, std::string const & _value); void raise_extract_failed(char const * _description, std::string const & _value); } // ***************************************************************************** // Design Notes: Non-dependent exception throwing code appears in // raise_extract_failed(). // ----------------------------------------------------------------------------- template T string_parsing::extract(char const * & _input, char const * const _description, std::string const & _value) { T result; if (!core::to_number(result, _input, &_input, 10)) { raise_extract_failed(_description, _value); } return result; } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- unsigned string_parsing::extract_denominator(char const * & _input, std::string const & _value) { unsigned const result(extract(_input, "denominator", _value)); switch (result) { case 0: { core::raise_zero_denominator(_value); } break; case 1: case 2: case 4: case 8: case 16: case 32: case 64: case 128: case 256: case 10: case 100: case 1000: case 100000: break; default: { if (lcm.as() % result) { std::cerr << "Rounding error when parsing the denominator in \"" << _value << "\": " << result << " is not an factor of " << lcm.as(); } } break; } return result; } // ***************************************************************************** // Design Notes: This function parses several number formats: floating // point, whole number, fraction, and mixed number. All formats begin with a // number, so the input is first parsed and converted to int64_t, base 10. If // a decimal point appears immediately after the input consumed for that // conversion, then reparse from the beginning as floating point rather than // jump through hoops to compute the fractional part. If there is a space // after the initial number, then the initial number is a whole number that may // be followed by a fraction, meaning the input was a mixed number. If there // is a slash after the initial number, then the initial number is the // numerator of a fraction. // // When converting a whole number, it is important to avoid floating point // representational and radix problems. The int64_t conversion is done with // base 10, thus ignoring any leading zeroes. Conversion to double would work // without specifying the base, but then it is difficult to know whether to // look for a fraction, for a mixed number, and the integer value may not be // representable in a double. // // Negative values, except when the input is a whole number, must be processed // as positive until after the fractional part has been parsed and computed to // simplify accounting for rounding errors in round() and to account correctly // for magnitude. // // When computing a fraction, the product of lcm and numerator is divided by // denominator in order to reduce rounding and representational errors that // would result from first computing the quotient of numerator and denominator. // ----------------------------------------------------------------------------- int64_t string_parsing::parse(std::string const & _value) { BOOST_ASSERT(std::string::npos == _value.find_first_not_of("-+0123456789eE ./")); if (_value.empty()) { return 0; } char const * const start(_value.c_str()); char const * end(start); int64_t result; (void)core::to_number(result, end, &end, 10); const bool is_floating_point('.' == *end); if (is_floating_point) { end = start; double value(extract(end, "input", _value) * lcm.as()); const bool is_negative(0 > value); if (is_negative) { value = -value; } result = core::round(value); if (is_negative) { result = -result; } } else { result *= lcm.as(); char const * const last(start + _value.length()); if (end != last) { const size_t slash(_value.find_first_of('/', end - start)); const bool is_fraction(std::string::npos != slash); if (is_fraction) { char const * const denominator(start + slash + 1); const bool is_mixed_number( std::string::npos != _value.find_last_of(' ', slash)); const bool is_non_negative(0 <= result); if (is_non_negative) { result = parse_fraction(end, result, denominator, is_mixed_number, _value); } else { result = -parse_fraction(end, -result, denominator, is_mixed_number, _value); } } } } BOOST_ASSERT(end == (start + _value.length()) || std::string::npos == _value.find_first_not_of( " \t\r\n", end - start, 4)); return result; } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- int64_t string_parsing::parse_fraction(char const *& _end, int64_t const _result, char const * const _denominator, const bool _is_mixed_number, std::string const & _value) { double value; if (_is_mixed_number) { unsigned const numerator(extract(_end, "numerator", _value)); _end = _denominator; unsigned const denominator(extract_denominator(_end, _value)); double fraction(numerator * lcm.as() / denominator); value = _result + fraction; } else { _end = _denominator; unsigned const denominator(extract_denominator(_end, _value)); value = _result / denominator; } int64_t const result(core::round(value)); return result; } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- void string_parsing::raise_extract_failed(char const * const _description, std::string const & _value) { std::string message("Failed to parse the "); message += _description; message += ": "; message += _value; throw std::runtime_error(message); } // ||||||||||||||||| Xpressive Parsing Implementation ||||||||||||||||| #include #include #include namespace xpressive_parsing { struct direct_impl { typedef int64_t result_type; template int64_t operator ()(Value const & _value) const { int64_t result(core::numeric_cast(_value)); result *= lcm.as(); return result; } }; // exception type indicating denominator was zero struct zero_denominator { }; struct to_price_impl { typedef int64_t result_type; template int64_t operator ()(Value const & _value) const { int64_t result(0); if (_value) { double const value(core::numeric_cast(_value)); result = core::round(lcm.as() * value); } return result; } template int64_t operator ()(Value const & _numerator, Value const & _denominator) const { unsigned const numerator(core::numeric_cast(_numerator)); unsigned const denominator(core::numeric_cast(_denominator)); if (0 == denominator) { throw zero_denominator(); } double const fraction(lcm.as() * numerator / denominator); return core::round(fraction); } template int64_t operator ()(Value const & _whole, double const _fraction) const { int const whole(core::numeric_cast(_whole)); double const value(lcm.as() * whole + _fraction); return core::round(value); } }; //boost::thread_specific_ptr match_s; } namespace xpressive_parsing { namespace prices { using namespace boost::xpressive; function::type const direct = {{}}; function::type const to_price = {{}}; placeholder is_negative_; placeholder price_; const BOOST_PROTO_AUTO(zero, as_xpr('0')); const BOOST_PROTO_AUTO(sign, as_xpr('-') [ is_negative_ = true ] | as_xpr('+')); // ignore const BOOST_PROTO_AUTO(fraction, ( *zero >> (s1 = +_d) >> *blank >> '/' >> *blank >> *zero >> (s2 = +_d) ) [ price_ = to_price(s1, s2) ]); const BOOST_PROTO_AUTO(real, ( *_d >> '.' >> *_d >> !((set = 'E', 'e') >> +_d) ) [ price_ = to_price(_) ]); const BOOST_PROTO_AUTO(mixed_number, ( // N.B. The capture here must be different from those used in // fraction; Xpressive composability suffers accordingly (is that // because of using BOOST_PROTO_AUTO?) *zero >> (s3 = +_d) >> +blank >> fraction ) [ price_ = to_price(s3, price_) ]); const BOOST_PROTO_AUTO(integer, ( *zero >> (s1 = +_d) ) [ price_ = direct(s1) ]); const sregex price = *blank >> !sign >> (real | mixed_number | fraction | integer) >> *space; } } // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- int64_t xpressive_parsing::parse(std::string const & _value) { typedef std::string::const_iterator iterator; using namespace prices; int64_t result; iterator begin(_value.begin()); iterator end(_value.end()); bool is_negative(false); //boost::xpressive::smatch & match(*match_s); static boost::xpressive::smatch match_s; boost::xpressive::smatch & match(match_s); match.let(price_ = result); match.let(is_negative_ = is_negative); bool matched; try { matched = boost::xpressive::regex_match(begin, end, match, price); } catch (zero_denominator) { core::raise_zero_denominator(_value); } if (!matched) { core::raise_parse_failure(_value); } if (is_negative) { result = -result; } return result; } // |||||||||||||||||| Spirit Parsing Implementation |||||||||||||||||| // ***************************************************************************** // Design Notes: // ----------------------------------------------------------------------------- struct dot_number_to_long_long_function_quick_new { boost::long_long_type &res; dot_number_to_long_long_function_quick_new(boost::long_long_type &_res):res(_res) {} template void operator()(double val, Inst, bool &pass) const { const bool negative(0.0 > val); if(negative) val = -val; res = static_cast(std::floor(160000.0*val + 0.5)); if(negative) res = -res; } }; int64_t spirit_parsing::parse(std::string const & _value) { using boost::spirit::qi::double_; using boost::spirit::qi::_1; using boost::spirit::qi::_2; using boost::spirit::qi::phrase_parse; using boost::spirit::qi::lexeme; using boost::spirit::qi::lit; using boost::spirit::ascii::space; using boost::spirit::ascii::blank; using boost::phoenix::ref; using boost::spirit::long_long; boost::long_long_type res; std::string::const_iterator first = _value.begin(); std::string::const_iterator last = _value.end(); bool r = phrase_parse(first, last, // Begin grammar ( (long_long[ref(res)=(_1*160000)] >> !lit('.') >> -( ((long_long >> '/' >> long_long)[ref(res)+=((160000*_1)/_2)]) | ('/' >> long_long[ref(res)/=_1]) ) ) | (double_[dot_number_to_long_long_function_quick_new(res)]) ) // End grammar ,blank); if (!r || first != last) // fail if we did not get a full match return false; return res; }