|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r60339 - trunk/libs/spirit/example/qi/scheme
From: joel_at_[hidden]
Date: 2010-03-08 03:52:06
Author: djowel
Date: 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
New Revision: 60339
URL: http://svn.boost.org/trac/boost/changeset/60339
Log:
working well with unicode (some problems with spirit debug on unicode)
Added:
trunk/libs/spirit/example/qi/scheme/out.txt (contents, props changed)
Text files modified:
trunk/libs/spirit/example/qi/scheme/sexpr.hpp | 59 +++++++++++++++++++++++++++++----------
trunk/libs/spirit/example/qi/scheme/sexpr_test.cpp | 23 ++++++++++++++
trunk/libs/spirit/example/qi/scheme/sexpr_test.txt | 9 +++++
trunk/libs/spirit/example/qi/scheme/simple_print.hpp | 45 ++++++++++++++++++++----------
4 files changed, 104 insertions(+), 32 deletions(-)
Added: trunk/libs/spirit/example/qi/scheme/out.txt
==============================================================================
--- (empty file)
+++ trunk/libs/spirit/example/qi/scheme/out.txt 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
@@ -0,0 +1,2 @@
+success: (123.45 "this is a ⬠string" "Τη γλÏÏÏα μοÏ
ÎδÏÏαν ελληνική" (92 ("another string" apple Sîne)))
+
Modified: trunk/libs/spirit/example/qi/scheme/sexpr.hpp
==============================================================================
--- trunk/libs/spirit/example/qi/scheme/sexpr.hpp (original)
+++ trunk/libs/spirit/example/qi/scheme/sexpr.hpp 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
@@ -26,6 +26,7 @@
{
using boost::spirit::unicode::char_;
using boost::spirit::unicode::space;
+ using boost::spirit::unicode::print;
using boost::spirit::qi::grammar;
using boost::spirit::qi::rule;
using boost::spirit::qi::eol;
@@ -40,6 +41,7 @@
using boost::spirit::qi::hex;
using boost::spirit::qi::oct;
using boost::spirit::qi::no_case;
+ using boost::spirit::qi::lexeme;
using boost::phoenix::function;
typedef boost::spirit::char_encoding::unicode unicode;
@@ -54,6 +56,9 @@
space // tab/space/cr/lf
| ';' >> *(char_ - eol) >> eol // comments
;
+
+ //~ start.name("white_space");
+ //~ debug(start);
}
rule<Iterator, unicode> start;
@@ -75,6 +80,19 @@
}
};
+ struct push_symbol_utf8
+ {
+ template <typename S, typename C>
+ struct result { typedef void type; };
+
+ void operator()(std::string& utf8, uchar code_point) const
+ {
+ if (utf8.size() == 0)
+ utf8 += ';'; // mark a symbol with prefix ';'
+ push_utf8()(utf8, code_point);
+ }
+ };
+
struct push_esc
{
template <typename S, typename C>
@@ -117,6 +135,11 @@
>> *(str_esc(_val) | (char_ - '"') [push_utf8(_val, _1)])
>> '"'
;
+
+ //~ start.name("string");
+ //~ str_esc.name("str_esc");
+ //~ debug(start);
+ //~ debug(str_esc);
}
rule<Iterator, unicode, void(std::string&)> str_esc;
@@ -128,37 +151,43 @@
{
sexpr() : sexpr::base_type(start)
{
- function<detail::push_utf8> push_utf8;
- function<detail::push_esc> push_esc;
+ real_parser<double, strict_real_policies<double> > strict_double;
+ function<detail::push_symbol_utf8> push_symbol_utf8;
start = atom | list;
list = '(' >> *start >> ')';
- atom = number [_val = _1]
- | string [_val = _1]
- | symbol [_val = _1]
+ atom = number [_val = _1]
+ | string [_val = _1]
+ | symbol [_val = _1]
;
- char const* symbol_start = "a-zA-Z!#$%&'*+,-./:;<=>?@[\\]^_`{|}~";
- char const* symbol_rest = "a-zA-Z0-9!#$%&'*+,-./:;<=>?@[\\]^_`{|}~";
+ char const* exclude = " ();\"\n\r\t";
+ symbol = +lexeme[print - char_(exclude)] [push_symbol_utf8(_val, _1)];
- symbol = char_(symbol_start) [push_utf8(_val, _1)]
- >> +char_(symbol_rest) [push_utf8(_val, _1)]
+ number = strict_double [_val = _1]
+ | int_ [_val = _1]
+ | no_case["0x"] >> hex [_val = _1]
+ | '0' >> oct [_val = _1]
;
- number = strict_double [_val = _1]
- | int_ [_val = _1]
- | no_case["0x"] >> hex [_val = _1]
- | '0' >> oct [_val = _1]
- ;
+ //~ start.name("sexpr");
+ //~ list.name("list");
+ //~ atom.name("atom");
+ //~ symbol.name("symbol");
+ //~ number.name("number");
+ //~ debug(start);
+ //~ debug(list);
+ //~ debug(atom);
+ //~ debug(symbol);
+ //~ debug(number);
}
rule<Iterator, unicode, white_space<Iterator>, utree()> start, list;
rule<Iterator, unicode, utree()> atom, number;
rule<Iterator, unicode, std::string()> symbol;
string<Iterator> string;
- real_parser<double, strict_real_policies<double> > strict_double;
};
}
Modified: trunk/libs/spirit/example/qi/scheme/sexpr_test.cpp
==============================================================================
--- trunk/libs/spirit/example/qi/scheme/sexpr_test.cpp (original)
+++ trunk/libs/spirit/example/qi/scheme/sexpr_test.cpp 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
@@ -9,6 +9,15 @@
#include <iostream>
#include <fstream>
+namespace scheme
+{
+ inline std::ostream& operator<<(std::ostream& out, utree const& x)
+ {
+ println(x);
+ return out;
+ }
+}
+
///////////////////////////////////////////////////////////////////////////////
// Main program
///////////////////////////////////////////////////////////////////////////////
@@ -34,6 +43,18 @@
return 1;
}
+ char prefix[4]; // Read the UTF-8 prefix (0xEF 0xBB 0xBF)
+ in >> prefix[0]; // marking the beginning of a UTF-8 file
+ in >> prefix[1];
+ in >> prefix[2];
+ prefix[3] = 0;
+ if (std::string("\xef\xbb\xbf") != prefix)
+ {
+ std::cerr << "Not a UTF-8 file: "
+ << filename << std::endl;
+ return 1;
+ }
+
std::string source_code; // We will read the contents here.
in.unsetf(std::ios::skipws); // No white space skipping!
std::copy(
@@ -41,7 +62,7 @@
std::istream_iterator<char>(),
std::back_inserter(source_code));
- typedef std::string::const_iterator iterator_type;
+ typedef boost::u8_to_u32_iterator<std::string::const_iterator> iterator_type;
iterator_type first = source_code.begin();
iterator_type last = source_code.end();
Modified: trunk/libs/spirit/example/qi/scheme/sexpr_test.txt
==============================================================================
--- trunk/libs/spirit/example/qi/scheme/sexpr_test.txt (original)
+++ trunk/libs/spirit/example/qi/scheme/sexpr_test.txt 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
@@ -1 +1,8 @@
-(123.45 "this is a \u20AC string" (92 ("another string" apple)))
\ No newline at end of file
+(
+ 123.45
+ "this is a \u20AC string" ; A UTF-8 string
+ "Τη γλÏÏÏα μοÏ
ÎδÏÏαν ελληνική" ; Another UTF-8 string
+ (
+ 92 ("another string" apple Sîne)
+ )
+)
\ No newline at end of file
Modified: trunk/libs/spirit/example/qi/scheme/simple_print.hpp
==============================================================================
--- trunk/libs/spirit/example/qi/scheme/simple_print.hpp (original)
+++ trunk/libs/spirit/example/qi/scheme/simple_print.hpp 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
@@ -34,26 +34,41 @@
std::cout << (b ? "true" : "false");
}
- template <typename Iterator>
- void operator()(boost::iterator_range<Iterator> const& range) const
+ template <typename Range> // for lists
+ void print_string_or_list(Range range, boost::mpl::false_) const
{
- // This code works for both strings and lists
- typedef typename boost::iterator_range<Iterator>::const_iterator iterator;
- bool const is_string = boost::is_pointer<Iterator>::value;
- char const start = is_string ? '"' : '(';
- char const end = is_string ? '"' : ')';
-
- print(start);
+ typedef typename Range::const_iterator iterator;
+ print('(');
for (iterator i = range.begin(); i != range.end(); ++i)
{
- if (!is_string)
- {
- if (i != range.begin())
- print(' ');
- }
+ if (i != range.begin())
+ print(' ');
print(*i);
}
- print(end);
+ print(')');
+ }
+
+ template <typename Range> // for strings
+ void print_string_or_list(Range range, boost::mpl::true_) const
+ {
+ typedef typename Range::const_iterator iterator;
+ iterator i = range.begin();
+ bool const is_symbol = *i == ';';
+ if (!is_symbol)
+ print('"');
+ else
+ ++i;
+ for (; i != range.end(); ++i)
+ print(*i);
+ if (!is_symbol)
+ print('"');
+ }
+
+ template <typename Iterator>
+ void operator()(boost::iterator_range<Iterator> const& range) const
+ {
+ // This code works for both strings and lists
+ print_string_or_list(range, boost::is_pointer<Iterator>());
}
};
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk