Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r60339 - trunk/libs/spirit/example/qi/scheme
From: joel_at_[hidden]
Date: 2010-03-08 03:52:06


Author: djowel
Date: 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
New Revision: 60339
URL: http://svn.boost.org/trac/boost/changeset/60339

Log:
working well with unicode (some problems with spirit debug on unicode)
Added:
   trunk/libs/spirit/example/qi/scheme/out.txt (contents, props changed)
Text files modified:
   trunk/libs/spirit/example/qi/scheme/sexpr.hpp | 59 +++++++++++++++++++++++++++++----------
   trunk/libs/spirit/example/qi/scheme/sexpr_test.cpp | 23 ++++++++++++++
   trunk/libs/spirit/example/qi/scheme/sexpr_test.txt | 9 +++++
   trunk/libs/spirit/example/qi/scheme/simple_print.hpp | 45 ++++++++++++++++++++----------
   4 files changed, 104 insertions(+), 32 deletions(-)

Added: trunk/libs/spirit/example/qi/scheme/out.txt
==============================================================================
--- (empty file)
+++ trunk/libs/spirit/example/qi/scheme/out.txt 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
@@ -0,0 +1,2 @@
+success: (123.45 "this is a € string" "Τη γλώσσα μου έδωσαν ελληνική" (92 ("another string" apple Sîne)))
+

Modified: trunk/libs/spirit/example/qi/scheme/sexpr.hpp
==============================================================================
--- trunk/libs/spirit/example/qi/scheme/sexpr.hpp (original)
+++ trunk/libs/spirit/example/qi/scheme/sexpr.hpp 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
@@ -26,6 +26,7 @@
 {
     using boost::spirit::unicode::char_;
     using boost::spirit::unicode::space;
+ using boost::spirit::unicode::print;
     using boost::spirit::qi::grammar;
     using boost::spirit::qi::rule;
     using boost::spirit::qi::eol;
@@ -40,6 +41,7 @@
     using boost::spirit::qi::hex;
     using boost::spirit::qi::oct;
     using boost::spirit::qi::no_case;
+ using boost::spirit::qi::lexeme;
     using boost::phoenix::function;
 
     typedef boost::spirit::char_encoding::unicode unicode;
@@ -54,6 +56,9 @@
                     space // tab/space/cr/lf
                 | ';' >> *(char_ - eol) >> eol // comments
                 ;
+
+ //~ start.name("white_space");
+ //~ debug(start);
         }
 
         rule<Iterator, unicode> start;
@@ -75,6 +80,19 @@
             }
         };
 
+ struct push_symbol_utf8
+ {
+ template <typename S, typename C>
+ struct result { typedef void type; };
+
+ void operator()(std::string& utf8, uchar code_point) const
+ {
+ if (utf8.size() == 0)
+ utf8 += ';'; // mark a symbol with prefix ';'
+ push_utf8()(utf8, code_point);
+ }
+ };
+
         struct push_esc
         {
             template <typename S, typename C>
@@ -117,6 +135,11 @@
>> *(str_esc(_val) | (char_ - '"') [push_utf8(_val, _1)])
>> '"'
                 ;
+
+ //~ start.name("string");
+ //~ str_esc.name("str_esc");
+ //~ debug(start);
+ //~ debug(str_esc);
         }
 
         rule<Iterator, unicode, void(std::string&)> str_esc;
@@ -128,37 +151,43 @@
     {
         sexpr() : sexpr::base_type(start)
         {
- function<detail::push_utf8> push_utf8;
- function<detail::push_esc> push_esc;
+ real_parser<double, strict_real_policies<double> > strict_double;
+ function<detail::push_symbol_utf8> push_symbol_utf8;
 
             start = atom | list;
 
             list = '(' >> *start >> ')';
 
- atom = number [_val = _1]
- | string [_val = _1]
- | symbol [_val = _1]
+ atom = number [_val = _1]
+ | string [_val = _1]
+ | symbol [_val = _1]
                     ;
 
- char const* symbol_start = "a-zA-Z!#$%&'*+,-./:;<=>?@[\\]^_`{|}~";
- char const* symbol_rest = "a-zA-Z0-9!#$%&'*+,-./:;<=>?@[\\]^_`{|}~";
+ char const* exclude = " ();\"\n\r\t";
+ symbol = +lexeme[print - char_(exclude)] [push_symbol_utf8(_val, _1)];
 
- symbol = char_(symbol_start) [push_utf8(_val, _1)]
- >> +char_(symbol_rest) [push_utf8(_val, _1)]
+ number = strict_double [_val = _1]
+ | int_ [_val = _1]
+ | no_case["0x"] >> hex [_val = _1]
+ | '0' >> oct [_val = _1]
                     ;
 
- number = strict_double [_val = _1]
- | int_ [_val = _1]
- | no_case["0x"] >> hex [_val = _1]
- | '0' >> oct [_val = _1]
- ;
+ //~ start.name("sexpr");
+ //~ list.name("list");
+ //~ atom.name("atom");
+ //~ symbol.name("symbol");
+ //~ number.name("number");
+ //~ debug(start);
+ //~ debug(list);
+ //~ debug(atom);
+ //~ debug(symbol);
+ //~ debug(number);
         }
 
         rule<Iterator, unicode, white_space<Iterator>, utree()> start, list;
         rule<Iterator, unicode, utree()> atom, number;
         rule<Iterator, unicode, std::string()> symbol;
         string<Iterator> string;
- real_parser<double, strict_real_policies<double> > strict_double;
     };
 }
 

Modified: trunk/libs/spirit/example/qi/scheme/sexpr_test.cpp
==============================================================================
--- trunk/libs/spirit/example/qi/scheme/sexpr_test.cpp (original)
+++ trunk/libs/spirit/example/qi/scheme/sexpr_test.cpp 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
@@ -9,6 +9,15 @@
 #include <iostream>
 #include <fstream>
 
+namespace scheme
+{
+ inline std::ostream& operator<<(std::ostream& out, utree const& x)
+ {
+ println(x);
+ return out;
+ }
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // Main program
 ///////////////////////////////////////////////////////////////////////////////
@@ -34,6 +43,18 @@
         return 1;
     }
 
+ char prefix[4]; // Read the UTF-8 prefix (0xEF 0xBB 0xBF)
+ in >> prefix[0]; // marking the beginning of a UTF-8 file
+ in >> prefix[1];
+ in >> prefix[2];
+ prefix[3] = 0;
+ if (std::string("\xef\xbb\xbf") != prefix)
+ {
+ std::cerr << "Not a UTF-8 file: "
+ << filename << std::endl;
+ return 1;
+ }
+
     std::string source_code; // We will read the contents here.
     in.unsetf(std::ios::skipws); // No white space skipping!
     std::copy(
@@ -41,7 +62,7 @@
         std::istream_iterator<char>(),
         std::back_inserter(source_code));
 
- typedef std::string::const_iterator iterator_type;
+ typedef boost::u8_to_u32_iterator<std::string::const_iterator> iterator_type;
     iterator_type first = source_code.begin();
     iterator_type last = source_code.end();
 

Modified: trunk/libs/spirit/example/qi/scheme/sexpr_test.txt
==============================================================================
--- trunk/libs/spirit/example/qi/scheme/sexpr_test.txt (original)
+++ trunk/libs/spirit/example/qi/scheme/sexpr_test.txt 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
@@ -1 +1,8 @@
-(123.45 "this is a \u20AC string" (92 ("another string" apple)))
\ No newline at end of file
+(
+ 123.45
+ "this is a \u20AC string" ; A UTF-8 string
+ "Τη γλώσσα μου έδωσαν ελληνική" ; Another UTF-8 string
+ (
+ 92 ("another string" apple Sîne)
+ )
+)
\ No newline at end of file

Modified: trunk/libs/spirit/example/qi/scheme/simple_print.hpp
==============================================================================
--- trunk/libs/spirit/example/qi/scheme/simple_print.hpp (original)
+++ trunk/libs/spirit/example/qi/scheme/simple_print.hpp 2010-03-08 03:52:05 EST (Mon, 08 Mar 2010)
@@ -34,26 +34,41 @@
             std::cout << (b ? "true" : "false");
         }
 
- template <typename Iterator>
- void operator()(boost::iterator_range<Iterator> const& range) const
+ template <typename Range> // for lists
+ void print_string_or_list(Range range, boost::mpl::false_) const
         {
- // This code works for both strings and lists
- typedef typename boost::iterator_range<Iterator>::const_iterator iterator;
- bool const is_string = boost::is_pointer<Iterator>::value;
- char const start = is_string ? '"' : '(';
- char const end = is_string ? '"' : ')';
-
- print(start);
+ typedef typename Range::const_iterator iterator;
+ print('(');
             for (iterator i = range.begin(); i != range.end(); ++i)
             {
- if (!is_string)
- {
- if (i != range.begin())
- print(' ');
- }
+ if (i != range.begin())
+ print(' ');
                 print(*i);
             }
- print(end);
+ print(')');
+ }
+
+ template <typename Range> // for strings
+ void print_string_or_list(Range range, boost::mpl::true_) const
+ {
+ typedef typename Range::const_iterator iterator;
+ iterator i = range.begin();
+ bool const is_symbol = *i == ';';
+ if (!is_symbol)
+ print('"');
+ else
+ ++i;
+ for (; i != range.end(); ++i)
+ print(*i);
+ if (!is_symbol)
+ print('"');
+ }
+
+ template <typename Iterator>
+ void operator()(boost::iterator_range<Iterator> const& range) const
+ {
+ // This code works for both strings and lists
+ print_string_or_list(range, boost::is_pointer<Iterator>());
         }
     };
 


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk