Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r56256 - in trunk/boost/spirit/home: qi/char support/char_encoding
From: hartmut.kaiser_at_[hidden]
Date: 2009-09-16 21:49:19


Author: hkaiser
Date: 2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
New Revision: 56256
URL: http://svn.boost.org/trac/boost/changeset/56256

Log:
Spirit: fixed a character size mismatch revealed while parsing based on tokens
Text files modified:
   trunk/boost/spirit/home/qi/char/char.hpp | 34 +++++++++++++++++++++++-----------
   trunk/boost/spirit/home/qi/char/char_class.hpp | 3 ++-
   trunk/boost/spirit/home/support/char_encoding/iso8859_1.hpp | 4 +++-
   trunk/boost/spirit/home/support/char_encoding/standard.hpp | 4 +++-
   trunk/boost/spirit/home/support/char_encoding/standard_wide.hpp | 19 +++++++++++++++++--
   5 files changed, 48 insertions(+), 16 deletions(-)

Modified: trunk/boost/spirit/home/qi/char/char.hpp
==============================================================================
--- trunk/boost/spirit/home/qi/char/char.hpp (original)
+++ trunk/boost/spirit/home/qi/char/char.hpp 2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
@@ -119,9 +119,9 @@
         };
 
         template <typename CharParam, typename Context>
- bool test(CharParam ch, Context&) const
+ bool test(CharParam ch_, Context&) const
         {
- return this->ch == char_type(ch);
+ return char_encoding::ischar(int(ch_)) && ch == char_type(ch_);
         }
 
         template <typename Context>
@@ -156,9 +156,13 @@
         };
 
         template <typename CharParam, typename Context>
- bool test(CharParam ch, Context&) const
+ bool test(CharParam ch_, Context&) const
         {
- return this->lo == char_type(ch) || this->hi == char_type(ch);
+ if (!char_encoding::ischar(int(ch_)))
+ return false;
+
+ char_type ch = char_type(ch_); // optimize for token based parsing
+ return this->lo == ch || this->hi == ch;
         }
 
         template <typename Context>
@@ -184,9 +188,13 @@
           : from(from), to(to) {}
 
         template <typename CharParam, typename Context>
- bool test(CharParam ch, Context&) const
+ bool test(CharParam ch_, Context&) const
         {
- return !(char_type(ch) < from) && !(to < char_type(ch));
+ if (!char_encoding::ischar(int(ch_)))
+ return false;
+
+ char_type ch = char_type(ch_); // optimize for token based parsing
+ return !(ch < from) && !(to < ch);
         }
 
         template <typename Context>
@@ -216,10 +224,14 @@
         {}
 
         template <typename CharParam, typename Context>
- bool test(CharParam ch, Context&) const
+ bool test(CharParam ch_, Context&) const
         {
- return (!(char_type(ch) < from_lo) && !(to_lo < char_type(ch)))
- || (!(char_type(ch) < from_hi) && !(to_hi < char_type(ch)))
+ if (!char_encoding::ischar(int(ch_)))
+ return false;
+
+ char_type ch = char_type(ch_); // optimize for token based parsing
+ return (!(ch < from_lo) && !(to_lo < ch))
+ || (!(ch < from_hi) && !(to_hi < ch))
             ;
         }
 
@@ -282,7 +294,7 @@
         template <typename CharParam, typename Context>
         bool test(CharParam ch, Context&) const
         {
- return chset.test(char_type(ch));
+ return char_encoding::ischar(int(ch)) && chset.test(char_type(ch));
         }
 
         template <typename Context>
@@ -341,7 +353,7 @@
         template <typename CharParam, typename Context>
         bool test(CharParam ch, Context&) const
         {
- return chset.test(char_type(char_type(ch)));
+ return char_encoding::ischar(int(ch)) && chset.test(char_type(ch));
         }
 
         template <typename Context>

Modified: trunk/boost/spirit/home/qi/char/char_class.hpp
==============================================================================
--- trunk/boost/spirit/home/qi/char/char_class.hpp (original)
+++ trunk/boost/spirit/home/qi/char/char_class.hpp 2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
@@ -57,7 +57,8 @@
         bool test(CharParam ch, Context&) const
         {
             using spirit::char_class::classify;
- return classify<char_encoding>::is(classification(), ch);
+ return char_encoding::ischar(ch) &&
+ classify<char_encoding>::is(classification(), ch);
         }
 
         template <typename Context>

Modified: trunk/boost/spirit/home/support/char_encoding/iso8859_1.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/iso8859_1.hpp (original)
+++ trunk/boost/spirit/home/support/char_encoding/iso8859_1.hpp 2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
@@ -579,7 +579,9 @@
         static bool
         ischar(int ch)
         {
- return true; // iso8859.1 uses all 8 bits
+ // iso8859.1 uses all 8 bits
+ // we have to watch out for sign extensions
+ return (0 == (ch & ~0xff) || ~0 == (ch | 0xff)) ? true : false;
         }
 
         static int

Modified: trunk/boost/spirit/home/support/char_encoding/standard.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/standard.hpp (original)
+++ trunk/boost/spirit/home/support/char_encoding/standard.hpp 2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
@@ -33,7 +33,9 @@
         static bool
         ischar(int ch)
         {
- return true; // use all the bits
+ // uses all 8 bits
+ // we have to watch out for sign extensions
+ return (0 == (ch & ~0xff) || ~0 == (ch | 0xff)) ? true : false;
         }
 
         static int

Modified: trunk/boost/spirit/home/support/char_encoding/standard_wide.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/standard_wide.hpp (original)
+++ trunk/boost/spirit/home/support/char_encoding/standard_wide.hpp 2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
@@ -16,6 +16,7 @@
 #include <string>
 
 #include <boost/cstdint.hpp>
+#include <boost/spirit/home/support/assert_msg.hpp>
 
 namespace boost { namespace spirit { namespace char_encoding
 {
@@ -40,10 +41,24 @@
             return std::char_traits<Char>::to_char_type(ch);
         }
 
+ template <std::size_t N>
+ struct wchar_t_size
+ {
+ BOOST_SPIRIT_ASSERT_MSG(N == 1 || N == 2 || N == 4,
+ not_supported_size_of_wchar_t, ());
+ };
+
+ template <> struct wchar_t_size<1> { enum { mask = 0xff }; };
+ template <> struct wchar_t_size<2> { enum { mask = 0xffff }; };
+ template <> struct wchar_t_size<4> { enum { mask = 0xffffffff }; };
+
         static bool
- ischar(wchar_t ch)
+ ischar(int ch)
         {
- return true; // any wchar_t
+ // we have to watch out for sign extensions
+ return ( 0 == (ch & ~wchar_t_size<sizeof(wchar_t)>::mask) ||
+ ~0 == (ch | wchar_t_size<sizeof(wchar_t)>::mask)) ?
+ true : false; // any wchar_t, but no other bits set
         }
 
         static bool


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk