|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r60345 - trunk/boost/spirit/home/support/char_encoding/unicode
From: joel_at_[hidden]
Date: 2010-03-08 07:43:43
Author: djowel
Date: 2010-03-08 07:43:42 EST (Mon, 08 Mar 2010)
New Revision: 60345
URL: http://svn.boost.org/trac/boost/changeset/60345
Log:
tweaks. caught a unicode lowercase table lookup bug
Text files modified:
trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp | 110 ++++++++++++++++++++-------------------
1 files changed, 57 insertions(+), 53 deletions(-)
Modified: trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp (original)
+++ trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp 2010-03-08 07:43:42 EST (Mon, 08 Mar 2010)
@@ -18,17 +18,17 @@
# include "uppercase_table.hpp"
namespace boost { namespace spirit { namespace ucd
-{
+{
// This header provides Basic (Level 1) Unicode Support
// See http://unicode.org/reports/tr18/ for details
-
+
struct properties
{
// bit pattern: xxMMMCCC
// MMM: major_category
// CCC: category
- enum major_category
+ enum major_category
{
letter,
mark,
@@ -38,47 +38,47 @@
punctuation,
symbol
};
-
+
enum category
{
uppercase_letter = 0, // [Lu] an uppercase letter
- lowercase_letter, // [Ll] a lowercase letter
- titlecase_letter, // [Lt] a digraphic character, with first part uppercase
- modifier_letter, // [Lm] a modifier letter
+ lowercase_letter, // [Ll] a lowercase letter
+ titlecase_letter, // [Lt] a digraphic character, with first part uppercase
+ modifier_letter, // [Lm] a modifier letter
other_letter, // [Lo] other letters, including syllables and ideographs
-
+
nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width)
enclosing_mark, // [Me] an enclosing combining mark
- spacing_mark, // [Mc] a spacing combining mark (positive advance width)
-
- decimal_number = 16, // [Nd] a decimal digit
+ spacing_mark, // [Mc] a spacing combining mark (positive advance width)
+
+ decimal_number = 16, // [Nd] a decimal digit
letter_number, // [Nl] a letterlike numeric character
other_number, // [No] a numeric character of other type
-
+
space_separator = 24, // [Zs] a space character (of various non-zero widths)
- line_separator, // [Zl] U+2028 LINE SEPARATOR only
+ line_separator, // [Zl] U+2028 LINE SEPARATOR only
paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only
-
+
control = 32, // [Cc] a C0 or C1 control code
format, // [Cf] a format control character
private_use, // [Co] a private-use character
surrogate, // [Cs] a surrogate code point
unassigned, // [Cn] a reserved unassigned code point or a noncharacter
-
+
dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark
- open_punctuation, // [Ps] an opening punctuation mark (of a pair)
- close_punctuation, // [Pe] a closing punctuation mark (of a pair)
- connector_punctuation, // [Pc] a connecting punctuation mark, like a tie
- other_punctuation, // [Po] a punctuation mark of other type
- initial_punctuation, // [Pi] an initial quotation mark
+ open_punctuation, // [Ps] an opening punctuation mark (of a pair)
+ close_punctuation, // [Pe] a closing punctuation mark (of a pair)
+ connector_punctuation, // [Pc] a connecting punctuation mark, like a tie
+ other_punctuation, // [Po] a punctuation mark of other type
+ initial_punctuation, // [Pi] an initial quotation mark
final_punctuation, // [Pf] a final quotation mark
-
+
math_symbol = 48, // [Sm] a symbol of primarily mathematical use
- currency_symbol, // [Sc] a currency sign
+ currency_symbol, // [Sc] a currency sign
modifier_symbol, // [Sk] a non-letterlike modifier symbol
- other_symbol // [So] a symbol of other type
+ other_symbol // [So] a symbol of other type
};
-
+
enum derived_properties
{
alphabetic = 64,
@@ -89,7 +89,7 @@
noncharacter_code_point = 2048,
default_ignorable_code_point = 4096
};
-
+
enum script
{
arabic = 0,
@@ -187,114 +187,118 @@
common = 92,
unknown = 93
};
- };
-
+ };
+
inline properties::category get_category(::boost::uint32_t ch)
{
return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F);
}
-
+
inline properties::major_category get_major_category(::boost::uint32_t ch)
{
return static_cast<properties::major_category>(get_category(ch) >> 3);
}
-
+
inline bool is_punctuation(::boost::uint32_t ch)
{
return get_major_category(ch) == properties::punctuation;
- }
-
+ }
+
inline bool is_decimal_number(::boost::uint32_t ch)
{
return get_category(ch) == properties::decimal_number;
}
-
+
inline bool is_hex_digit(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::hex_digit) != 0;
- }
-
+ }
+
inline bool is_control(::boost::uint32_t ch)
{
return get_category(ch) == properties::control;
}
-
+
inline bool is_alphabetic(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::alphabetic) != 0;
}
-
+
inline bool is_alphanumeric(::boost::uint32_t ch)
{
return is_decimal_number(ch) || is_alphabetic(ch);
}
-
+
inline bool is_uppercase(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::uppercase) != 0;
}
-
+
inline bool is_lowercase(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::lowercase) != 0;
}
-
+
inline bool is_white_space(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::white_space) != 0;
}
-
+
inline bool is_blank(::boost::uint32_t ch)
{
switch (ch)
{
case '\n': case '\v': case '\f': case '\r':
return false;
- default:
- return is_white_space(ch)
- && !( get_category(ch) == properties::line_separator
+ default:
+ return is_white_space(ch)
+ && !( get_category(ch) == properties::line_separator
|| get_category(ch) == properties::paragraph_separator
);
}
}
-
+
inline bool is_graph(::boost::uint32_t ch)
{
return !( is_white_space(ch)
- || get_category(ch) == properties::control
+ || get_category(ch) == properties::control
|| get_category(ch) == properties::surrogate
|| get_category(ch) == properties::unassigned
);
}
-
+
inline bool is_print(::boost::uint32_t ch)
{
return (is_graph(ch) || is_blank(ch)) && !is_control(ch);
- }
+ }
inline bool is_noncharacter_code_point(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0;
}
-
+
inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
{
return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0;
}
-
+
inline properties::script get_script(::boost::uint32_t ch)
{
return static_cast<properties::script>(detail::script_lookup(ch) & 0x3F);
}
-
+
inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
{
- return detail::lowercase_lookup(ch);
+ // The table returns 0 to signal that this code maps to itself
+ ::boost::uint32_t r = detail::lowercase_lookup(ch);
+ return (r == 0)? ch : r;
}
-
+
inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
{
- return detail::uppercase_lookup(ch);
+ // The table returns 0 to signal that this code maps to itself
+ ::boost::uint32_t r = detail::uppercase_lookup(ch);
+ return (r == 0)? ch : r;
}
}}}
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk