Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r59440 - in trunk/boost/spirit/home/support: . char_encoding char_encoding/unicode
From: joel_at_[hidden]
Date: 2010-02-03 07:35:11


Author: djowel
Date: 2010-02-03 07:35:10 EST (Wed, 03 Feb 2010)
New Revision: 59440
URL: http://svn.boost.org/trac/boost/changeset/59440

Log:
More Unicode Support
Text files modified:
   trunk/boost/spirit/home/support/char_class.hpp | 734 ++++++++++++++++++++++++++++++---------
   trunk/boost/spirit/home/support/char_encoding/unicode.hpp | 203 +++++++++++
   trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp | 14
   trunk/boost/spirit/home/support/common_terminals.hpp | 193 ++++++++++
   4 files changed, 958 insertions(+), 186 deletions(-)

Modified: trunk/boost/spirit/home/support/char_class.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_class.hpp (original)
+++ trunk/boost/spirit/home/support/char_class.hpp 2010-02-03 07:35:10 EST (Wed, 03 Feb 2010)
@@ -51,6 +51,168 @@
     struct uppernum {};
     struct ucs4 {};
 
+#if defined(BOOST_SPIRIT_UNICODE)
+///////////////////////////////////////////////////////////////////////////
+// Unicode Major Categories
+///////////////////////////////////////////////////////////////////////////
+ struct letter {};
+ struct mark {};
+ struct number {};
+ struct separator {};
+ struct other {};
+ struct punctuation {};
+ struct symbol {};
+
+///////////////////////////////////////////////////////////////////////////
+// Unicode General Categories
+///////////////////////////////////////////////////////////////////////////
+ struct uppercase_letter {};
+ struct lowercase_letter {};
+ struct titlecase_letter {};
+ struct modifier_letter {};
+ struct other_letter {};
+
+ struct nonspacing_mark {};
+ struct enclosing_mark {};
+ struct spacing_mark {};
+
+ struct decimal_number {};
+ struct letter_number {};
+ struct other_number {};
+
+ struct space_separator {};
+ struct line_separator {};
+ struct paragraph_separator {};
+
+ struct control {};
+ struct format {};
+ struct private_use {};
+ struct surrogate {};
+ struct unassigned {};
+
+ struct dash_punctuation {};
+ struct open_punctuation {};
+ struct close_punctuation {};
+ struct connector_punctuation {};
+ struct other_punctuation {};
+ struct initial_punctuation {};
+ struct final_punctuation {};
+
+ struct math_symbol {};
+ struct currency_symbol {};
+ struct modifier_symbol {};
+ struct other_symbol {};
+
+///////////////////////////////////////////////////////////////////////////
+// Unicode Derived Categories
+///////////////////////////////////////////////////////////////////////////
+ struct alphabetic {};
+ struct uppercase {};
+ struct lowercase {};
+ struct white_space {};
+ struct hex_digit {};
+ struct noncharacter_code_point {};
+ struct default_ignorable_code_point {};
+
+///////////////////////////////////////////////////////////////////////////
+// Unicode Scripts
+///////////////////////////////////////////////////////////////////////////
+ struct arabic {};
+ struct imperial_aramaic {};
+ struct armenian {};
+ struct avestan {};
+ struct balinese {};
+ struct bamum {};
+ struct bengali {};
+ struct bopomofo {};
+ struct braille {};
+ struct buginese {};
+ struct buhid {};
+ struct canadian_aboriginal {};
+ struct carian {};
+ struct cham {};
+ struct cherokee {};
+ struct coptic {};
+ struct cypriot {};
+ struct cyrillic {};
+ struct devanagari {};
+ struct deseret {};
+ struct egyptian_hieroglyphs {};
+ struct ethiopic {};
+ struct georgian {};
+ struct glagolitic {};
+ struct gothic {};
+ struct greek {};
+ struct gujarati {};
+ struct gurmukhi {};
+ struct hangul {};
+ struct han {};
+ struct hanunoo {};
+ struct hebrew {};
+ struct hiragana {};
+ struct katakana_or_hiragana {};
+ struct old_italic {};
+ struct javanese {};
+ struct kayah_li {};
+ struct katakana {};
+ struct kharoshthi {};
+ struct khmer {};
+ struct kannada {};
+ struct kaithi {};
+ struct tai_tham {};
+ struct lao {};
+ struct latin {};
+ struct lepcha {};
+ struct limbu {};
+ struct linear_b {};
+ struct lisu {};
+ struct lycian {};
+ struct lydian {};
+ struct malayalam {};
+ struct mongolian {};
+ struct meetei_mayek {};
+ struct myanmar {};
+ struct nko {};
+ struct ogham {};
+ struct ol_chiki {};
+ struct old_turkic {};
+ struct oriya {};
+ struct osmanya {};
+ struct phags_pa {};
+ struct inscriptional_pahlavi {};
+ struct phoenician {};
+ struct inscriptional_parthian {};
+ struct rejang {};
+ struct runic {};
+ struct samaritan {};
+ struct old_south_arabian {};
+ struct saurashtra {};
+ struct shavian {};
+ struct sinhala {};
+ struct sundanese {};
+ struct syloti_nagri {};
+ struct syriac {};
+ struct tagbanwa {};
+ struct tai_le {};
+ struct new_tai_lue {};
+ struct tamil {};
+ struct tai_viet {};
+ struct telugu {};
+ struct tifinagh {};
+ struct tagalog {};
+ struct thaana {};
+ struct thai {};
+ struct tibetan {};
+ struct ugaritic {};
+ struct vai {};
+ struct old_persian {};
+ struct cuneiform {};
+ struct yi {};
+ struct inherited {};
+ struct common {};
+ struct unknown {};
+#endif
+
     ///////////////////////////////////////////////////////////////////////////
     // This composite tag type encodes both the character
     // set and the specific char tag (used for classification
@@ -77,113 +239,222 @@
     {
         typedef typename CharEncoding::char_type char_type;
 
- template <typename Char>
- static bool
- is(tag::char_, Char ch)
- {
- return CharEncoding::ischar(char_type(ch));
- }
-
- template <typename Char>
- static bool
- is(tag::alnum, Char ch)
- {
- return CharEncoding::isalnum(char_type(ch));
- }
-
- template <typename Char>
- static bool
- is(tag::alpha, Char ch)
- {
- return CharEncoding::isalpha(char_type(ch));
- }
-
- template <typename Char>
- static bool
- is(tag::digit, Char ch)
- {
- return CharEncoding::isdigit(char_type(ch));
- }
-
- template <typename Char>
- static bool
- is(tag::xdigit, Char ch)
- {
- return CharEncoding::isxdigit(char_type(ch));
- }
-
- template <typename Char>
- static bool
- is(tag::cntrl, Char ch)
- {
- return CharEncoding::iscntrl(char_type(ch));
- }
-
- template <typename Char>
- static bool
- is(tag::graph, Char ch)
- {
- return CharEncoding::isgraph(char_type(ch));
- }
+#define BOOST_SPIRIT_CLASSIFY(name, isname) \
+ template <typename Char> \
+ static bool \
+ is(tag::name, Char ch) \
+ { \
+ return CharEncoding::isname \
+ BOOST_PREVENT_MACRO_SUBSTITUTION (char_type(ch)); \
+ } \
+ /***/
+
+ BOOST_SPIRIT_CLASSIFY(char_, ischar)
+ BOOST_SPIRIT_CLASSIFY(alnum, isalnum)
+ BOOST_SPIRIT_CLASSIFY(alpha, isalpha)
+ BOOST_SPIRIT_CLASSIFY(digit, isdigit)
+ BOOST_SPIRIT_CLASSIFY(xdigit, isxdigit)
+ BOOST_SPIRIT_CLASSIFY(cntrl, iscntrl)
+ BOOST_SPIRIT_CLASSIFY(graph, isgraph)
+ BOOST_SPIRIT_CLASSIFY(lower, islower)
+ BOOST_SPIRIT_CLASSIFY(print, isprint)
+ BOOST_SPIRIT_CLASSIFY(punct, ispunct)
+ BOOST_SPIRIT_CLASSIFY(space, isspace)
+ BOOST_SPIRIT_CLASSIFY(blank, isblank)
+ BOOST_SPIRIT_CLASSIFY(upper, isupper)
 
- template <typename Char>
- static bool
- is(tag::lower, Char ch)
- {
- return CharEncoding::islower(char_type(ch));
- }
+#undef BOOST_SPIRIT_CLASSIFY
 
         template <typename Char>
         static bool
         is(tag::lowernum, Char ch)
         {
- return CharEncoding::islower(char_type(ch)) ||
+ return CharEncoding::islower(char_type(ch)) ||
                    CharEncoding::isdigit(char_type(ch));
         }
 
         template <typename Char>
         static bool
- is(tag::print, Char ch)
- {
- return CharEncoding::isprint(char_type(ch));
- }
-
- template <typename Char>
- static bool
- is(tag::punct, Char ch)
+ is(tag::uppernum, Char ch)
         {
- return CharEncoding::ispunct(char_type(ch));
+ return CharEncoding::isupper(char_type(ch)) ||
+ CharEncoding::isdigit(char_type(ch));
         }
 
- template <typename Char>
- static bool
- is(tag::space, Char ch)
- {
- return CharEncoding::isspace(char_type(ch));
- }
+#if defined(BOOST_SPIRIT_UNICODE)
 
- template <typename Char>
- static bool
- is(tag::blank, Char ch)
- {
- return CharEncoding::isblank
- BOOST_PREVENT_MACRO_SUBSTITUTION (char_type(ch));
- }
+#define BOOST_SPIRIT_UNICODE_CLASSIFY(name) \
+ template <typename Char> \
+ static bool \
+ is(tag::name, Char ch) \
+ { \
+ return CharEncoding::is_##name(char_type(ch)); \
+ } \
+ /***/
+
+///////////////////////////////////////////////////////////////////////////
+// Unicode Major Categories
+///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CLASSIFY(letter)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(mark)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(number)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(separator)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(other)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(symbol)
+
+///////////////////////////////////////////////////////////////////////////
+// Unicode General Categories
+///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CLASSIFY(uppercase_letter)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(lowercase_letter)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(titlecase_letter)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(modifier_letter)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(other_letter)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY(nonspacing_mark)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(enclosing_mark)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(spacing_mark)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY(decimal_number)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(letter_number)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(other_number)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY(space_separator)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(line_separator)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(paragraph_separator)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY(control)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(format)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(private_use)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(surrogate)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(unassigned)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY(dash_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(open_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(close_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(connector_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(other_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(initial_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(final_punctuation)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY(math_symbol)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(currency_symbol)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(modifier_symbol)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(other_symbol)
+
+///////////////////////////////////////////////////////////////////////////
+// Unicode Derived Categories
+///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CLASSIFY(alphabetic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(uppercase)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(lowercase)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(white_space)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(hex_digit)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(noncharacter_code_point)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(default_ignorable_code_point)
+
+///////////////////////////////////////////////////////////////////////////
+// Unicode Scripts
+///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CLASSIFY(arabic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(imperial_aramaic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(armenian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(avestan)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(balinese)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(bamum)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(bengali)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(bopomofo)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(braille)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(buginese)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(buhid)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(canadian_aboriginal)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(carian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(cham)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(cherokee)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(coptic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(cypriot)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(cyrillic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(devanagari)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(deseret)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(egyptian_hieroglyphs)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(ethiopic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(georgian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(glagolitic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(gothic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(greek)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(gujarati)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(gurmukhi)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(hangul)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(han)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(hanunoo)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(hebrew)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(hiragana)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(katakana_or_hiragana)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(old_italic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(javanese)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(kayah_li)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(katakana)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(kharoshthi)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(khmer)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(kannada)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(kaithi)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(tai_tham)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(lao)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(latin)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(lepcha)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(limbu)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(linear_b)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(lisu)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(lycian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(lydian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(malayalam)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(mongolian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(meetei_mayek)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(myanmar)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(nko)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(ogham)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(ol_chiki)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(old_turkic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(oriya)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(osmanya)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(phags_pa)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(inscriptional_pahlavi)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(phoenician)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(inscriptional_parthian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(rejang)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(runic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(samaritan)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(old_south_arabian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(saurashtra)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(shavian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(sinhala)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(sundanese)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(syloti_nagri)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(syriac)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(tagbanwa)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(tai_le)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(new_tai_lue)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(tamil)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(tai_viet)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(telugu)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(tifinagh)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(tagalog)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(thaana)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(thai)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(tibetan)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(ugaritic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(vai)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(old_persian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(cuneiform)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(yi)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(inherited)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(common)
+ BOOST_SPIRIT_UNICODE_CLASSIFY(unknown)
 
- template <typename Char>
- static bool
- is(tag::upper, Char ch)
- {
- return CharEncoding::isupper(char_type(ch));
- }
+#undef BOOST_SPIRIT_UNICODE_CLASSIFY
+#endif
 
- template <typename Char>
- static bool
- is(tag::uppernum, Char ch)
- {
- return CharEncoding::isupper(char_type(ch)) ||
- CharEncoding::isdigit(char_type(ch));
- }
     };
 
     ///////////////////////////////////////////////////////////////////////////
@@ -227,100 +498,219 @@
     template <typename CharEncoding>
     struct what
     {
- static char const* is(tag::char_)
- {
- return "char";
- }
-
- static char const* is(tag::alnum)
- {
- return "alnum";
- }
-
- static char const* is(tag::alpha)
- {
- return "alpha";
- }
-
- static char const* is(tag::digit)
- {
- return "digit";
- }
-
- static char const* is(tag::xdigit)
- {
- return "xdigit";
- }
-
- static char const* is(tag::cntrl)
- {
- return "cntrl";
- }
-
- static char const* is(tag::graph)
- {
- return "graph";
- }
-
- static char const* is(tag::lower)
- {
- return "lower";
- }
-
- static char const* is(tag::lowernum)
- {
- return "lowernum";
- }
-
- static char const* is(tag::print)
- {
- return "print";
- }
-
- static char const* is(tag::punct)
- {
- return "punct";
- }
+#define BOOST_SPIRIT_CLASSIFY_WHAT(name, isname) \
+ static char const* is(tag::name) \
+ { \
+ return isname; \
+ } \
+ /***/
+
+ BOOST_SPIRIT_CLASSIFY_WHAT(char_, "char")
+ BOOST_SPIRIT_CLASSIFY_WHAT(alnum, "alnum")
+ BOOST_SPIRIT_CLASSIFY_WHAT(alpha, "alpha")
+ BOOST_SPIRIT_CLASSIFY_WHAT(digit, "digit")
+ BOOST_SPIRIT_CLASSIFY_WHAT(xdigit, "xdigit")
+ BOOST_SPIRIT_CLASSIFY_WHAT(cntrl, "cntrl")
+ BOOST_SPIRIT_CLASSIFY_WHAT(graph, "graph")
+ BOOST_SPIRIT_CLASSIFY_WHAT(lower, "lower")
+ BOOST_SPIRIT_CLASSIFY_WHAT(lowernum, "lowernum")
+ BOOST_SPIRIT_CLASSIFY_WHAT(print, "print")
+ BOOST_SPIRIT_CLASSIFY_WHAT(punct, "punct")
+ BOOST_SPIRIT_CLASSIFY_WHAT(space, "space")
+ BOOST_SPIRIT_CLASSIFY_WHAT(blank, "blank")
+ BOOST_SPIRIT_CLASSIFY_WHAT(upper, "upper")
+ BOOST_SPIRIT_CLASSIFY_WHAT(uppernum, "uppernum")
+ BOOST_SPIRIT_CLASSIFY_WHAT(ucs4, "ucs4")
+
+#undef BOOST_SPIRIT_CLASSIFY_WHAT
+
+#if defined(BOOST_SPIRIT_UNICODE)
+
+#define BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(name) \
+ static char const* is(tag::name) \
+ { \
+ return BOOST_PP_STRINGIZE(name); \
+ } \
+ /***/
+
+///////////////////////////////////////////////////////////////////////////
+// Unicode Major Categories
+///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(letter)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(mark)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(number)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(separator)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(other)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(symbol)
+
+///////////////////////////////////////////////////////////////////////////
+// Unicode General Categories
+///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(uppercase_letter)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lowercase_letter)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(titlecase_letter)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(modifier_letter)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(other_letter)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(nonspacing_mark)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(enclosing_mark)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(spacing_mark)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(decimal_number)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(letter_number)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(other_number)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(space_separator)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(line_separator)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(paragraph_separator)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(control)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(format)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(private_use)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(surrogate)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(unassigned)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(dash_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(open_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(close_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(connector_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(other_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(initial_punctuation)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(final_punctuation)
+
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(math_symbol)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(currency_symbol)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(modifier_symbol)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(other_symbol)
+
+///////////////////////////////////////////////////////////////////////////
+// Unicode Derived Categories
+///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(alphabetic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(uppercase)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lowercase)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(white_space)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(hex_digit)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(noncharacter_code_point)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(default_ignorable_code_point)
+
+///////////////////////////////////////////////////////////////////////////
+// Unicode Scripts
+///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(arabic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(imperial_aramaic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(armenian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(avestan)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(balinese)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(bamum)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(bengali)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(bopomofo)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(braille)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(buginese)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(buhid)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(canadian_aboriginal)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(carian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(cham)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(cherokee)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(coptic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(cypriot)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(cyrillic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(devanagari)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(deseret)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(egyptian_hieroglyphs)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(ethiopic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(georgian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(glagolitic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(gothic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(greek)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(gujarati)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(gurmukhi)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(hangul)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(han)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(hanunoo)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(hebrew)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(hiragana)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(katakana_or_hiragana)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(old_italic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(javanese)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(kayah_li)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(katakana)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(kharoshthi)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(khmer)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(kannada)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(kaithi)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tai_tham)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lao)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(latin)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lepcha)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(limbu)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(linear_b)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lisu)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lycian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lydian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(malayalam)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(mongolian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(meetei_mayek)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(myanmar)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(nko)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(ogham)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(ol_chiki)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(old_turkic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(oriya)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(osmanya)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(phags_pa)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(inscriptional_pahlavi)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(phoenician)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(inscriptional_parthian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(rejang)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(runic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(samaritan)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(old_south_arabian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(saurashtra)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(shavian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(sinhala)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(sundanese)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(syloti_nagri)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(syriac)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tagbanwa)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tai_le)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(new_tai_lue)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tamil)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tai_viet)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(telugu)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tifinagh)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tagalog)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(thaana)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(thai)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tibetan)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(ugaritic)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(vai)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(old_persian)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(cuneiform)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(yi)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(inherited)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(common)
+ BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(unknown)
 
- static char const* is(tag::space)
- {
- return "space";
- }
-
- static char const* is(tag::blank)
- {
- return "blank";
- }
-
- static char const* is(tag::upper)
- {
- return "upper";
- }
-
- static char const* is(tag::uppernum)
- {
- return "uppernum";
- }
+#undef BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT
+#endif
 
- static char const* is(tag::ucs4)
- {
- return "ucs4";
- }
     };
 }}}
 
-namespace boost { namespace spirit { namespace traits
+namespace boost { namespace spirit { namespace traits
 {
     ///////////////////////////////////////////////////////////////////////////
- // This meta-function evaluates to mpl::true_ if the function
+ // This meta-function evaluates to mpl::true_ if the function
     // char_encoding::ischar() needs to be called to ensure correct matching.
     // This happens mainly if the character type returned from the underlying
- // iterator is larger than the character type of the used character
- // encoding. Additionally, this meta-function provides a customization
+ // iterator is larger than the character type of the used character
+ // encoding. Additionally, this meta-function provides a customization
     // point for the lexer library to enforce this behavior while parsing
     // a token stream.
     template <typename Char, typename BaseChar>
- struct mustcheck_ischar
+ struct mustcheck_ischar
       : mpl::bool_<(sizeof(Char) > sizeof(BaseChar)) ? true : false> {};
 
     ///////////////////////////////////////////////////////////////////////////

Modified: trunk/boost/spirit/home/support/char_encoding/unicode.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/unicode.hpp (original)
+++ trunk/boost/spirit/home/support/char_encoding/unicode.hpp 2010-02-03 07:35:10 EST (Wed, 03 Feb 2010)
@@ -24,6 +24,9 @@
     {
         typedef ::boost::uint32_t char_type;
 
+ ///////////////////////////////////////////////////////////////////////////
+ // Posix stuff
+ ///////////////////////////////////////////////////////////////////////////
         static bool
         isascii_(char_type ch)
         {
@@ -58,7 +61,7 @@
         static bool
         isxdigit(char_type ch)
         {
- return ucd::is_hexadecimal_number(ch);
+ return ucd::is_hex_digit(ch);
         }
 
         static bool
@@ -130,6 +133,204 @@
         {
             return ch;
         }
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Major Categories
+ ///////////////////////////////////////////////////////////////////////////
+#define BOOST_SPIRIT_MAJOR_CATEGORY(name) \
+ static bool \
+ is_##name(char_type ch) \
+ { \
+ return ucd::get_major_category(ch) == ucd::properties::name; \
+ } \
+ /***/
+
+ BOOST_SPIRIT_MAJOR_CATEGORY(letter)
+ BOOST_SPIRIT_MAJOR_CATEGORY(mark)
+ BOOST_SPIRIT_MAJOR_CATEGORY(number)
+ BOOST_SPIRIT_MAJOR_CATEGORY(separator)
+ BOOST_SPIRIT_MAJOR_CATEGORY(other)
+ BOOST_SPIRIT_MAJOR_CATEGORY(punctuation)
+ BOOST_SPIRIT_MAJOR_CATEGORY(symbol)
+
+ ///////////////////////////////////////////////////////////////////////////
+ // General Categories
+ ///////////////////////////////////////////////////////////////////////////
+#define BOOST_SPIRIT_CATEGORY(name) \
+ static bool \
+ is_##name(char_type ch) \
+ { \
+ return ucd::get_category(ch) == ucd::properties::name; \
+ } \
+ /***/
+
+ BOOST_SPIRIT_CATEGORY(uppercase_letter)
+ BOOST_SPIRIT_CATEGORY(lowercase_letter)
+ BOOST_SPIRIT_CATEGORY(titlecase_letter)
+ BOOST_SPIRIT_CATEGORY(modifier_letter)
+ BOOST_SPIRIT_CATEGORY(other_letter)
+
+ BOOST_SPIRIT_CATEGORY(nonspacing_mark)
+ BOOST_SPIRIT_CATEGORY(enclosing_mark)
+ BOOST_SPIRIT_CATEGORY(spacing_mark)
+
+ BOOST_SPIRIT_CATEGORY(decimal_number)
+ BOOST_SPIRIT_CATEGORY(letter_number)
+ BOOST_SPIRIT_CATEGORY(other_number)
+
+ BOOST_SPIRIT_CATEGORY(space_separator)
+ BOOST_SPIRIT_CATEGORY(line_separator)
+ BOOST_SPIRIT_CATEGORY(paragraph_separator)
+
+ BOOST_SPIRIT_CATEGORY(control)
+ BOOST_SPIRIT_CATEGORY(format)
+ BOOST_SPIRIT_CATEGORY(private_use)
+ BOOST_SPIRIT_CATEGORY(surrogate)
+ BOOST_SPIRIT_CATEGORY(unassigned)
+
+ BOOST_SPIRIT_CATEGORY(dash_punctuation)
+ BOOST_SPIRIT_CATEGORY(open_punctuation)
+ BOOST_SPIRIT_CATEGORY(close_punctuation)
+ BOOST_SPIRIT_CATEGORY(connector_punctuation)
+ BOOST_SPIRIT_CATEGORY(other_punctuation)
+ BOOST_SPIRIT_CATEGORY(initial_punctuation)
+ BOOST_SPIRIT_CATEGORY(final_punctuation)
+
+ BOOST_SPIRIT_CATEGORY(math_symbol)
+ BOOST_SPIRIT_CATEGORY(currency_symbol)
+ BOOST_SPIRIT_CATEGORY(modifier_symbol)
+ BOOST_SPIRIT_CATEGORY(other_symbol)
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Derived Categories
+ ///////////////////////////////////////////////////////////////////////////
+#define BOOST_SPIRIT_DERIVED_CATEGORY(name) \
+ static bool \
+ is_##name(char_type ch) \
+ { \
+ return ucd::is_##name(ch); \
+ } \
+ /***/
+
+ BOOST_SPIRIT_DERIVED_CATEGORY(alphabetic)
+ BOOST_SPIRIT_DERIVED_CATEGORY(uppercase)
+ BOOST_SPIRIT_DERIVED_CATEGORY(lowercase)
+ BOOST_SPIRIT_DERIVED_CATEGORY(white_space)
+ BOOST_SPIRIT_DERIVED_CATEGORY(hex_digit)
+ BOOST_SPIRIT_DERIVED_CATEGORY(noncharacter_code_point)
+ BOOST_SPIRIT_DERIVED_CATEGORY(default_ignorable_code_point)
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Scripts
+ ///////////////////////////////////////////////////////////////////////////
+#define BOOST_SPIRIT_SCRIPT(name) \
+ static bool \
+ is_##name(char_type ch) \
+ { \
+ return ucd::get_script(ch) == ucd::properties::name; \
+ } \
+ /***/
+
+ BOOST_SPIRIT_SCRIPT(arabic)
+ BOOST_SPIRIT_SCRIPT(imperial_aramaic)
+ BOOST_SPIRIT_SCRIPT(armenian)
+ BOOST_SPIRIT_SCRIPT(avestan)
+ BOOST_SPIRIT_SCRIPT(balinese)
+ BOOST_SPIRIT_SCRIPT(bamum)
+ BOOST_SPIRIT_SCRIPT(bengali)
+ BOOST_SPIRIT_SCRIPT(bopomofo)
+ BOOST_SPIRIT_SCRIPT(braille)
+ BOOST_SPIRIT_SCRIPT(buginese)
+ BOOST_SPIRIT_SCRIPT(buhid)
+ BOOST_SPIRIT_SCRIPT(canadian_aboriginal)
+ BOOST_SPIRIT_SCRIPT(carian)
+ BOOST_SPIRIT_SCRIPT(cham)
+ BOOST_SPIRIT_SCRIPT(cherokee)
+ BOOST_SPIRIT_SCRIPT(coptic)
+ BOOST_SPIRIT_SCRIPT(cypriot)
+ BOOST_SPIRIT_SCRIPT(cyrillic)
+ BOOST_SPIRIT_SCRIPT(devanagari)
+ BOOST_SPIRIT_SCRIPT(deseret)
+ BOOST_SPIRIT_SCRIPT(egyptian_hieroglyphs)
+ BOOST_SPIRIT_SCRIPT(ethiopic)
+ BOOST_SPIRIT_SCRIPT(georgian)
+ BOOST_SPIRIT_SCRIPT(glagolitic)
+ BOOST_SPIRIT_SCRIPT(gothic)
+ BOOST_SPIRIT_SCRIPT(greek)
+ BOOST_SPIRIT_SCRIPT(gujarati)
+ BOOST_SPIRIT_SCRIPT(gurmukhi)
+ BOOST_SPIRIT_SCRIPT(hangul)
+ BOOST_SPIRIT_SCRIPT(han)
+ BOOST_SPIRIT_SCRIPT(hanunoo)
+ BOOST_SPIRIT_SCRIPT(hebrew)
+ BOOST_SPIRIT_SCRIPT(hiragana)
+ BOOST_SPIRIT_SCRIPT(katakana_or_hiragana)
+ BOOST_SPIRIT_SCRIPT(old_italic)
+ BOOST_SPIRIT_SCRIPT(javanese)
+ BOOST_SPIRIT_SCRIPT(kayah_li)
+ BOOST_SPIRIT_SCRIPT(katakana)
+ BOOST_SPIRIT_SCRIPT(kharoshthi)
+ BOOST_SPIRIT_SCRIPT(khmer)
+ BOOST_SPIRIT_SCRIPT(kannada)
+ BOOST_SPIRIT_SCRIPT(kaithi)
+ BOOST_SPIRIT_SCRIPT(tai_tham)
+ BOOST_SPIRIT_SCRIPT(lao)
+ BOOST_SPIRIT_SCRIPT(latin)
+ BOOST_SPIRIT_SCRIPT(lepcha)
+ BOOST_SPIRIT_SCRIPT(limbu)
+ BOOST_SPIRIT_SCRIPT(linear_b)
+ BOOST_SPIRIT_SCRIPT(lisu)
+ BOOST_SPIRIT_SCRIPT(lycian)
+ BOOST_SPIRIT_SCRIPT(lydian)
+ BOOST_SPIRIT_SCRIPT(malayalam)
+ BOOST_SPIRIT_SCRIPT(mongolian)
+ BOOST_SPIRIT_SCRIPT(meetei_mayek)
+ BOOST_SPIRIT_SCRIPT(myanmar)
+ BOOST_SPIRIT_SCRIPT(nko)
+ BOOST_SPIRIT_SCRIPT(ogham)
+ BOOST_SPIRIT_SCRIPT(ol_chiki)
+ BOOST_SPIRIT_SCRIPT(old_turkic)
+ BOOST_SPIRIT_SCRIPT(oriya)
+ BOOST_SPIRIT_SCRIPT(osmanya)
+ BOOST_SPIRIT_SCRIPT(phags_pa)
+ BOOST_SPIRIT_SCRIPT(inscriptional_pahlavi)
+ BOOST_SPIRIT_SCRIPT(phoenician)
+ BOOST_SPIRIT_SCRIPT(inscriptional_parthian)
+ BOOST_SPIRIT_SCRIPT(rejang)
+ BOOST_SPIRIT_SCRIPT(runic)
+ BOOST_SPIRIT_SCRIPT(samaritan)
+ BOOST_SPIRIT_SCRIPT(old_south_arabian)
+ BOOST_SPIRIT_SCRIPT(saurashtra)
+ BOOST_SPIRIT_SCRIPT(shavian)
+ BOOST_SPIRIT_SCRIPT(sinhala)
+ BOOST_SPIRIT_SCRIPT(sundanese)
+ BOOST_SPIRIT_SCRIPT(syloti_nagri)
+ BOOST_SPIRIT_SCRIPT(syriac)
+ BOOST_SPIRIT_SCRIPT(tagbanwa)
+ BOOST_SPIRIT_SCRIPT(tai_le)
+ BOOST_SPIRIT_SCRIPT(new_tai_lue)
+ BOOST_SPIRIT_SCRIPT(tamil)
+ BOOST_SPIRIT_SCRIPT(tai_viet)
+ BOOST_SPIRIT_SCRIPT(telugu)
+ BOOST_SPIRIT_SCRIPT(tifinagh)
+ BOOST_SPIRIT_SCRIPT(tagalog)
+ BOOST_SPIRIT_SCRIPT(thaana)
+ BOOST_SPIRIT_SCRIPT(thai)
+ BOOST_SPIRIT_SCRIPT(tibetan)
+ BOOST_SPIRIT_SCRIPT(ugaritic)
+ BOOST_SPIRIT_SCRIPT(vai)
+ BOOST_SPIRIT_SCRIPT(old_persian)
+ BOOST_SPIRIT_SCRIPT(cuneiform)
+ BOOST_SPIRIT_SCRIPT(yi)
+ BOOST_SPIRIT_SCRIPT(inherited)
+ BOOST_SPIRIT_SCRIPT(common)
+ BOOST_SPIRIT_SCRIPT(unknown)
+
+#undef BOOST_SPIRIT_MAJOR_CATEGORY
+#undef BOOST_SPIRIT_CATEGORY
+#undef BOOST_SPIRIT_DERIVED_CATEGORY
+#undef BOOST_SPIRIT_SCRIPT
+
     };
 
 }}}

Modified: trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp (original)
+++ trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp 2010-02-03 07:35:10 EST (Wed, 03 Feb 2010)
@@ -10,10 +10,12 @@
 #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010)
 #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010
 
-#include "category_table.hpp"
-#include "script_table.hpp"
-#include "lowercase_table.hpp"
-#include "uppercase_table.hpp"
+#include <boost/cstdint.hpp>
+
+# include "category_table.hpp"
+# include "script_table.hpp"
+# include "lowercase_table.hpp"
+# include "uppercase_table.hpp"
 
 namespace boost { namespace spirit { namespace ucd
 {
@@ -71,7 +73,7 @@
             initial_punctuation, // [Pi] an initial quotation mark
             final_punctuation, // [Pf] a final quotation mark
                                     
- math_symboll = 48, // [Sm] a symbol of primarily mathematical use
+ math_symbol = 48, // [Sm] a symbol of primarily mathematical use
             currency_symbol, // [Sc] a currency sign
             modifier_symbol, // [Sk] a non-letterlike modifier symbol
             other_symbol // [So] a symbol of other type
@@ -207,7 +209,7 @@
         return get_category(ch) == properties::decimal_number;
     }
     
- inline bool is_hexadecimal_number(::boost::uint32_t ch)
+ inline bool is_hex_digit(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::hex_digit) != 0;
     }

Modified: trunk/boost/spirit/home/support/common_terminals.hpp
==============================================================================
--- trunk/boost/spirit/home/support/common_terminals.hpp (original)
+++ trunk/boost/spirit/home/support/common_terminals.hpp 2010-02-03 07:35:10 EST (Wed, 03 Feb 2010)
@@ -17,9 +17,12 @@
 #include <boost/spirit/home/support/char_encoding/standard_wide.hpp>
 #include <boost/spirit/home/support/char_encoding/ascii.hpp>
 #include <boost/spirit/home/support/char_encoding/iso8859_1.hpp>
-#include <boost/spirit/home/support/char_encoding/unicode.hpp>
 #include <boost/spirit/home/support/char_class.hpp>
 
+#if defined(BOOST_SPIRIT_UNICODE)
+# include <boost/spirit/home/support/char_encoding/unicode.hpp>
+#endif
+
 namespace boost { namespace spirit
 {
     // Our basic terminals
@@ -87,18 +90,18 @@
     )
 
     // special tags (used mainly for stateful tag types)
- namespace tag
- {
- struct attr_cast {};
+ namespace tag
+ {
+ struct attr_cast {};
     }
 
 }}
 
 ///////////////////////////////////////////////////////////////////////////////
 // Here we place the character-set sensitive placeholders. We have one set
-// each for ascii, iso8859_1, standard and standard_wide. These placeholders
-// are placed in its char-set namespace. For example, there exist a placeholder
-// spirit::ascii::alnum for ascii versions of alnum.
+// each for ascii, iso8859_1, standard and standard_wide and unicode. These
+// placeholders are placed in its char-set namespace. For example, there exist
+// a placeholder spirit::ascii::alnum for ascii versions of alnum.
 
 #define BOOST_SPIRIT_TAG_CHAR_SPEC(charset) \
     typedef tag::char_code<tag::char_, charset> char_; \
@@ -157,6 +160,182 @@
 BOOST_SPIRIT_DEFINE_CHAR_CODES(iso8859_1)
 BOOST_SPIRIT_DEFINE_CHAR_CODES(standard)
 BOOST_SPIRIT_DEFINE_CHAR_CODES(standard_wide)
+
+#if defined(BOOST_SPIRIT_UNICODE)
 BOOST_SPIRIT_DEFINE_CHAR_CODES(unicode)
 
+ namespace boost { namespace spirit { namespace tag { namespace unicode
+ {
+ BOOST_SPIRIT_TAG_CHAR_SPEC(spirit::char_encoding::unicode)
+ }}}}
+
+ namespace boost { namespace spirit { namespace unicode
+ {
+#define BOOST_SPIRIT_UNICODE_CHAR_CODE(name) \
+ BOOST_SPIRIT_CHAR_CODE(name, spirit::char_encoding::unicode) \
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Unicode Major Categories
+ ///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(letter)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(mark)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(number)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(separator)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(other)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(punctuation)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(symbol)
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Unicode General Categories
+ ///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(uppercase_letter)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(lowercase_letter)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(titlecase_letter)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(modifier_letter)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(other_letter)
+
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(nonspacing_mark)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(enclosing_mark)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(spacing_mark)
+
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(decimal_number)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(letter_number)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(other_number)
+
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(space_separator)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(line_separator)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(paragraph_separator)
+
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(control)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(format)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(private_use)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(surrogate)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(unassigned)
+
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(dash_punctuation)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(open_punctuation)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(close_punctuation)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(connector_punctuation)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(other_punctuation)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(initial_punctuation)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(final_punctuation)
+
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(math_symbol)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(currency_symbol)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(modifier_symbol)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(other_symbol)
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Unicode Derived Categories
+ ///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(alphabetic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(uppercase)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(lowercase)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(white_space)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(hex_digit)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(noncharacter_code_point)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(default_ignorable_code_point)
+
+ ///////////////////////////////////////////////////////////////////////////
+ // Unicode Scripts
+ ///////////////////////////////////////////////////////////////////////////
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(arabic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(imperial_aramaic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(armenian)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(avestan)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(balinese)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(bamum)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(bengali)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(bopomofo)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(braille)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(buginese)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(buhid)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(canadian_aboriginal)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(carian)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(cham)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(cherokee)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(coptic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(cypriot)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(cyrillic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(devanagari)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(deseret)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(egyptian_hieroglyphs)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(ethiopic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(georgian)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(glagolitic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(gothic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(greek)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(gujarati)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(gurmukhi)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(hangul)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(han)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(hanunoo)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(hebrew)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(hiragana)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(katakana_or_hiragana)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(old_italic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(javanese)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(kayah_li)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(katakana)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(kharoshthi)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(khmer)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(kannada)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(kaithi)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(tai_tham)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(lao)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(latin)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(lepcha)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(limbu)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(linear_b)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(lisu)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(lycian)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(lydian)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(malayalam)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(mongolian)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(meetei_mayek)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(myanmar)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(nko)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(ogham)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(ol_chiki)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(old_turkic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(oriya)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(osmanya)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(phags_pa)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(inscriptional_pahlavi)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(phoenician)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(inscriptional_parthian)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(rejang)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(runic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(samaritan)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(old_south_arabian)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(saurashtra)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(shavian)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(sinhala)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(sundanese)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(syloti_nagri)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(syriac)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(tagbanwa)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(tai_le)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(new_tai_lue)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(tamil)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(tai_viet)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(telugu)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(tifinagh)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(tagalog)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(thaana)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(thai)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(tibetan)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(ugaritic)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(vai)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(old_persian)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(cuneiform)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(yi)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(inherited)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(common)
+ BOOST_SPIRIT_UNICODE_CHAR_CODE(unknown)
+
+#undef BOOST_SPIRIT_UNICODE_CHAR_CODE
+ }}}
+#endif
+
 #endif


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk