Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r59422 - in trunk/boost/spirit/home/support/char_encoding: . unicode
From: joel_at_[hidden]
Date: 2010-02-02 04:58:55


Author: djowel
Date: 2010-02-02 04:58:54 EST (Tue, 02 Feb 2010)
New Revision: 59422
URL: http://svn.boost.org/trac/boost/changeset/59422

Log:
Complete Unicode Level-1 support: table generation.
Added:
   trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp
      - copied unchanged from r59421, /trunk/boost/spirit/home/support/char_encoding/unicode/properties.hpp
Removed:
   trunk/boost/spirit/home/support/char_encoding/unicode/properties.hpp
Text files modified:
   trunk/boost/spirit/home/support/char_encoding/unicode.hpp | 2 +-
   1 files changed, 1 insertions(+), 1 deletions(-)

Modified: trunk/boost/spirit/home/support/char_encoding/unicode.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/unicode.hpp (original)
+++ trunk/boost/spirit/home/support/char_encoding/unicode.hpp 2010-02-02 04:58:54 EST (Tue, 02 Feb 2010)
@@ -13,7 +13,7 @@
 #endif
 
 #include <boost/cstdint.hpp>
-#include <boost/spirit/home/support/char_encoding/unicode/properties.hpp>
+#include <boost/spirit/home/support/char_encoding/unicode/query.hpp>
 
 namespace boost { namespace spirit { namespace char_encoding
 {

Deleted: trunk/boost/spirit/home/support/char_encoding/unicode/properties.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/unicode/properties.hpp 2010-02-02 04:58:54 EST (Tue, 02 Feb 2010)
+++ (empty file)
@@ -1,300 +0,0 @@
-/*=============================================================================
- Copyright (c) 2001-2010 Joel de Guzman
-
- Distributed under the Boost Software License, Version 1.0. (See accompanying
- file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-
- Autogenerated by MultiStageTable.py (Unicode multi-stage
- table builder) (c) Peter Kankowski, 2008
-==============================================================================*/
-#if !defined(BOOST_SPIRIT_UNICODE_GET_CATEGORY)
-
-#include "category_table.hpp"
-#include "script_table.hpp"
-#include "lowercase_table.hpp"
-#include "uppercase_table.hpp"
-
-namespace boost { namespace spirit { namespace unicode
-{
- // This header provides Basic (Level 1) Unicode Support
- // See http://unicode.org/reports/tr18/ for details
-
- struct properties
- {
- // bit pattern: xxMMMCCC
- // MMM: major_category
- // CCC: category
-
- enum major_category
- {
- letter,
- mark,
- number,
- separator,
- other,
- punctuation,
- symbol
- };
-
- enum category
- {
- uppercase_letter = 0, // [Lu] an uppercase letter
- lowercase_letter, // [Ll] a lowercase letter
- titlecase_letter, // [Lt] a digraphic character, with first part uppercase
- modifier_letter, // [Lm] a modifier letter
- other_letter, // [Lo] other letters, including syllables and ideographs
-
- nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width)
- enclosing_mark, // [Me] an enclosing combining mark
- spacing_mark, // [Mc] a spacing combining mark (positive advance width)
-
- decimal_number = 16, // [Nd] a decimal digit
- letter_number, // [Nl] a letterlike numeric character
- other_number, // [No] a numeric character of other type
-
- space_separator = 24, // [Zs] a space character (of various non-zero widths)
- line_separator, // [Zl] U+2028 LINE SEPARATOR only
- paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only
-
- control = 32, // [Cc] a C0 or C1 control code
- format, // [Cf] a format control character
- private_use, // [Co] a private-use character
- surrogate, // [Cs] a surrogate code point
- unassigned, // [Cn] a reserved unassigned code point or a noncharacter
-
- dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark
- open_punctuation, // [Ps] an opening punctuation mark (of a pair)
- close_punctuation, // [Pe] a closing punctuation mark (of a pair)
- connector_punctuation, // [Pc] a connecting punctuation mark, like a tie
- other_punctuation, // [Po] a punctuation mark of other type
- initial_punctuation, // [Pi] an initial quotation mark
- final_punctuation, // [Pf] a final quotation mark
-
- math_symboll = 48, // [Sm] a symbol of primarily mathematical use
- currency_symbol, // [Sc] a currency sign
- modifier_symbol, // [Sk] a non-letterlike modifier symbol
- other_symbol // [So] a symbol of other type
- };
-
- enum derived_properties
- {
- alphabetic = 64,
- uppercase = 128,
- lowercase = 256,
- white_space = 512,
- hex_digit = 1024,
- noncharacter_code_point = 2048,
- default_ignorable_code_point = 4096
- };
-
- enum script
- {
- arabic = 0,
- imperial_aramaic = 1,
- armenian = 2,
- avestan = 3,
- balinese = 4,
- bamum = 5,
- bengali = 6,
- bopomofo = 7,
- braille = 8,
- buginese = 9,
- buhid = 10,
- canadian_aboriginal = 11,
- carian = 12,
- cham = 13,
- cherokee = 14,
- coptic = 15,
- cypriot = 16,
- cyrillic = 17,
- devanagari = 18,
- deseret = 19,
- egyptian_hieroglyphs = 20,
- ethiopic = 21,
- georgian = 22,
- glagolitic = 23,
- gothic = 24,
- greek = 25,
- gujarati = 26,
- gurmukhi = 27,
- hangul = 28,
- han = 29,
- hanunoo = 30,
- hebrew = 31,
- hiragana = 32,
- katakana_or_hiragana = 33,
- old_italic = 34,
- javanese = 35,
- kayah_li = 36,
- katakana = 37,
- kharoshthi = 38,
- khmer = 39,
- kannada = 40,
- kaithi = 41,
- tai_tham = 42,
- lao = 43,
- latin = 44,
- lepcha = 45,
- limbu = 46,
- linear_b = 47,
- lisu = 48,
- lycian = 49,
- lydian = 50,
- malayalam = 51,
- mongolian = 52,
- meetei_mayek = 53,
- myanmar = 54,
- nko = 55,
- ogham = 56,
- ol_chiki = 57,
- old_turkic = 58,
- oriya = 59,
- osmanya = 60,
- phags_pa = 61,
- inscriptional_pahlavi = 62,
- phoenician = 63,
- inscriptional_parthian = 64,
- rejang = 65,
- runic = 66,
- samaritan = 67,
- old_south_arabian = 68,
- saurashtra = 69,
- shavian = 70,
- sinhala = 71,
- sundanese = 72,
- syloti_nagri = 73,
- syriac = 74,
- tagbanwa = 75,
- tai_le = 76,
- new_tai_lue = 77,
- tamil = 78,
- tai_viet = 79,
- telugu = 80,
- tifinagh = 81,
- tagalog = 82,
- thaana = 83,
- thai = 84,
- tibetan = 85,
- ugaritic = 86,
- vai = 87,
- old_persian = 88,
- cuneiform = 89,
- yi = 90,
- inherited = 91,
- common = 92,
- unknown = 93
- };
- };
-
- inline properties::category get_category(::boost::uint32_t ch)
- {
- return static_cast<properties::category>(detail::category_lookup(r) & 0x3F);
- }
-
- inline properties::major_category get_major_category(::boost::uint32_t ch)
- {
- return static_cast<properties::major_category>(get_category(ch) >> 3);
- }
-
- inline bool is_punctuation(::boost::uint32_t ch)
- {
- return get_major_category() == properties::punctuation;
- }
-
- inline bool is_decimal_number(::boost::uint32_t ch)
- {
- return get_category(ch) == properties::decimal_number;
- }
-
- inline bool is_hexadecimal_number(::boost::uint32_t ch)
- {
- return get_category(ch) == properties::hex_digit
- || get_category(ch) == properties::decimal_number
- ;
- }
-
- inline bool is_control(::boost::uint32_t ch)
- {
- return get_category(ch) == properties::control;
- }
-
- inline bool is_print(::boost::uint32_t ch)
- {
- return (is_graph() || is_blank()) && !is_control(ch);
- }
-
- inline bool is_alphabetic(::boost::uint32_t ch)
- {
- return detail::category_lookup(r) & properties::alphabetic;
- }
-
- inline bool is_alphanumeric(::boost::uint32_t ch)
- {
- return is_decimal_number() || is_alphabetic();
- }
-
- inline bool is_uppercase(::boost::uint32_t ch)
- {
- return detail::category_lookup(r) & properties::uppercase;
- }
-
- inline bool is_lowercase(::boost::uint32_t ch)
- {
- return detail::category_lookup(r) & properties::lowercase;
- }
-
- inline bool is_white_space(::boost::uint32_t ch)
- {
- return detail::category_lookup(r) & properties::white_space;
- }
-
- inline bool is_blank(::boost::uint32_t ch)
- {
- switch (ch)
- {
- case '\n': case '\v': case '\f': case '\r':
- return false;
- default:
- is_white_space(ch)
- && !( get_catogory(ch) == line_separator
- || get_catogory(ch) == paragraph_separator
- );
- }
- }
-
- inline bool is_graph(::boost::uint32_t ch)
- {
- return !( is_white_space(ch)
- || get_category(ch) == properties::control
- || get_category(ch) == properties::surrogate
- || get_category(ch) == properties::unassigned
- );
- }
-
- inline bool is_noncharacter_code_point(::boost::uint32_t ch)
- {
- return detail::category_lookup(r) & properties::noncharacter_code_point;
- }
-
- inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
- {
- return detail::category_lookup(r) & properties::default_ignorable_code_point;
- }
-
- inline properties::script get_script(::boost::uint32_t ch)
- {
- return static_cast<properties::script>(detail::script_lookup(ch) & 0x3F);
- }
-
- inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
- {
- return detail::lowercase_lookup(ch);
- }
-
- inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
- {
- return detail::uppercase_lookup(ch);
- }
-}}}
-
-#endif


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk