Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r53823 - in sandbox/SOC/2009/unicode: boost/unicode libs/unicode/data_parser
From: loufoque_at_[hidden]
Date: 2009-06-12 10:10:05


Author: mgaunard
Date: 2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
New Revision: 53823
URL: http://svn.boost.org/trac/boost/changeset/53823

Log:
decompose_for_sort fix by Graham Barnett
Text files modified:
   sandbox/SOC/2009/unicode/boost/unicode/unicode_properties_types.hpp | 36 +++++++++---------
   sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties.cpp | 2
   sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties_unicodedata.cpp | 36 +++++++++---------
   sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.cpp | 73 ++++++++++++++++++++++++++++++---------
   sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.hpp | 7 +++
   5 files changed, 100 insertions(+), 54 deletions(-)

Modified: sandbox/SOC/2009/unicode/boost/unicode/unicode_properties_types.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/unicode/unicode_properties_types.hpp (original)
+++ sandbox/SOC/2009/unicode/boost/unicode/unicode_properties_types.hpp 2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
@@ -178,24 +178,24 @@
                 {
                         enum type // maximum 32 values (5 bits)
                         {
- font = 0,
- no_break,
- initial,
- medial,
- final,
- isolated,
- circle,
- super,
- sub,
- vertical,
- wide,
- narrow,
- small,
- square,
- fraction,
- compat,
- canonical,
- none, // default value for unknown characters
+ dct_font = 0,
+ dct_no_break,
+ dct_initial,
+ dct_medial,
+ dct_final,
+ dct_isolated,
+ dct_circle,
+ dct_super,
+ dct_sub,
+ dct_vertical,
+ dct_wide,
+ dct_narrow,
+ dct_small,
+ dct_square,
+ dct_fraction,
+ dct_compat,
+ dct_canonical,
+ dct_none, // default value for unknown characters
 
                                 _count
                         };

Modified: sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties.cpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties.cpp (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties.cpp 2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
@@ -48,7 +48,7 @@
         char_not_present.general_category = category::unknown;
         char_not_present.combining = 0;
         char_not_present.bidi = bidi_class::strong_left_to_right;
- char_not_present.decomposition_kind = decomposition_type::none;
+ char_not_present.decomposition_kind = decomposition_type::dct_none;
         char_not_present.line_break = break_class::unknown;
         char_not_present.joining = join_type::none;
         char_not_present.unknown_char = true;

Modified: sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties_unicodedata.cpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties_unicodedata.cpp (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties_unicodedata.cpp 2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
@@ -221,60 +221,60 @@
                 // If none is indicated, it's a canonical decomposition_type.
                 (((str_p ("<font>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::font)] |
+ decomposition_type::dct_font)] |
                 str_p ("<noBreak>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::no_break)] |
+ decomposition_type::dct_no_break)] |
                 str_p ("<initial>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::initial)] |
+ decomposition_type::dct_initial)] |
                 str_p ("<medial>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::medial)] |
+ decomposition_type::dct_medial)] |
                 str_p ("<final>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::final)] |
+ decomposition_type::dct_final)] |
                 str_p ("<isolated>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::isolated)] |
+ decomposition_type::dct_isolated)] |
                 str_p ("<circle>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::circle)] |
+ decomposition_type::dct_circle)] |
                 str_p ("<super>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::super)] |
+ decomposition_type::dct_super)] |
                 str_p ("<sub>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::sub)] |
+ decomposition_type::dct_sub)] |
                 str_p ("<vertical>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::vertical)] |
+ decomposition_type::dct_vertical)] |
                 str_p ("<wide>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::wide)] |
+ decomposition_type::dct_wide)] |
                 str_p ("<narrow>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::narrow)] |
+ decomposition_type::dct_narrow)] |
                 str_p ("<small>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::small)] |
+ decomposition_type::dct_small)] |
                 str_p ("<square>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::square)] |
+ decomposition_type::dct_square)] |
                 str_p ("<fraction>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::fraction)] |
+ decomposition_type::dct_fraction)] |
                 str_p ("<compat>")
                         [assign_a (prop.decomposition_kind,
- decomposition_type::compat)] |
+ decomposition_type::dct_compat)] |
                 eps_p [assign_a (prop.decomposition_kind,
- decomposition_type::canonical)]
+ decomposition_type::dct_canonical)]
                 ) >>
                 // composition
                 +(hex_p [push_back_a (prop.decomposition)])
                 ) |
                 // or no composition at all
- eps_p [assign_a (prop.decomposition_kind, decomposition_type::none)]
+ eps_p [assign_a (prop.decomposition_kind, decomposition_type::dct_none)]
                 ) >> ';' >>
 
                 // numeric type is skipped

Modified: sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.cpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.cpp (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.cpp 2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
@@ -619,6 +619,7 @@
 }
 
 bool decompose_for_sort(const character_properties & props_char,
+ char32 cp,
                         const std::map <char32, character_properties> & props,
                                             std::vector<collation_data> & tbl_coll)
 {
@@ -640,24 +641,62 @@
         }
         else
         {
- // no optimisation should have happened at this point so all entries
- // should be set to indexed
- //assert(iter_char->second.sort_type == sort_type::is_index);
-
- if (iter_char->second.sort_data.size() > 1)
- {
- // For simplicity we so not handle complex sort data
- return false;
- }
- else if (iter_char->second.sort_data.size() == 0)
+ switch (iter_char->second.sort_type)
             {
- return decompose_for_sort(iter_char->second, props, tbl_coll);
- }
+ case sort_type::zero_data1_data2_cp:
+ {
+ collation_data sort_entry;
+ sort_entry.variable = iter_char->second.sort_variable;
+ sort_entry.weight1 = 0;
+ sort_entry.weight2 = iter_char->second.sort_index_or_data1;
+ sort_entry.weight3 = iter_char->second.sort_data2;
+ sort_entry.weight4 = cp;
+ tbl_coll.push_back(sort_entry);
+ }
+ break;
+ case sort_type::data1_0x0020_data2_cp:
+ {
+ collation_data sort_entry;
+ sort_entry.variable = iter_char->second.sort_variable;
+ sort_entry.weight1 = iter_char->second.sort_index_or_data1;
+ sort_entry.weight2 = 0x0020;
+ sort_entry.weight3 = iter_char->second.sort_data2;
+ sort_entry.weight4 = cp;
+ tbl_coll.push_back(sort_entry);
+ }
+ break;
+ case sort_type::default_:
+ {
+ collation_entry sort_entry;
+ get_default_sort_characteristics(cp, sort_entry);
+ // this optimisation requires default to be of size 1
+ assert(sort_entry.data.size() == 1);
+ tbl_coll.push_back(sort_entry.data[0]);
+ }
+ break;
+ case sort_type::is_index:
+ {
+ if (iter_char->second.sort_data.size() > 1)
+ {
+ // For simplicity we do not handle complex sort data
+ return false;
+ }
+ else if (iter_char->second.sort_data.size() == 0)
+ {
+ return decompose_for_sort(iter_char->second, iter_char->first, props, tbl_coll);
+ }
 
- tbl_coll.reserve(tbl_coll.size() + iter_char->second.sort_data[0].data.size());
- copy(iter_char->second.sort_data[0].data.begin(),
- iter_char->second.sort_data[0].data.end(),
- back_inserter(tbl_coll));
+ tbl_coll.reserve(tbl_coll.size() + iter_char->second.sort_data[0].data.size());
+ copy(iter_char->second.sort_data[0].data.begin(),
+ iter_char->second.sort_data[0].data.end(),
+ back_inserter(tbl_coll));
+ }
+ break;
+ default:
+ // invalid enum
+ assert(iter_char->second.sort_type == (size_t)-1);
+ break;
+ }
         }
     }
 
@@ -692,7 +731,7 @@
     std::vector<collation_data> tbl_coll;
     
     // check for decomp and return false if it is too complex to optimise
- if (!decompose_for_sort(props_char_var, props, tbl_coll))
+ if (!decompose_for_sort(props_char_var, cp, props, tbl_coll))
         return true;
 
     std::vector<collation_data>::const_iterator iter_src =

Modified: sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.hpp (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.hpp 2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
@@ -17,7 +17,14 @@
 #include <vector>
 #include <map>
 #include <boost/spirit/include/classic.hpp>
+#ifdef BOOST_MSVC
+#pragma warning(push)
+#pragma warning(disable : 4800 ) // 'int' : forcing value to bool 'true' or 'false'
+#endif
 #include <boost/crc.hpp>
+#ifdef BOOST_MSVC
+#pragma warning(pop)
+#endif
 #include <boost/tuple/tuple.hpp>
 
 #include <boost/unicode/unicode_properties.hpp>


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk