|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r82658 - trunk/boost/spirit/home/support/char_encoding/unicode
From: joel_at_[hidden]
Date: 2013-01-28 11:37:15
Author: djowel
Date: 2013-01-28 11:37:15 EST (Mon, 28 Jan 2013)
New Revision: 82658
URL: http://svn.boost.org/trac/boost/changeset/82658
Log:
Fixes #7942
Text files modified:
trunk/boost/spirit/home/support/char_encoding/unicode/create_tables.cpp | 93 ++++++++++++++++++++-------------------
1 files changed, 47 insertions(+), 46 deletions(-)
Modified: trunk/boost/spirit/home/support/char_encoding/unicode/create_tables.cpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/unicode/create_tables.cpp (original)
+++ trunk/boost/spirit/home/support/char_encoding/unicode/create_tables.cpp 2013-01-28 11:37:15 EST (Mon, 28 Jan 2013)
@@ -16,6 +16,7 @@
#include <boost/range/iterator_range.hpp>
#include <iostream>
+#include <iomanip>
#include <fstream>
#include <vector>
#include <algorithm>
@@ -49,13 +50,13 @@
{
ucd_range(uint32_t start, uint32_t finish)
: start(start), finish(finish) {}
-
- // we need this so we can use ucd_range as a multimap key
+
+ // we need this so we can use ucd_range as a multimap key
friend bool operator<(ucd_range const& a, ucd_range const& b)
{
return a.start < b.start;
}
-
+
uint32_t start;
uint32_t finish;
};
@@ -89,23 +90,23 @@
rule<iterator_type, std::string()> field = *(char_-(';'|endl)) >> (';'|&endl);
rule<iterator_type, ucd_line()> line = +(field-endl) >> endl;
rule<iterator_type, std::vector<ucd_line>()> file = +(endl | line[push_back(_val, _1)]);
-
+
parse(f, l, file, info);
}
}
-
+
template <typename Array>
void collect(Array& data, int field, bool collect_properties = true) const
{
BOOST_ASSERT(!info.empty());
ucd_vector::const_iterator current = info.begin();
ucd_vector::const_iterator end = info.end();
-
+
while (current != end)
{
std::string range = (*current)[0];
boost::trim(range);
-
+
std::string::const_iterator f = range.begin();
std::string::const_iterator l = range.end();
@@ -113,14 +114,14 @@
uint32_t start;
uint32_t finish;
parse(f, l, hex[ref(start) = ref(finish) = _1] >> -(".." >> hex[ref(finish) = _1]));
-
+
// special case for UnicodeData.txt ranges:
if ((*current)[1].find("First>") != std::string::npos)
{
++current;
BOOST_ASSERT(current != end);
BOOST_ASSERT((*current)[1].find("Last>") != std::string::npos);
-
+
std::string range = (*current)[0];
boost::trim(range);
f = range.begin();
@@ -128,7 +129,7 @@
parse(f, l, hex[ref(finish) = _1]);
}
-
+
std::string code;
if (field < int(current->size()))
code = (*current)[field];
@@ -136,7 +137,7 @@
// Only collect properties we are interested in
if (collect_properties) // code for properties
{
- if (!ignore_property(code))
+ if (!ignore_property(code))
{
for (uint32_t i = start; i <= finish; ++i)
data[i] |= map_property(code);
@@ -161,7 +162,7 @@
++current;
}
}
-
+
private:
static bool ignore_property(std::string const& p)
@@ -169,7 +170,7 @@
// We don't handle all properties
std::map<std::string, int>& pm = get_property_map();
std::map<std::string, int>::iterator i = pm.find(p);
- return i == pm.end();
+ return i == pm.end();
}
static int
@@ -181,7 +182,7 @@
return i->second;
}
- static std::map<std::string, int>&
+ static std::map<std::string, int>&
get_property_map()
{
// The properties we are interested in:
@@ -194,25 +195,25 @@
map["Lt"] = 2;
map["Lm"] = 3;
map["Lo"] = 4;
-
+
map["Mn"] = 8;
map["Me"] = 9;
map["Mc"] = 10;
-
+
map["Nd"] = 16;
map["Nl"] = 17;
map["No"] = 18;
-
+
map["Zs"] = 24;
map["Zl"] = 25;
map["Zp"] = 26;
-
+
map["Cc"] = 32;
map["Cf"] = 33;
map["Co"] = 34;
map["Cs"] = 35;
map["Cn"] = 36;
-
+
map["Pd"] = 40;
map["Ps"] = 41;
map["Pe"] = 42;
@@ -220,12 +221,12 @@
map["Po"] = 44;
map["Pi"] = 45;
map["Pf"] = 46;
-
+
map["Sm"] = 48;
map["Sc"] = 49;
map["Sk"] = 50;
map["So"] = 51;
-
+
// Derived Properties.
map["Alphabetic"] = 64;
map["Uppercase"] = 128;
@@ -351,24 +352,24 @@
for (uint32_t i = 0; i < full_span; ++i)
p[i] = 0;
}
-
+
void collect(char const* filename, int field, bool collect_properties = true)
{
std::cout << "collecting " << filename << std::endl;
ucd_info info(filename);
info.collect(p, field, collect_properties);
}
-
+
void build(std::vector<uint8_t>& stage1, std::vector<T const*>& stage2)
- {
+ {
std::cout << "building tables" << std::endl;
std::map<block_ptr, std::vector<T const*> > blocks;
for (T const* i = p.get(); i < (p.get() + full_span); i += block_size)
blocks[block_ptr(i)].push_back(i);
-
+
// Not enough bits to store the block indices.
BOOST_ASSERT(blocks.size() < (1 << (sizeof(uint8_t) * 8)));
-
+
typedef std::pair<block_ptr, std::vector<T const*> > blocks_value_type;
std::map<T const*, std::vector<T const*> > sorted_blocks;
BOOST_FOREACH(blocks_value_type const& val, blocks)
@@ -392,9 +393,9 @@
}
}
}
-
+
private:
-
+
struct block_ptr
{
block_ptr(T const* p) : p(p) {}
@@ -404,7 +405,7 @@
return std::lexicographical_compare(
a.p, a.p + block_size, b.p, b.p + block_size);
}
-
+
T const* p;
};
@@ -422,7 +423,7 @@
void print_table(Out& out, C const& c, bool trailing_comma, int width = 4, int group = 16)
{
int const tab = 4;
- C::size_type size = c.size();
+ typename C::size_type size = c.size();
BOOST_ASSERT(size > 1);
print_tab(out, tab);
out << std::setw(width) << int(c[0]);
@@ -436,7 +437,7 @@
}
out << std::setw(width) << int(c[i]);
}
-
+
if (trailing_comma)
out << ", " << std::endl;
}
@@ -444,7 +445,7 @@
template <typename Out>
void print_head(Out& out)
{
- out
+ out
<< "/*=============================================================================\n"
<< " Copyright (c) 2001-2011 Joel de Guzman\n"
<< "\n"
@@ -463,7 +464,7 @@
template <typename Out>
void print_tail(Out& out)
{
- out
+ out
<< "\n"
<< "}}}} // namespace boost::spirit::unicode::detail\n"
;
@@ -489,13 +490,13 @@
uint32_t const block_size = Builder::block_size;
typedef typename Builder::value_type value_type;
print_head(out);
-
+
std::vector<uint8_t> stage1;
std::vector<value_type const*> stage2;
builder.build(stage1, stage2);
std::cout << "Block Size: " << block_size << std::endl;
- std::cout << "Total Bytes: "
- << stage1.size()+(stage2.size()*block_size*sizeof(value_type))
+ std::cout << "Total Bytes: "
+ << stage1.size()+(stage2.size()*block_size*sizeof(value_type))
<< std::endl;
out
@@ -503,11 +504,11 @@
<< " static const ::boost::uint8_t " << name << "_stage1[] = {\n"
<< "\n"
;
-
+
print_table(out, stage1, false, 3);
char const* int_name = get_int_type_name(sizeof(value_type));
- out
+ out
<< "\n"
<< " };"
<< "\n"
@@ -521,17 +522,17 @@
value_type const* p = stage2[i];
bool last = (i+1 == stage2.size());
out << "\n\n // block " << block_n++ << std::endl;
- print_table(out,
+ print_table(out,
boost::iterator_range<value_type const*>(p, p+block_size), !last, field_width);
}
- out
+ out
<< "\n"
<< " };"
<< "\n"
;
-
- out
+
+ out
<< "\n"
<< " inline " << int_name << ' ' << name << "_lookup(::boost::uint32_t ch)\n"
<< " {\n"
@@ -539,7 +540,7 @@
<< " return " << name << "_stage2[block_offset + ch % " << block_size << "];\n"
<< " }\n"
;
-
+
print_tail(out);
}
@@ -554,7 +555,7 @@
builder.collect("PropList.txt", 1);
print_file(out, builder, 4, "category");
}
-
+
// The script tables
{
std::ofstream out("script_table.hpp");
@@ -562,7 +563,7 @@
builder.collect("Scripts.txt", 1);
print_file(out, builder, 3, "script");
}
-
+
// The lowercase tables
{
std::ofstream out("lowercase_table.hpp");
@@ -570,7 +571,7 @@
builder.collect("UnicodeData.txt", 13, false);
print_file(out, builder, 6, "lowercase");
}
-
+
// The uppercase tables
{
std::ofstream out("uppercase_table.hpp");
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk