Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r60067 - in branches/quickbook-1.5-spirit2: . test
From: daniel_james_at_[hidden]
Date: 2010-03-01 20:44:50


Author: danieljames
Date: 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
New Revision: 60067
URL: http://svn.boost.org/trac/boost/changeset/60067

Log:
Merge from trunk.

The new unicode escape support showed up a difference with the old version.
Header ids were generated from boostbook, while this version was generated them
from quickbook. So fix that as well.

Added:
   branches/quickbook-1.5-spirit2/test/fail-template-section-1.quickbook
      - copied unchanged from r60062, /trunk/tools/quickbook/test/fail-template-section-1.quickbook
   branches/quickbook-1.5-spirit2/test/fail-template-section-2.quickbook
      - copied unchanged from r60062, /trunk/tools/quickbook/test/fail-template-section-2.quickbook
   branches/quickbook-1.5-spirit2/test/fail-template-section-3.quickbook
      - copied unchanged from r60062, /trunk/tools/quickbook/test/fail-template-section-3.quickbook
   branches/quickbook-1.5-spirit2/test/simple_markup.gold
      - copied unchanged from r60062, /trunk/tools/quickbook/test/simple_markup.gold
   branches/quickbook-1.5-spirit2/test/simple_markup.quickbook
      - copied unchanged from r60062, /trunk/tools/quickbook/test/simple_markup.quickbook
   branches/quickbook-1.5-spirit2/test/template-section.gold
      - copied unchanged from r60062, /trunk/tools/quickbook/test/template-section.gold
   branches/quickbook-1.5-spirit2/test/template-section.quickbook
      - copied unchanged from r60062, /trunk/tools/quickbook/test/template-section.quickbook
   branches/quickbook-1.5-spirit2/test/unicode-escape.gold
      - copied unchanged from r60062, /trunk/tools/quickbook/test/unicode-escape.gold
   branches/quickbook-1.5-spirit2/test/unicode-escape.quickbook
      - copied unchanged from r60062, /trunk/tools/quickbook/test/unicode-escape.quickbook
   branches/quickbook-1.5-spirit2/test/utf-16be-bom.quickbook
      - copied unchanged from r60062, /trunk/tools/quickbook/test/utf-16be-bom.quickbook
   branches/quickbook-1.5-spirit2/test/utf-16le-bom.quickbook
      - copied unchanged from r60062, /trunk/tools/quickbook/test/utf-16le-bom.quickbook
   branches/quickbook-1.5-spirit2/test/utf-8-bom.gold
      - copied unchanged from r60062, /trunk/tools/quickbook/test/utf-8-bom.gold
   branches/quickbook-1.5-spirit2/test/utf-8-bom.quickbook
      - copied unchanged from r60062, /trunk/tools/quickbook/test/utf-8-bom.quickbook
   branches/quickbook-1.5-spirit2/test/utf-8.gold
      - copied unchanged from r60062, /trunk/tools/quickbook/test/utf-8.gold
   branches/quickbook-1.5-spirit2/test/utf-8.quickbook
      - copied unchanged from r60062, /trunk/tools/quickbook/test/utf-8.quickbook
Properties modified:
   branches/quickbook-1.5-spirit2/ (props changed)
Text files modified:
   branches/quickbook-1.5-spirit2/block_actions.cpp | 22 ++++++-----
   branches/quickbook-1.5-spirit2/boostbook.cpp | 24 +++++++++++-
   branches/quickbook-1.5-spirit2/encoder.hpp | 1
   branches/quickbook-1.5-spirit2/encoder_impl.hpp | 2 +
   branches/quickbook-1.5-spirit2/html.cpp | 20 ++++++++++
   branches/quickbook-1.5-spirit2/phrase.cpp | 23 ++++++++++-
   branches/quickbook-1.5-spirit2/phrase.hpp | 4 ++
   branches/quickbook-1.5-spirit2/process.cpp | 1
   branches/quickbook-1.5-spirit2/quickbook.cpp | 2
   branches/quickbook-1.5-spirit2/state.cpp | 3 +
   branches/quickbook-1.5-spirit2/state.hpp | 2 +
   branches/quickbook-1.5-spirit2/template.cpp | 14 +++++++
   branches/quickbook-1.5-spirit2/test/Jamfile.v2 | 10 +++++
   branches/quickbook-1.5-spirit2/utils.cpp | 75 +++++++++++++++++++++++++++++++++++++--
   14 files changed, 183 insertions(+), 20 deletions(-)

Modified: branches/quickbook-1.5-spirit2/block_actions.cpp
==============================================================================
--- branches/quickbook-1.5-spirit2/block_actions.cpp (original)
+++ branches/quickbook-1.5-spirit2/block_actions.cpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -79,16 +79,18 @@
 
     end_section2 process(quickbook::state& state, end_section const& x)
     {
- --state.section_level;
- if (state.section_level < 0)
+ if (state.section_level <= state.min_section_level)
         {
             detail::outerr(x.position.file,x.position.line)
                 << "Mismatched [endsect] near column " << x.position.column << ".\n";
             ++state.error_count;
             
- // $$$ TODO: somehow fail parse else BOOST_ASSERT(std::string::npos != n)
- // $$$ below will assert.
+ // TODO: Return something else?
+ return end_section2();
         }
+
+ --state.section_level;
+
         if (state.section_level == 0)
         {
             state.qualified_section_id.clear();
@@ -97,8 +99,8 @@
         {
             std::string::size_type const n =
                 state.qualified_section_id.find_last_of('.');
- BOOST_ASSERT(std::string::npos != n);
- state.qualified_section_id.erase(n, std::string::npos);
+ if(std::string::npos != n)
+ state.qualified_section_id.erase(n, std::string::npos);
         }
         
         return end_section2();
@@ -125,16 +127,16 @@
         {
             r.id = state.section_id + "." +
                 detail::make_identifier(
- x.content.raw.begin(),
- x.content.raw.end());
+ x.content.content.begin(),
+ x.content.content.end());
         }
         else // version 1.3 and above
         {
             r.linkend = r.id = fully_qualified_id(
                 state.doc_id, state.qualified_section_id,
                 detail::make_identifier(
- x.content.raw.begin(),
- x.content.raw.end()));
+ x.content.content.begin(),
+ x.content.content.end()));
 
         }
 

Modified: branches/quickbook-1.5-spirit2/boostbook.cpp
==============================================================================
--- branches/quickbook-1.5-spirit2/boostbook.cpp (original)
+++ branches/quickbook-1.5-spirit2/boostbook.cpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -40,7 +40,7 @@
         return encode_impl(&c, &c + 1);
     }
 
- namespace {
+ namespace {
         struct boostbook_markup {
             char const* quickbook;
             char const* pre;
@@ -118,7 +118,27 @@
         state.phrase << encode(x);
     }
 
- void boostbook_encoder::operator()(quickbook::state& state, anchor const& x)
+ void boostbook_encoder::operator()(quickbook::state& state, unicode_char const& x)
+ {
+ std::string::const_iterator first = x.value.begin(), last = x.value.end();
+ while(first != last && *first == '0') ++first;
+
+ // Just ignore \u0000
+ // Maybe I should issue a warning?
+ if(first == last) return;
+
+ std::string hex_digits(first, last);
+
+ if(hex_digits.size() == 2 && *first > '0' && *first <= '7') {
+ using namespace std;
+ (*this)(state, strtol(hex_digits.c_str(), 0, 16));
+ }
+ else {
+ state.phrase << "&#x" << hex_digits << ";";
+ }
+ }
+
+ void boostbook_encoder::operator()(quickbook::state& state, anchor const& x)
     {
         state.phrase << "<anchor id=\"";
         state.phrase << encode(x.id);

Modified: branches/quickbook-1.5-spirit2/encoder.hpp
==============================================================================
--- branches/quickbook-1.5-spirit2/encoder.hpp (original)
+++ branches/quickbook-1.5-spirit2/encoder.hpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -40,6 +40,7 @@
         // Note: char is a plain quickbook character, string is an encoded
         // boostbook string. Oops.
         virtual void operator()(quickbook::state&, char) = 0;
+ virtual void operator()(quickbook::state&, unicode_char const&) = 0;
         virtual void operator()(quickbook::state&, std::string const&) = 0;
         virtual void operator()(quickbook::state&, anchor const&) = 0;
         virtual void operator()(quickbook::state&, link const&) = 0;

Modified: branches/quickbook-1.5-spirit2/encoder_impl.hpp
==============================================================================
--- branches/quickbook-1.5-spirit2/encoder_impl.hpp (original)
+++ branches/quickbook-1.5-spirit2/encoder_impl.hpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -14,6 +14,7 @@
         // Note: char is a plain quickbook character, string is an encoded
         // boostbook string. Oops.
         virtual void operator()(quickbook::state&, char);
+ virtual void operator()(quickbook::state&, unicode_char const&);
         virtual void operator()(quickbook::state&, std::string const&);
         virtual void operator()(quickbook::state&, anchor const&);
         virtual void operator()(quickbook::state&, link const&);
@@ -49,6 +50,7 @@
         // Note: char is a plain quickbook character, string is an encoded
         // html string. Oops.
         virtual void operator()(quickbook::state&, char);
+ virtual void operator()(quickbook::state&, unicode_char const&);
         virtual void operator()(quickbook::state&, std::string const&);
         virtual void operator()(quickbook::state&, anchor const&);
         virtual void operator()(quickbook::state&, link const&);

Modified: branches/quickbook-1.5-spirit2/html.cpp
==============================================================================
--- branches/quickbook-1.5-spirit2/html.cpp (original)
+++ branches/quickbook-1.5-spirit2/html.cpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -118,6 +118,26 @@
         state.phrase << encode(x);
     }
 
+ void html_encoder::operator()(quickbook::state& state, unicode_char const& x)
+ {
+ std::string::const_iterator first = x.value.begin(), last = x.value.end();
+ while(first != last && *first == '0') ++first;
+
+ // Just ignore \u0000
+ // Maybe I should issue a warning?
+ if(first == last) return;
+
+ std::string hex_digits(first, last);
+
+ if(hex_digits.size() == 2 && *first > '0' && *first <= '7') {
+ using namespace std;
+ (*this)(state, strtol(hex_digits.c_str(), 0, 16));
+ }
+ else {
+ state.phrase << "&#x" << hex_digits << ";";
+ }
+ }
+
     void html_encoder::operator()(quickbook::state& state, anchor const& x)
     {
         state.phrase << "<a id=\"" << encode(x.id) << "\"/>\n";

Modified: branches/quickbook-1.5-spirit2/phrase.cpp
==============================================================================
--- branches/quickbook-1.5-spirit2/phrase.cpp (original)
+++ branches/quickbook-1.5-spirit2/phrase.cpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -16,6 +16,7 @@
 #include <boost/spirit/include/qi_eol.hpp>
 #include <boost/spirit/include/qi_eps.hpp>
 #include <boost/spirit/include/qi_matches.hpp>
+#include <boost/spirit/include/qi_uint.hpp>
 #include <boost/spirit/include/phoenix_operator.hpp>
 #include <boost/fusion/include/std_pair.hpp>
 #include <boost/fusion/include/adapt_struct.hpp>
@@ -82,6 +83,11 @@
     (std::string, identifier)
 )
 
+BOOST_FUSION_ADAPT_STRUCT(
+ quickbook::unicode_char,
+ (std::string, value)
+)
+
 namespace quickbook
 {
     namespace qi = boost::spirit::qi;
@@ -101,10 +107,12 @@
         qi::rule<iterator, quickbook::code()> code_block;
         qi::rule<iterator, quickbook::code()> inline_code;
         qi::rule<iterator, quickbook::simple_markup(), qi::locals<char> > simple_format;
+ qi::rule<iterator> simple_phrase_end;
         qi::rule<iterator> escape;
         qi::rule<iterator, quickbook::break_()> escape_break;
         qi::rule<iterator, quickbook::formatted()> escape_punct;
         qi::rule<iterator, quickbook::formatted()> escape_markup;
+ qi::rule<iterator, quickbook::unicode_char()> escape_unicode;
         qi::rule<iterator> comment;
         qi::rule<iterator> dummy_block;
         qi::rule<iterator, quickbook::callout_link()> callout_link;
@@ -234,8 +242,8 @@
                         ( qi::graph // qi::graph must follow qi::lit(qi::_r1)
>> *( qi::char_ -
                                 ( (qi::graph >> qi::lit(qi::_a))
- | phrase_end // Make sure that we don't go
- ) // past a single block
+ | simple_phrase_end // Make sure that we don't go
+ ) // past a single block
                             )
>> qi::graph // qi::graph must precede qi::lit(qi::_r1)
>> &( qi::char_(qi::_a)
@@ -247,10 +255,13 @@
>> qi::omit[qi::char_(qi::_a)]
             ;
 
+ simple_phrase_end = '[' | phrase_end;
+
         escape =
             ( escape_break
- | "\\ " // ignore an escaped char
+ | "\\ " // ignore an escaped char
             | escape_punct
+ | escape_unicode
             | escape_markup
             ) [actions.process]
             ;
@@ -274,6 +285,12 @@
>> "'''"
             ;
 
+ escape_unicode =
+ "\\u"
+ >> qi::raw[qi::repeat(1,4)[qi::hex]]
+ >> qi::attr(nothing())
+ ;
+
         comment =
             "[/" >> *(dummy_block | (qi::char_ - ']')) >> ']'
             ;

Modified: branches/quickbook-1.5-spirit2/phrase.hpp
==============================================================================
--- branches/quickbook-1.5-spirit2/phrase.hpp (original)
+++ branches/quickbook-1.5-spirit2/phrase.hpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -56,6 +56,10 @@
         std::string image_filename;
         attribute_map attributes;
     };
+
+ struct unicode_char {
+ std::string value;
+ };
 }
 
 #endif // BOOST_SPIRIT_QUICKBOOK_PHRASE_HPP

Modified: branches/quickbook-1.5-spirit2/process.cpp
==============================================================================
--- branches/quickbook-1.5-spirit2/process.cpp (original)
+++ branches/quickbook-1.5-spirit2/process.cpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -61,6 +61,7 @@
     template void process_action::operator()<define_template>(define_template const&) const;
     template void process_action::operator()<code_token>(code_token const&) const;
     template void process_action::operator()<char>(char const&) const;
+ template void process_action::operator()<unicode_char>(unicode_char const&) const;
     template void process_action::operator()<doc_info>(doc_info const&) const;
     template void process_action::operator()<doc_info_post>(doc_info_post const&) const;
     template void process_action::operator()<callout_link>(callout_link const&) const;

Modified: branches/quickbook-1.5-spirit2/quickbook.cpp
==============================================================================
--- branches/quickbook-1.5-spirit2/quickbook.cpp (original)
+++ branches/quickbook-1.5-spirit2/quickbook.cpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -30,7 +30,7 @@
 #pragma warning(disable:4355)
 #endif
 
-#define QUICKBOOK_VERSION "Quickbook Version 1.5.1"
+#define QUICKBOOK_VERSION "Quickbook Version 1.5.2"
 
 namespace quickbook
 {

Modified: branches/quickbook-1.5-spirit2/state.cpp
==============================================================================
--- branches/quickbook-1.5-spirit2/state.cpp (original)
+++ branches/quickbook-1.5-spirit2/state.cpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -37,6 +37,7 @@
         , outdir(outdir_)
         , macro()
         , section_level(0)
+ , min_section_level(0)
         , section_id()
         , qualified_section_id()
         , source_mode("c++")
@@ -67,6 +68,7 @@
               , outdir
               , macro
               , section_level
+ , min_section_level
               , section_id
               , qualified_section_id
               , source_mode
@@ -84,6 +86,7 @@
           , outdir
           , macro
           , section_level
+ , min_section_level
           , section_id
           , qualified_section_id
           , source_mode

Modified: branches/quickbook-1.5-spirit2/state.hpp
==============================================================================
--- branches/quickbook-1.5-spirit2/state.hpp (original)
+++ branches/quickbook-1.5-spirit2/state.hpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -46,6 +46,7 @@
         fs::path outdir;
         macro_symbols macro;
         int section_level;
+ int min_section_level;
         std::string section_id;
         std::string qualified_section_id;
         std::string source_mode;
@@ -55,6 +56,7 @@
           , fs::path
           , macro_symbols
           , int
+ , int
           , std::string
           , std::string
           , std::string>

Modified: branches/quickbook-1.5-spirit2/template.cpp
==============================================================================
--- branches/quickbook-1.5-spirit2/template.cpp (original)
+++ branches/quickbook-1.5-spirit2/template.cpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -383,6 +383,10 @@
         std::string result;
         state.push(); // scope the state
         {
+ // Store the current section level so that we can ensure that
+ // [section] and [endsect] tags in the template are balanced.
+ state.min_section_level = state.section_level;
+
             // Quickbook 1.4-: When expanding the tempalte continue to use the
             // current scope (the dynamic scope).
             // Quickbook 1.5+: Use the scope the template was defined in
@@ -434,6 +438,16 @@
                 ++state.error_count;
                 return "";
             }
+
+ if (state.section_level != state.min_section_level)
+ {
+ detail::outerr(x.position.file,x.position.line)
+ << "Mismatched sections in template " << x.symbol->identifier << std::endl;
+ state.pop(); // restore the actions' states
+ --state.template_depth;
+ ++state.error_count;
+ return "";
+ }
         }
 
         state.pop(); // restore the state

Modified: branches/quickbook-1.5-spirit2/test/Jamfile.v2
==============================================================================
--- branches/quickbook-1.5-spirit2/test/Jamfile.v2 (original)
+++ branches/quickbook-1.5-spirit2/test/Jamfile.v2 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -23,6 +23,7 @@
     [ quickbook-test templates ]
     [ quickbook-test templates_1_4 ]
     [ quickbook-test templates_1_5 ]
+ [ quickbook-test template-section ]
     #[ quickbook-test xinclude ]
     [ quickbook-test import ]
     [ quickbook-test include_1_5 ]
@@ -35,6 +36,7 @@
     [ quickbook-test cond_phrase ]
     [ quickbook-test doc-info-1 ]
     [ quickbook-test callouts ]
+ [ quickbook-test simple_markup ]
     [ quickbook-fail-test fail-include ]
     [ quickbook-fail-test fail-import ]
     [ quickbook-fail-test fail-template-arguments1 ]
@@ -46,5 +48,13 @@
     [ quickbook-fail-test fail-parse-error1 ]
     [ quickbook-fail-test fail-parse-error2 ]
     [ quickbook-fail-test fail-template-lookup1 ]
+ [ quickbook-fail-test fail-template-section-1 ]
+ [ quickbook-fail-test fail-template-section-2 ]
+ [ quickbook-fail-test fail-template-section-3 ]
     [ quickbook-fail-test fail-image_1_5 ]
+ [ quickbook-test utf-8 ]
+ [ quickbook-test utf-8-bom ]
+ [ quickbook-test unicode-escape ]
+ [ quickbook-fail-test utf-16be-bom ]
+ [ quickbook-fail-test utf-16le-bom ]
     ;

Modified: branches/quickbook-1.5-spirit2/utils.cpp
==============================================================================
--- branches/quickbook-1.5-spirit2/utils.cpp (original)
+++ branches/quickbook-1.5-spirit2/utils.cpp 2010-03-01 20:44:48 EST (Mon, 01 Mar 2010)
@@ -153,13 +153,74 @@
         }
     }
 
+ // Read the first few bytes in a file to see it starts with a byte order
+ // mark. If it doesn't, then write the characters we've already read in.
+ // Although, given how UTF-8 works, if even part of the BOM was matched,
+ // the file is probably broken.
+
+ template <class InputIterator, class OutputIterator>
+ bool check_bom(InputIterator& begin, InputIterator end,
+ OutputIterator out, char const* chars, int length)
+ {
+ char const* ptr = chars;
+
+ while(begin != end && *begin == *ptr) {
+ ++begin;
+ ++ptr;
+ --length;
+ if(length == 0) return true;
+ }
+
+ // Failed to match, so write the skipped characters to storage:
+ while(chars != ptr) *out++ = *chars++;
+
+ return false;
+ }
+
+ template <class InputIterator, class OutputIterator>
+ std::string read_bom(InputIterator& begin, InputIterator end,
+ OutputIterator out)
+ {
+ if(begin == end) return "";
+
+ const char utf8[] = {0xef, 0xbb, 0xbf};
+ const char utf32be[] = {0, 0, 0xfe, 0xff};
+ const char utf32le[] = {0xff, 0xfe, 0, 0};
+
+ unsigned char c = *begin;
+ switch(c)
+ {
+ case 0xEF: { // UTF-8
+ return check_bom(begin, end, out, utf8, 3) ? "UTF-8" : "";
+ }
+ case 0xFF: // UTF-16/UTF-32 little endian
+ return !check_bom(begin, end, out, utf32le, 2) ? "" :
+ check_bom(begin, end, out, utf32le + 2, 2) ? "UTF-32" : "UTF-16";
+ case 0: // UTF-32 big endian
+ return check_bom(begin, end, out, utf32be, 4) ? "UTF-32" : "";
+ case 0xFE: // UTF-16 big endian
+ return check_bom(begin, end, out, utf32be + 2, 2) ? "UTF-16" : "";
+ default:
+ return "";
+ }
+ }
+
     // Copy a string, converting mac and windows style newlines to unix
     // newlines.
 
     template <class InputIterator, class OutputIterator>
- void normalize_newlines(InputIterator begin, InputIterator end,
- OutputIterator out)
+ bool normalize(InputIterator begin, InputIterator end,
+ OutputIterator out, std::string const& filename)
     {
+ std::string encoding = read_bom(begin, end, out);
+
+ if(encoding != "UTF-8" && encoding != "") {
+ outerr(filename) << encoding << " is not supported. Please use UTF-8."
+ << std::endl;
+
+ return false;
+ }
+
         while(begin != end) {
             if(*begin == '\r') {
                 *out++ = '\n';
@@ -170,6 +231,8 @@
                 *out++ = *begin++;
             }
         }
+
+ return true;
     }
 
     int load(std::string const& filename, std::string& storage)
@@ -191,10 +254,14 @@
         // Turn off white space skipping on the stream
         in.unsetf(ios::skipws);
 
- normalize_newlines(
+ if(!normalize(
             istream_iterator<char>(in),
             istream_iterator<char>(),
- std::back_inserter(storage));
+ std::back_inserter(storage),
+ filename))
+ {
+ return 1;
+ }
 
         // ensure that we have enough trailing newlines to eliminate
         // the need to check for end of file in the grammar.


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk