Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r56000 - in trunk: boost/property_tree boost/property_tree/detail libs/property_tree/doc libs/property_tree/test
From: sebastian.redl_at_[hidden]
Date: 2009-09-03 13:37:37


Author: cornedbee
Date: 2009-09-03 13:37:36 EDT (Thu, 03 Sep 2009)
New Revision: 56000
URL: http://svn.boost.org/trac/boost/changeset/56000

Log:
Don't mangle whitespace too much.
Text files modified:
   trunk/boost/property_tree/detail/xml_parser_flags.hpp | 13 +++++--
   trunk/boost/property_tree/detail/xml_parser_read_rapidxml.hpp | 21 +++++++---
   trunk/boost/property_tree/detail/xml_parser_utils.hpp | 29 ++++++++++-----
   trunk/boost/property_tree/detail/xml_parser_write.hpp | 32 ++++++++++++-----
   trunk/boost/property_tree/detail/xml_parser_writer_settings.hpp | 10 +++--
   trunk/boost/property_tree/xml_parser.hpp | 4 +
   trunk/libs/property_tree/doc/xml_parser.qbk | 11 +++--
   trunk/libs/property_tree/test/test_xml_parser_common.hpp | 71 +++++++++++++++++++++++++++++----------
   trunk/libs/property_tree/test/xml_parser_test_data.hpp | 1
   9 files changed, 133 insertions(+), 59 deletions(-)

Modified: trunk/boost/property_tree/detail/xml_parser_flags.hpp
==============================================================================
--- trunk/boost/property_tree/detail/xml_parser_flags.hpp (original)
+++ trunk/boost/property_tree/detail/xml_parser_flags.hpp 2009-09-03 13:37:36 EDT (Thu, 03 Sep 2009)
@@ -12,13 +12,18 @@
 
 namespace boost { namespace property_tree { namespace xml_parser
 {
-
- static const int no_concat_text = 1; // Text elements should be put in separate keys, not concatenated in parent data
- static const int no_comments = 2; // Comments should be omitted
+
+ /// Text elements should be put in separate keys,
+ /// not concatenated in parent data.
+ static const int no_concat_text = 0x1;
+ /// Comments should be omitted.
+ static const int no_comments = 0x2;
+ /// Whitespace should be collapsed and trimmed.
+ static const int trim_whitespace = 0x4;
 
     inline bool validate_flags(int flags)
     {
- return (flags & ~(no_concat_text | no_comments)) == 0;
+ return (flags & ~(no_concat_text | no_comments | trim_whitespace)) == 0;
     }
 
 } } }

Modified: trunk/boost/property_tree/detail/xml_parser_read_rapidxml.hpp
==============================================================================
--- trunk/boost/property_tree/detail/xml_parser_read_rapidxml.hpp (original)
+++ trunk/boost/property_tree/detail/xml_parser_read_rapidxml.hpp 2009-09-03 13:37:36 EDT (Thu, 03 Sep 2009)
@@ -94,14 +94,21 @@
         try {
             // Parse using appropriate flags
             using namespace rapidxml;
- const int ncflags = parse_normalize_whitespace
- | parse_trim_whitespace;
- const int cflags = ncflags | parse_comment_nodes;
+ const int f_tws = parse_normalize_whitespace
+ | parse_trim_whitespace;
+ const int f_c = parse_comment_nodes;
             xml_document<Ch> doc;
- if (flags & no_comments)
- doc.BOOST_NESTED_TEMPLATE parse<ncflags>(&v.front());
- else
- doc.BOOST_NESTED_TEMPLATE parse<cflags>(&v.front());
+ if (flags & no_comments) {
+ if (flags & trim_whitespace)
+ doc.BOOST_NESTED_TEMPLATE parse<f_tws>(&v.front());
+ else
+ doc.BOOST_NESTED_TEMPLATE parse<0>(&v.front());
+ } else {
+ if (flags & trim_whitespace)
+ doc.BOOST_NESTED_TEMPLATE parse<f_tws | f_c>(&v.front());
+ else
+ doc.BOOST_NESTED_TEMPLATE parse<f_c>(&v.front());
+ }
 
             // Create ptree from nodes
             Ptree local;

Modified: trunk/boost/property_tree/detail/xml_parser_utils.hpp
==============================================================================
--- trunk/boost/property_tree/detail/xml_parser_utils.hpp (original)
+++ trunk/boost/property_tree/detail/xml_parser_utils.hpp 2009-09-03 13:37:36 EDT (Thu, 03 Sep 2009)
@@ -46,17 +46,26 @@
     {
         typedef typename std::basic_string<Ch> Str;
         Str r;
- typename Str::const_iterator end = s.end();
- for (typename Str::const_iterator it = s.begin(); it != end; ++it)
- {
- switch (*it)
+ // To properly round-trip spaces and not uglify the XML beyond
+ // recognition, we have to encode them IF the text contains only spaces.
+ Str sp(1, Ch(' '));
+ if(s.find_first_not_of(sp) == Str::npos) {
+ // The first will suffice.
+ r = detail::widen<Ch>("&#32;");
+ r += Str(s.size() - 1, Ch(' '));
+ } else {
+ typename Str::const_iterator end = s.end();
+ for (typename Str::const_iterator it = s.begin(); it != end; ++it)
             {
- case Ch('<'): r += detail::widen<Ch>("&lt;"); break;
- case Ch('>'): r += detail::widen<Ch>("&gt;"); break;
- case Ch('&'): r += detail::widen<Ch>("&amp;"); break;
- case Ch('"'): r += detail::widen<Ch>("&quot;"); break;
- case Ch('\''): r += detail::widen<Ch>("&apos;"); break;
- default: r += *it; break;
+ switch (*it)
+ {
+ case Ch('<'): r += detail::widen<Ch>("&lt;"); break;
+ case Ch('>'): r += detail::widen<Ch>("&gt;"); break;
+ case Ch('&'): r += detail::widen<Ch>("&amp;"); break;
+ case Ch('"'): r += detail::widen<Ch>("&quot;"); break;
+ case Ch('\''): r += detail::widen<Ch>("&apos;"); break;
+ default: r += *it; break;
+ }
             }
         }
         return r;

Modified: trunk/boost/property_tree/detail/xml_parser_write.hpp
==============================================================================
--- trunk/boost/property_tree/detail/xml_parser_write.hpp (original)
+++ trunk/boost/property_tree/detail/xml_parser_write.hpp 2009-09-03 13:37:36 EDT (Thu, 03 Sep 2009)
@@ -68,6 +68,7 @@
         typedef typename std::basic_string<Ch> Str;
         typedef typename Ptree::const_iterator It;
 
+ bool want_pretty = settings.indent_count > 0;
         // Find if elements present
         bool has_elements = false;
         bool has_attrs_only = pt.data().empty();
@@ -114,34 +115,43 @@
                 if ( has_attrs_only )
                 {
                     // Write closing brace
- stream << Ch('/') << Ch('>') << std::endl;
+ stream << Ch('/') << Ch('>');
+ if (want_pretty)
+ stream << Ch('\n');
                 }
                 else
                 {
                     // Write closing brace
                     stream << Ch('>');
 
- // Break line if needed
- if (has_elements)
+ // Break line if needed and if we want pretty-printing
+ if (has_elements && want_pretty)
                         stream << Ch('\n');
                 }
             }
-
+
             // Write data text, if present
             if (!pt.data().empty())
- write_xml_text(stream, pt.template get_value<std::basic_string<Ch> >(), indent + 1, has_elements, settings);
-
+ write_xml_text(stream,
+ pt.template get_value<std::basic_string<Ch> >(),
+ indent + 1, has_elements && want_pretty, settings);
+
             // Write elements, comments and texts
             for (It it = pt.begin(); it != pt.end(); ++it)
             {
                 if (it->first == xmlattr<Ch>())
                     continue;
                 else if (it->first == xmlcomment<Ch>())
- write_xml_comment(stream, it->second.template get_value<std::basic_string<Ch> >(), indent + 1, settings);
+ write_xml_comment(stream,
+ it->second.template get_value<std::basic_string<Ch> >(),
+ indent + 1, settings);
                 else if (it->first == xmltext<Ch>())
- write_xml_text(stream, it->second.template get_value<std::basic_string<Ch> >(), indent + 1, has_elements, settings);
+ write_xml_text(stream,
+ it->second.template get_value<std::basic_string<Ch> >(),
+ indent + 1, has_elements && want_pretty, settings);
                 else
- write_xml_element(stream, it->first, it->second, indent + 1, settings);
+ write_xml_element(stream, it->first, it->second,
+ indent + 1, settings);
             }
             
             // Write closing tag
@@ -149,7 +159,9 @@
             {
                 if (has_elements)
                     write_xml_indent(stream,indent,settings);
- stream << Ch('<') << Ch('/') << key << Ch('>') << std::endl;
+ stream << Ch('<') << Ch('/') << key << Ch('>');
+ if (want_pretty)
+ stream << Ch('\n');
             }
 
         }

Modified: trunk/boost/property_tree/detail/xml_parser_writer_settings.hpp
==============================================================================
--- trunk/boost/property_tree/detail/xml_parser_writer_settings.hpp (original)
+++ trunk/boost/property_tree/detail/xml_parser_writer_settings.hpp 2009-09-03 13:37:36 EDT (Thu, 03 Sep 2009)
@@ -30,14 +30,14 @@
         return result;
     }
 
- //! Xml writer settings
+ //! Xml writer settings. The default settings lead to no pretty printing.
     template<class Ch>
     class xml_writer_settings
     {
     public:
         xml_writer_settings(Ch indent_char = Ch(' '),
- typename std::basic_string<Ch>::size_type indent_count = 4,
- const std::basic_string<Ch> &encoding = widen<Ch>("utf-8"))
+ typename std::basic_string<Ch>::size_type indent_count = 0,
+ const std::basic_string<Ch> &encoding = widen<Ch>("utf-8"))
             : indent_char(indent_char)
             , indent_count(indent_count)
             , encoding(encoding)
@@ -50,7 +50,9 @@
     };
 
     template <class Ch>
- xml_writer_settings<Ch> xml_writer_make_settings(Ch indent_char, typename std::basic_string<Ch>::size_type indent_count, const Ch *encoding)
+ xml_writer_settings<Ch> xml_writer_make_settings(Ch indent_char = Ch(' '),
+ typename std::basic_string<Ch>::size_type indent_count = 0,
+ const std::basic_string<Ch> &encoding = widen<Ch>("utf-8"))
     {
         return xml_writer_settings<Ch>(indent_char, indent_count, encoding);
     }

Modified: trunk/boost/property_tree/xml_parser.hpp
==============================================================================
--- trunk/boost/property_tree/xml_parser.hpp (original)
+++ trunk/boost/property_tree/xml_parser.hpp 2009-09-03 13:37:36 EDT (Thu, 03 Sep 2009)
@@ -33,12 +33,14 @@
      * @throw xml_parser_error In case of error deserializing the property tree.
      * @param stream Stream from which to read in the property tree.
      * @param[out] pt The property tree to populate.
- * @param flags Flags controlling the bahviour of the parser.
+ * @param flags Flags controlling the behaviour of the parser.
      * The following flags are supported:
      * @li @c no_concat_text -- Prevents concatenation of text nodes into
      * datastring of property tree. Puts them in
      * separate @c \<xmltext\> strings instead.
      * @li @c no_comments -- Skip XML comments.
+ * @li @c trim_whitespace -- Trim leading and trailing whitespace from text,
+ * and collapse sequences of whitespace.
      */
     template<class Ptree>
     void read_xml(std::basic_istream<

Modified: trunk/libs/property_tree/doc/xml_parser.qbk
==============================================================================
--- trunk/libs/property_tree/doc/xml_parser.qbk (original)
+++ trunk/libs/property_tree/doc/xml_parser.qbk 2009-09-03 13:37:36 EDT (Thu, 03 Sep 2009)
@@ -8,10 +8,13 @@
 time of this writing. The library therefore contains the fast and tiny
 __RapidXML__ parser (currently in version 1.13) to provide XML parsing support.
 RapidXML does not fully support the XML standard; it is not capable of parsing
-DTDs and therefore cannot do full entity substitution. The parser is configured
-to trim whitespace from the edges of character data. This behaviour exists for
-compatibility with older versions of PropertyTree and will be made configurable
-in the future.
+DTDs and therefore cannot do full entity substitution.
+
+By default, the parser will preserve most whitespace, but remove element content
+that consists only of whitespace. Encoded whitespaces (e.g. &#32;) does not
+count as whitespace in this regard. You can pass the trim_whitespace flag if you
+want all leading and trailing whitespace trimmed and all continuous whitespace
+collapsed into a single space.
 
 Please note that RapidXML does not understand the encoding specification. If
 you pass it a character buffer, it assumes the data is already correctly

Modified: trunk/libs/property_tree/test/test_xml_parser_common.hpp
==============================================================================
--- trunk/libs/property_tree/test/test_xml_parser_common.hpp (original)
+++ trunk/libs/property_tree/test/test_xml_parser_common.hpp 2009-09-03 13:37:36 EDT (Thu, 03 Sep 2009)
@@ -14,7 +14,7 @@
 #include <boost/property_tree/xml_parser.hpp>
 #include "xml_parser_test_data.hpp"
 
-struct ReadFunc
+struct ReadFuncWS
 {
     template<class Ptree>
     void operator()(const std::string &filename, Ptree &pt) const
@@ -23,7 +23,7 @@
     }
 };
 
-struct WriteFunc
+struct WriteFuncWS
 {
     template<class Ptree>
     void operator()(const std::string &filename, const Ptree &pt) const
@@ -32,6 +32,27 @@
     }
 };
 
+struct ReadFuncNS
+{
+ template<class Ptree>
+ void operator()(const std::string &filename, Ptree &pt) const
+ {
+ boost::property_tree::read_xml(filename, pt,
+ boost::property_tree::xml_parser::trim_whitespace);
+ }
+};
+
+struct WriteFuncNS
+{
+ template<class Ptree>
+ void operator()(const std::string &filename, const Ptree &pt) const
+ {
+ boost::property_tree::write_xml(filename, pt, std::locale(),
+ boost::property_tree::xml_writer_make_settings(
+ typename Ptree::key_type::value_type(' '), 4));
+ }
+};
+
 template <typename Ch> int umlautsize();
 template <> inline int umlautsize<char>() { return 2; }
 template <> inline int umlautsize<wchar_t>() { return 1; }
@@ -42,46 +63,58 @@
 
     using namespace boost::property_tree;
 
- generic_parser_test_ok<Ptree, ReadFunc, WriteFunc>
+ generic_parser_test_ok<Ptree, ReadFuncWS, WriteFuncWS>
     (
- ReadFunc(), WriteFunc(), ok_data_1, NULL,
+ ReadFuncWS(), WriteFuncWS(), ok_data_1, NULL,
         "testok1.xml", NULL, "testok1out.xml", 2, 0, 5
     );
-
- generic_parser_test_ok<Ptree, ReadFunc, WriteFunc>
+
+ generic_parser_test_ok<Ptree, ReadFuncWS, WriteFuncWS>
+ (
+ ReadFuncWS(), WriteFuncWS(), ok_data_2, NULL,
+ "testok2a.xml", NULL, "testok2aout.xml", 6, 18, 8
+ );
+
+ generic_parser_test_ok<Ptree, ReadFuncNS, WriteFuncNS>
+ (
+ ReadFuncNS(), WriteFuncNS(), ok_data_2, NULL,
+ "testok2b.xml", NULL, "testok2bout.xml", 6, 15, 8
+ );
+
+ generic_parser_test_ok<Ptree, ReadFuncWS, WriteFuncWS>
     (
- ReadFunc(), WriteFunc(), ok_data_2, NULL,
- "testok2.xml", NULL, "testok2out.xml", 5, 15, 7
+ ReadFuncWS(), WriteFuncWS(), ok_data_3, NULL,
+ "testok3a.xml", NULL, "testok3aout.xml", 787, 32523, 3831
     );
 
- generic_parser_test_ok<Ptree, ReadFunc, WriteFunc>
+ generic_parser_test_ok<Ptree, ReadFuncNS, WriteFuncNS>
     (
- ReadFunc(), WriteFunc(), ok_data_3, NULL,
- "testok3.xml", NULL, "testok3out.xml", 787, 31376, 3831
+ ReadFuncNS(), WriteFuncNS(), ok_data_3, NULL,
+ "testok3b.xml", NULL, "testok3bout.xml", 787, 31376, 3831
     );
 
- generic_parser_test_ok<Ptree, ReadFunc, WriteFunc>
+ generic_parser_test_ok<Ptree, ReadFuncWS, WriteFuncWS>
     (
- ReadFunc(), WriteFunc(), ok_data_4, NULL,
+ ReadFuncWS(), WriteFuncWS(), ok_data_4, NULL,
         "testok4.xml", NULL, "testok4out.xml", 5, 2, 20
     );
 
- generic_parser_test_ok<Ptree, ReadFunc, WriteFunc>
+ generic_parser_test_ok<Ptree, ReadFuncWS, WriteFuncWS>
     (
- ReadFunc(), WriteFunc(), ok_data_5, NULL,
+ ReadFuncWS(), WriteFuncWS(), ok_data_5, NULL,
         "testok5.xml", NULL, "testok5out.xml",
         2, umlautsize<typename Ptree::data_type::value_type>(), 3
     );
 
- generic_parser_test_error<Ptree, ReadFunc, WriteFunc, xml_parser_error>
+ generic_parser_test_error<Ptree, ReadFuncWS, WriteFuncWS, xml_parser_error>
     (
- ReadFunc(), WriteFunc(), error_data_1, NULL,
+ ReadFuncWS(), WriteFuncWS(), error_data_1, NULL,
         "testerr1.xml", NULL, "testerr1out.xml", 1
     );
 
- generic_parser_test_error<Ptree, ReadFunc, WriteFunc, xml_parser_error>
+ generic_parser_test_error<Ptree, ReadFuncWS, WriteFuncWS, xml_parser_error>
     (
- ReadFunc(), WriteFunc(), error_data_2, NULL,
+ ReadFuncWS(), WriteFuncWS(), error_data_2, NULL,
         "testerr2.xml", NULL, "testerr2out.xml", 2
     );
 

Modified: trunk/libs/property_tree/test/xml_parser_test_data.hpp
==============================================================================
--- trunk/libs/property_tree/test/xml_parser_test_data.hpp (original)
+++ trunk/libs/property_tree/test/xml_parser_test_data.hpp 2009-09-03 13:37:36 EDT (Thu, 03 Sep 2009)
@@ -16,6 +16,7 @@
     "<a>&lt;&gt;&amp;</a>\n"
     "<b>1&lt;2&gt;3&amp;4</b>\n"
     "<c> &lt; &gt; &amp; </c>\n"
+ "<d>&#32;</d>\n"
     "</root>\n";
 
 // Correct


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk