Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r58035 - trunk/libs/graph/src
From: jewillco_at_[hidden]
Date: 2009-11-29 15:35:25


Author: jewillco
Date: 2009-11-29 15:35:25 EST (Sun, 29 Nov 2009)
New Revision: 58035
URL: http://svn.boost.org/trac/boost/changeset/58035

Log:
Rewrote GraphML parser to use Boost.PropertyTree; fixes #3300
Text files modified:
   trunk/libs/graph/src/graphml.cpp | 357 ++++++++++-----------------------------
   1 files changed, 97 insertions(+), 260 deletions(-)

Modified: trunk/libs/graph/src/graphml.cpp
==============================================================================
--- trunk/libs/graph/src/graphml.cpp (original)
+++ trunk/libs/graph/src/graphml.cpp 2009-11-29 15:35:25 EST (Sun, 29 Nov 2009)
@@ -1,18 +1,20 @@
 // Copyright (C) 2006 Tiago de Paula Peixoto <tiago_at_[hidden]>
-// Copyright (C) 2004 The Trustees of Indiana University.
+// Copyright (C) 2004,2009 The Trustees of Indiana University.
 //
 // Use, modification and distribution is subject to the Boost Software
 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
 // http://www.boost.org/LICENSE_1_0.txt)
 
 // Authors: Douglas Gregor
+// Jeremiah Willcock
 // Andrew Lumsdaine
 // Tiago de Paula Peixoto
 
-#include <boost/variant.hpp>
-#include <expat.h>
+#include <boost/foreach.hpp>
+#include <boost/optional.hpp>
 #include <boost/graph/graphml.hpp>
-#include <boost/algorithm/string/replace.hpp>
+#include <boost/property_tree/ptree.hpp>
+#include <boost/property_tree/xml_parser.hpp>
 
 using namespace boost;
 
@@ -20,34 +22,91 @@
 {
 public:
     graphml_reader(mutate_graph& g)
- : m_g(g), m_canonical_vertices(false) { }
+ : m_g(g) { }
+
+ static boost::property_tree::ptree::path_type path(const std::string& str) {
+ return boost::property_tree::ptree::path_type(str, '/');
+ }
+
+ static void get_graphs(const boost::property_tree::ptree& top,
+ std::vector<const boost::property_tree::ptree*>& result) {
+ using boost::property_tree::ptree;
+ BOOST_FOREACH(const ptree::value_type& n, top) {
+ if (n.first == "graph") {
+ result.push_back(&n.second);
+ get_graphs(n.second, result);
+ }
+ }
+ }
     
     void run(std::istream& in)
     {
- const int buffer_size = 4096;
- m_parser = XML_ParserCreateNS(0,'|');
- XML_SetElementHandler(m_parser, &on_start_element, &on_end_element);
- XML_SetCharacterDataHandler(m_parser, &on_character_data);
- XML_SetUserData(m_parser, this);
- char buffer[buffer_size];
-
- bool okay = true;
- do
- {
- in.read(buffer, buffer_size);
- okay = XML_Parse(m_parser, buffer, in.gcount(), in.gcount() == 0);
- }
- while (okay && in.good());
-
- if (!okay)
- {
- std::stringstream s;
- s << "on line " << XML_GetCurrentLineNumber(m_parser)
- <<", column " << XML_GetCurrentColumnNumber(m_parser)
- << ": " << XML_ErrorString(XML_GetErrorCode(m_parser));
- throw parse_error(s.str());
+ using boost::property_tree::ptree;
+ ptree pt;
+ read_xml(in, pt, boost::property_tree::xml_parser::no_comments | boost::property_tree::xml_parser::trim_whitespace);
+ ptree gml = pt.get_child(path("graphml"));
+ // Search for attributes
+ BOOST_FOREACH(const ptree::value_type& child, gml) {
+ if (child.first != "key") continue;
+ std::string id = child.second.get(path("<xmlattr>/id"), "");
+ std::string for_ = child.second.get(path("<xmlattr>/for"), "");
+ std::string name = child.second.get(path("<xmlattr>/attr.name"), "");
+ std::string type = child.second.get(path("<xmlattr>/attr.type"), "");
+ key_kind kind = all_key;
+ if (for_ == "graph") kind = graph_key;
+ else if (for_ == "node") kind = node_key;
+ else if (for_ == "edge") kind = edge_key;
+ else if (for_ == "hyperedge") kind = hyperedge_key;
+ else if (for_ == "port") kind = port_key;
+ else if (for_ == "endpoint") kind = endpoint_key;
+ else if (for_ == "all") kind = all_key;
+ else throw parse_error("Attribute for is not valid: " + for_);
+ m_keys[id] = kind;
+ m_key_name[id] = name;
+ m_key_type[id] = type;
+ boost::optional<std::string> default_ = child.second.get_optional<std::string>(path("default"));
+ if (default_) m_key_default[id] = default_.get();
+ }
+ // Search for graphs
+ std::vector<const ptree*> graphs;
+ get_graphs(gml, graphs);
+ BOOST_FOREACH(const ptree* gr, graphs) {
+ // Search for nodes
+ BOOST_FOREACH(const ptree::value_type& node, *gr) {
+ if (node.first != "node") continue;
+ std::string id = node.second.get<std::string>(path("<xmlattr>/id"));
+ handle_vertex(id);
+ BOOST_FOREACH(const ptree::value_type& attr, node.second) {
+ if (attr.first != "data") continue;
+ std::string key = attr.second.get<std::string>(path("<xmlattr>/key"));
+ std::string value = attr.second.get_value("");
+ handle_node_property(key, id, value);
+ }
+ }
+ }
+ BOOST_FOREACH(const ptree* gr, graphs) {
+ bool default_directed = gr->get<std::string>(path("<xmlattr>/edgedefault")) == "directed";
+ // Search for edges
+ BOOST_FOREACH(const ptree::value_type& edge, *gr) {
+ if (edge.first != "edge") continue;
+ std::string id = edge.second.get<std::string>(path("<xmlattr>/id"));
+ std::string source = edge.second.get<std::string>(path("<xmlattr>/source"));
+ std::string target = edge.second.get<std::string>(path("<xmlattr>/target"));
+ std::string local_directed = edge.second.get(path("<xmlattr>/directed"), "");
+ bool is_directed = (local_directed == "" ? default_directed : local_directed == "true");
+ if (is_directed != m_g.is_directed()) {
+ if (is_directed) throw directed_graph_error(); else throw undirected_graph_error();
+ }
+ size_t old_edges_size = m_edge.size();
+ handle_edge(source, target);
+ BOOST_FOREACH(const ptree::value_type& attr, edge.second) {
+ if (attr.first != "data") continue;
+ std::string key = attr.second.get<std::string>(path("<xmlattr>/key"));
+ std::string value = attr.second.get_value("");
+ handle_edge_property(key, old_edges_size, value);
+ }
         }
- XML_ParserFree(m_parser);
+ }
     }
 
 private:
@@ -62,199 +121,15 @@
         all_key
     };
 
- static void
- on_start_element(void* user_data, const XML_Char *c_name,
- const XML_Char **atts)
- {
- graphml_reader* self = static_cast<graphml_reader*>(user_data);
-
- std::string name(c_name);
- replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
-
- if (name == "edge")
- {
- std::string id;
- std::string source, target;
- while (*atts)
- {
- std::string name = *atts++;
- std::string value = *atts++;
-
- if (name == "id") id = value;
- else if (name == "source") source = value;
- else if (name == "target") target = value;
- else if (name == "directed")
- {
- bool edge_is_directed = (value == "directed");
- if (edge_is_directed != self->m_g.is_directed())
- {
- if (edge_is_directed)
- throw directed_graph_error();
- else
- throw undirected_graph_error();
- }
- }
- }
-
- self->m_active_descriptor = self->m_edge.size();
- self->handle_edge(source, target);
- }
- else if (name == "node")
- {
- std::string id;
-
- while (*atts)
- {
- std::string name = *atts++;
- std::string value = *atts++;
-
- if (name == "id") id = value;
- }
-
- self->handle_vertex(id);
- self->m_active_descriptor = id;
- }
- else if (name == "data")
- {
- while (*atts)
- {
- std::string name = *atts++;
- std::string value = *atts++;
-
- if (name == "key") self->m_active_key = value;
- }
- }
- else if (name == "key")
- {
- std::string id;
- std::string key_name;
- std::string key_type;
- key_kind kind = all_key;
-
- while (*atts)
- {
- std::string name = *atts++;
- std::string value = *atts++;
-
- if (name == "id") id = value;
- else if (name == "attr.name") key_name = value;
- else if (name == "attr.type") key_type = value;
- else if (name == "for")
- {
- if (value == "graph") kind = graph_key;
- else if (value == "node") kind = node_key;
- else if (value == "edge") kind = edge_key;
- else if (value == "hyperedge") kind = hyperedge_key;
- else if (value == "port") kind = port_key;
- else if (value == "endpoint") kind = endpoint_key;
- else if (value == "all") kind = all_key;
- else
- {
- std::stringstream s;
- s << "on line " << XML_GetCurrentLineNumber(self->m_parser)
- << ", column " << XML_GetCurrentColumnNumber(self->m_parser)
- << ": unrecognized key kind '" << value << "'";
- throw parse_error(s.str());
- }
- }
- }
-
- self->m_keys[id] = kind;
- self->m_key_name[id] = key_name;
- self->m_key_type[id] = key_type;
- self->m_active_key = id;
- }
- else if (name == "graph")
- {
- while (*atts)
- {
- std::string name = *atts++;
- std::string value = *atts++;
-
- if (name == "edgedefault")
- {
- bool edge_is_directed = (value == "directed");
- if (edge_is_directed != self->m_g.is_directed())
- {
- if (edge_is_directed)
- throw directed_graph_error();
- else
- throw undirected_graph_error();
- }
- }
- else if (name == "parse.nodeids")
- {
- self->m_canonical_vertices = (value == "canonical");
- }
- }
- self->m_active_descriptor = "";
- }
-
- self->m_character_data.clear();
- }
-
- static void
- on_end_element(void* user_data, const XML_Char *c_name)
- {
- graphml_reader* self = static_cast<graphml_reader*>(user_data);
-
- std::string name(c_name);
- replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
-
- if (name == "data")
- {
- self->handle_property(self->m_active_key, self->m_active_descriptor,
- self->m_character_data);
- }
- else if (name == "default")
- {
- self->m_key_default[self->m_active_key] = self->m_character_data;
- }
- }
-
- static void
- on_character_data(void* user_data, const XML_Char* s, int len)
- {
- graphml_reader* self = static_cast<graphml_reader*>(user_data);
- self->m_character_data.append(s, len);
- }
-
     void
     handle_vertex(const std::string& v)
     {
         bool is_new = false;
 
- if (m_canonical_vertices)
- {
- size_t id;
-
- //strip leading "n" from name
- try
- {
- id = lexical_cast<size_t>(std::string(v,1));
- }
- catch (bad_lexical_cast)
- {
- std::stringstream s;
- s << "on line " << XML_GetCurrentLineNumber(m_parser)
- << ", column " << XML_GetCurrentColumnNumber(m_parser)
- << ": invalid vertex: " << v;
- throw parse_error(s.str());
- }
-
- while(id >= m_canonical_vertex.size())
- {
- m_canonical_vertex.push_back(m_g.do_add_vertex());
- is_new = true;
- }
- }
- else
+ if (m_vertex.find(v) == m_vertex.end())
         {
- if (m_vertex.find(v) == m_vertex.end())
- {
- m_vertex[v] = m_g.do_add_vertex();
- is_new = true;
- }
+ m_vertex[v] = m_g.do_add_vertex();
+ is_new = true;
         }
 
         if (is_new)
@@ -263,7 +138,7 @@
             for (iter = m_key_default.begin(); iter != m_key_default.end(); ++iter)
             {
                 if (m_keys[iter->first] == node_key)
- handle_property(iter->first, v, iter->second);
+ handle_node_property(iter->first, v, iter->second);
             }
         }
     }
@@ -271,16 +146,7 @@
     any
     get_vertex_descriptor(const std::string& v)
     {
- if (m_canonical_vertices)
- {
- //strip leading "n" from name
- size_t id = lexical_cast<size_t>(std::string(v,1));
- return m_canonical_vertex[id];
- }
- else
- {
- return m_vertex[v];
- }
+ return m_vertex[v];
     }
 
     void
@@ -306,40 +172,18 @@
         for (iter = m_key_default.begin(); iter != m_key_default.end(); ++iter)
         {
             if (m_keys[iter->first] == edge_key)
- handle_property(iter->first, e, iter->second);
+ handle_edge_property(iter->first, e, iter->second);
         }
     }
 
- void handle_property(const std::string& key_id, const variant<std::string,size_t>& descriptor, const std::string& value)
+ void handle_node_property(const std::string& key_id, const std::string& descriptor, const std::string& value)
     {
- try
- {
- if (get<std::string>(&descriptor))
- {
- if (get<std::string>(descriptor) == "")
- m_g.set_graph_property(m_key_name[key_id], value, m_key_type[key_id]);
- else
- m_g.set_vertex_property(m_key_name[key_id], get_vertex_descriptor(get<std::string>(descriptor)), value, m_key_type[key_id]);
- }
- else
- {
- m_g.set_edge_property(m_key_name[key_id], get_edge_descriptor(get<size_t>(descriptor)), value, m_key_type[key_id]);
- }
- }
- catch (parse_error &e)
- {
- std::stringstream s;
- s << "on line " << XML_GetCurrentLineNumber(m_parser)
- << ", column " << XML_GetCurrentColumnNumber(m_parser)
- << ": " << e.error;
- throw parse_error(s.str());
- }
+ m_g.set_vertex_property(m_key_name[key_id], m_vertex[descriptor], value, m_key_type[key_id]);
     }
 
- any
- get_edge_descriptor(size_t e)
+ void handle_edge_property(const std::string& key_id, size_t descriptor, const std::string& value)
     {
- return m_edge[e];
+ m_g.set_edge_property(m_key_name[key_id], m_edge[descriptor], value, m_key_type[key_id]);
     }
 
     mutate_graph& m_g;
@@ -348,14 +192,7 @@
     std::map<std::string, std::string> m_key_type;
     std::map<std::string, std::string> m_key_default;
     std::map<std::string, any> m_vertex;
- std::vector<any> m_canonical_vertex;
     std::vector<any> m_edge;
- variant<std::string, size_t> m_active_descriptor;
- std::string m_active_key;
- std::string m_character_data;
- bool m_canonical_vertices;
- bool m_canonical_edges;
- XML_Parser m_parser;
 };
 
 namespace boost


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk