Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r53237 - in trunk: boost/graph boost/graph/detail libs/graph/build libs/graph/doc libs/graph/src
From: jewillco_at_[hidden]
Date: 2009-05-25 01:38:58


Author: jewillco
Date: 2009-05-25 01:38:56 EDT (Mon, 25 May 2009)
New Revision: 53237
URL: http://svn.boost.org/trac/boost/changeset/53237

Log:
Changed new GraphViz parser to be less generic (so it can be built as a binary); fixed subgraph issues by doing a lot of tests on GraphViz itself and restructuring a lot of the parser; made docs only point to new parser and made old one not build by default (although it is not removed)
Added:
   trunk/libs/graph/src/read_graphviz_new.cpp (contents, props changed)
Text files modified:
   trunk/boost/graph/detail/read_graphviz_new.hpp | 679 ++-------------------------------------
   trunk/boost/graph/graphviz.hpp | 21
   trunk/libs/graph/build/Jamfile.v2 | 3
   trunk/libs/graph/doc/read_graphviz.html | 59 +-
   trunk/libs/graph/doc/read_graphviz.rst | 50 +-
   5 files changed, 122 insertions(+), 690 deletions(-)

Modified: trunk/boost/graph/detail/read_graphviz_new.hpp
==============================================================================
--- trunk/boost/graph/detail/read_graphviz_new.hpp (original)
+++ trunk/boost/graph/detail/read_graphviz_new.hpp 2009-05-25 01:38:56 EDT (Mon, 25 May 2009)
@@ -29,13 +29,10 @@
 #define BOOST_READ_GRAPHVIZ_NEW_HPP
 
 #include <boost/ref.hpp>
-#include <boost/function/function2.hpp>
 #include <boost/property_map/dynamic_property_map.hpp>
 #include <boost/graph/graph_traits.hpp>
 #include <boost/detail/workaround.hpp>
-#include <boost/algorithm/string/case_conv.hpp>
 #include <algorithm>
-#include <exception> // for std::exception
 #include <string>
 #include <vector>
 #include <set>
@@ -43,661 +40,75 @@
 #include <map>
 #include <iostream>
 #include <cstdlib>
-#include <boost/graph/graphviz.hpp>
-#include <boost/throw_exception.hpp>
-#include <boost/regex.hpp>
-#include <boost/function.hpp>
-#include <boost/bind.hpp>
 
 namespace boost {
 
 namespace read_graphviz_detail {
- struct token {
- enum token_type {
- kw_strict,
- kw_graph,
- kw_digraph,
- kw_node,
- kw_edge,
- kw_subgraph,
- left_brace,
- right_brace,
- semicolon,
- equal,
- left_bracket,
- right_bracket,
- comma,
- colon,
- dash_greater,
- dash_dash,
- plus,
- left_paren,
- right_paren,
- at,
- identifier,
- quoted_string, // Only used internally in tokenizer
- eof,
- invalid
- } type;
- std::string normalized_value; // May have double-quotes removed and/or some escapes replaced
- token(token_type type, const std::string& normalized_value)
- : type(type), normalized_value(normalized_value) {}
- token(): type(invalid), normalized_value("") {}
- friend std::ostream& operator<<(std::ostream& o, const token& t) {
- switch (t.type) {
- case token::kw_strict: o << "<strict>"; break;
- case token::kw_graph: o << "<graph>"; break;
- case token::kw_digraph: o << "<digraph>"; break;
- case token::kw_node: o << "<node>"; break;
- case token::kw_edge: o << "<edge>"; break;
- case token::kw_subgraph: o << "<subgraph>"; break;
- case token::left_brace: o << "<left_brace>"; break;
- case token::right_brace: o << "<right_brace>"; break;
- case token::semicolon: o << "<semicolon>"; break;
- case token::equal: o << "<equal>"; break;
- case token::left_bracket: o << "<left_bracket>"; break;
- case token::right_bracket: o << "<right_bracket>"; break;
- case token::comma: o << "<comma>"; break;
- case token::colon: o << "<colon>"; break;
- case token::dash_greater: o << "<dash-greater>"; break;
- case token::dash_dash: o << "<dash-dash>"; break;
- case token::plus: o << "<plus>"; break;
- case token::left_paren: o << "<left_paren>"; break;
- case token::right_paren: o << "<right_paren>"; break;
- case token::at: o << "<at>"; break;
- case token::identifier: o << "<identifier>"; break;
- case token::quoted_string: o << "<quoted_string>"; break;
- case token::eof: o << "<eof>"; break;
- default: o << "<invalid type>"; break;
- }
- o << " '" << t.normalized_value << "'";
- return o;
- }
- };
-
- struct lex_error: public std::exception {
- std::string errmsg;
- char bad_char;
- lex_error(const std::string& errmsg, char bad_char): errmsg(errmsg), bad_char(bad_char) {
- this->errmsg += std::string(" (char is '") + bad_char + "')";
- }
- const char* what() const throw () {return errmsg.c_str();}
- ~lex_error() throw () {};
- };
-
- struct parse_error: public std::exception {
- std::string errmsg;
- token bad_token;
- parse_error(const std::string& errmsg, const token& bad_token): errmsg(errmsg), bad_token(bad_token) {
- this->errmsg += std::string(" (token is \"") + boost::lexical_cast<std::string>(bad_token) + "\")";
- }
- const char* what() const throw () {return errmsg.c_str();}
- ~parse_error() throw () {};
- };
-
- template <typename BidirectionalIterator>
- struct tokenizer {
- BidirectionalIterator begin, end;
- std::vector<token> lookahead;
- // Precomputed regexes
- boost::regex stuff_to_skip;
- boost::regex basic_id_token;
- boost::regex punctuation_token;
- boost::regex number_token;
- boost::regex quoted_string_token;
-
- tokenizer(BidirectionalIterator begin, BidirectionalIterator end)
- : begin(begin), end(end)
- {
- std::string end_of_token = "(?=(?:\\W))";
- std::string whitespace = "(?:\\s+)";
- std::string slash_slash_comment = "(?://.*$)";
- std::string slash_star_comment = "(?:/\\*.*?\\*/)";
- std::string hash_comment = "(?:^#.*?$)";
- stuff_to_skip = "\\A(?:" + whitespace + "|" + slash_slash_comment + "|" + slash_star_comment + "|" + hash_comment + ")*";
- basic_id_token = "\\A([[:alpha:]_](?:\\w*))";
- punctuation_token = "\\A([][{};=,:+()@]|[-][>-])";
- number_token = "\\A([-]?(?:(?:\\.\\d+)|(?:\\d+(?:\\.\\d*)?)))";
- quoted_string_token = "\\A(\"(?:[^\"\\\\]|(?:[\\\\].))*\")";
- }
-
- void skip() {
- boost::match_results<BidirectionalIterator> results;
- bool found = boost::regex_search(begin, end, results, stuff_to_skip);
- assert (found);
- boost::sub_match<BidirectionalIterator> sm1 = results.suffix();
- assert (sm1.second == end);
- begin = sm1.first;
- }
-
- token get_token_raw() {
- if (!lookahead.empty()) {
- token t = lookahead.front();
- lookahead.erase(lookahead.begin());
- return t;
- }
- skip();
- if (begin == end) return token(token::eof, "");
- // Look for keywords first
- bool found;
- boost::match_results<BidirectionalIterator> results;
- found = boost::regex_search(begin, end, results, basic_id_token);
- if (found) {
- std::string str = results[1].str();
- std::string str_lower = boost::algorithm::to_lower_copy(str);
- begin = results.suffix().first;
- if (str_lower == "strict") {
- return token(token::kw_strict, str);
- } else if (str_lower == "graph") {
- return token(token::kw_graph, str);
- } else if (str_lower == "digraph") {
- return token(token::kw_digraph, str);
- } else if (str_lower == "node") {
- return token(token::kw_node, str);
- } else if (str_lower == "edge") {
- return token(token::kw_edge, str);
- } else if (str_lower == "subgraph") {
- return token(token::kw_subgraph, str);
- } else {
- return token(token::identifier, str);
- }
- }
- found = boost::regex_search(begin, end, results, punctuation_token);
- if (found) {
- std::string str = results[1].str();
- begin = results.suffix().first;
- switch (str[0]) {
- case '[': return token(token::left_bracket, str);
- case ']': return token(token::right_bracket, str);
- case '{': return token(token::left_brace, str);
- case '}': return token(token::right_brace, str);
- case ';': return token(token::semicolon, str);
- case '=': return token(token::equal, str);
- case ',': return token(token::comma, str);
- case ':': return token(token::colon, str);
- case '+': return token(token::plus, str);
- case '(': return token(token::left_paren, str);
- case ')': return token(token::right_paren, str);
- case '@': return token(token::at, str);
- case '-': {
- switch (str[1]) {
- case '-': return token(token::dash_dash, str);
- case '>': return token(token::dash_greater, str);
- default: assert (!"Definition of punctuation_token does not match switch statement");
- }
- }
- default: assert (!"Definition of punctuation_token does not match switch statement"); std::abort();
- }
- }
- found = boost::regex_search(begin, end, results, number_token);
- if (found) {
- std::string str = results[1].str();
- begin = results.suffix().first;
- return token(token::identifier, str);
- }
- found = boost::regex_search(begin, end, results, quoted_string_token);
- if (found) {
- std::string str = results[1].str();
- begin = results.suffix().first;
- // Remove the beginning and ending quotes
- assert (str.size() >= 2);
- str.erase(str.begin());
- str.erase(str.end() - 1);
- // Unescape quotes in the middle, but nothing else (see format spec)
- for (size_t i = 0; i + 1 < str.size() /* May change */; ++i) {
- if (str[i] == '\\' && str[i + 1] == '"') {
- str.erase(str.begin() + i);
- // Don't need to adjust i
- } else if (str[i] == '\\' && str[i + 1] == '\n') {
- str.erase(str.begin() + i);
- str.erase(str.begin() + i);
- --i; // Invert ++ that will be applied
- }
- }
- return token(token::quoted_string, str);
- }
- if (*begin == '<') {
- throw_lex_error("HTML strings not supported");
- return token();
- } else {
- throw_lex_error("Invalid character");
- return token();
- }
- }
-
- token peek_token_raw() {
- if (lookahead.empty()) {
- token t = get_token_raw();
- lookahead.push_back(t);
- }
- return lookahead.front();
- }
-
- token get_token() { // Handle string concatenation
- token t = get_token_raw();
- if (t.type != token::quoted_string) return t;
- std::string str = t.normalized_value;
- while (peek_token_raw().type == token::plus) {
- get_token_raw();
- token t2 = get_token_raw();
- if (t2.type != token::quoted_string) {
- throw_lex_error("Must have quoted string after string concatenation");
- }
- str += t2.normalized_value;
- }
- return token(token::identifier, str); // Note that quoted_string does not get passed to the parser
- }
-
- void throw_lex_error(const std::string& errmsg) {
- boost::throw_exception(lex_error(errmsg, *begin));
- }
- };
+ typedef std::string node_name;
+ typedef std::string subgraph_name;
 
   typedef std::map<std::string, std::string> properties;
 
- struct node_id {
- std::string name;
+ struct node_and_port {
+ node_name name;
     std::string angle; // Or empty if no angle
     std::vector<std::string> location; // Up to two identifiers
- };
-
- // Parser policy object should have the following methods:
- // struct parser_policy {
- // void do_node(const node_id& n, const properties& default_props, const properties& custom_props);
- // void do_edge(const node_id& a, const node_id& b, const properties& props);
- // void do_begin_graph(bool is_strict, bool is_directed, const std::string& name);
- // void do_end_graph(const properties& props);
- // void do_begin_subgraph(const std::string& name);
- // void do_end_subgraph(const properties& props);
- // };
-
- template <typename BidirectionalIterator, typename Policy>
- struct parser {
- tokenizer<BidirectionalIterator> the_tokenizer;
- std::vector<token> lookahead;
- bool graph_is_directed;
- properties graph_props;
- properties node_props;
- properties edge_props;
- Policy hooks;
-
- parser(BidirectionalIterator begin, BidirectionalIterator end, Policy hooks)
- : the_tokenizer(begin, end), lookahead(), hooks(hooks) {}
-
- token get() {
- if (lookahead.empty()) {
- token t = the_tokenizer.get_token();
- return t;
- } else {
- token t = lookahead.front();
- lookahead.erase(lookahead.begin());
- return t;
- }
- }
-
- token peek() {
- if (lookahead.empty()) {
- lookahead.push_back(the_tokenizer.get_token());
- }
- return lookahead.front();
- }
-
- void error(const std::string& str) {
- boost::throw_exception(parse_error(str, peek()));
- }
-
- void parse_graph(bool want_directed) {
- bool is_strict = false;
- bool is_directed;
- std::string name;
- if (peek().type == token::kw_strict) {get(); is_strict = true;}
- switch (peek().type) {
- case token::kw_graph: is_directed = false; break;
- case token::kw_digraph: is_directed = true; break;
- default: error("Wanted \"graph\" or \"digraph\"");
- }
- graph_is_directed = is_directed; // Used to check edges
- if (want_directed != graph_is_directed) {
- if (want_directed) {
- boost::throw_exception(boost::undirected_graph_error());
- } else {
- boost::throw_exception(boost::directed_graph_error());
- }
- }
- hooks.do_begin_graph(is_strict, is_directed, name);
- get();
- switch (peek().type) {
- case token::identifier: name = peek().normalized_value; get(); break;
- case token::left_brace: break;
- default: error("Wanted a graph name or left brace");
- }
- if (peek().type == token::left_brace) get(); else error("Wanted a left brace to start the graph");
- parse_stmt_list();
- if (peek().type == token::right_brace) get(); else error("Wanted a right brace to end the graph");
- hooks.do_end_graph(graph_props);
- if (peek().type == token::eof) {} else error("Wanted end of file");
- }
-
- void parse_stmt_list() {
- while (true) {
- if (peek().type == token::right_brace) return;
- parse_stmt();
- if (peek().type == token::semicolon) get();
- }
- }
-
- void parse_stmt() {
- switch (peek().type) {
- case token::kw_node:
- case token::kw_edge:
- case token::kw_graph: parse_attr_stmt(); break;
- case token::kw_subgraph:
- case token::left_brace: parse_subgraph(); break;
- case token::identifier: {
- token id = get();
- switch (peek().type) {
- case token::dash_dash:
- case token::dash_greater: {
- node_id n = parse_node_id_rest(id);
- parse_edge_stmt(n);
- break;
- }
- case token::equal: {
- get();
- if (peek().type != token::identifier) error("Wanted identifier as right side of =");
- token id2 = get();
- graph_props[id.normalized_value] = id2.normalized_value;
- break;
- }
- default: {
- node_id n = parse_node_id_rest(id);
- parse_node_stmt(n);
- break;
- }
- }
- break;
- }
- default: error("Invalid start token for statement");
- }
- }
-
- void parse_attr_stmt() {
- switch (get().type) {
- case token::kw_graph: parse_attr_list(graph_props); break;
- case token::kw_node: parse_attr_list(node_props); break;
- case token::kw_edge: parse_attr_list(edge_props); break;
- default: assert (!"Bad attr_stmt case"); std::abort();
- }
- }
 
- void parse_subgraph() {
- std::string name;
- if (peek().type == token::kw_subgraph) {
- get();
- if (peek().type == token::identifier) {
- name = get().normalized_value;
- }
- }
- properties saved_node_props = node_props;
- properties saved_edge_props = edge_props;
- properties saved_graph_props = graph_props;
- if (peek().type != token::left_brace) {
- if (name.empty()) error("Subgraph reference needs a name");
- hooks.do_subgraph_reference(name);
- return;
- }
- hooks.do_begin_subgraph(name);
- if (peek().type == token::left_brace) get(); else error("Wanted left brace to start subgraph");
- parse_stmt_list();
- if (peek().type == token::right_brace) get(); else error("Wanted right brace to end subgraph");
- hooks.do_end_subgraph(graph_props);
- node_props = saved_node_props;
- edge_props = saved_edge_props;
- graph_props = saved_graph_props;
- if (peek().type == token::dash_greater ||
- peek().type == token::dash_dash) { // FIXME
- std::cerr << "FIXME: Subgraphs in edges are not supported" << std::endl;
- error("Unsupported subgraph edge case");
- }
+ friend inline bool operator==(const node_and_port& a, const node_and_port& b) {
+ return a.name == b.name &&
+ a.angle == b.angle &&
+ a.location == b.location;
     }
 
- node_id parse_node_id_rest(const token& name) {
- // A node ID is a node name, followed optionally by a port angle and a
- // port location (in either order); a port location is either :id,
- // :id:id, or :(id,id); the last two forms are treated as equivalent
- // although I am not sure about that.
- node_id id;
- id.name = name.normalized_value;
- parse_more:
- switch (peek().type) {
- case token::at: {
- get();
- if (peek().type != token::identifier) error("Wanted identifier as port angle");
- if (id.angle != "") error("Duplicate port angle");
- id.angle = get().normalized_value;
- goto parse_more;
- }
- case token::colon: {
- get();
- if (!id.location.empty()) error("Duplicate port location");
- switch (peek().type) {
- case token::identifier: {
- id.location.push_back(get().normalized_value);
- switch (peek().type) {
- case token::colon: {
- get();
- if (peek().type != token::identifier) error("Wanted identifier as port location");
- id.location.push_back(get().normalized_value);
- goto parse_more;
- }
- default: goto parse_more;
- }
- }
- case token::left_paren: {
- get();
- if (peek().type != token::identifier) error("Wanted identifier as first element of port location");
- id.location.push_back(get().normalized_value);
- if (peek().type != token::comma) error("Wanted comma between parts of port location");
- get();
- if (peek().type != token::identifier) error("Wanted identifier as second element of port location");
- id.location.push_back(get().normalized_value);
- if (peek().type != token::right_paren) error("Wanted right parenthesis to close port location");
- get();
- goto parse_more;
- }
- default: error("Wanted identifier or left parenthesis as start of port location");
- }
- }
- default: break;
- }
- return id;
- }
-
- node_id parse_node_id() {
- if (peek().type != token::identifier) error("Wanted identifier as node name (subgraphs not supported yet)");
- token name = get();
- return parse_node_id_rest(name);
- }
-
- void parse_node_stmt(const node_id& n) {
- properties this_node_props;
- if (peek().type == token::left_bracket) parse_attr_list(this_node_props);
- hooks.do_node(n, node_props, this_node_props);
- }
-
- void parse_edge_stmt(const node_id& lhs) {
- std::vector<node_id> nodes_in_chain(1, lhs);
- while (true) {
- bool leave_loop = true;
- switch (peek().type) {
- case token::dash_dash: {
- if (graph_is_directed) error("Using -- in directed graph");
- get();
- nodes_in_chain.push_back(parse_node_id());
- leave_loop = false;
- break;
- }
- case token::dash_greater: {
- if (!graph_is_directed) error("Using -> in undirected graph");
- get();
- nodes_in_chain.push_back(parse_node_id());
- leave_loop = false;
- break;
- }
- default: leave_loop = true; break;
- }
- if (leave_loop) break;
- }
- properties this_edge_props = edge_props;
- if (peek().type == token::left_bracket) parse_attr_list(this_edge_props);
- assert (nodes_in_chain.size() >= 2); // Should be in node parser otherwise
- for (size_t i = 0; i + 1 < nodes_in_chain.size(); ++i) {
- hooks.do_edge(nodes_in_chain[i], nodes_in_chain[i + 1], this_edge_props);
- }
- }
-
- void parse_attr_list(properties& props) {
- while (true) {
- if (peek().type == token::left_bracket) get(); else error("Wanted left bracket to start attribute list");
- while (true) {
- switch (peek().type) {
- case token::right_bracket: break;
- case token::identifier: {
- std::string lhs = get().normalized_value;
- std::string rhs = "true";
- if (peek().type == token::equal) {
- get();
- if (peek().type != token::identifier) error("Wanted identifier as value of attributed");
- rhs = get().normalized_value;
- }
- props[lhs] = rhs;
- break;
- }
- default: error("Wanted identifier as name of attribute");
- }
- if (peek().type == token::comma) {get(); continue;}
- break;
- }
- if (peek().type == token::right_bracket) get(); else error("Wanted right bracket to end attribute list");
- if (peek().type != token::left_bracket) break;
- }
+ friend inline bool operator<(const node_and_port& a, const node_and_port& b) {
+ if (a.name != b.name) return a.name < b.name;
+ if (a.angle != b.angle) return a.angle < b.angle;
+ return a.location < b.location;
     }
   };
 
- struct graph_parser_policy {
- ::boost::detail::graph::mutate_graph* mg;
-
- typedef boost::detail::graph::node_t vertex;
- typedef boost::detail::graph::edge_t edge;
-
- std::map<std::string, vertex> vertex_map;
- std::map<std::string, edge> edge_map;
-
- graph_parser_policy(::boost::detail::graph::mutate_graph* mg)
- : mg(mg),
- vertex_map(),
- edge_map()
- {}
-
- bool node_exists(const node_id& n) const {
- std::map<std::string, vertex>::const_iterator i = vertex_map.find(n.name);
- return (i != vertex_map.end());
- }
-
- vertex get_or_build_node(const node_id& n) {
- // FIXME: use ports
- std::map<std::string, vertex>::const_iterator i = vertex_map.find(n.name);
- if (i == vertex_map.end()) {
- vertex v = n.name;
- mg->do_add_vertex(v);
- vertex_map.insert(std::make_pair(v, v));
- return v;
- } else {
- return i->second;
- }
- }
-
- static std::string node_id_to_string(const node_id& n) {
- std::string result = n.name;
- for (size_t i = 0; i < n.location.size(); ++i) {
- result += ":" + n.location[i];
- }
- if (!n.angle.empty()) result += "@" + n.angle;
- return result;
- }
-
- static std::string props_to_string(const properties& props) {
- std::string result = "[";
- for (properties::const_iterator i = props.begin(); i != props.end(); ++i) {
- if (i != props.begin()) result += ", ";
- result += i->first + "=" + i->second;
- }
- result += "]";
- return result;
- }
-
- void do_node(const node_id& n, const properties& def_props, const properties& new_props) {
- std::cerr << node_id_to_string(n) << " " << props_to_string(def_props) << " " << props_to_string(new_props) << std::endl;
- properties props_to_set = new_props;
- if (!node_exists(n)) { // Only use defaults if node is new
- props_to_set.insert(def_props.begin(), def_props.end()); // This keeps old properties in preference to ones from def_props
- }
- vertex v = get_or_build_node(n);
- for (properties::const_iterator i = props_to_set.begin(); i != props_to_set.end(); ++i) {
- mg->set_node_property(i->first, v, i->second);
- }
- }
-
- void do_edge(const node_id& a, const node_id& b, const properties& props) {
- std::cerr << node_id_to_string(a) << " -> " << node_id_to_string(b) << " " << props_to_string(props) << std::endl;
- edge e = edge::new_edge();
- mg->do_add_edge(e, get_or_build_node(a), get_or_build_node(b));
- for (properties::const_iterator i = props.begin(); i != props.end(); ++i) {
- mg->set_edge_property(i->first, e, i->second);
- }
- }
-
- void do_begin_graph(bool is_strict, bool is_directed, const std::string& name) {
- ignore_unused_variable_warning(is_strict);
- ignore_unused_variable_warning(is_directed);
- ignore_unused_variable_warning(name);
- // std::cerr << "starting" << (is_strict ? " strict" : "") << " " << (is_directed ? "directed" : "undirected") << " graph named " << name << std::endl;
- }
+ struct edge_info {
+ node_and_port source;
+ node_and_port target;
+ properties props;
+ };
 
- void do_end_graph(const properties& props) {
- std::cerr << "ending graph " << props_to_string(props) << std::endl;
- for (properties::const_iterator i = props.begin(); i != props.end(); ++i) {
- mg->set_graph_property(i->first, i->second);
- }
- }
-
- void do_subgraph_reference(const std::string& name) {
- ignore_unused_variable_warning(name);
- // std::cerr << "subgraph reference to " << name << std::endl;
- }
+ struct parser_result {
+ bool graph_is_directed;
+ bool graph_is_strict;
+ std::map<node_name, properties> nodes; // Global set
+ std::vector<edge_info> edges;
+ std::map<subgraph_name, properties> graph_props; // Root and subgraphs
+ };
 
- void do_begin_subgraph(const std::string& name) {
- ignore_unused_variable_warning(name);
- // std::cerr << "starting subgraph named " << name << std::endl;
- }
+ // The actual parser, from libs/graph/src/read_graphviz_new.cpp
+ void parse_graphviz_from_string(const std::string& str, parser_result& result, bool want_directed);
 
- void do_end_subgraph(const properties& props) {
- ignore_unused_variable_warning(props);
- // std::cerr << "ending subgraph " << props_to_string(props) << std::endl;
- }
- };
+ // Translate from those results to a graph
+ void translate_results_to_graph(const parser_result& r, ::boost::detail::graph::mutate_graph* mg);
 
 } // namespace read_graphviz_detail
 
-template <typename BidirectionalIterator, typename MutableGraph>
-bool read_graphviz(BidirectionalIterator begin, BidirectionalIterator end,
+// This is also in boost/graph/graphviz.hpp
+namespace detail {
+ namespace graph {
+ BOOST_GRAPH_DECL bool read_graphviz(const std::string& str, boost::detail::graph::mutate_graph* mg);
+ } // end namespace graph
+} // end namespace detail
+
+template <typename MutableGraph>
+bool read_graphviz(const std::string& str,
                    MutableGraph& graph, boost::dynamic_properties& dp,
                    std::string const& node_id = "node_id") {
   boost::detail::graph::mutate_graph_impl<MutableGraph> mg(graph, dp, node_id);
- read_graphviz_detail::graph_parser_policy policy(&mg);
- read_graphviz_detail::parser<BidirectionalIterator, read_graphviz_detail::graph_parser_policy> p(begin, end, policy);
- p.parse_graph(mg.is_directed());
- return true;
+ return detail::graph::read_graphviz(str, &mg);
+}
+
+template <typename InputIter, typename MutableGraph>
+bool read_graphviz(InputIter begin, InputIter end,
+ MutableGraph& graph, boost::dynamic_properties& dp,
+ std::string const& node_id = "node_id") {
+ return read_graphviz(std::string(begin, end), graph, dp, node_id);
 }
 
 } // namespace boost

Modified: trunk/boost/graph/graphviz.hpp
==============================================================================
--- trunk/boost/graph/graphviz.hpp (original)
+++ trunk/boost/graph/graphviz.hpp 2009-05-25 01:38:56 EDT (Mon, 25 May 2009)
@@ -491,11 +491,12 @@
 
   // These four require linking the BGL-Graphviz library: libbgl-viz.a
   // from the /src directory.
- extern void read_graphviz(const std::string& file, GraphvizDigraph& g);
- extern void read_graphviz(FILE* file, GraphvizDigraph& g);
-
- extern void read_graphviz(const std::string& file, GraphvizGraph& g);
- extern void read_graphviz(FILE* file, GraphvizGraph& g);
+ // Library has not existed for a while
+ // extern void read_graphviz(const std::string& file, GraphvizDigraph& g);
+ // extern void read_graphviz(FILE* file, GraphvizDigraph& g);
+ //
+ // extern void read_graphviz(const std::string& file, GraphvizGraph& g);
+ // extern void read_graphviz(FILE* file, GraphvizGraph& g);
 
   class dynamic_properties_writer
   {
@@ -654,6 +655,14 @@
   }
 };
 
+struct bad_graphviz_syntax: public graph_exception {
+ std::string errmsg;
+ bad_graphviz_syntax(const std::string& errmsg)
+ : errmsg(errmsg) {}
+ const char* what() const throw () {return errmsg.c_str();}
+ ~bad_graphviz_syntax() throw () {};
+};
+
 namespace detail { namespace graph {
 
 typedef std::string id_t;
@@ -789,7 +798,7 @@
   std::copy(std::istream_iterator<char>(in),
             std::istream_iterator<char>(),
             std::back_inserter(data));
- return read_graphviz(data.begin(),data.end(),graph,dp,node_id);
+ return read_graphviz(data,graph,dp,node_id);
 }
 
 } // namespace boost

Modified: trunk/libs/graph/build/Jamfile.v2
==============================================================================
--- trunk/libs/graph/build/Jamfile.v2 (original)
+++ trunk/libs/graph/build/Jamfile.v2 2009-05-25 01:38:56 EDT (Mon, 25 May 2009)
@@ -47,9 +47,10 @@
 
 lib boost_graph
     :
- read_graphviz_spirit.cpp
+ read_graphviz_new.cpp
     graphml
     :
+ <library>../../regex/build//boost_regex
     <define>BOOST_GRAPH_NO_LIB=1
     <link>shared:<define>BOOST_GRAPH_DYN_LINK=1
     # # Intel compiler ICEs if we turn optimization on

Modified: trunk/libs/graph/doc/read_graphviz.html
==============================================================================
--- trunk/libs/graph/doc/read_graphviz.html (original)
+++ trunk/libs/graph/doc/read_graphviz.html 2009-05-25 01:38:56 EDT (Mon, 25 May 2009)
@@ -286,7 +286,7 @@
 <div class="document" id="logo-read-graphviz">
 <h1 class="title"><a class="reference external" href="../../../index.htm"><img align="middle" alt="Boost" class="align-middle" src="../../../boost.png" /></a> <tt class="docutils literal"><span class="pre">read_graphviz</span></tt></h1>
 
-<!-- Copyright (c) 2005 Trustees of Indiana University
+<!-- Copyright (c) 2005-2009 Trustees of Indiana University
 Distributed under the Boost Software License, Version 1.0.
 (See accompanying file LICENSE_1_0.txt or copy at
  http://www.boost.org/LICENSE_1_0.txt) -->
@@ -298,8 +298,13 @@
                      dynamic_properties&amp; dp,
                      const std::string&amp; node_id = &quot;node_id&quot;);
 
- template &lt;typename BidirectionalIterator, typename MutableGraph&gt;
- bool read_graphviz(BidirectionalIterator begin, BidirectionalIterator end,
+ template &lt;typename MutableGraph&gt;
+ bool read_graphviz(std::string&amp; str, MutableGraph&amp; graph,
+ dynamic_properties&amp; dp,
+ const std::string&amp; node_id = &quot;node_id&quot;);
+
+ template &lt;typename InputIterator, typename MutableGraph&gt;
+ bool read_graphviz(InputIterator begin, InputIterator end,
                      MutableGraph&amp; graph, dynamic_properties&amp; dp,
                      const std::string&amp; node_id = &quot;node_id&quot;);
 
@@ -309,11 +314,6 @@
 <a class="reference external" href="http://graphviz.org/">GraphViz</a> DOT language and builds a BGL graph that captures that
 description. Using these functions, you can initialize a graph using
 data stored as text.</p>
-<p>The <tt class="docutils literal"><span class="pre">BOOST_GRAPH_USE_SPIRIT_PARSER</span></tt> macro may be defined before including
-<tt class="docutils literal"><span class="pre">&lt;boost/graph/graphviz.hpp&gt;</span></tt> to include a previous parser implementation,
-with the same interface, that uses Spirit for its implementation. The main
-advantage of that version is that it supports subgraphs to a limited extent,
-while the new parser does not support them at all.</p>
 <p>The DOT language can specify both directed and undirected graphs, and
 <tt class="docutils literal"><span class="pre">read_graphviz</span></tt> differentiates between the two. One must pass
 <tt class="docutils literal"><span class="pre">read_graphviz</span></tt> an undirected graph when reading an undirected graph;
@@ -325,12 +325,12 @@
 property maps. The reader passes all the properties encountered to
 this object, using the GraphViz string keys as the property keys.
 Furthermore, <tt class="docutils literal"><span class="pre">read_graphviz</span></tt> stores node identifier names under the
-vertex property map named node_id.</p>
+vertex property map named <tt class="docutils literal"><span class="pre">node_id</span></tt>.</p>
 <dl class="docutils">
 <dt>Requirements:</dt>
 <dd><ul class="first last simple">
 <li>The type of the graph must model the <a class="reference external" href="MutableGraph.html">Mutable Graph</a> concept.</li>
-<li>The type of the iterator must model the <a class="reference external" href="http://www.sgi.com/tech/stl/BidirectionalIterator.html">Bidirectional Iterator</a>
+<li>The type of the iterator must model the <a class="reference external" href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
 concept.</li>
 <li>The property map value types must be default-constructible.</li>
 </ul>
@@ -378,6 +378,14 @@
   virtual ~undirected_graph_error() throw();
   virtual const char* what() const throw();
 };
+
+struct bad_graphviz_syntax: public graph_exception {
+ std::string errmsg;
+
+ bad_graphviz_syntax(const std::string&amp;);
+ virtual ~bad_graphviz_syntax() throw();
+ virtual const char* what() const throw();
+};
 </pre>
 <p>Under certain circumstances, <tt class="docutils literal"><span class="pre">read_graphviz</span></tt> will throw one of the
 above exceptions. The three concrete exceptions can all be caught
@@ -398,6 +406,8 @@
 type is passed to <tt class="docutils literal"><span class="pre">read_graph</span></tt> but the textual representation of the
 graph is undirected, as indicated by the <tt class="docutils literal"><span class="pre">graph</span></tt> keyword in the DOT
 language.</p>
+<p>The <tt class="docutils literal"><span class="pre">bad_graphviz_syntax</span></tt> exception occurs when the graph input is not a
+valid GraphViz graph.</p>
 </div>
 <div class="section" id="example">
 <h1><a class="toc-backref" href="#id4">Example</a></h1>
@@ -446,8 +456,8 @@
 <div class="section" id="building-the-graphviz-readers">
 <h1><a class="toc-backref" href="#id5">Building the GraphViz Readers</a></h1>
 <p>To use the GraphViz readers, you will need to build and link against
-the &quot;boost_regex&quot; library. The library can be built by following the
-<a class="reference external" href="../../../more/getting_started.html#Build_Install">Boost Jam Build Instructions</a> for the subdirectory <tt class="docutils literal"><span class="pre">libs/regex/build</span></tt>.</p>
+the &quot;boost_graph&quot; library. The library can be built by following the
+<a class="reference external" href="../../../more/getting_started.html#Build_Install">Boost Jam Build Instructions</a> for the subdirectory <tt class="docutils literal"><span class="pre">libs/graph/build</span></tt>.</p>
 </div>
 <div class="section" id="notes">
 <h1><a class="toc-backref" href="#id6">Notes</a></h1>
@@ -462,24 +472,22 @@
 <li>On successful reading of a graph, every vertex and edge will have
 an associated value for every respective edge and vertex property
 encountered while interpreting the graph. These values will be set
-using the <tt class="docutils literal"><span class="pre">dynamic_properties</span></tt> object. Some properties may be
-<tt class="docutils literal"><span class="pre">put</span></tt> multiple times during the course of reading in order to
-ensure the same semantics as the GraphViz tools. Those edges and
+using the <tt class="docutils literal"><span class="pre">dynamic_properties</span></tt> object. Those edges and
 vertices that are not explicitly given a value for a property (and that
 property has no default) will be
 given the default constructed value of the value type. <strong>Be sure
 that property map value types are default constructible.</strong></li>
+<li><tt class="docutils literal"><span class="pre">read_graphviz</span></tt> treats subgraphs as syntactic sugar. It does not
+reflect subgraphs as actual entities in the BGL. Rather, they are
+used to shorten some edge definitions as well as to give a subset
+of all nodes or edges certain properties. For example, the
+DOT graphs <tt class="docutils literal"><span class="pre">digraph</span> <span class="pre">{</span> <span class="pre">a</span> <span class="pre">-&gt;</span> <span class="pre">subgraph</span> <span class="pre">{b</span> <span class="pre">-&gt;</span> <span class="pre">c}</span> <span class="pre">-&gt;</span> <span class="pre">e</span> <span class="pre">}</span></tt> and
+<tt class="docutils literal"><span class="pre">digraph</span> <span class="pre">{</span> <span class="pre">a</span> <span class="pre">-&gt;</span> <span class="pre">b</span> <span class="pre">-&gt;</span> <span class="pre">e</span> <span class="pre">;</span> <span class="pre">a</span> <span class="pre">-&gt;</span> <span class="pre">c</span> <span class="pre">-&gt;</span> <span class="pre">e</span> <span class="pre">;</span> <span class="pre">b</span> <span class="pre">-&gt;</span> <span class="pre">c}</span></tt> are equivalent.</li>
+<li>Subgraph IDs refer to subgraphs defined earlier in the graph
+description. Undefined subgraphs behave as empty subgraphs
+(<tt class="docutils literal"><span class="pre">{}</span></tt>). This is the same behavior as GraphViz.</li>
 </ul>
 </blockquote>
-<!-- - ``read_graphviz`` treats subgraphs as syntactic sugar. It does not
- reflect subgraphs as actual entities in the BGL. Rather, they are
- used to shorten some edge definitions as well as to give a subset
- of all nodes or edges certain properties. For example, the
- DOT graphs ``digraph { a -> subgraph {b -> c} -> e }`` and
- ``digraph { a -> b -> e ; a -> c -> e ; b -> c}`` are equivalent. -->
-<!-- - Subgraph IDs refer to subgraphs defined earlier in the graph
- description. Undefined subgraphs behave as empty subgraphs
- (``{}``). -->
 </div>
 <div class="section" id="see-also">
 <h1><a class="toc-backref" href="#id7">See Also</a></h1>
@@ -489,15 +497,14 @@
 <h1><a class="toc-backref" href="#id8">Future Work</a></h1>
 <blockquote>
 <ul class="simple">
-<li>Support for subgraphs (currently parsed but ignored).</li>
 <li>Support for HTML strings.</li>
 <li>Passing port information to BGL.</li>
 <li>Expanding escape codes in the same way GraphViz does.</li>
 <li>Enforcement of the <tt class="docutils literal"><span class="pre">strict</span></tt> keyword (ignoring self-loops and parallel
 edges).</li>
+<li>Support for optional recognition of subgraphs as distinct entities.</li>
 </ul>
 </blockquote>
-<!-- - Support for optional recognition of subgraphs as distinct entities. -->
 </div>
 </div>
 </body>

Modified: trunk/libs/graph/doc/read_graphviz.rst
==============================================================================
--- trunk/libs/graph/doc/read_graphviz.rst (original)
+++ trunk/libs/graph/doc/read_graphviz.rst 2009-05-25 01:38:56 EDT (Mon, 25 May 2009)
@@ -6,7 +6,7 @@
    :align: middle
    :alt: Boost
 
-.. Copyright (c) 2005 Trustees of Indiana University
+.. Copyright (c) 2005-2009 Trustees of Indiana University
     Distributed under the Boost Software License, Version 1.0.
     (See accompanying file LICENSE_1_0.txt or copy at
      http://www.boost.org/LICENSE_1_0.txt)
@@ -21,8 +21,13 @@
                        dynamic_properties& dp,
                        const std::string& node_id = "node_id");
   
- template <typename BidirectionalIterator, typename MutableGraph>
- bool read_graphviz(BidirectionalIterator begin, BidirectionalIterator end,
+ template <typename MutableGraph>
+ bool read_graphviz(std::string& str, MutableGraph& graph,
+ dynamic_properties& dp,
+ const std::string& node_id = "node_id");
+
+ template <typename InputIterator, typename MutableGraph>
+ bool read_graphviz(InputIterator begin, InputIterator end,
                        MutableGraph& graph, dynamic_properties& dp,
                        const std::string& node_id = "node_id");
   
@@ -34,12 +39,6 @@
 description. Using these functions, you can initialize a graph using
 data stored as text.
 
-The ``BOOST_GRAPH_USE_SPIRIT_PARSER`` macro may be defined before including
-``<boost/graph/graphviz.hpp>`` to include a previous parser implementation,
-with the same interface, that uses Spirit for its implementation. The main
-advantage of that version is that it supports subgraphs to a limited extent,
-while the new parser does not support them at all.
-
 The DOT language can specify both directed and undirected graphs, and
 ``read_graphviz`` differentiates between the two. One must pass
 ``read_graphviz`` an undirected graph when reading an undirected graph;
@@ -52,11 +51,11 @@
 property maps. The reader passes all the properties encountered to
 this object, using the GraphViz string keys as the property keys.
 Furthermore, ``read_graphviz`` stores node identifier names under the
-vertex property map named node_id.
+vertex property map named ``node_id``.
 
 Requirements:
  - The type of the graph must model the `Mutable Graph`_ concept.
- - The type of the iterator must model the `Bidirectional Iterator`_
+ - The type of the iterator must model the `Input Iterator`_
    concept.
  - The property map value types must be default-constructible.
 
@@ -96,6 +95,14 @@
     virtual const char* what() const throw();
   };
 
+ struct bad_graphviz_syntax: public graph_exception {
+ std::string errmsg;
+
+ bad_graphviz_syntax(const std::string&);
+ virtual ~bad_graphviz_syntax() throw();
+ virtual const char* what() const throw();
+ };
+
 Under certain circumstances, ``read_graphviz`` will throw one of the
 above exceptions. The three concrete exceptions can all be caught
 using the general ``graph_exception`` moniker when greater precision
@@ -120,6 +127,10 @@
 graph is undirected, as indicated by the ``graph`` keyword in the DOT
 language.
 
+The ``bad_graphviz_syntax`` exception occurs when the graph input is not a
+valid GraphViz graph.
+
+
 Example
 -------
 The following example illustrates a relatively simple use of the
@@ -171,8 +182,8 @@
 Building the GraphViz Readers
 -----------------------------
 To use the GraphViz readers, you will need to build and link against
-the "boost_regex" library. The library can be built by following the
-`Boost Jam Build Instructions`_ for the subdirectory ``libs/regex/build``.
+the "boost_graph" library. The library can be built by following the
+`Boost Jam Build Instructions`_ for the subdirectory ``libs/graph/build``.
 
 
 Notes
@@ -188,15 +199,12 @@
  - On successful reading of a graph, every vertex and edge will have
    an associated value for every respective edge and vertex property
    encountered while interpreting the graph. These values will be set
- using the ``dynamic_properties`` object. Some properties may be
- ``put`` multiple times during the course of reading in order to
- ensure the same semantics as the GraphViz tools. Those edges and
+ using the ``dynamic_properties`` object. Those edges and
    vertices that are not explicitly given a value for a property (and that
    property has no default) will be
    given the default constructed value of the value type. **Be sure
    that property map value types are default constructible.**
 
-..
  - ``read_graphviz`` treats subgraphs as syntactic sugar. It does not
    reflect subgraphs as actual entities in the BGL. Rather, they are
    used to shorten some edge definitions as well as to give a subset
@@ -204,10 +212,9 @@
    DOT graphs ``digraph { a -> subgraph {b -> c} -> e }`` and
    ``digraph { a -> b -> e ; a -> c -> e ; b -> c}`` are equivalent.
 
-..
  - Subgraph IDs refer to subgraphs defined earlier in the graph
    description. Undefined subgraphs behave as empty subgraphs
- (``{}``).
+ (``{}``). This is the same behavior as GraphViz.
 
 See Also
 --------
@@ -218,8 +225,6 @@
 Future Work
 -----------
 
- - Support for subgraphs (currently parsed but ignored).
-
  - Support for HTML strings.
 
  - Passing port information to BGL.
@@ -229,13 +234,12 @@
  - Enforcement of the ``strict`` keyword (ignoring self-loops and parallel
    edges).
     
-..
  - Support for optional recognition of subgraphs as distinct entities.
 
 
 .. _GraphViz: http://graphviz.org/
 .. _`Mutable Graph`: MutableGraph.html
-.. _`Bidirectional Iterator`: http://www.sgi.com/tech/stl/BidirectionalIterator.html
+.. _`Input Iterator`: http://www.sgi.com/tech/stl/InputIterator.html
 .. _dynamic_properties: ../../property_map/doc/dynamic_property_map.html
 .. _write_graphviz: write-graphviz.html
 .. _Boost Jam Build Instructions: ../../../more/getting_started.html#Build_Install

Added: trunk/libs/graph/src/read_graphviz_new.cpp
==============================================================================
--- (empty file)
+++ trunk/libs/graph/src/read_graphviz_new.cpp 2009-05-25 01:38:56 EDT (Mon, 25 May 2009)
@@ -0,0 +1,810 @@
+// Copyright 2004-9 Trustees of Indiana University
+
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+//
+// read_graphviz_new.cpp -
+// Initialize a model of the BGL's MutableGraph concept and an associated
+// collection of property maps using a graph expressed in the GraphViz
+// DOT Language.
+//
+// Based on the grammar found at:
+// http://www.graphviz.org/cvs/doc/info/lang.html
+//
+// Jeremiah rewrite used grammar found at:
+// http://www.graphviz.org/doc/info/lang.html
+// and page 34 or http://www.graphviz.org/pdf/dotguide.pdf
+//
+// See documentation for this code at:
+// http://www.boost.org/libs/graph/doc/read-graphviz.html
+//
+
+// Author: Jeremiah Willcock
+// Ronald Garcia
+//
+
+#include <boost/ref.hpp>
+#include <boost/function/function2.hpp>
+#include <boost/property_map/dynamic_property_map.hpp>
+#include <boost/graph/graph_traits.hpp>
+#include <boost/detail/workaround.hpp>
+#include <boost/algorithm/string/case_conv.hpp>
+#include <algorithm>
+#include <exception> // for std::exception
+#include <string>
+#include <vector>
+#include <set>
+#include <utility>
+#include <map>
+#include <iostream>
+#include <cstdlib>
+#include <boost/throw_exception.hpp>
+#include <boost/regex.hpp>
+#include <boost/function.hpp>
+#include <boost/bind.hpp>
+#include <boost/graph/graphviz.hpp>
+
+namespace boost {
+
+namespace read_graphviz_detail {
+ struct token {
+ enum token_type {
+ kw_strict,
+ kw_graph,
+ kw_digraph,
+ kw_node,
+ kw_edge,
+ kw_subgraph,
+ left_brace,
+ right_brace,
+ semicolon,
+ equal,
+ left_bracket,
+ right_bracket,
+ comma,
+ colon,
+ dash_greater,
+ dash_dash,
+ plus,
+ left_paren,
+ right_paren,
+ at,
+ identifier,
+ quoted_string, // Only used internally in tokenizer
+ eof,
+ invalid
+ } type;
+ std::string normalized_value; // May have double-quotes removed and/or some escapes replaced
+ token(token_type type, const std::string& normalized_value)
+ : type(type), normalized_value(normalized_value) {}
+ token(): type(invalid), normalized_value("") {}
+ friend std::ostream& operator<<(std::ostream& o, const token& t) {
+ switch (t.type) {
+ case token::kw_strict: o << "<strict>"; break;
+ case token::kw_graph: o << "<graph>"; break;
+ case token::kw_digraph: o << "<digraph>"; break;
+ case token::kw_node: o << "<node>"; break;
+ case token::kw_edge: o << "<edge>"; break;
+ case token::kw_subgraph: o << "<subgraph>"; break;
+ case token::left_brace: o << "<left_brace>"; break;
+ case token::right_brace: o << "<right_brace>"; break;
+ case token::semicolon: o << "<semicolon>"; break;
+ case token::equal: o << "<equal>"; break;
+ case token::left_bracket: o << "<left_bracket>"; break;
+ case token::right_bracket: o << "<right_bracket>"; break;
+ case token::comma: o << "<comma>"; break;
+ case token::colon: o << "<colon>"; break;
+ case token::dash_greater: o << "<dash-greater>"; break;
+ case token::dash_dash: o << "<dash-dash>"; break;
+ case token::plus: o << "<plus>"; break;
+ case token::left_paren: o << "<left_paren>"; break;
+ case token::right_paren: o << "<right_paren>"; break;
+ case token::at: o << "<at>"; break;
+ case token::identifier: o << "<identifier>"; break;
+ case token::quoted_string: o << "<quoted_string>"; break;
+ case token::eof: o << "<eof>"; break;
+ default: o << "<invalid type>"; break;
+ }
+ o << " '" << t.normalized_value << "'";
+ return o;
+ }
+ };
+
+ bad_graphviz_syntax lex_error(const std::string& errmsg, char bad_char) {
+ return bad_graphviz_syntax(errmsg + " (char is '" + bad_char + "')");
+ }
+
+ bad_graphviz_syntax parse_error(const std::string& errmsg, const token& bad_token) {
+ return bad_graphviz_syntax(errmsg + " (token is \"" + boost::lexical_cast<std::string>(bad_token) + "\")");
+ }
+
+ struct tokenizer {
+ std::string::const_iterator begin, end;
+ std::vector<token> lookahead;
+ // Precomputed regexes
+ boost::regex stuff_to_skip;
+ boost::regex basic_id_token;
+ boost::regex punctuation_token;
+ boost::regex number_token;
+ boost::regex quoted_string_token;
+
+ tokenizer(const std::string& str) : begin(str.begin()), end(str.end())
+ {
+ std::string end_of_token = "(?=(?:\\W))";
+ std::string whitespace = "(?:\\s+)";
+ std::string slash_slash_comment = "(?://.*$)";
+ std::string slash_star_comment = "(?:/\\*.*?\\*/)";
+ std::string hash_comment = "(?:^#.*?$)";
+ stuff_to_skip = "\\A(?:" + whitespace + "|" + slash_slash_comment + "|" + slash_star_comment + "|" + hash_comment + ")*";
+ basic_id_token = "\\A([[:alpha:]_](?:\\w*))";
+ punctuation_token = "\\A([][{};=,:+()@]|[-][>-])";
+ number_token = "\\A([-]?(?:(?:\\.\\d+)|(?:\\d+(?:\\.\\d*)?)))";
+ quoted_string_token = "\\A(\"(?:[^\"\\\\]|(?:[\\\\].))*\")";
+ }
+
+ void skip() {
+ boost::match_results<std::string::const_iterator> results;
+ bool found = boost::regex_search(begin, end, results, stuff_to_skip);
+ assert (found);
+ boost::sub_match<std::string::const_iterator> sm1 = results.suffix();
+ assert (sm1.second == end);
+ begin = sm1.first;
+ }
+
+ token get_token_raw() {
+ if (!lookahead.empty()) {
+ token t = lookahead.front();
+ lookahead.erase(lookahead.begin());
+ return t;
+ }
+ skip();
+ if (begin == end) return token(token::eof, "");
+ // Look for keywords first
+ bool found;
+ boost::match_results<std::string::const_iterator> results;
+ found = boost::regex_search(begin, end, results, basic_id_token);
+ if (found) {
+ std::string str = results[1].str();
+ std::string str_lower = boost::algorithm::to_lower_copy(str);
+ begin = results.suffix().first;
+ if (str_lower == "strict") {
+ return token(token::kw_strict, str);
+ } else if (str_lower == "graph") {
+ return token(token::kw_graph, str);
+ } else if (str_lower == "digraph") {
+ return token(token::kw_digraph, str);
+ } else if (str_lower == "node") {
+ return token(token::kw_node, str);
+ } else if (str_lower == "edge") {
+ return token(token::kw_edge, str);
+ } else if (str_lower == "subgraph") {
+ return token(token::kw_subgraph, str);
+ } else {
+ return token(token::identifier, str);
+ }
+ }
+ found = boost::regex_search(begin, end, results, punctuation_token);
+ if (found) {
+ std::string str = results[1].str();
+ begin = results.suffix().first;
+ switch (str[0]) {
+ case '[': return token(token::left_bracket, str);
+ case ']': return token(token::right_bracket, str);
+ case '{': return token(token::left_brace, str);
+ case '}': return token(token::right_brace, str);
+ case ';': return token(token::semicolon, str);
+ case '=': return token(token::equal, str);
+ case ',': return token(token::comma, str);
+ case ':': return token(token::colon, str);
+ case '+': return token(token::plus, str);
+ case '(': return token(token::left_paren, str);
+ case ')': return token(token::right_paren, str);
+ case '@': return token(token::at, str);
+ case '-': {
+ switch (str[1]) {
+ case '-': return token(token::dash_dash, str);
+ case '>': return token(token::dash_greater, str);
+ default: assert (!"Definition of punctuation_token does not match switch statement");
+ }
+ }
+ default: assert (!"Definition of punctuation_token does not match switch statement"); std::abort();
+ }
+ }
+ found = boost::regex_search(begin, end, results, number_token);
+ if (found) {
+ std::string str = results[1].str();
+ begin = results.suffix().first;
+ return token(token::identifier, str);
+ }
+ found = boost::regex_search(begin, end, results, quoted_string_token);
+ if (found) {
+ std::string str = results[1].str();
+ begin = results.suffix().first;
+ // Remove the beginning and ending quotes
+ assert (str.size() >= 2);
+ str.erase(str.begin());
+ str.erase(str.end() - 1);
+ // Unescape quotes in the middle, but nothing else (see format spec)
+ for (size_t i = 0; i + 1 < str.size() /* May change */; ++i) {
+ if (str[i] == '\\' && str[i + 1] == '"') {
+ str.erase(str.begin() + i);
+ // Don't need to adjust i
+ } else if (str[i] == '\\' && str[i + 1] == '\n') {
+ str.erase(str.begin() + i);
+ str.erase(str.begin() + i);
+ --i; // Invert ++ that will be applied
+ }
+ }
+ return token(token::quoted_string, str);
+ }
+ if (*begin == '<') {
+ throw_lex_error("HTML strings not supported");
+ return token();
+ } else {
+ throw_lex_error("Invalid character");
+ return token();
+ }
+ }
+
+ token peek_token_raw() {
+ if (lookahead.empty()) {
+ token t = get_token_raw();
+ lookahead.push_back(t);
+ }
+ return lookahead.front();
+ }
+
+ token get_token() { // Handle string concatenation
+ token t = get_token_raw();
+ if (t.type != token::quoted_string) return t;
+ std::string str = t.normalized_value;
+ while (peek_token_raw().type == token::plus) {
+ get_token_raw();
+ token t2 = get_token_raw();
+ if (t2.type != token::quoted_string) {
+ throw_lex_error("Must have quoted string after string concatenation");
+ }
+ str += t2.normalized_value;
+ }
+ return token(token::identifier, str); // Note that quoted_string does not get passed to the parser
+ }
+
+ void throw_lex_error(const std::string& errmsg) {
+ boost::throw_exception(lex_error(errmsg, *begin));
+ }
+ };
+
+ struct edge_endpoint {
+ bool is_subgraph;
+ node_and_port node_ep;
+ subgraph_name subgraph_ep;
+
+ static edge_endpoint node(const node_and_port& ep) {
+ edge_endpoint r;
+ r.is_subgraph = false;
+ r.node_ep = ep;
+ return r;
+ }
+
+ static edge_endpoint subgraph(const subgraph_name& ep) {
+ edge_endpoint r;
+ r.is_subgraph = true;
+ r.subgraph_ep = ep;
+ return r;
+ }
+ };
+
+ struct node_or_subgraph_ref {
+ bool is_subgraph;
+ std::string name; // Name for subgraphs or nodes, "___root___" for root graph
+ };
+
+ static node_or_subgraph_ref noderef(const node_name& n) {
+ node_or_subgraph_ref r;
+ r.is_subgraph = false;
+ r.name = n;
+ return r;
+ }
+
+ static node_or_subgraph_ref subgraphref(const subgraph_name& n) {
+ node_or_subgraph_ref r;
+ r.is_subgraph = true;
+ r.name = n;
+ return r;
+ }
+
+ typedef std::vector<node_or_subgraph_ref> subgraph_member_list;
+
+ struct subgraph_info {
+ properties def_node_props;
+ properties def_edge_props;
+ subgraph_member_list members;
+ };
+
+ struct parser {
+ tokenizer the_tokenizer;
+ std::vector<token> lookahead;
+ parser_result& r;
+ std::map<subgraph_name, subgraph_info> subgraphs;
+ std::string current_subgraph_name;
+ int sgcounter; // Counter for anonymous subgraphs
+
+ subgraph_info& current() {return subgraphs[current_subgraph_name];}
+ properties& current_graph_props() {return r.graph_props[current_subgraph_name];}
+ subgraph_member_list& current_members() {return current().members;}
+
+ parser(const std::string& gr, parser_result& result)
+ : the_tokenizer(gr), lookahead(), r(result), sgcounter(0) {
+ current_subgraph_name = "___root___";
+ current() = subgraph_info(); // Initialize root graph
+ current_graph_props().clear();
+ current_members().clear();
+ }
+
+ token get() {
+ if (lookahead.empty()) {
+ token t = the_tokenizer.get_token();
+ return t;
+ } else {
+ token t = lookahead.front();
+ lookahead.erase(lookahead.begin());
+ return t;
+ }
+ }
+
+ token peek() {
+ if (lookahead.empty()) {
+ lookahead.push_back(the_tokenizer.get_token());
+ }
+ return lookahead.front();
+ }
+
+ void error(const std::string& str) {
+ boost::throw_exception(parse_error(str, peek()));
+ }
+
+ void parse_graph(bool want_directed) {
+ bool is_strict = false;
+ bool is_directed;
+ std::string name;
+ if (peek().type == token::kw_strict) {get(); is_strict = true;}
+ switch (peek().type) {
+ case token::kw_graph: is_directed = false; break;
+ case token::kw_digraph: is_directed = true; break;
+ default: error("Wanted \"graph\" or \"digraph\"");
+ }
+ r.graph_is_directed = is_directed; // Used to check edges
+ r.graph_is_strict = is_strict;
+ if (want_directed != r.graph_is_directed) {
+ if (want_directed) {
+ boost::throw_exception(boost::undirected_graph_error());
+ } else {
+ boost::throw_exception(boost::directed_graph_error());
+ }
+ }
+ get();
+ switch (peek().type) {
+ case token::identifier: name = peek().normalized_value; get(); break;
+ case token::left_brace: break;
+ default: error("Wanted a graph name or left brace");
+ }
+ if (peek().type == token::left_brace) get(); else error("Wanted a left brace to start the graph");
+ parse_stmt_list();
+ if (peek().type == token::right_brace) get(); else error("Wanted a right brace to end the graph");
+ if (peek().type == token::eof) {} else error("Wanted end of file");
+ }
+
+ void parse_stmt_list() {
+ while (true) {
+ if (peek().type == token::right_brace) return;
+ parse_stmt();
+ if (peek().type == token::semicolon) get();
+ }
+ }
+
+ void parse_stmt() {
+ switch (peek().type) {
+ case token::kw_node:
+ case token::kw_edge:
+ case token::kw_graph: parse_attr_stmt(); break;
+ case token::kw_subgraph:
+ case token::left_brace:
+ case token::identifier: {
+ token id = get();
+ if (id.type == token::identifier && peek().type == token::equal) { // Graph property
+ get();
+ if (peek().type != token::identifier) error("Wanted identifier as right side of =");
+ token id2 = get();
+ current_graph_props()[id.normalized_value] = id2.normalized_value;
+ } else {
+ edge_endpoint ep = parse_endpoint_rest(id);
+ if (peek().type == token::dash_dash || peek().type == token::dash_greater) { // Edge
+ parse_edge_stmt(ep);
+ } else {
+ if (!ep.is_subgraph) { // Only nodes can have attribute lists
+ // This node already exists because of its first mention
+ // (properties set to defaults by parse_node_and_port, called
+ // by parse_endpoint_rest)
+ properties this_node_props;
+ if (peek().type == token::left_bracket) {
+ parse_attr_list(this_node_props);
+ }
+ for (properties::const_iterator i = this_node_props.begin();
+ i != this_node_props.end(); ++i) {
+ // Override old properties with same names
+ r.nodes[ep.node_ep.name][i->first] = i->second;
+ }
+ current_members().push_back(noderef(ep.node_ep.name));
+ } else {
+ current_members().push_back(subgraphref(ep.subgraph_ep));
+ }
+ }
+ }
+ break;
+ }
+ default: error("Invalid start token for statement");
+ }
+ }
+
+ void parse_attr_stmt() {
+ switch (get().type) {
+ case token::kw_graph: parse_attr_list(current_graph_props()); break;
+ case token::kw_node: parse_attr_list(current().def_node_props); break;
+ case token::kw_edge: parse_attr_list(current().def_edge_props); break;
+ default: assert (!"Bad attr_stmt case"); std::abort();
+ }
+ }
+
+ edge_endpoint parse_endpoint() {
+ switch (peek().type) {
+ case token::kw_subgraph:
+ case token::left_brace:
+ case token::identifier: {
+ token first = get();
+ return parse_endpoint_rest(first);
+ }
+ default: {
+ error("Wanted \"subgraph\", \"{\", or identifier to start node or subgraph");
+ return edge_endpoint();
+ }
+ }
+ }
+
+ edge_endpoint parse_endpoint_rest(const token& first_token) {
+ switch (first_token.type) {
+ case token::kw_subgraph:
+ case token::left_brace: return edge_endpoint::subgraph(parse_subgraph(first_token));
+ default: return edge_endpoint::node(parse_node_and_port(first_token));
+ }
+ }
+
+ subgraph_name parse_subgraph(const token& first_token) {
+ std::string name;
+ bool is_anonymous = true;
+ if (first_token.type == token::kw_subgraph) {
+ if (peek().type == token::identifier) {
+ name = get().normalized_value;
+ is_anonymous = false;
+ }
+ }
+ if (is_anonymous) {
+ name = "___subgraph_" +
+ boost::lexical_cast<std::string>(++sgcounter);
+ }
+ if (subgraphs.find(name) == subgraphs.end()) {
+ subgraphs[name] = current(); // Initialize properties and defaults
+ subgraphs[name].members.clear(); // Except member list
+ }
+ if (first_token.type == token::kw_subgraph && peek().type != token::left_brace) {
+ if (is_anonymous) error("Subgraph reference needs a name");
+ return name;
+ }
+ subgraph_name old_sg = current_subgraph_name;
+ current_subgraph_name = name;
+ if (peek().type == token::left_brace) get(); else error("Wanted left brace to start subgraph");
+ parse_stmt_list();
+ if (peek().type == token::right_brace) get(); else error("Wanted right brace to end subgraph");
+ current_subgraph_name = old_sg;
+ return name;
+ }
+
+ node_and_port parse_node_and_port(const token& name) {
+ // A node ID is a node name, followed optionally by a port angle and a
+ // port location (in either order); a port location is either :id,
+ // :id:id, or :(id,id); the last two forms are treated as equivalent
+ // although I am not sure about that.
+ node_and_port id;
+ id.name = name.normalized_value;
+ parse_more:
+ switch (peek().type) {
+ case token::at: {
+ get();
+ if (peek().type != token::identifier) error("Wanted identifier as port angle");
+ if (id.angle != "") error("Duplicate port angle");
+ id.angle = get().normalized_value;
+ goto parse_more;
+ }
+ case token::colon: {
+ get();
+ if (!id.location.empty()) error("Duplicate port location");
+ switch (peek().type) {
+ case token::identifier: {
+ id.location.push_back(get().normalized_value);
+ switch (peek().type) {
+ case token::colon: {
+ get();
+ if (peek().type != token::identifier) error("Wanted identifier as port location");
+ id.location.push_back(get().normalized_value);
+ goto parse_more;
+ }
+ default: goto parse_more;
+ }
+ }
+ case token::left_paren: {
+ get();
+ if (peek().type != token::identifier) error("Wanted identifier as first element of port location");
+ id.location.push_back(get().normalized_value);
+ if (peek().type != token::comma) error("Wanted comma between parts of port location");
+ get();
+ if (peek().type != token::identifier) error("Wanted identifier as second element of port location");
+ id.location.push_back(get().normalized_value);
+ if (peek().type != token::right_paren) error("Wanted right parenthesis to close port location");
+ get();
+ goto parse_more;
+ }
+ default: error("Wanted identifier or left parenthesis as start of port location");
+ }
+ }
+ default: break;
+ }
+ if (r.nodes.find(id.name) == r.nodes.end()) { // First mention
+ r.nodes[id.name] = current().def_node_props;
+ }
+ return id;
+ }
+
+ void parse_edge_stmt(const edge_endpoint& lhs) {
+ std::vector<edge_endpoint> nodes_in_chain(1, lhs);
+ while (true) {
+ bool leave_loop = true;
+ switch (peek().type) {
+ case token::dash_dash: {
+ if (r.graph_is_directed) error("Using -- in directed graph");
+ get();
+ nodes_in_chain.push_back(parse_endpoint());
+ leave_loop = false;
+ break;
+ }
+ case token::dash_greater: {
+ if (!r.graph_is_directed) error("Using -> in undirected graph");
+ get();
+ nodes_in_chain.push_back(parse_endpoint());
+ leave_loop = false;
+ break;
+ }
+ default: leave_loop = true; break;
+ }
+ if (leave_loop) break;
+ }
+ properties this_edge_props = current().def_edge_props;
+ if (peek().type == token::left_bracket) parse_attr_list(this_edge_props);
+ assert (nodes_in_chain.size() >= 2); // Should be in node parser otherwise
+ for (size_t i = 0; i + 1 < nodes_in_chain.size(); ++i) {
+ do_orig_edge(nodes_in_chain[i], nodes_in_chain[i + 1], this_edge_props);
+ }
+ }
+
+ // Do an edge from the file, the edge may need to be expanded if it connects to a subgraph
+ void do_orig_edge(const edge_endpoint& src, const edge_endpoint& tgt, const properties& props) {
+ std::set<node_and_port> sources = get_recursive_members(src);
+ std::set<node_and_port> targets = get_recursive_members(tgt);
+ for (std::set<node_and_port>::const_iterator i = sources.begin(); i != sources.end(); ++i) {
+ for (std::set<node_and_port>::const_iterator j = targets.begin(); j != targets.end(); ++j) {
+ do_edge(*i, *j, props);
+ }
+ }
+ }
+
+ // Get nodes in an edge_endpoint, recursively
+ std::set<node_and_port> get_recursive_members(const edge_endpoint& orig_ep) {
+ std::set<node_and_port> result;
+ std::vector<edge_endpoint> worklist(1, orig_ep);
+ std::set<subgraph_name> done;
+ while (!worklist.empty()) {
+ edge_endpoint ep = worklist.back();
+ worklist.pop_back();
+ if (ep.is_subgraph) {
+ if (done.find(ep.subgraph_ep) == done.end()) {
+ done.insert(ep.subgraph_ep);
+ std::map<subgraph_name, subgraph_info>::const_iterator
+ info_i = subgraphs.find(ep.subgraph_ep);
+ if (info_i != subgraphs.end()) {
+ const subgraph_member_list& members = info_i->second.members;
+ for (subgraph_member_list::const_iterator i = members.begin();
+ i != members.end(); ++i) {
+ node_or_subgraph_ref ref = *i;
+ if (ref.is_subgraph) {
+ worklist.push_back(edge_endpoint::subgraph(ref.name));
+ } else {
+ node_and_port np;
+ np.name = ref.name;
+ worklist.push_back(edge_endpoint::node(np));
+ }
+ }
+ }
+ }
+ } else {
+ result.insert(ep.node_ep);
+ }
+ }
+ return result;
+ }
+
+ // Do a fixed-up edge, with only nodes as endpoints
+ void do_edge(const node_and_port& src, const node_and_port& tgt, const properties& props) {
+ edge_info e;
+ e.source = src;
+ e.target = tgt;
+ e.props = props;
+ r.edges.push_back(e);
+ }
+
+ void parse_attr_list(properties& props) {
+ while (true) {
+ if (peek().type == token::left_bracket) get(); else error("Wanted left bracket to start attribute list");
+ while (true) {
+ switch (peek().type) {
+ case token::right_bracket: break;
+ case token::identifier: {
+ std::string lhs = get().normalized_value;
+ std::string rhs = "true";
+ if (peek().type == token::equal) {
+ get();
+ if (peek().type != token::identifier) error("Wanted identifier as value of attributed");
+ rhs = get().normalized_value;
+ }
+ props[lhs] = rhs;
+ break;
+ }
+ default: error("Wanted identifier as name of attribute");
+ }
+ if (peek().type == token::comma) {get(); continue;}
+ break;
+ }
+ if (peek().type == token::right_bracket) get(); else error("Wanted right bracket to end attribute list");
+ if (peek().type != token::left_bracket) break;
+ }
+ }
+ };
+
+ void parse_graphviz_from_string(const std::string& str, parser_result& result, bool want_directed) {
+ parser p(str, result);
+ p.parse_graph(want_directed);
+ }
+
+ // Some debugging stuff
+ std::ostream& operator<<(std::ostream& o, const node_and_port& n) {
+ o << n.name;
+ for (size_t i = 0; i < n.location.size(); ++i) {
+ o << ":" << n.location[i];
+ }
+ if (!n.angle.empty()) o << "@" << n.angle;
+ return o;
+ }
+
+ // Can't be operator<< because properties is just an std::map
+ std::string props_to_string(const properties& props) {
+ std::string result = "[";
+ for (properties::const_iterator i = props.begin(); i != props.end(); ++i) {
+ if (i != props.begin()) result += ", ";
+ result += i->first + "=" + i->second;
+ }
+ result += "]";
+ return result;
+ }
+
+ void translate_results_to_graph(const parser_result& r, ::boost::detail::graph::mutate_graph* mg) {
+ typedef boost::detail::graph::node_t vertex;
+ typedef boost::detail::graph::edge_t edge;
+ for (std::map<node_name, properties>::const_iterator i = r.nodes.begin(); i != r.nodes.end(); ++i) {
+ std::cerr << i->first << " " << props_to_string(i->second) << std::endl;
+ mg->do_add_vertex(i->first);
+ for (properties::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
+ mg->set_node_property(j->first, i->first, j->second);
+ }
+ }
+ for (std::vector<edge_info>::const_iterator i = r.edges.begin(); i != r.edges.end(); ++i) {
+ const edge_info& ei = *i;
+ std::cerr << ei.source << " -> " << ei.target << " " << props_to_string(ei.props) << std::endl;
+ edge e = edge::new_edge();
+ mg->do_add_edge(e, ei.source.name, ei.target.name);
+ for (properties::const_iterator j = ei.props.begin(); j != ei.props.end(); ++j) {
+ mg->set_edge_property(j->first, e, j->second);
+ }
+ }
+ std::map<subgraph_name, properties>::const_iterator root_graph_props_i = r.graph_props.find("___root___");
+ assert (root_graph_props_i != r.graph_props.end()); // Should not happen
+ const properties& root_graph_props = root_graph_props_i->second;
+ std::cerr << "ending graph " << props_to_string(root_graph_props) << std::endl;
+ for (properties::const_iterator i = root_graph_props.begin(); i != root_graph_props.end(); ++i) {
+ mg->set_graph_property(i->first, i->second);
+ }
+ }
+
+} // end namespace read_graphviz_detail
+
+namespace detail {
+ namespace graph {
+
+ bool read_graphviz(const std::string& str, boost::detail::graph::mutate_graph* mg) {
+ read_graphviz_detail::parser_result parsed_file;
+ read_graphviz_detail::parse_graphviz_from_string(str, parsed_file, mg->is_directed());
+ read_graphviz_detail::translate_results_to_graph(parsed_file, mg);
+ return true;
+ }
+
+ } // end namespace graph
+} // end namespace detail
+
+} // end namespace boost
+
+// GraphViz format notes (tested using "dot version 1.13 (v16) (Mon August 23,
+// 2004)", grammar from references in read_graphviz_new.hpp):
+
+// Subgraphs don't nest (all a0 subgraphs are the same thing), but a node or
+// subgraph can have multiple parents (sources online say that the layout
+// algorithms can't handle non-tree structures of clusters, but it seems to
+// read them the same from the file). The containment relation is required to
+// be a DAG, though; it appears that a node or subgraph can't be moved into an
+// ancestor of a subgraph where it already was (we don't enforce that but do a
+// DFS when finding members to prevent cycles). Nodes get their properties by
+// when they are first mentioned, and can only have them overridden after that
+// by explicit properties on that particular node. Closing and reopening the
+// same subgraph name adds to its members, and graph properties and node/edge
+// defaults are preserved in that subgraph. The members of a subgraph used in
+// an edge are gathered when the edge is read, even if new members are added to
+// the subgraph later. Ports are allowed in a lot more places in the grammar
+// than Dot uses. For example, declaring a node seems to ignore ports, and I
+// don't think it's possible to set properties on a particular port. Adding an
+// edge between two ports on the same node seems to make Dot unhappy (crashed
+// for me).
+
+// Test graph for GraphViz behavior on strange combinations of subgraphs and
+// such. I don't have anywhere else to put this file.
+
+#if 0
+dIGRaph foo {
+ node [color=blue]
+ subgraph a -> b
+ subgraph a {c}
+ subgraph a -> d
+ subgraph a {node [color=red]; e}
+ subgraph a -> f
+ subgraph a {g} -> h
+ subgraph a {node [color=green]; i} -> j
+ subgraph a {node [color=yellow]}
+
+ subgraph a0 {node [color=black]; subgraph a1 {node [color=white]}}
+ node [color=pink] zz
+ subgraph a0 {x1}
+ subgraph a0 {subgraph a1 {x2}}
+
+ subgraph a0 -> x3
+ subgraph a0 {subgraph a1 -> x3}
+ x3
+ subgraph a0 {subgraph a0 {node [color=xxx]; x2} x7}
+ x2 [color=yyy]
+ subgraph cluster_ax {foo; subgraph a0}
+ subgraph a0 {foo2}
+ subgraph cluster_ax {foo3}
+ // subgraph a0 -> subgraph a0
+
+ bgcolor=yellow
+ subgraph cluster_a2 {y1}
+ // y1:n -> y1:(s,3)@se
+ y1_at_se [color=green]
+ y1_at_n [color=red]
+}
+#endif


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk