#include #include #include #include #include #include #include #include #include #include #include namespace { struct TaggedText; typedef boost::variant, std::string> TaggedTextNode; typedef std::vector MixedTaggedAndUntaggedText; struct TaggedText { std::string m_name; std::vector m_params; MixedTaggedAndUntaggedText m_children; }; } BOOST_FUSION_ADAPT_STRUCT( TaggedText, (std::string, m_name) (std::vector, m_params) (std::vector, m_children) ) namespace { namespace ascii = boost::spirit::ascii; namespace qi = boost::spirit::qi; namespace fus = boost::fusion; template struct TagGrammar : qi::grammar { TagGrammar() : TagGrammar::base_type(m_all_text, "tag") { using namespace boost::spirit; using namespace ascii; using boost::spirit::arg_names::_val; using boost::spirit::arg_names::_r1; using boost::spirit::arg_names::_a; using boost::spirit::arg_names::_1; using boost::spirit::arg_names::_2; using boost::spirit::arg_names::_3; using boost::spirit::arg_names::_4; using boost::phoenix::construct; using boost::phoenix::val; using boost::phoenix::bind; using boost::phoenix::push_back; using namespace qi; m_all_text %= *m_node; // Adding an alternative to either of these m_text* rules seems to // change the attribute of the whole "|" expression, even if I // just repeat the alternatives (i.e., "m_text1 %= lexeme[+(char_ // - '<') | +(char_ - '<')];" does not compile). Note that I'd // like to write "m_text %= lexeme[(+char_('<')) | +(char_ - // '<')];" instead. Note the missing "raw[]". m_text1 %= lexeme[raw[(+char_('<'))]]; m_text2 %= lexeme[+(char_ - '<')]; m_preformatted_text %= lexeme[*(char_ - "")]; // This alternate expression does not act as expected. If you // supply "text" as input, you get m_text1 picking up // the first "<", m_text2 picking up the "tag>text", then m_text1 // picking up the "<", and m_text2 picking up the rest. I would // expect m_tagged_text to pick up this tag, since it comes first. // This does in fact work if I remove m_text1 from the alternate // expression. Wierder still, if I change "tag" above to "pre", // for which there is a separate tagged-text parser, everything // works fine as well. I hope I'm just doing something stupid here. m_node %= m_pre_tagged_text | m_tagged_text | m_text1 | m_text2; m_tag_word %= lexeme[+(char_ - ('>' | space))]; m_pre_tag = lexeme["pre"]; // "m_text2 %= lexeme[+(char_ - '<')];" is perfectly fine, and does // the assignment automatically. Tres cool. "m_text1 %= // lexeme[(+char_('<'))];" compiles and runs, but even though I've // used "%=", no assignment takes place. This is frankly // bewildering to us mere mortals. // NOTE: When the docs are complete, this second paragraph be a // non-issue. I thought I'd leave it here as a record of my // troubles in case it is of any use: // Even worse, if // I add "[_val = _1]" to the end of the "m_text1..." snippet, it // does not compile. I have to write my own functor, unless // there's some trick I don't know about yet that forces a string // or char attribute for the expression "+char_('<')" -- is there // such a trick? I found a workaround, though not a satisfying one. // "m_text1 %= lexeme[(+~~char_('<'))];" works as I'd like, but is // sure to be baffling to maintainers. Okay, I just discovered // "raw[]". So, there is a simple solution, but it is very // nonobvious/unintuitive. m_end_tag = "> lit(_r1) >> '>' ; m_pre_tagged_text = '<' >> m_pre_tag[bind(&TaggedText::m_name, _val) = "pre"] >> '>' >> m_preformatted_text[push_back(::boost::phoenix::bind(&TaggedText::m_children, _val), _1)] >> m_end_tag(val("pre")) ; // The start-tag and end-tag could not be defined separately, // since I need TaggedText::m_children to grab the text and tags // between the start and end tags. This should be doable, but // whenever I tried it, I got a compilation error indicating that // the start-tag's attribute was a std::string, even though I // explicitly specified it to be a TaggedText. // This does not compile without the semantic actions and with // "%=", even though it did when I was using ">" operators. // When I switched to ">>", I had to add the semantic actions. m_tagged_text = '<' >> !(char_('/') | "pre") >> m_tag_word[_a = bind(&TaggedText::m_name, _val) = _1] >> *(m_tag_word[push_back(bind(&TaggedText::m_params, _val), _1)]) >> '>' >> *(m_node[push_back(::boost::phoenix::bind(&TaggedText::m_children, _val), _1)]) >> m_end_tag(_a) ; m_all_text.name("all_text"); m_tagged_text.name("tagged_text"); m_node.name("node"); m_tag_word.name("tag_word"); m_text1.name("text"); m_text2.name("text"); m_end_tag.name("end_tag"); on_error( m_tagged_text, std::cout << val("Error: expected ") << _4 << val(" here: \"") << construct(_3, _2) << val("\"") << std::endl ); } qi::rule m_all_text; qi::rule m_pre_tagged_text; qi::rule, ascii::space_type> m_tagged_text; qi::rule m_node; qi::rule m_tag_word; qi::rule m_pre_tag; qi::rule m_text1; qi::rule m_text2; qi::rule m_preformatted_text; qi::rule m_end_tag; }; } /////////////////////////////////////////////////////////////////////////////// // Print out the ast /////////////////////////////////////////////////////////////////////////////// const int tabsize = 4; void tab(int indent) { for (int i = 0; i < indent; ++i) std::cout << ' '; } struct Printer { Printer(int indent = 0) : indent(indent) {} void operator()(const TaggedText& tagged_text) const; int indent; }; struct NodePrinter : boost::static_visitor<> { NodePrinter(int indent = 0) : indent(indent) {} void operator()(const TaggedText& tagged_text) const { Printer(indent + tabsize)(tagged_text); } void operator()(const std::string& text) const { tab(indent + tabsize); std::cout << "TEXT \"" << text << '"' << std::endl; } int indent; }; void Printer::operator()(const TaggedText& tagged_text) const { tab(indent); std::cout << "TAG <" << tagged_text.m_name << "> [ "; BOOST_FOREACH(const std::string& param, tagged_text.m_params) { std::cout << param << " "; } std::cout << "]" << std::endl; tab(indent); BOOST_FOREACH(const TaggedTextNode& node, tagged_text.m_children) { boost::apply_visitor(NodePrinter(indent), node); } tab(indent); } void Print(const MixedTaggedAndUntaggedText& text) { BOOST_FOREACH(const TaggedTextNode& node, text) { boost::apply_visitor(NodePrinter(-tabsize), node); } } int main() { std::ifstream ifs("testcase"); std::string testcase; char c; while (ifs.get(c)) { testcase += c; } TagGrammar tag; MixedTaggedAndUntaggedText ast; std::string::const_iterator it = testcase.begin(); std::string::const_iterator end = testcase.end(); bool matched = phrase_parse(it, end, tag, ast, ascii::space); if (matched) Print(ast); std::cout << (!matched ? "NO " : "") << "MATCH" << std::endl; return 0; }