Boost logo

Boost Users :

Subject: [Boost-users] [Boost] [Spirit] parse_ast, leaf_node_d and skip parser
From: Vincent Jacques (newsgroups_at_[hidden])
Date: 2009-11-19 13:29:40


Hello,

Using Boost.Spirit, when I group nodes with a leaf_node_d directive and call
parse_ast, it looks like I get a node whose value includes some characters
that are supposed to be skipped.

My questions: am I missing anything? Is this intended? If yes, what am I
missing (again)? Is there a way to work around this?

Here is a simple example program (it should compile and demonstrate what I
mean, with boost 1.39 and 1.40)

// BEGIN OF THE PROGRAM
#include <iostream>

#include <boost/spirit/include/classic_core.hpp>
#include <boost/spirit/include/classic_lists.hpp>
#include <boost/spirit/include/classic_ast.hpp>
#include <boost/spirit/include/classic_tree_to_xml.hpp>

namespace bs = boost::spirit::classic;

template< typename IteratorT >
class OkGrammar : public bs::grammar< OkGrammar< IteratorT > > {
public:
    template< typename ScannerT >
    struct definition {
        bs::rule< ScannerT, bs::parser_context<>, bs::parser_tag< 1 > >
        start() { return startRule; }

        definition( const OkGrammar& ) {
            startRule = bs::list_p( identifier, bs::ch_p( ',' ) );

            identifier = +bs::alpha_p;
        }

        bs::rule< ScannerT, bs::parser_context<>, bs::parser_tag< 1 > >
startRule;
        bs::rule< ScannerT, bs::parser_context<>, bs::parser_tag< 2 > >
identifier;
    };
};

template< typename IteratorT >
class StrangeGrammar : public bs::grammar< StrangeGrammar< IteratorT > > {
public:
    template< typename ScannerT >
    struct definition {
        bs::rule< ScannerT, bs::parser_context<>, bs::parser_tag< 1 > >
        start() { return startRule; }

        definition( const StrangeGrammar& ) {
            startRule = bs::list_p( identifier, bs::ch_p( ',' ) );

            identifier = bs::leaf_node_d[ +bs::alpha_p ];
        }

        bs::rule< ScannerT, bs::parser_context<>, bs::parser_tag< 1 > >
startRule;
        bs::rule< ScannerT, bs::parser_context<>, bs::parser_tag< 2 > >
identifier;
    };
};

int main( int argc, char *argv[] ) {
    std::string input = " Hello , wor , ld ";

    std::map< bs::parser_id, std::string > ruleNames;
    ruleNames[ 1 ] = "startRule";
    ruleNames[ 2 ] = "identifier";

    typedef std::string::const_iterator iterator;
    iterator first = input.begin();
    iterator last = input.end();

    bs::tree_parse_info< iterator > info = bs::ast_parse(
        first,
        last,
        OkGrammar< iterator >(),
        +bs::space_p );
    bs::tree_to_xml( std::cout, info.trees, input, ruleNames );

    std::cout << std::endl;

    info = bs::ast_parse(
        first,
        last,
        StrangeGrammar< iterator >(),
        +bs::space_p );
    bs::tree_to_xml( std::cout, info.trees, input, ruleNames );
}
// END OF THE PROGRAM

Here is the output:
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE parsetree SYSTEM "parsetree.dtd">
<!-- Hello , wor , ld -->
<parsetree version="1.0">
    <parsenode rule="startRule">
        <parsenode rule="identifier">
            <parsenode>
                <value>H</value>
            </parsenode>
            <parsenode>
                <value>e</value>
            </parsenode>
            <parsenode>
                <value>l</value>
            </parsenode>
            <parsenode>
                <value>l</value>
            </parsenode>
            <parsenode>
                <value>o</value>
            </parsenode>
        </parsenode>
        <parsenode>
            <value>,</value>
        </parsenode>
        <parsenode rule="identifier">
            <parsenode>
                <value>w</value>
            </parsenode>
            <parsenode>
                <value>o</value>
            </parsenode>
            <parsenode>
                <value>r</value>
            </parsenode>
        </parsenode>
        <parsenode>
            <value>,</value>
        </parsenode>
        <parsenode rule="identifier">
            <parsenode>
                <value>l</value>
            </parsenode>
            <parsenode>
                <value>d</value>
            </parsenode>
        </parsenode>
    </parsenode>
</parsetree>

<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE parsetree SYSTEM "parsetree.dtd">
<!-- Hello , wor , ld -->
<parsetree version="1.0">
    <parsenode rule="startRule">
        <parsenode rule="identifier">
            <value>Hello</value>
        </parsenode>
        <parsenode>
            <value>,</value>
        </parsenode>
        <parsenode rule="identifier">
            <value> wor</value>
        </parsenode>
        <parsenode>
            <value>,</value>
        </parsenode>
        <parsenode rule="identifier">
            <value> ld</value>
        </parsenode>
    </parsenode>
</parsetree>

In the first xml tree, there are no node with spaces, while in the second,
the identifiers do have spaces.

Thank you for your help,

-- 
Vincent Jacques
"S'il n'y a pas de solution, c'est qu'il n'y a pas de problème"
           Devise Shadock


Boost-users list run by williamkempf at hotmail.com, kalb at libertysoft.com, bjorn.karlsson at readsoft.com, gregod at cs.rpi.edu, wekempf at cox.net