#include #include #include #include #include //#include //#include #include #include /* compile: g++ -I spirit-problem.cc -g -o spirit-problem */ using namespace std; using namespace boost; using namespace spirit; using namespace phoenix; const spirit::chset lwsp_char_p(" \t"); struct crlf_parser : spirit::grammar { crlf_parser() { } template struct definition { definition(const crlf_parser& self) { using namespace spirit; first = str_p("\r\n"); } const spirit::rule& start() const { return first; } spirit::rule first; }; }; const crlf_parser crlf_p; struct lwsp_parser : spirit::grammar { lwsp_parser() { } template struct definition { definition(const lwsp_parser& self) { using namespace spirit; first = lexeme_d [ +( !crlf_p >> lwsp_char_p ) ]; } const spirit::rule& start() const { return first; } spirit::rule first; }; }; const lwsp_parser lwsp_p; //------------------------------------------------------------------------------- struct content_type_line { const char* type; const char* type_end; const char* subtype; const char* subtype_end; const char* param; const char* param_end; }; struct content_type_line_closure : spirit::closure { member1 val; }; struct content_type_line_parser : spirit::grammar { content_type_line_parser() { } template struct definition { definition(const content_type_line_parser& self) { /* Borenstein & Freed [Page 9] RFC 1521 MIME September 1993 In the Augmented BNF notation of RFC 822, a Content-Type header field value is defined as follows: content := "Content-Type" ":" type "/" subtype *(";" parameter) ; case-insensitive matching of type and subtype type := "application" / "audio" / "image" / "message" / "multipart" / "text" / "video" / extension-token ; All values case-insensitive extension-token := x-token / iana-token iana-token := x-token := subtype := token ; case-insensitive parameter := attribute "=" value attribute := token ; case-insensitive value := token / quoted-string token := 1* tspecials := "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "\" / <"> / "/" / "[" / "]" / "?" / "=" ; Must be in quoted-string, ; to use within parameter values Borenstein & Freed [Page 10] RFC 1521 MIME September 1993 Note that the definition of "tspecials" is the same as the RFC 822 definition of "specials" with the addition of the three characters "/", "?", and "=", and the removal of ".". */ first = ( field = lexeme_d [ *lwsp_p >> ( type [bind(&content_type_line::type)(self.val) = arg1] [bind(&content_type_line::type_end)(self.val) = arg2] >> !lwsp_p >> '/' | epsilon_p[bind(&content_type_line::type)(self.val) = arg1] [bind(&content_type_line::type_end)(self.val) = arg2] ) >> ( subtype [bind(&content_type_line::subtype)(self.val) = arg1] [bind(&content_type_line::subtype_end)(self.val) = arg2] | epsilon_p [bind(&content_type_line::subtype)(self.val) = arg1] [bind(&content_type_line::subtype_end)(self.val) = arg2] ) >> ( ch_p(';') >> parameters [bind(&content_type_line::param)(self.val) = arg1] [bind(&content_type_line::param_end)(self.val) = arg2] | epsilon_p [bind(&content_type_line::param)(self.val) = arg1] [bind(&content_type_line::param_end)(self.val) = arg2] ) ], type = nocase_d [ (str_p("application") | "audio" | "image" | "message" | "multipart" | "text" | "video" ) ], subtype = nocase_d [ token ], token = +(anychar_p - tspecials), tspecials = ( ch_p('(') | ')' | '<' | '>' | '@' | ',' | ';' | ':' | '\\' | '"' | '/' | '[' | ']' | '?' | '=' ), parameters = *( anychar_p - crlf_p ) >> !( lwsp_p >> parameters ) ); } const spirit::rule& start() const { return first; } spirit::subrule<0> field; spirit::subrule<1> type; spirit::subrule<2> subtype; spirit::subrule<3> token; spirit::subrule<4> tspecials; spirit::subrule<5> parameters; spirit::rule first; }; }; const content_type_line_parser content_type_line_p; unsigned int parser_content_type(content_type_line& result, const char* input, const char* input_end = 0) { if (input_end == 0) input_end = input + strlen(input); parse_info rc = parse(input, input_end, content_type_line_p[assign(result)]); if (rc.hit) return rc.length; else return 0; } int main(int, char**) { char input[] = "text/plain; charset=ISO-8859-1"; content_type_line ctype; assert(0 != parser_content_type(ctype, input)); cout << "Type = " << string(ctype.type, ctype.type_end) << endl << "Subtype = " << string(ctype.subtype, ctype.subtype_end) << endl << "Parameter = " << string(ctype.param, ctype.param_end) << endl; assert(string(ctype.type, ctype.type_end) == "text"); assert(string(ctype.subtype, ctype.subtype_end) == "plain"); assert(string(ctype.param, ctype.param_end) == " charset=ISO-8859-1"); return 0; }