Boost logo

Boost Users :

Subject: [Boost-users] tokenizer problem
From: Medora Schauer (mschauer_at_[hidden])
Date: 2009-03-12 11:11:54


Hi All!

I'm having a problem passing strings into a boost tokenizer. I have been able to get the code to work but I don't think I should have had to do what I did. I don't know if it is tokenizer problem or a compiler problem or my problem.

Basically, I start with a char[] and depending on how I convert it to an std::string, I may or may not get garbage tokens.

I've attached a complete, but small, program to demonstrate the problem. I hope someone can tell what is going on.

I'm using a Fedora 8 system with g++ v 4.1.2.

Thanks in advance!

Medora

#include <iostream>
#include <string>
#include <boost/tokenizer.hpp>

using namespace std;

char *in_strs[] = {
                   "S,1001,101,39.6511915386591,-108.199469861370,1240151.6,127361.5",
                   "S,1001,111,39.6511191997175,-108.199521016802,1240136.4,127335.6",
                   "S,1001,121,39.6510468691093,-108.199571817158,1240121.3,127309.7",
                   "S,1001,131,39.6509745384787,-108.199622617407,1240106.2,127283.8",
                   "S,1001,141,39.6509021994691,-108.199673772516,1240091.0,127257.9",
                   "S,1001,151,39.6508298687935,-108.19972457255,1240075.9,127232.0",
                   "S,1001,161,39.6507575380953,-108.199775372477,1240060.8,127206.1",
                   "S,1001,171,39.6506851990178,-108.199826527263,1240045.6,127180.2",
                   "S,1001,181,39.6506125938775,-108.199877316164,1240030.5,127154.2",
                   NULL
                  };

    int main(int argc, char* argv[])
    {
        typedef boost::tokenizer<boost::char_separator<char> > str_tokenizer;
        boost::char_separator<char> sep(",");

        cout << endl;
#if 1
        // Here I'm just tokenizing the strings and writing them out.

        // This version works.
        char **str = in_strs;
        while (*str){
            string file1_str = *str;
            str_tokenizer file1_tokens(file1_str, sep);
            for(str_tokenizer::iterator iter=file1_tokens.begin(); iter!=file1_tokens.end();++iter){
                cout << *iter << " ";
            }
            ++str;
            cout << endl;
        }
#endif

#if 0
        // This version results in garbage in some tokens.
        char **str = in_strs;
        while (*str){
            str_tokenizer file1_tokens(string(*str), sep); // <-- difference
            for(str_tokenizer::iterator iter=file1_tokens.begin(); iter!=file1_tokens.end();++iter){
                cout << *iter << " ";
            }
            ++str;
            cout << endl;
        }
#endif

//====================================================================================================

#if 0
        // Now I want to skip the first 2 characters of the string.

        // This version works.
        string file1_str;
        char **str = in_strs;
        while (*str){
            string file1_str = *str;
            string sub1 = file1_str.substr(2); // <-- difference
            str_tokenizer file1_tokens(sub1, sep); // <-- difference
            for(str_tokenizer::iterator iter=file1_tokens.begin(); iter!=file1_tokens.end();++iter){
                cout << *iter << " ";
            }
            ++str;
            cout << endl;
        }
#endif

#if 0
        // This version results in garbage in some tokens.
        string file1_str;
        char **str = in_strs;
        while (*str){
            string file1_str = *str;
            str_tokenizer file1_tokens(file1_str.substr(2), sep); // <--- difference
            for(str_tokenizer::iterator iter=file1_tokens.begin(); iter!=file1_tokens.end();++iter){
                cout << *iter << " ";
            }
            ++str;
            cout << endl;
        }
#endif
        return(0);
    }



Boost-users list run by williamkempf at hotmail.com, kalb at libertysoft.com, bjorn.karlsson at readsoft.com, gregod at cs.rpi.edu, wekempf at cox.net