Hi All!

I’m having a problem passing strings into a boost tokenizer. I have been able to get the code to work but I don’t think I should have had to do what I did. I don’t know if it is tokenizer problem or a compiler problem or my problem.

Basically, I start with a char[] and depending on how I convert it to an std::string, I may or may not get garbage tokens.

I’ve attached a complete, but small, program to demonstrate the problem. I hope someone can tell what is going on.

I’m using a Fedora 8 system with g++ v 4.1.2.

Thanks in advance!

Medora

#include <iostream>

#include <string>

#include <boost/tokenizer.hpp>

using namespace std;

char *in_strs[] = {

"S,1001,101,39.6511915386591,-108.199469861370,1240151.6,127361.5",

"S,1001,111,39.6511191997175,-108.199521016802,1240136.4,127335.6",

"S,1001,121,39.6510468691093,-108.199571817158,1240121.3,127309.7",

"S,1001,131,39.6509745384787,-108.199622617407,1240106.2,127283.8",

"S,1001,141,39.6509021994691,-108.199673772516,1240091.0,127257.9",

"S,1001,151,39.6508298687935,-108.19972457255,1240075.9,127232.0",

"S,1001,161,39.6507575380953,-108.199775372477,1240060.8,127206.1",

"S,1001,171,39.6506851990178,-108.199826527263,1240045.6,127180.2",

"S,1001,181,39.6506125938775,-108.199877316164,1240030.5,127154.2",

NULL

};

int main(int argc, char* argv[])

{

typedef boost::tokenizer<boost::char_separator<char> > str_tokenizer;

boost::char_separator<char> sep(",");

cout << endl;

#if 1

// Here I'm just tokenizing the strings and writing them out.

// This version works.

char **str = in_strs;

while (*str){

string file1_str = *str;

str_tokenizer file1_tokens(file1_str, sep);

for(str_tokenizer::iterator iter=file1_tokens.begin(); iter!=file1_tokens.end();++iter){

cout << *iter << " ";

}

++str;

cout << endl;

}

#endif

#if 0

// This version results in garbage in some tokens.

char **str = in_strs;

while (*str){

str_tokenizer file1_tokens(string(*str), sep); // <-- difference

for(str_tokenizer::iterator iter=file1_tokens.begin(); iter!=file1_tokens.end();++iter){

cout << *iter << " ";

}

++str;

cout << endl;

}

#endif

//====================================================================================================

#if 0

// Now I want to skip the first 2 characters of the string.

// This version works.

string file1_str;

char **str = in_strs;

while (*str){

string file1_str = *str;

string sub1 = file1_str.substr(2); // <-- difference

str_tokenizer file1_tokens(sub1, sep); // <-- difference

for(str_tokenizer::iterator iter=file1_tokens.begin(); iter!=file1_tokens.end();++iter){

cout << *iter << " ";

}

++str;

cout << endl;

}

#endif

#if 0

// This version results in garbage in some tokens.

string file1_str;

char **str = in_strs;

while (*str){

string file1_str = *str;

str_tokenizer file1_tokens(file1_str.substr(2), sep); // <--- difference

for(str_tokenizer::iterator iter=file1_tokens.begin(); iter!=file1_tokens.end();++iter){

cout << *iter << " ";

}

++str;

cout << endl;

}

#endif

return(0);

}