|
Boost : |
Subject: [boost] [string] Realistic API proposal
From: Artyom (artyomtnk_at_[hidden])
Date: 2011-01-28 05:41:42
Hello All,
I'd like to provide a realistic string API proposal:
1. It is Unicode aware extension of std::string
2. It is fully compatible with std::string giving
an ability to be included in C++XYZ
3. It can be implemented today!
Basically it extends std::string with
a) const_code_point_iterator - for iterating over string - bidirectional
b) code_point_iterator - back inserter
c) provides Unicode and Locale functionality:
- normalization
- case handling
- comparison
- search
d) marks non const operator[],at() as deprecated
Advantages:
-----------
1. It can be implemented easily (when boost.locale is given)
2. It **is** compatible replacement of std::string
3. It allows to use std::string meanwhile under the hood as storage
giving high efficiency when assigning boost::string to std::string
when the implementation is COW (almost all implementations with
exception of MSVC)
4. It is full unicode aware
5. It pushes "UTF-8" idea to standard C++
6. You don't pay for what you do not need.
Proposed API:
-------------
namespace boost {
// Fully bidirectional iterator
template<typename UnitsIterator>
class const_code_point_iterator {
public:
const_code_point_iterator(UnitsIterator begin,UnitsIterator end); //
begin
const_code_point_iterator(UnitsIterator begin,UnitsIterator
end,UnitsIterator location); // current pos
const_code_point_iterator(); // end
#ifdef C++0x
typedef char32_t const_code_point_type;
#else
typedef unsigned const_code_point_type;
#endif
const_code_point_type operator*() const;
...
};
/// Output iterator
template<typename BackInserter>
class code_point_iterator {
public:
code_point_iterator(BackInserter out); // begin
code_point_iterator(); // end
#ifdef C++0x
typedef char32_t code_point_type;
#else
typedef unsigned code_point_type;
#endif
code_point_type operator*() const;
...
};
template<typename Char,typename Traits=std::char_traits<Char>, typename
Alloc=std::allocator<Char> >
class basic_string {
public:
// { boost specific
typedef std::basic_string<Char,Traits,Alloc> std_string_type;
// } boost specific
// All std::string standard functions based
// Deprecated interfaces that exist for backward compatibility
// as they not Unicode aware
value_type &at(size_type indx);
value_type &operator[](size_type indx);
iterator begin();
iterator end();
// { boost specific compatibility functions with std::string, they would
go
// as std::string becode extended with boost::string new interfaces
//
basic_string(std_string_type const &other) : data_(other) {}
basic_string(std_string_type const &other,size_type index,size_type len)
: data_(other,index,len) {}
...
operator std_string_type() const
{
return data_;
}
// } boost specific compatibility functions
//
// Unicode Support
//
// ------------------------
//
//
// UTF Codepoint iteration
//
#ifdef C++0x
typedef char32_t code_point_type;
#else
typedef unsigned code_point_type;
#endif
typedef boost::const_code_point_iterator<const_iterator>
const_code_point_iterator;
const_code_point_iterator code_point_begin() const
{
return const_code_point_iterator(begin(),end());
}
const_code_point_iterator code_point_end() const
{
return const_code_point_iterator(begin(),end(),end());
}
typedef boost::code_point_iterator<std::back_inserter<basic_string> >
code_point_iterator;
code_point_iterator back_inserter()
{
return code_point_iterator(std::back_inserter<basic_string>(*this));
}
basic_string &operator+=(code_point_type code_point);
basic_string operator+(code_point_type code_point) const;
void append(code_point_type code_point);
//
// Lexical operations on string
//
// Case handling
basic_string upper_case(std::locale const &l=std::locale()) const;
basic_string lower_case(std::locale const &l=std::locale()) const;
basic_string title_case(std::locale const &l=std::locale()) const;
basic_string fold_case() const; // locale independent
// Unicode normalization
typedef enum {
nfc,
nfkc,
nfd,
nfkd
} normalization_mode;
basic_string normalize(normalization_mode mode = nfc) const;
// normalized string constructor
basic_string(basic_string const &,normalization_mode mode);
basic_string(Char const *,normalization_mode mode);
basic_string(Char const *,size_t n,normalization_mode mode);
template<Iterator>
basic_string(Iterator begin,Iterator end,normalization_mode mode);
void append_normalized(basic_string const &other,normalization_mode mode
= nfc);
void append_normalized(Char const *,normalization_mode mode = nfc);
void append_normalized(Char const *,size_t n,normalization_mode mode =
nfc);
basic_string concat_normalized(basic_string const
&other,normalization_mode mode = nfc) const;
basic_string concat_normalized(Char const *,normalization_mode mode =
nfc) const;
basic_string concat_normalized(Char const *,size_t n,normalization_mode
mode = nfc) const;
// Unicode validation
bool valid_utf() const;
typedef struct validate_utf{}; // Unicode validation tag
// Create string validating it
basic_string(basic_string const &,validate_utf const &);
basic_string(Char const *,validate_utf const &);
basic_string(Char const *,size_t n,validate_utf const &);
template<Iterator>
basic_string(Iterator begin,Iterator end,validate_utf const &);
// Create string validating and normalazing it
basic_string(basic_string const &,validate_utf const
&,normalization_mode mode);
basic_string(Char const *,validate_utf const &,normalization_mode mode);
basic_string(Char const *,size_t n,validate_utf const
&,normalization_mode mode);
template<Iterator>
basic_string(Iterator begin,Iterator end,validate_utf const
&,normalization_mode mode);
// Search and comparison
typedef enum {
primary = 0, ///< 1st collation level: base letters
secondary = 1, ///< 2nd collation level: letters and accents
tertiary = 2, ///< 3rd collation level: letters, accents and case
quaternary = 3, ///< 4th collation level: letters, accents, case
and punctuation
identical = 4 ///< identical collation level: include code-point
comparison
} level_type;
//
// search(...) return pair of index and size as string you search for
may have different size
//
std::pair<size_type,size_type> search(basic_string const &other) const;
std::pair<size_type,size_type> search(basic_string const
&other,level_type level) const;
std::pair<size_type,size_type> search(basic_string const
&other,std::locale const &l) const ;
std::pair<size_type,size_type> search(basic_string const
&other,level_type level,std::locale const &l) const;
std::pair<size_type,size_type> search(basic_string const &other,size_t
index,size_t size) const;
std::pair<size_type,size_type> search(basic_string const &other,size_t
index,size_t size,level_type level) const;
std::pair<size_type,size_type> search(basic_string const &other,size_t
index,size_t size,std::locale const &l) const;
std::pair<size_type,size_type> search(basic_string const &other,size_t
index,size_t size,level_type level,std::locale const &l) const;
std::pair<size_type,size_type> search(Char const *) const;
std::pair<size_type,size_type> search(Char const *,level_type level)
const;
std::pair<size_type,size_type> search(Char const *,std::locale const &l)
const ;
std::pair<size_type,size_type> search(Char const *,level_type
level,std::locale const &l) const;
std::pair<size_type,size_type> search(Char const *,size_t size) const;
std::pair<size_type,size_type> search(Char const *,size_t
size,level_type level) const;
std::pair<size_type,size_type> search(Char const *,size_t
size,std::locale const &l) const;
std::pair<size_type,size_type> search(Char const *,size_t
size,level_type level,std::locale const &l) const;
int compare_to(basic_string const &other) const;
int compare_to(basic_string const &other,level_type level) const;
int compare_to(basic_string const &other,std::locale const &l) const ;
int compare_to(basic_string const &other,level_type level,std::locale
const &l) const;
int compare_to(basic_string const &other,size_t index,size_t size)
const;
int compare_to(basic_string const &other,size_t index,size_t
size,level_type level) const;
int compare_to(basic_string const &other,size_t index,size_t
size,std::locale const &l) const;
int compare_to(basic_string const &other,size_t index,size_t
size,level_type level,std::locale const &l) const;
int compare_to(size_t index,size_t size,basic_string const &other)
const;
int compare_to(size_t index,size_t size,basic_string const
&other,level_type level) const;
int compare_to(size_t index,size_t size,basic_string const
&other,std::locale const &l) const ;
int compare_to(size_t index,size_t size,basic_string const
&other,level_type level,std::locale const &l) const;
int compare_to(size_t index,size_t size,basic_string const &other,size_t
index,size_t size) const;
int compare_to(size_t index,size_t size,basic_string const &other,size_t
index,size_t size,level_type level) const;
int compare_to(size_t index,size_t size,basic_string const &other,size_t
index,size_t size,std::locale const &l) const;
int compare_to(size_t index,size_t size,basic_string const &other,size_t
index,size_t size,level_type level,std::locale const &l) const;
int compare_to(Char const *) const;
int compare_to(Char const *,level_type level) const;
int compare_to(Char const *,std::locale const &l) const ;
int compare_to(Char const *,level_type level,std::locale const &l)
const;
int compare_to(Char const *,size_t size) const;
int compare_to(Char const *,size_t size,level_type level) const;
int compare_to(Char const *,size_t size,std::locale const &l) const;
int compare_to(Char const *,size_t size,level_type level,std::locale
const &l) const;
int compare_to(size_t size,Char const *) const;
int compare_to(size_t size,Char const *,level_type level) const;
int compare_to(size_t size,Char const *,std::locale const &l) const ;
int compare_to(size_t size,Char const *,level_type level,std::locale
const &l) const;
int compare_to(size_t size,Char const *,size_t size) const;
int compare_to(size_t size,Char const *,size_t size,level_type level)
const;
int compare_to(size_t size,Char const *,size_t size,std::locale const
&l) const;
int compare_to(size_t size,Char const *,size_t size,level_type
level,std::locale const &l) const;
// UTF validation
bool is_valid_utf() const;
private:
std_string_type data_;
};
} // boost
Boost list run by bdawes at acm.org, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk