|
Boost : |
Subject: [boost] [string] Yet another Unicode string class
From: Anders Dalvander (boost_at_[hidden])
Date: 2011-02-10 15:38:11
I'm working on yet another Unicode string class/library from another set
of features and requirements.
* It is designed around the codepoint concept.
* It uses (currently forward-) iterators for encoding and decoding.
* It has a minimal interface, mostly constructors and iterator access.
* Most other functions can (hopefully) be free functions.
* It uses basic_string as backend.
* It has fast access to underlying basic_string.
* It is (currently) using some C++0X features (mainly decltype).
* It is (currently) immutable and shares data, and thus fast to copy.
Some of these features and requirements may be unacceptable to some of
you, but I'm open to suggestions and comments.
// Copyright (c) 2011 Anders Dalvander.
//
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
template <typename encoding>
class basic_text
{
public:
typedef encoding encoding_type;
typedef typename encoding_type::codeunit_type codeunit_type;
typedef typename encoding_type::codepoint_type codepoint_type;
typedef std::basic_string<codeunit_type> string_type;
typedef typename string_type::const_iterator codeunit_iterator;
typedef typename encoding_type::decode_iterator<codeunit_iterator>
codepoint_iterator;
typedef codepoint_iterator const_iterator;
typedef codepoint_iterator iterator;
basic_text()
: s(std::make_shared<string_type>())
{
}
template <typename other_encoding>
basic_text(const basic_text<other_encoding>& text)
: s(std::make_shared<string_type>(
encoding_type::encode_iterator<decltype(std::begin(text))>
(std::begin(text), std::begin(text), std::end(text)),
encoding_type::encode_iterator<decltype(std::begin(text))>
(std::end(text), std::begin(text), std::end(text))))
{
}
// TODO: Use some default_encoding traits type.
template <typename container>
explicit basic_text(const container& c)
: s(std::make_shared<string_type>(
encoding_type::encode_iterator<decltype(std::begin(c))>
(std::begin(c), std::begin(c), std::end(c)),
encoding_type::encode_iterator<decltype(std::begin(c))>
(std::end(c), std::begin(c), std::end(c))))
{
}
template <typename codepoint_iterator>
basic_text(codepoint_iterator first, codepoint_iterator last)
: s(std::make_shared<string_type>(
encoding_type::encode_iterator<codepoint_iterator>
(first, first, last),
encoding_type::encode_iterator<codepoint_iterator>
(last, first, last)))
{
}
codepoint_iterator begin() const
{
return codepoint_iterator
(codeunit_begin(), codeunit_begin(), codeunit_end());
}
codepoint_iterator end() const
{
return codepoint_iterator
(codeunit_end(), codeunit_begin(), codeunit_end());
}
codeunit_iterator codeunit_begin() const
{
return std::begin(*s);
}
codeunit_iterator codeunit_end() const
{
return std::end(*s);
}
const string_type& str() const
{
return *s;
}
const codeunit_type* c_str() const
{
return s->c_str();
}
private:
typedef std::shared_ptr<const string_type> pointer_type;
pointer_type s;
};
typedef undefined-type utf8_encoding;
typedef basic_text<utf8_encoding> u8text;
typedef undefined-type utf16_encoding;
typedef basic_text<utf16_encoding> u16text;
typedef undefined-type utf32_encoding;
typedef basic_text<utf32_encoding> u32text;
typedef undefined-type wchar_encoding;
typedef basic_text<wchar_encoding> wtext;
typedef undefined-type ascii_encoding;
typedef basic_text<ascii_encoding> ascii_text;
Usage:
int main()
{
const uint32_t cps[] = {0x41,0x42,0x80,0x800,0x10000,0x10ffff};
// construct from codepoint range
u8text u8txt(std::begin(cps), std::end(cps));
// construct from encoded container,
// currently treats each element as a codepoint
u8text u8txt2("test");
// sharing is caring
u8text u8txt3 = u8txt;
// construct from codepoint range
u16text u16txt(std::begin(cps), std::end(cps));
// construct from text, transcodes range
u16text u16txt2 = u8txt;
// construct from text, transcodes range
u32text u32txt = u8txt;
// using policy (possible extension)
ascii_text ascii(u8txt, replace_policy(0xff));
}
void OpenFileWin32(const u8text& txt)
{
CloseHandle(CreateFileW(wtext(txt).c_str(), ...))
}
typedef undefined-type posix_encoding;
typedef basic_text<posix_encoding> posixtext;
void OpenFilePosix(const u8text& txt)
{
close(open(posixtext(txt).c_str(), ...))
}
Regards,
Anders Dalvander
-- WWFSMD?
Boost list run by bdawes at acm.org, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk