#include "unicode.hpp" using namespace unicode; string::string() { m_code=std::make_pair(0,0); m_range=std::make_pair(0,0); };/*string*/ string::~string() throw() { };/*~string*/ string& string::decode( std::vector& data ) { while(!data.empty()) { if(m_code.first--==0) { if(m_range.second>0) { if(m_code.second>=m_range.first && m_code.second<=m_range.second) { push_back(m_code.second); }else{ throw;/*overlong form*/ } } if(*data.begin()>=0x00 && *data.begin()<=0x7F) { /*ASCII*/ m_code.first=0; m_code.second=*data.begin(); m_range=std::make_pair(0x00,0x7F); }else{ if(*data.begin()>=0xC2 && *data.begin()<=0xDF) { /*2-byte sequence*/ m_code.first=1; m_code.second=*data.begin() & 0x1F; m_range=std::make_pair(0xFF,0x7FF); }else if(*data.begin()>=0xE0 && *data.begin()<=0xEF) { /*3-byte sequence*/ m_code.first=2; m_code.second=*data.begin() & 0x0F; m_range=std::make_pair(0x800,0xFFFF); }else if(*data.begin()>=0xF0 && *data.begin()<=0xF4) { /*4-byte sequence*/ m_code.first=3; m_code.second=*data.begin() & 0x07; m_range=std::make_pair(0x10000,0x10FFFF); }else{ /*invalid UTF-8 sequence*/ throw; } } }else if(*data.begin()>=0x80 && *data.begin()<=0xBF) { /*multi-byte sequence*/ m_code.second<<=6; m_code.second+=*data.begin() & 0x3F; }else{ /*invalid UTF-8 sequence*/ throw; } data.erase(data.begin()); } /*check if last byte is in decoded state*/ if(m_code.first==0) { if(m_code.second>=m_range.first && m_code.second<=m_range.second) { push_back(m_code.second); }else{ throw;/*overlong form*/ } m_code=std::make_pair(0,0); m_range=std::make_pair(0,0); } return *this; };/*decode*/ std::vector string::encode() { };/*encode*/