// This file contains a draft interface for querying Unicode character
// properties.
// It does not compile yet.

#include <utility>
#include <boost/cstdint.hpp>

namespace boost
{
#ifndef __UNICODEINTEGRALTYPES
	typedef uint32_t codepoint;
	typedef uint16_t char16_t;
#endif //__UNICODEINTEGRALTYPES

namespace unicode
{
	typedef ::boost::char32_t codepoint;

	/*********************************************************************************
	** This is automatically generated from blocks.txt, taking each entry and	
	** replacing '-' with '_' and removing all ' '
	** Note: Private is added to this list
	*********************************************************************************/
	struct range
	{
		enum type
		{
			latin_1_supplement,
			latin_extended_a,
			latin_extended_b,
			ipa_extensions,
			spacing_modifier_letters,
			combining_diacritical_marks,
			greek_and_coptic,
			cyrillic,
			cyrillic_supplement,
			armenian,
			hebrew,
			arabic,
			syriac,
			arabic_supplement,
			thaana,
			devanagari,
			bengali,
			gurmukhi,
			gujarati,
			oriya,
			tamil,
			telugu,
			kannada,
			malayalam,	
			sinhala,
			thai,
			lao,
			tibetan,
			myanmar,
			georgian,
			hangulJamo,
			ethiopic,
			ethiopic_supplement,
			cherokee,
			unified_canadian_aboriginal_syllabics,
			ogham,
			runic,
			tagalog,
			hanunoo,
			buhid,
			tagbanwa,
			khmer,
			mongolian,
			limbu,
			taiLe,
			new_tai_lue,
			khmerSymbols,
			buginese,
			phonetic_extensions,
			phonetic_extensions_supplement,
			combining_diacritical_marks_supplement,
			latin_extended_additional,
			greek_extended,
			general_punctuation,
			superscripts_and_subscripts,
			currency_symbols,
			combining_diacritical_marks_for_symbols,
			letterlike_symbols,
			number_forms,
			arrows,
			mathematical_operators,
			miscellaneous_technical,
			control_pictures,
			optical_character_recognition,
			enclosed_alphanumerics,
			box_drawing,
			block_elements,
			geometric_shapes,
			miscellaneous_symbols,
			dingbats,
			miscellaneous_mathematical_symbols_a,
			supplemental_arrows_a,
			braille_patterns,
			supplemental_arrows_b,
			miscellaneous_mathematical_symbols_b,
			supplemental_mathematical_operators,
			miscellaneous_symbols_and_arrows,
			glagolitic,
			coptic,
			georgian_supplement,
			tifinagh,
			ethiopicextended,
			supplemental_punctuation,
			cjk_radicals_supplement,
			kangxi_radicals,
			ideographic_description_characters,
			cjk_symbols_and_punctuation,
			hiragana,
			katakana,
			bopomofo,
			hangul_compatibility_jamo,
			kanbun,
			bopomofo_extended,
			cjk_strokes,
			katakana_phonetic_extensions,
			enclosed_cjk_letters_and_months,
			cjk_compatibility,
			cjk_unified_ideographs_extension_a,
			yijing_hexagram_symbols,
			cjk_unified_ideographs,
			yi_syllables,
			yi_radicals,
			modifier_tone_letters,
			syloti_nagri,
			hangul_syllables,
			high_surrogates,
			high_private_use_surrogates,
			low_surrogates,
			private_use_area,
			cjk_compatibility_ideographs,
			alphabetic_presentation_forms,
			arabic_presentation_forms_a,
			variation_selectors,
			vertical_forms,
			combining_half_marks,
			cjk_compatibility_forms,
			small_form_variants,
			arabic_presentation_forms_b,
			halfwidth_and_fullwidth_forms,
			specials,
			linear_b_syllabary,
			linear_b_ideograms,
			aegean_numbers,
			ancient_greek_numbers,
			old_italic,
			gothic,
			ugaritic,
			old_persian,
			deseret,
			shavian,
			osmanya,
			cypriot_syllabary,
			kharoshthi,
			byzantine_musical_symbols,
			musical_symbols,
			ancient_greek_musical_notation,
			tai_xuan_jing_symbols,
			mathemat,
			private_
		};
	};


/*********************************************************************************
** Unicode types
*********************************************************************************/

	struct category
	{
		enum type
		{
			letter,
			mark,
			number,
			separator,
			other,
			punctuation,
			symbol
		};
	};

	struct category_casing
	{
		enum type
		{
			uppercase,
			lowercase,
			titlecase,
			modifier,
			other
		};
	};
	struct category_mark
	{
		enum type
		{
			non_spacing,
			space_combining,
			mark_enclosing
		};
	};

	struct category_number
	{
		enum type
		{
			decimal_digit,
			letter,
			other
		};
	};
	struct category_separator
	{
		enum type
		{
			space,
			line,
			paragraph
		};
	};
	struct category_other
	{
		enum type
		{
			control,
			format,
			surrogate,
			private_use,
			not_assigned
		};
	};
	struct category_punctuation
	{
		enum type
		{
			connector,
			dash,
			open,
			close,
			initial_quote,
			final_quote,
			other
		};
	};
	struct category_symbol
	{
		enum type
		{
			math,
			currency,
			modifier,
			other
		};
	};
	struct join_type
	{
		enum type
		{
			none,
			right,
			left,
			dual,
			causing,
			transparent
		};
	};
	struct bidi_char_type
	{
		enum type
		{
			strong_left_to_right			= 0x100,
			strong_left_to_right_embedding,
			strong_left_to_right_override,

			strong_right_to_left			= 0x200,
			strong_right_to_left_arabic,
			strong_right_to_left_embedding,
			strong_right_to_left_override,

			weak_pop_direction_format		= 0x1000,
			weak_european_digits,
			weak_european_number_separator,
			weak_european_number_terminator,
			weak_arabic_number,
			weak_common_number_separator,
			weak_non_spacing_mark,
			weak_boundary_neutral,

			neutral_paragraph_separator	= 0x2000,
			neutral_segment_separator,
			neutral_whitespace,
			neutral_other,
		};
	};
	struct break_class
	{
		enum type
		{
			mandatory,
			carriage_return,
			line_feed,
			combining_marks,
			next_line,
			surrogates,
			word_joiner,
			zero_width_space,
			non_breaking,
			contingent_break_opport,
			space,
			break_opport_before_after,
			break_opport_after,
			break_opport_before,
			hyphen,
			closing_punct,
			exclamation_interrog,
			inseparable,
			non_starter,
			opening_punct,
			ambiguous_quote,
			infix_separator,
			numeric,
			postfix_numeric,
			prefix_numeric,
			symbols_allowing_breaks,
			ambiguous,
			ordinary_alpabetic_and_symbol_chars,
			hangul_lv_syllable,
			hangul_lvt_syllable,
			ideograph,
			hangul_l_jamo,
			hangul_v_jamo,
			hangul_t_jamo,
			complex_context,
			unknown
		};
	};
	struct break_action
	{
		enum type
		{
			direct,
			indirect,
			combiningIndirect,
			combiningProhibited,
			prohibited,
			explicit_
		};
	};

/*********************************************************************************
** Functions
*********************************************************************************/

	/**
	\todo The behaviour of these functions when called with any respective
	out-of-range values should be specified.
	**/
	category::type get_category(codepoint ch);
	size_t get_combining_class(codepoint ch);
	category_casing::type get_category_casing(codepoint ch);
	category_mark::type get_category_mark(codepoint ch);
	category_number::type get_category_number(codepoint ch);
	category_separator::type get_category_separator(codepoint ch);
	category_other::type get_category_other(codepoint ch);
	category_punctuation::type get_category_punctuation(codepoint ch);
	category_symbol::type get_category_symbol(codepoint ch);
	join_type::type get_join_type(codepoint ch);
	bidi_char_type::type get_bidi_char_type(codepoint ch);
	break_class::type get_break_class(codepoint ch);

	// forms information
	codepoint get_nominal_form(codepoint ch);
	codepoint get_left_form(codepoint ch);
	codepoint get_right_form(codepoint ch);
	codepoint get_medial_form(codepoint ch);

	// blocks
	range::type get_range(codepoint ch);

	// separators

	/// \pre: current != begin
	template <class BidirectionalIterator>
	BidirectionalIterator previous_grapheme(
		BidirectionalIterator begin,
		BidirectionalIterator current);

	/// \pre: current != end
	template <class ForwardIterator>
	ForwardIterator next_grapheme(
		ForwardIterator current,
		ForwardIterator end);

	/// \pre: current != begin
	template <class BidirectionalIterator>
	BidirectionalIterator previous_word(
		BidirectionalIterator begin,
		BidirectionalIterator current,
		BidirectionalIterator end);

	/// \pre: current != end
	template <class BidirectionalIterator>
	BidirectionalIterator next_word(
		BidirectionalIterator begin,
		BidirectionalIterator current,
		BidirectionalIterator end);

	/// \pre: current != begin
	template <class BidirectionalIterator>
	BidirectionalIterator previous_sentence(
		BidirectionalIterator begin,
		BidirectionalIterator current,
		BidirectionalIterator end);

	/// \pre: current != end
	template <class BidirectionalIterator>
	BidirectionalIterator next_sentence(
		BidirectionalIterator begin,
		BidirectionalIterator current,
		BidirectionalIterator end);

	/// \pre: current != begin
	template <class BidirectionalIterator>
	std::pair <BidirectionalIterator, break_action> previous_line_break(
		BidirectionalIterator begin,
		BidirectionalIterator current,
		BidirectionalIterator end);

	/// \pre: current != end
	template <class BidirectionalIterator>
	std::pair <BidirectionalIterator, break_action> next_line_break(
		BidirectionalIterator begin,
		BidirectionalIterator current,
		BidirectionalIterator end);

	// Exactly what objects this class will compare will become clear
	// after the unicode string class has been defined.
	// It should be possible to use this class for STL containers
	// (e.g. std::map) and algorithms (e.g. std::lower_bound).
	class collation
	{
	public:
		template <...>
		bool operator () (
			const string<...> & s1,
			const string<...> & s2) const;

		template <...>
		bool operator () (
			const string_with_sort_data<...> & s1,
			const string_with_sort_data<...> & s2) const;
	};

	class locale
	{
	public:
		template <class InputIterator, class OutputIterator>
			void lowercase(InputIterator begin, InputIterator end,
				OutputIterator out) const;
		template <class InputIterator, class OutputIterator>
			void uppercase(InputIterator begin, InputIterator end,
				OutputIterator out) const;

		// A sketch of what collation might look like.
		collation collate_base_characters() const;
		collation collate_accents() const;
		collation collate_case() const;
		collation collate_punctuation() const;
		collation collate_tie_breaker() const;
	};

	locale default_locale();

	// ----------------------------------------------------------------------------
	// inline helpers
	// TODO add lots based on core functions above
	//  

	bool is_space(codepoint ch)
	{
		// space for any kind of separator.
		return get_category_separator (ch) == category_separator::space;
	}

	bool is_strong_ltor(codepoint ch)
	{
		return get_break_class (ch) & 0xF00 == 0x100;
	}

	bool is_strong_rtol(codepoint ch)
	{
		return get_break_class (ch) & 0xF00 == 0x100;
	}

}	// namespace unicode
}	// namespace boost