|
Boost : |
From: Jens Nilsson (jensgnilsson_at_[hidden])
Date: 2003-01-14 15:32:20
Hi,
In processing xml files, configuration files etc I repeatedly find
that I need to compare strings to evaluate actions, settings etc.
In doing so, I'd like some efficient switch case statements that works with
strings. Now comparing a large set of strings does not seem efficient and
switch statements don't work with strings.
I saw the immutable string suggestions and the simple hash function it
provided. Now I thought that if I could use the hash function on my
arguments and provide a matching compile-time hash algorithm for my case
statements I could come close a solution.
With my knowledge of templates I got stuck on using strings as template
arguments. I solved that with a multi parameter template class where the
strings has to be breaken up by character (see code below). Is it possible
to work with strings ? Any suggestions on another approch ?
Is there any interest in a solution like this ?
Is there anything in boost already doing this that I've missed?
The good thing of using switch case staments with the compile-time hash is
that the compiler gives a compile-time error if the hashing algorithm
generates the same hash twice or more.
Regards,
/Jens
Source, and sample below
namespace hash
{
// generice type, no implementaion
template<typename CharType> struct retval;
// specialization for char
template<>
struct retval<char>
{
typedef char char_type;
typedef unsigned long size_type;
retval(size_type v=0) : v_(v) {}
operator size_type () { return v_; }
size_type v_;
};
template<>
struct retval<wchar_t>
{
typedef wchar_t char_type;
typedef unsigned long size_type;
retval(size_type v=0) : v_(v) {}
operator size_type () { return v_; }
size_type v_;
};
// generic function, no implementation
template<typename CharType> retval<CharType> hash_of(const CharType* s);
// specialization for char
// hash_of algorithm stolen from the immutable string suggestion.
template<> retval<char> hash_of(const char* s)
{
unsigned long h = 0;
for ( ; *s; ++s)
h = 5*h + *s;
return retval<char>(h);
}
// specialization for wchar_t
template<> retval<wchar_t> hash_of(const wchar_t* s)
{
unsigned long h = 0;
for ( ; *s; ++s)
h = 5*h + *s;
return retval<wchar_t>(h);
}
/* A recursive version could look like this
inline unsigned long recursive_hash_of(const char* s, unsigned long
h=0)
{
if (s == 0 || *s == '\0')
return h;
return recursive_hash_of(++s, 5*h + *s);
}
*/
// Compile time hash generation. Limit to 20 characher strings.
// Would be nice to work with strings as template arguments
// instead of single character template arguments.
//
template<const char C0, const char C1='\0', const char C2='\0', const
char C3='\0',
const char C4='\0', const char C5='\0', const char C6='\0',
const char C7='\0', const char C8='\0', const char C9='\0',
const char C10='\0', const char C11='\0', const char C12='\0',
const char C13='\0', const char C14='\0', const char C15='\0',
const char C16='\0', const char C17='\0', const char C18='\0',
const char C19='\0', const char C20='\0'>
struct Compile
{
template<const char c, unsigned long h>
struct HashCalc
{
enum { value = c == 0 ? h : h*5 + c };
};
enum { hash = HashCalc<C20, HashCalc<C19, HashCalc<C18, HashCalc<C17,
HashCalc<C16, HashCalc<C15, HashCalc<C14,
HashCalc<C13,
HashCalc<C12, HashCalc<C11, HashCalc<C10, HashCalc<C9,
HashCalc<C8, HashCalc<C7, HashCalc<C6, HashCalc<C5,
HashCalc<C4, HashCalc<C3, HashCalc<C2, HashCalc<C1,
HashCalc<C0,
0>::value>::value>::value>::value>::value>
::value>::value>::value>::value>::value>::value>::value>::value>
::value>::value>::value>::value>::value>::value>::value>::value};
};
template<const wchar_t C0, const wchar_t C1=L'\0', const wchar_t
C2=L'\0', const wchar_t C3=L'\0',
const wchar_t C4=L'\0', const wchar_t C5=L'\0', const wchar_t
C6=L'\0',
const wchar_t C7=L'\0', const wchar_t C8=L'\0', const wchar_t
C9=L'\0',
const wchar_t C10=L'\0', const wchar_t C11=L'\0', const wchar_t
C12=L'\0',
const wchar_t C13=L'\0', const wchar_t C14=L'\0', const wchar_t
C15=L'\0',
const wchar_t C16=L'\0', const wchar_t C17=L'\0', const wchar_t
C18=L'\0',
const wchar_t C19=L'\0', const wchar_t C20=L'\0'>
struct CompileW
{
template<const wchar_t c, unsigned long h>
struct HashCalc
{
enum { value = c == 0 ? h : h*5 + c };
};
enum { hash = HashCalc<C20, HashCalc<C19, HashCalc<C18, HashCalc<C17,
HashCalc<C16, HashCalc<C15, HashCalc<C14,
HashCalc<C13,
HashCalc<C12, HashCalc<C11, HashCalc<C10, HashCalc<C9,
HashCalc<C8, HashCalc<C7, HashCalc<C6, HashCalc<C5,
HashCalc<C4, HashCalc<C3, HashCalc<C2, HashCalc<C1,
HashCalc<C0,
0>::value>::value>::value>::value>::value>
::value>::value>::value>::value>::value>::value>::value>::value>
::value>::value>::value>::value>::value>::value>::value>::value};
};
};
int main(int argc, char* argv[])
{
printf("Hash testing!\n");
using namespace hash;
unsigned long u = hash_of("testing");
printf("hash_of: %u\n", u);
u = hash_of(L"testing");
printf("hash_of: %u\n", u);
typedef Compile<'t','e','s','t', 'i', 'n', 'g' > ATesting;
printf("hash: %u\n", ATesting::hash );
typedef CompileW<L't',L'e',L's',L't', L'i', L'n', L'g' > WTesting;
printf("hash: %u\n", WTesting::hash );
// compile time error if duplicate case entries are generated
switch (hash_of("/print"))
{
case Compile<'/','n','e','w'>::hash:
printf("/new option\n");
break;
case Compile<'/','o','p','e','n'>::hash:
printf("/open option\n");
break;
case Compile<'/','p','r','i','n','t'>::hash:
printf("/print option\n");
break;
}
return 0;
}
_________________________________________________________________
The new MSN 8 is here: Try it free* for 2 months
http://join.msn.com/?page=dept/dialup
Boost list run by bdawes at acm.org, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk