Boost logo

Boost :

From: Jens Nilsson (jensgnilsson_at_[hidden])
Date: 2003-01-14 15:32:20


Hi,

In processing xml files, configuration files etc I repeatedly find
that I need to compare strings to evaluate actions, settings etc.

In doing so, I'd like some efficient switch case statements that works with
strings. Now comparing a large set of strings does not seem efficient and
switch statements don't work with strings.

I saw the immutable string suggestions and the simple hash function it
provided. Now I thought that if I could use the hash function on my
arguments and provide a matching compile-time hash algorithm for my case
statements I could come close a solution.

With my knowledge of templates I got stuck on using strings as template
arguments. I solved that with a multi parameter template class where the
strings has to be breaken up by character (see code below). Is it possible
to work with strings ? Any suggestions on another approch ?

Is there any interest in a solution like this ?

Is there anything in boost already doing this that I've missed?

The good thing of using switch case staments with the compile-time hash is
that the compiler gives a compile-time error if the hashing algorithm
generates the same hash twice or more.

Regards,

/Jens

Source, and sample below

namespace hash
{
   // generice type, no implementaion
   template<typename CharType> struct retval;

   // specialization for char
   template<>
   struct retval<char>
   {
       typedef char char_type;
       typedef unsigned long size_type;

       retval(size_type v=0) : v_(v) {}

       operator size_type () { return v_; }

       size_type v_;
   };

   template<>
   struct retval<wchar_t>
   {
       typedef wchar_t char_type;
       typedef unsigned long size_type;

       retval(size_type v=0) : v_(v) {}

       operator size_type () { return v_; }

       size_type v_;
   };

   // generic function, no implementation
   template<typename CharType> retval<CharType> hash_of(const CharType* s);

   // specialization for char
   // hash_of algorithm stolen from the immutable string suggestion.
   template<> retval<char> hash_of(const char* s)
   {
       unsigned long h = 0;
       for ( ; *s; ++s)
           h = 5*h + *s;
       return retval<char>(h);
   }

   // specialization for wchar_t
   template<> retval<wchar_t> hash_of(const wchar_t* s)
   {
       unsigned long h = 0;
       for ( ; *s; ++s)
           h = 5*h + *s;
       return retval<wchar_t>(h);
   }

   /* A recursive version could look like this
   inline unsigned long recursive_hash_of(const char* s, unsigned long
h=0)
   {
       if (s == 0 || *s == '\0')
           return h;
       return recursive_hash_of(++s, 5*h + *s);
   }
   */

   // Compile time hash generation. Limit to 20 characher strings.
   // Would be nice to work with strings as template arguments
   // instead of single character template arguments.
   //
   template<const char C0, const char C1='\0', const char C2='\0', const
char C3='\0',
            const char C4='\0', const char C5='\0', const char C6='\0',
            const char C7='\0', const char C8='\0', const char C9='\0',
            const char C10='\0', const char C11='\0', const char C12='\0',
            const char C13='\0', const char C14='\0', const char C15='\0',
            const char C16='\0', const char C17='\0', const char C18='\0',
            const char C19='\0', const char C20='\0'>
   struct Compile
   {
       template<const char c, unsigned long h>
       struct HashCalc
       {
           enum { value = c == 0 ? h : h*5 + c };
       };

       enum { hash = HashCalc<C20, HashCalc<C19, HashCalc<C18, HashCalc<C17,
                      HashCalc<C16, HashCalc<C15, HashCalc<C14,
HashCalc<C13,
                      HashCalc<C12, HashCalc<C11, HashCalc<C10, HashCalc<C9,
                      HashCalc<C8, HashCalc<C7, HashCalc<C6, HashCalc<C5,
                      HashCalc<C4, HashCalc<C3, HashCalc<C2, HashCalc<C1,
                      HashCalc<C0,
0>::value>::value>::value>::value>::value>
                      
::value>::value>::value>::value>::value>::value>::value>::value>
                      
::value>::value>::value>::value>::value>::value>::value>::value};
   };

   template<const wchar_t C0, const wchar_t C1=L'\0', const wchar_t
C2=L'\0', const wchar_t C3=L'\0',
            const wchar_t C4=L'\0', const wchar_t C5=L'\0', const wchar_t
C6=L'\0',
            const wchar_t C7=L'\0', const wchar_t C8=L'\0', const wchar_t
C9=L'\0',
            const wchar_t C10=L'\0', const wchar_t C11=L'\0', const wchar_t
C12=L'\0',
            const wchar_t C13=L'\0', const wchar_t C14=L'\0', const wchar_t
C15=L'\0',
            const wchar_t C16=L'\0', const wchar_t C17=L'\0', const wchar_t
C18=L'\0',
            const wchar_t C19=L'\0', const wchar_t C20=L'\0'>
   struct CompileW
   {
       template<const wchar_t c, unsigned long h>
       struct HashCalc
       {
           enum { value = c == 0 ? h : h*5 + c };
       };

       enum { hash = HashCalc<C20, HashCalc<C19, HashCalc<C18, HashCalc<C17,
                      HashCalc<C16, HashCalc<C15, HashCalc<C14,
HashCalc<C13,
                      HashCalc<C12, HashCalc<C11, HashCalc<C10, HashCalc<C9,
                      HashCalc<C8, HashCalc<C7, HashCalc<C6, HashCalc<C5,
                      HashCalc<C4, HashCalc<C3, HashCalc<C2, HashCalc<C1,
                      HashCalc<C0,
0>::value>::value>::value>::value>::value>
                      
::value>::value>::value>::value>::value>::value>::value>::value>
                      
::value>::value>::value>::value>::value>::value>::value>::value};
   };

};

int main(int argc, char* argv[])
{
   printf("Hash testing!\n");

   using namespace hash;

   unsigned long u = hash_of("testing");
   printf("hash_of: %u\n", u);

   u = hash_of(L"testing");
   printf("hash_of: %u\n", u);

   typedef Compile<'t','e','s','t', 'i', 'n', 'g' > ATesting;
   printf("hash: %u\n", ATesting::hash );

   typedef CompileW<L't',L'e',L's',L't', L'i', L'n', L'g' > WTesting;
   printf("hash: %u\n", WTesting::hash );

   // compile time error if duplicate case entries are generated
   switch (hash_of("/print"))
   {
   case Compile<'/','n','e','w'>::hash:
       printf("/new option\n");
       break;
   case Compile<'/','o','p','e','n'>::hash:
       printf("/open option\n");
       break;
   case Compile<'/','p','r','i','n','t'>::hash:
       printf("/print option\n");
       break;
   }

   return 0;
}

_________________________________________________________________
The new MSN 8 is here: Try it free* for 2 months
http://join.msn.com/?page=dept/dialup


Boost list run by bdawes at acm.org, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk