Boost logo

Boost :

Subject: [boost] [string]
From: remi.chateauneu_at_[hidden]
Date: 2009-10-26 19:45:20


Hi,

Is there any interest for a class for computing Levenshtein and Damerau
distances ?
It is inspired from http://en.wikipedia.org/wiki/Levenshtein_distance
with some enhancements:

- The buffer can be reused thanks to a class, avoiding reallocations.
- Only one buffer of twice the size of the longest string is necessary
(Instead of the product of the size of the two input sequences). (Three
times the size for Damerau distance).
- It works with any input iterator category for one string, any iterator
except input_iterator for the second string.
- The distance type is templatized (Allowing to use short ints to save
buffer space).
- Any type of element can be used (Not only chars).
- More efficient than the other implementations I could find on the web,
completely inlined (Specific care to maximize memory locality).
- The cost function can be specified with a functor (Insertion,
deletion, replacement, transposition).
- Tested with GCC 3.2.3 and 4.2.3, Visual C++ 2005.

There was a proposal for such a function two years ago (edit_distance),
but maybe this one, being faster, might be of interest for the string
algorithms library ?

The code is not very big so please let me attach it here.

Thanks


/* boost levenshtein.hpp header file
 *
 * Copyright Remi Chateauneu 2009
 * Distributed under the Boost Software License, Version 1.0. (See
 * accompanying file LICENSE_1_0.txt or copy at
 * http://www.boost.org/LICENSE_1_0.txt)
 *
 * $Id: $
 *
 * Idea by x
 */

#ifndef BOOST_LEVENSHTEIN_HPP
#define BOOST_LEVENSHTEIN_HPP

#include <string.h>
#include <vector>

namespace boost {

/// Given a distance formula, levenshtein or damerau, does some processing of the input parameters.
template< class Distance, class Formula >
class distance_base
{
        /// This uses Curiously recurring template pattern
        template < class Iter1, class Iter2 >
        inline Distance distance_noswap(
                Iter1 begS1,
                Iter1 endS1,
                Iter2 ptrS2,
                size_t len2 )
        {
                return static_cast< Formula * >(this)->distance_noswap( begS1, endS1, ptrS2, len2 );
        };
public:

        /// If both lengths are available, we can swap to increase speed.
        template < class Iter1, class Iter2 >
        Distance distance_swap(
                Iter1 begS1,
                Iter1 endS1,
                size_t len1,
                Iter2 begS2,
                Iter2 endS2,
                size_t len2 )
        {
                /// The longer string should go to the inner loop
                return ( len1 < len2 )
                        ? distance_noswap( begS1, endS1, begS2, len2 )
                        : distance_noswap( begS2, endS2, begS1, len1 );
        };

        /// For plain containers. We assume that 'size()' has a constant cost.
        template < class Container1, class Container2 >
        Distance operator()(
                const Container1 & s1,
                const Container2 & s2 )
        {
                return distance_swap( s1.begin(), s1.end(), s1.size(), s2.begin(), s2.end(), s2.size() );
        };

        /// For plain C-style strings. No need to create a temporary string.
        Distance operator()(
                const char * ptrS1,
                const char * ptrS2 )
        {
                size_t len1 = strlen(ptrS1);
                size_t len2 = strlen(ptrS2);
                return distance_swap( ptrS1, ptrS1 + len1, len1, ptrS2, ptrS2 + len2, len2 );
        };

        /// If no iterator is an input_iterator, we may be able to swap them for performance.
        template< class Iter1, class Iter2 >
        Distance distance_iterator(
                Iter1 beg1,
                Iter1 end1,
                std::forward_iterator_tag,
                Iter2 beg2,
                Iter2 end2,
                std::forward_iterator_tag )
        {
                return distance_swap(
                                beg1, end1, std::distance( beg1, end1 ),
                                beg2, end2, std::distance( beg2, end2 ) );
        };

        /// If the first iterator only is an input_iterator, then process it.
        template< class Iter1, class Iter2 >
        Distance distance_iterator(
                Iter1 beg1,
                Iter1 end1,
                std::input_iterator_tag,
                Iter2 beg2,
                Iter2 end2,
                std::forward_iterator_tag )
        {
                return distance_noswap(
                                beg1, end1,
                                beg2, std::distance( beg2, end2 ) );
        };

        /// If the first iterator only is an input_iterator, then swap them first.
        template< class Iter1, class Iter2 >
        Distance distance_iterator(
                Iter1 beg1,
                Iter1 end1,
                std::forward_iterator_tag,
                Iter2 beg2,
                Iter2 end2,
                std::input_iterator_tag )
        {
                return distance_noswap(
                                beg2, end2,
                                beg1, std::distance( beg1, end1 ) );
        };

        /// According to the type of iterator, not all operations are possible.
        template< class Iter1, class Iter2 >
        Distance operator()(
                Iter1 beg1,
                Iter1 end1,
                Iter2 beg2,
                Iter2 end2 )
        {
                return distance_iterator(
                                beg1, end1, typename std::iterator_traits< Iter1 >::iterator_category(),
                                beg2, end2, typename std::iterator_traits< Iter2 >::iterator_category() );
        };
}; // distance_base

/// A user functor must have this interface.
/// NOTE: The distance must always be positive.
template< class Distance, class Char >
struct dummy_functor
{
        /// Char subsitution.
        Distance operator()( Char, Char ) const ;

        /// Char insertion or deletion. The cost
        /// is the same for both cases otherwise the distance
        /// would not be commutative.
        Distance operator()( Char ) const ;

        /// Char transposition (Option, for Damerau only).
        /// This does not depends on the character.
        Distance operator()(void) const ;
};

/// Default functor for the most common case. It is never instantiated.
/// Equivalent to a cost of '1' for character insertion, deletion or substitution.
struct distance_null_functor
{};

/// Specialization for the default case, for performance.
/// This wraps a user functor, and compute the core test of Levenshtein distance.
template < class Distance, class Functor >
class cell_min
{
        /// A user can specify the cost of char insertion, deletion and substitution.
        /// It must match the interface of dummy_functor.
        Functor _ftor ;
public:
        cell_min() {};

        cell_min( const Functor & refFtor ) : _ftor( refFtor ) {};

        /// The cost of character insertion and deletion are the same otherwise
        /// it would not be commutative, and would not be a distance.
        template< class Char >
        Distance min_levenshtein(
                const Distance & distIJ,
                const Distance & distI1J,
                const Distance & distIJ1,
                const Char & chrLeft,
                const Char & chrRight )
        {
                return std::min(
                        distIJ + _ftor( chrLeft, chrRight ),
                        std::min(
                                distI1J + _ftor( chrLeft ),
                                distIJ1 + _ftor( chrRight ) ) );
        };

        /// The operator without argument computes the cost of a transposition
        /// and does not depend on the transposed characters.
        /// Damerau is tested on top of Levenshtein comparison.
        Distance min_damerau(
                const Distance & prevDiag,
                const Distance & currDiag )
        {
                return std::min(
                        Distance( prevDiag + _ftor() ),
                        currDiag );
        };
}; // cell_min

/// Does the core comparison of Levenshtein distance when no
/// user functor is provided. This is the most common case
/// and must be the fastest.
template < class Distance >
class cell_min< Distance, distance_null_functor >
{
public:
        cell_min() {};

        cell_min( const distance_null_functor & ) {};

        /// Here we save two additions, than if the general class were used.
        template< class Char >
        Distance min_levenshtein(
                const Distance & distIJ,
                const Distance & distI1J,
                const Distance & distIJ1,
                const Char & ,
                const Char & ) const
        {
                return 1 + std::min( distIJ, std::min( distI1J, distIJ1 ) );
        };

        /// The usual cost of a transposition can be set to 1.
        /// Damerau is tested on top of Levenshtein comparison.
        Distance min_damerau(
                const Distance & prevDiag,
                const Distance & currDiag ) const
        {
                return std::min(
                        prevDiag,
                        currDiag + 1 );
        };
}; // cell_min (Specialization)

/// Most of times, Distance is 'unsigned int'.
template< class Distance >
class distance_buffer
{
        /// TODO: Use temporary buffer.
        std::vector< Distance > _buffer ;

        size_t _sz ;
public:
        distance_buffer() : _sz(0) {};

        void resize( size_t sz )
        {
                if( sz <= _sz ) return ;
                _buffer.resize( sz );
                _sz = _buffer.size();
        };

        Distance * start(void)
        {
                return &_buffer[0];
        }
}; // distance_buffer

/// This wraps a buffer which can be reused, because buffer allocations are costly.
/// The distance may be char, short, int: This can reduce the buffer size.
/// TODO: Do not resize the buffer but use the stack if less than X elements.
/// TODO: Use a specific buffer allocation.
template < class Distance = unsigned int, class Functor = distance_null_functor >
class levenshtein : public distance_base< Distance, levenshtein< Distance, Functor > >
{
        /// This wraps a user-provided optional functor.
        cell_min< Distance, Functor > _minFtor ;

        /// Max size: Twice the longest string + 2.
        distance_buffer< Distance > _buffer ;

public:
        levenshtein() {};

        levenshtein( const Functor & refFtor ) : _minFtor( refFtor ) {};

        /// The size of the first range is not needed, any iterator type will do.
        template < class Iter1, class Iter2 >
        Distance distance_noswap(
                Iter1 begS1,
                Iter1 endS1,
                Iter2 ptrS2,
                size_t len2 )
        {
                typedef typename std::iterator_traits< Iter1 >::value_type Char ;

                /// The buffer contains two lines of distance.
                _buffer.resize( 2 * ( len2 + 1 ) );

                Distance * localPtr = _buffer.start() ;
                for(unsigned j = 0; j <= len2; ++j)
                {
                        localPtr[ 2 * j ] = (Distance)j;
                }
                const Distance * endPtr = localPtr + 2 * len2 ;

                /// One cell out of two is associated to one line, the other to the next line.
                /// The two lines are swapped at each loop.
                for( unsigned i = 1; ; )
                {
                        localPtr = _buffer.start() ;
                        localPtr[1] = Distance(i++) /* i,j+1 */ ;
                        Char chr1 = *begS1++ ;

                        // Restart from the beginning of the second string.
                        for( Iter2 currS2 = ptrS2 ; localPtr != endPtr ; localPtr += 2 )
                        {
                                const Char chr2 = *currS2++ ;
                                Distance costDiag = localPtr[ 0 ];
                                /// Optimisation: The distance cannot increase if the chars are identical.
                                if( chr1 != chr2 )
                                {
                                        /// Good data locality in four contiguous cells of the buffer.
                                        costDiag = _minFtor.min_levenshtein(
                                                        costDiag, /* i,j */
                                                        localPtr[ 2 ] /* i+1,j */,
                                                        localPtr[ 1 ] /* i,j+1 */,
                                                        chr1, chr2 );
                                }
                                localPtr[ 3 ] = costDiag ;
                        }

                        /// The loop is unrolled two steps by two.
                        if( begS1 == endS1 )
                        {
                                /// After an even number of half-loops (and lines).
                                return localPtr[ 1 ] /* i+1,j+1 */ ;
                        }

                        /// This starts an odd line.
                        localPtr = _buffer.start() ;
                        localPtr[0] = Distance(i++) /* i,j+1 */ ;
                        chr1 = *begS1++;

                        // Restart from the beginning of the second string.
                        for( Iter2 currS2 = ptrS2 ; localPtr != endPtr ; localPtr += 2 )
                        {
                                const Char chr2 = *currS2++ ;
                                Distance costDiag = localPtr[ 1 ];
                                /// Optimisation: The distance cannot increase if the chars are identical.
                                if( chr1 != chr2 )
                                {
                                        /// Good data locality in four contiguous cells of the buffer.
                                        costDiag = _minFtor.min_levenshtein(
                                                        costDiag, /* i,j */
                                                        localPtr[ 3 ] /* i+1,j */,
                                                        localPtr[ 0 ] /* i,j+1 */,
                                                        chr1, chr2 );
                                }
                                localPtr[ 2 ] = costDiag ;
                        };

                        if( begS1 == endS1 )
                        {
                                /// After an odd number of lines.
                                return localPtr[ 0 ] /* i+1,j+1 */ ;
                        }
                }
                /// Should never happen.
                return Distance(0);
        }; // distance_noswap
}; // levenshtein

/// This wraps a buffer which can be reused, because buffer allocations are costly.
/// The distance may be char, short, int: This can reduce the buffer size.
template < class Distance = unsigned int, class Functor = distance_null_functor >
class damerau : public distance_base< Distance, damerau< Distance, Functor > >
{
        /// This wraps a user-provided optional functor.
        cell_min< Distance, Functor > _minFtor ;

        /// Max size: Twice the longest string + 2.
        distance_buffer< Distance > _buffer ;

public:
        damerau() {};

        damerau( const Functor & refFtor ) : _minFtor( refFtor ) {};

        /// The size of the first range is not needed, any iterator will do.
        template < class Iter1, class Iter2 >
        Distance distance_noswap(
                Iter1 begS1,
                Iter1 endS1,
                Iter2 ptrS2,
                size_t len2 )
        {
                typedef typename std::iterator_traits< Iter1 >::value_type Char ;

                /// The buffer contains two lines of distance.
                _buffer.resize( 3 * ( len2 + 3 ) );

                Distance * localPtr = _buffer.start() ;
                for(unsigned j = 0; j <= len2; ++j)
                {
                        localPtr[ 3 * j] = (Distance)j;
                }

                /// One cell out of two is associated to one line, the other to the next line.
                int offPrev = 2, offCurr = 0, offNext = 1 ;

                Char chr1, chr1_prev ;

                /// The three lines are shifted at each loop.
                for(
                        unsigned i = 1;
                        begS1 != endS1 ;
                        chr1_prev = chr1, ++begS1, ++i,
                                offCurr = offNext, offNext = offPrev, offPrev = offPrev == 2 ? 0 : offPrev + 1 )
                {
                        Distance * localPtr = _buffer.start() ;
                        localPtr[offNext] = Distance(i) /* i+1,j */ ;
                        chr1 = *begS1 ;
                        Char chr2, chr2_prev ;

                        Iter2 currS2 = ptrS2 ;
                        for(unsigned j = 0; j < len2; chr2_prev = chr2, ++currS2, ++j, localPtr += 3 )
                        {
                                chr2 = *currS2 ;
                                Distance costDiag = localPtr[ offCurr ] /* i,j */ ;
                                /// Optimisation: The distance cannot increase if the chars are identical.
                                /// NOTE: Without this test, a valid transposition could not be checked.
                                if( chr1 != chr2 )
                                {
                                        /// Good data locality in four contiguous cells of the buffer.
                                        costDiag = _minFtor.min_levenshtein(
                                                        costDiag,
                                                        localPtr[ 3 + offCurr ],
                                                        localPtr[ offNext ],
                                                        chr1, chr2 );
                                        /// If beyond the first column and line, check for a transposition.
                                        /// This makes sense only if the current chars are different.
                                        if( ( i > 1 )
                                                && ( j > 0 )
                                           && ( chr1 == chr2_prev )
                                                && ( chr1_prev == chr2 ) )
                                        {
                                                costDiag= _minFtor.min_damerau(
                                                        costDiag,
                                                        localPtr[ offPrev - 3 ] /* i-1,j-1 */ );
                                        }
                                }
                                localPtr[ 3 + offNext ] = costDiag /* i+1,j+1 */ ;
                        }
                }

                return _buffer.start()[ 3 * len2 + offCurr ];
        }; // distance_noswap
}; // damerau

} // namespace boost

#endif /* BOOST_LEVENSHTEIN_HPP */


#include <iostream>
#include <string>
#include <list>
#include <vector>
#include <iterator>

#include "levenshtein.hpp"

using namespace std ;

/// This can be later used for creating combinations of tests.
static void tst_perf(size_t nbLoops)
{
        static const char * tst[] = {
                "a",
                "c",
                "bc",
                "abc",
                "bcdef",
                "abcdegh",
                "acdeghi",
                "acedghi",
               "abcdfgi",
               "abcfgi",
               "abcfig" };

        const size_t nbTst = sizeof(tst)/sizeof(tst[0]);

        boost::levenshtein<int> tmpLev ;
        boost::damerau<int> tmpDam ;

        while( nbLoops-- )
        {
                for( size_t i = 0 ; i < nbTst ; ++i )
                {
                        for( size_t j = i + 1 ; j < nbTst ; ++j )
                        {
                                int distLev = tmpLev( tst[i], tst[j] );
                                int distDam = tmpDam( tst[i], tst[j] );
                        };
                };
        }
}

// http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C.2B.2B
template <class T> size_t edit_distance(const T& s1, const T& s2)
{
        const size_t len1 = s1.size(), len2 = s2.size();
        vector<vector<unsigned int> > d(len1 + 1, vector<unsigned int>(len2 + 1));

        d[0][0] = 0;
        for(unsigned int i = 1; i <= len1; ++i) d[i][0] = i;
        for(unsigned int i = 1; i <= len2; ++i) d[0][i] = i;

        for(unsigned int i = 1; i <= len1; ++i)
                for(unsigned int j = 1; j <= len2; ++j)
                        d[i][j] = std::min(
                                std::min(d[i - 1][j] + 1,
                                        d[i][j - 1] + 1),
                                        d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1) );
        return d[len1][len2];
}

/// Classical implementation. For comparing results.
size_t levenshtein_dist(const std::string & source, const std::string & target)
{
        const size_t n = source.length();
        const size_t m = target.length();
        if (n == 0) return m;
        if (m == 0) return n;
        
        vector<vector<size_t> > matrix(n + 1, vector<size_t>(m + 1,0));
        
        for (size_t i = 0; i <= n; i++) {
                matrix[i][0]=i;
        }
        
        for (size_t j = 0; j <= m; j++) {
                matrix[0][j]=j;
        }
        
        for (size_t i = 1; i <= n; i++) {
                char s_i = source[i-1];
                
                for (size_t j = 1; j <= m; j++) {
                        char t_j = target[j-1];
                        
                        int cost;
                        if (s_i == t_j) {
                                cost = 0;
                        }
                        else {
                                cost = 1;
                        }
                        
                        size_t above = matrix[i-1][j];
                        size_t left = matrix[i][j-1];
                        size_t diag = matrix[i-1][j-1];
                        size_t cell = std::min( above + 1, std::min(left + 1, diag + cost));

                        matrix[i][j]=cell;
                }
        }
        
        return matrix[n][m];
}

/// Classical implementation. For comparing results.
size_t damerau_dist(const std::string & source, const std::string & target)
{
        const size_t n = source.length();
        const size_t m = target.length();
        if (n == 0) return m;
        if (m == 0) return n;
        
        vector<vector<size_t> > matrix(n + 1, vector<size_t>(m + 1,0));
        
        for (size_t i = 0; i <= n; i++) {
                matrix[i][0]=i;
        }
        
        for (size_t j = 0; j <= m; j++) {
                matrix[0][j]=j;
        }
        
        for (size_t i = 1; i <= n; i++) {
                char s_i = source[i-1];
                
                for (size_t j = 1; j <= m; j++) {
                        char t_j = target[j-1];
                        
                        int cost;
                        if (s_i == t_j) {
                                cost = 0;
                        }
                        else {
                                cost = 1;
                        }
                        
                        size_t above = matrix[i-1][j];
                        size_t left = matrix[i][j-1];
                        size_t diag = matrix[i-1][j-1];
                        size_t cell = std::min( above + 1, std::min(left + 1, diag + cost));
                        
                        // Step 6A: Cover transposition, in addition to deletion,
                        // insertion and substitution. This step is taken from:
                        // Berghel, Hal ; Roach, David : "An Extension of Ukkonen's
                        // Enhanced Dynamic Programming ASM Algorithm"
                        // (http://www.acm.org/~hlb/publications/asm/asm.html)

           if( (i > 1)
                           && (j > 1)
                           && (source[i-2] == t_j)
                           && (s_i == target[j-2]) )
                   {
                           cell = std::min( cell, matrix[i-2][j-2] + 1 );
                   }
                        
                        matrix[i][j]=cell;
                }
        }
        
        return matrix[n][m];
}

struct TstData
{
        const char * _left ;
        const char * _right ;
        unsigned int _lev ;
        unsigned int _dam ;
};

/// We can check whether the result is OK.
static const TstData tst_array[] = {
        { "a","c", 1, 1 },
        { "a","bc", 2, 2 },
        { "ab","ba", 2, 1 },
        { "a","abc", 2, 2 },
        { "a","bcdef", 5, 5 },
        { "a","abcdegh", 6, 6 },
        { "a","acdeghi", 6, 6 },
        { "a","acedghi", 6, 6 },
        { "a","abcdfgi", 6, 6 },
        { "a","abcfgi", 5, 5 },
        { "a","abcfig", 5, 5 },
        { "c","bc", 1, 1 },
        { "c","abc", 2, 2 },
        { "c","bcdef", 4, 4 },
        { "c","abcdegh", 6, 6 },
        { "c","acdeghi", 6, 6 },
        { "c","acedghi", 6, 6 },
        { "c","abcdfgi", 6, 6 },
        { "c","abcfgi", 5, 5 },
        { "c","abcfig", 5, 5 },
        { "bc","abc", 1, 1 },
        { "abcd","badc", 3, 2 },
        { "bc","bcdef", 3, 3 },
        { "bc","abcdegh", 5, 5 },
        { "bc","acdeghi", 6, 6 },
        { "bc","acedghi", 6, 6 },
        { "bc","abcdfgi", 5, 5 },
        { "bc","abcfgi", 4, 4 },
        { "bc","abcfig", 4, 4 },
        { "abc","bcdef", 4, 4 },
        { "abc","abcdegh", 4, 4 },
        { "abc","acdeghi", 6, 6 },
        { "abc","acedghi", 6, 6 },
        { "abc","abcdfgi", 4, 4 },
        { "abc","abcfgi", 3, 3 },
        { "abc","abcfig", 3, 3 },
        { "bcdef","abcdegh", 3, 3 },
        { "bcdef","acdeghi", 4, 4 },
        { "bcdef","acedghi", 5, 5 },
        { "bcdef","abcdfgi", 4, 4 },
        { "bcdef","abcfgi", 4, 4 },
        { "bcdef","abcfig", 4, 4 },
        { "abcdegh","acdeghi", 2, 2 },
        { "abcdegh","acedghi", 4, 3 },
        { "abcdegh","abcdfgi", 2, 2 },
        { "abcdegh","abcfgi", 3, 3 },
        { "abcdegh","abcfig", 3, 3 },
        { "acdeghi","acedghi", 2, 1 },
        { "cadeghi","acedghi", 3, 2 },
        { "acdeghi","abcdfgi", 3, 3 },
        { "acdeghi","abcfgi", 4, 4 },
        { "acdeghi","abcfig", 5, 5 },
        { "acedghi","abcdfgi", 4, 4 },
        { "acedghi","abcfgi", 4, 4 },
        { "acedghi","abcfig", 5, 5 },
        { "abcdfgi","abcfgi", 1, 1 },
        { "abcdfgi","abcfig", 3, 2 },
        { "abcfgi","abcfig", 2, 1 }
};

static const size_t nbArr = sizeof(tst_array)/sizeof(tst_array[0]);

static boost::levenshtein<int> staticLev ;
static boost::damerau<int> staticDam ;

/// This is for checking the assembly code generated.
static int assembly_levenshtein( const char * a, const char * b )
{
        return staticLev( a, b );
}

static void tst_values(void)
{
        for( size_t i = 0; i < nbArr ; ++i )
        {
                const TstData * p = tst_array + i ;
                size_t valL = staticLev( p->_left, p->_right );
                size_t valD = staticDam( p->_left, p->_right );
                size_t edtL = edit_distance( std::string(p->_left), std::string(p->_right) );
                size_t oldL = levenshtein_dist( p->_left, p->_right );
                size_t oldD = damerau_dist( p->_left, p->_right );

                if( ( valL != p->_lev )
                 || ( oldL != p->_lev )
                 || ( edtL != p->_lev ) )
                {
                        std::cout << "Error L:"
                                << "\tvalL=" << valL
                                << "\tp->lev=" << p->_lev
                                << "\toldL=" << oldL
                                << "\tedtL=" << edtL
                                << "\tp->left=" << p->_left
                                << "\tp->right=" << p->_right
                                << "\n" ;
                };

                if( ( valD != p->_dam )
                 || ( oldD != p->_dam ) )
                {
                        std::cout << "Error D:"
                                << "\tvalD=" << valD
                                << "\tp->dam=" << p->_dam
                                << "\toldD=" << oldD
                                << "\tp->left=" << p->_left
                                << "\tp->right=" << p->_right
                                << "\n" ;
                };

        };
}

int main( int argC, const char ** argV )
{
        tst_values();

        if( argC > 1 )
        {
                int nbTst ;
                sscanf( argV[1], "%d", &nbTst );
                tst_perf(nbTst);
        };
        /// So it it not optimized away.
        std::cout << assembly_levenshtein << "\n" ;
};

/// This is for checking the assembly code generated.
int assembly_damerau( const char * a, const char * b )
{
        return staticLev( a, b );
}

/// This tests some specific compile-time conditions.
template< class Formula >
static int compile_formula(Formula refFrm)
{
        std::list< char > chrLst ;
        vector< char > chrVec ;

        std::istream_iterator<char> chrStrm ;

        refFrm( chrLst, chrVec );
        refFrm( chrStrm, chrStrm, chrVec.begin(), chrVec.end() );
        refFrm( chrLst.begin(), chrLst.end(), chrStrm, chrStrm );
};

struct my_functor
{
        short operator()( char a, char b ) const
        {
                return 2 ;
        };

        short operator()( char a ) const
        {
                return 1 ;
        };

        short operator()(void) const
        {
                return 1 ;
        };
};

/// This tests some specific compile-time conditions.
static int compile_test(void)
{
        boost::levenshtein<int> tmpLev ;
        compile_formula( tmpLev );

        boost::damerau<long long> tmpDam ;
        compile_formula( tmpDam );

        boost::levenshtein<short,my_functor> tmpLevF ;
        compile_formula( tmpLevF );

        my_functor tmpFunc ;
        boost::damerau<short, my_functor> tmpDamF( tmpFunc ) ;
        compile_formula( tmpDamF );
};


Boost list run by bdawes at acm.org, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk