#ifndef BOOST_EDIT_DISTANCE_HPP #define BOOST_EDIT_DISTANCE_HPP #include #include #include #include #include #include #include #include #include #include // this implementation of the edit distance is based on the article // An O(NP) sequence comparison algorithm, 1989, // by Wu, Manber, Myers, Miller namespace boost { namespace algorithm { namespace detail { template< bool cond, typename Iterator1T, typename Iterator2T > struct altcomp { }; // compare the first two iterator for equality template< typename Iterator1T, typename Iterator2T > struct altcomp< true, Iterator1T, Iterator2T > { static bool equal( Iterator1T it11, Iterator1T it12, Iterator2T it21, Iterator2T it22 ) { return it11 == it12; } }; // compare the last two iterator for equality template< typename Iterator1T, typename Iterator2T > struct altcomp< false, Iterator1T, Iterator2T > { static bool equal( Iterator1T it11, Iterator1T it12, Iterator2T it21, Iterator2T it22 ) { return it21 == it22; } }; template class edit_distance_impl { typedef BOOST_STRING_TYPENAME range_const_iterator::type Iterator1T; typedef BOOST_STRING_TYPENAME range_const_iterator::type Iterator2T; public: edit_distance_impl( const Range1T& _arg1, const Range2T& _arg2, PredicateT _comp ) : arg1(_arg1), arg2(_arg2), comp(_comp), M( size(arg1) ), N( size(arg2) ), delta(N - M), sd(M + 1), // source diagonal index td(N + 1), // target diagonal index p(-1), // compressed distance value fp(M+N+2, -1), // furthest point set y coord beg1( begin(_arg1) ), end1( end(_arg1) ), beg2( begin(_arg2) ), end2( end(_arg2) ) { } unsigned int distance() { if ( M == 0 ) return N; do { ++p; // update the furthest p-points y coord for diagonals // below the target diagonal for ( int k = -p + sd; k < td; ++k ) { fp[k] = dmove(k); // std::cout << "FS[" << (k-sd) << "," << p << "] = " // << fp[k] << std::endl; } // update the furthest p-points y coord for diagonals // above the target diagonal for ( int k = td + p; k > td; --k ) { fp[k] = dmove(k); // std::cout << "FS[" << (k-sd) << "," << p << "] = " // << fp[k] << std::endl; } // update the furthest p-point y coord for the target diagonal fp[td] = dmove(td); // std::cout << "FS[" << (td-sd) << "," << p << "] = " // << fp[td] << std::endl; } while ( fp[td] < N ); // iterate until the target belongs to the furthest point set return delta + 2*p; } private: // we move along the diagonal k until we reach a furthest point template< bool cond > unsigned int dmove( int k ) { unsigned int y = std::max(fp[k-1] + 1, fp[k+1]); unsigned int x = y - (k - sd); // std::cout << "x = " << x << std::endl; // std::cout << "y = " << y << std::endl; Iterator1T it_arg1 = beg1; Iterator2T it_arg2 = beg2; std::advance(it_arg1, x); std::advance(it_arg2, y); Iterator1T it_arg1_start = it_arg1; while( !is_end(it_arg1, it_arg2) && comp(*it_arg1, *it_arg2) ) { ++it_arg1; ++it_arg2; } return y + std::distance( it_arg1_start, it_arg1 ); } // when cond==true it returns true if it2==end2 // when cond==false it returns true if it1==end1 template< bool cond > bool is_end( Iterator1T it1, Iterator2T it2 ) { return altcomp< cond, Iterator1T, Iterator2T >:: equal(it2, end2, it1, end1); } private: static const bool ABOVE = true; static const bool BELOW = false; const Range1T& arg1; const Range2T& arg2; PredicateT comp; int M, N, delta, sd, td, p; std::vector fp; Iterator1T beg1, end1; Iterator2T beg2, end2; }; // end class } // end namespace detail template inline unsigned int edit_distance( const Range1T& arg1, const Range2T& arg2, PredicateT comp ) { if ( size(arg1) < size(arg2) ) { return detail::edit_distance_impl< Range1T, Range2T, PredicateT >(arg1, arg2, comp).distance(); } else { return detail::edit_distance_impl< Range1T, Range2T, PredicateT >(arg2, arg1, comp).distance(); } } } } // end namespace boost::algorithm #endif // BOOST_EDIT_DISTANCE_HPP