Boost logo

Boost Users :

Subject: Re: [Boost-users] [boost.numeric] Poor Performance of numeric_cast
From: Tang Jiang Jun (tangjiangjun_at_[hidden])
Date: 2012-10-15 23:50:36


Hi,

I modify my code to make it can run in release mode without unintended
optimization, and now the performance is acceptable. However there
definitely has some runtime overhead even no overflow happens, and the
overhead takes extra time as much as the plain cast itself takes. I think
this maybe should be mentioned in the numeric_cast document, because if the
cast is the core step in an algorithms and is executed heavily, this
overhead will impact the performance significantly .

The following is the benchmark code after modification and the result run
in my computer.

CODE
#include <boost/numeric/conversion/cast.hpp>
#include <boost/format.hpp>
#include <boost/cstdint.hpp>
#include <boost/chrono.hpp>
#include <iostream>

using namespace std;
using namespace boost;
using namespace boost::numeric;
using namespace boost::chrono;

typedef void (*PROFILE_FUNC)( uint32_t, uint32_t& );

nanoseconds profile( PROFILE_FUNC _profileFunc, uint32_t _count )
{
    high_resolution_clock::time_point start = high_resolution_clock::now();

    uint32_t sum = 0;
    _profileFunc( _count, sum );

    nanoseconds ns = ( high_resolution_clock::now() - start ) / _count;

    cout << sum << endl;

    return ns;
}

void native_integer_cast( uint32_t _count, uint32_t& _sum )
{
    for( uint64_t n = 0; n < _count; ++n )
    {
        _sum += static_cast< uint32_t >( n );
    }
}

void boost_integer_cast( uint32_t _count, uint32_t& _sum )
{
    for( uint64_t n = 0; n < _count; ++n )
    {
        try
        {
            _sum += numeric_cast< uint32_t >( n );
        }
        catch( const bad_numeric_cast& e )
        {
            cout << e.what() << endl;
        }
    }
}

void native_itof_cast( uint32_t _count, uint32_t& _sum )
{
    float fsum = 0.0f;

    for( uint32_t n = 0; n < _count; ++n )
    {
        fsum += static_cast< float >( n );
    }

    _sum = static_cast< uint32_t >( fsum );
}

void boost_itof_cast( uint32_t _count, uint32_t& _sum )
{
    float fsum = 0.0f;

    for( uint32_t n = 0; n < _count; ++n )
    {
        try
        {
            fsum += numeric_cast< float >( n );
        }
        catch( const bad_numeric_cast& e )
        {
            cout << e.what() << endl;
        }
    }

    _sum = numeric_cast< uint32_t >( fsum );
}

void native_ftoi_cast( uint32_t _count, uint32_t& _sum )
{
    for( float f = 0.0f; f < _count; f += 1.0f )
    {
        _sum += static_cast< uint32_t >( f );
    }
}

void boost_ftoi_cast( uint32_t _count, uint32_t& _sum )
{
    for( float f = 0.0f; f < _count; f += 1.0f )
    {
        try
        {
            _sum += numeric_cast< uint32_t >( f );
        }
        catch( const bad_numeric_cast& e )
        {
            cout << e.what() << endl;
        }
    }
}

int main()
{
    const static int32_t COUNT = 10000;

    nanoseconds nsNativeIntegerCast = profile( native_integer_cast, COUNT );
    nanoseconds nsBoostIntegerCast = profile( boost_integer_cast, COUNT );
    nanoseconds nsNativeItofCast = profile( native_itof_cast, COUNT );
    nanoseconds nsBoostItofCast = profile( boost_itof_cast, COUNT );
    nanoseconds nsNativeFtoiCast = profile( native_ftoi_cast, COUNT );
    nanoseconds nsBoostFtoiCast = profile( boost_ftoi_cast, COUNT );

    cout << "Native Integer Cast: " << nsNativeIntegerCast << endl;
    cout << "Boost Integer Cast: " << nsBoostIntegerCast << endl;
    cout << "Native Integer-Floating Cast: " << nsNativeItofCast << endl;
    cout << "Boost Integer-Floating Cast: " << nsBoostItofCast << endl;
    cout << "Native Floating-Integer Cast: " << nsNativeFtoiCast << endl;
    cout << "Boost Floating-Integer Cast: " << nsBoostFtoiCast << endl;

    return 0;
};

RESULT:
Native Integer Cast: 1 nanosecond
Boost Integer Cast: 4 nanoseconds
Native Integer-Floating Cast: 3 nanoseconds
Boost Integer-Floating Cast: 3 nanoseconds
Native Floating-Integer Cast: 5 nanoseconds
Boost Floating-Integer Cast: 14 nanoseconds

Regards,
Tang

On Mon, Oct 15, 2012 at 6:43 PM, Oswin Krause <
Oswin.Krause_at_[hidden]> wrote:

> Hi,
>
> Never benchmark in debug mode. Moreover, never ever benchmark boost code
> in debug mode.
>
>
> On 2012-10-15 11:29, Tang Jiang Jun wrote:
>
>> Hi Oswin,
>>
>> Sorry, I forgot to mention that I compiled it as debug configuration
>> in order to prevent unintended optimization.
>> Anyway, many thanks for reminding!
>>
>> Tang
>>
>>
>> On Mon, Oct 15, 2012 at 4:30 PM, Oswin Krause
>> <Oswin.Krause_at_ruhr-uni-bochum.**de <Oswin.Krause_at_[hidden]>[3]> wrote:
>>
>> Hi,
>>>
>>> Your complete loop got optimized away in the native test cases.
>>> Because of the try/catch block the compiler couldn't do this in the
>>> other cases. So you are benchmarking nothing vs somthing.
>>>
>>> Greetings,
>>> Oswin
>>>
>>> On 2012-10-15 10:16, Tang Jiang Jun wrote:
>>>
>>> Hi,
>>>>
>>>> I have run a performance testing for numeric_cast recently, and
>>>> found
>>>> that the result was really unexpected bad, although the document
>>>> mentioned that it will be no overhead if overflows don't happen.
>>>> Could somebody please help me to verify this testing? If this is
>>>> true,
>>>> I doubt whether I should use numeric_cast in the production code.
>>>>
>>>> Here is my testing code and result.
>>>>
>>>> #include <boost/numeric/conversion/**cast.hpp>
>>>> #include <boost/format.hpp>
>>>> #include <boost/cstdint.hpp>
>>>> #include <boost/chrono.hpp>
>>>> #include <iostream>
>>>>
>>>> using namespace std;
>>>> using namespace boost;
>>>> using namespace boost::numeric;
>>>> using namespace boost::chrono;
>>>>
>>>> int main()
>>>> {
>>>> const static int32_t COUNT = 1000000;
>>>> high_resolution_clock::time_**point start;
>>>>
>>>> start = high_resolution_clock::now();
>>>> for( int32_t n = 0; n < COUNT; ++n )
>>>> {
>>>> int32_t i32 = 123;
>>>> int16_t i16 = i32;
>>>> }
>>>> cout << format("Native Integer Cast: %1%n") % ( (
>>>>
>>>> high_resolution_clock::now() - start ) / COUNT );
>>>>
>>>> start = high_resolution_clock::now();
>>>> for( int32_t n = 0; n < COUNT; ++n )
>>>> {
>>>> try
>>>> {
>>>> int32_t i32 = 100;
>>>> int16_t i16 = numeric_cast< int16_t >( i32
>>>> );
>>>> }
>>>> catch( const bad_numeric_cast& e )
>>>> {
>>>> cout << e.what() << endl;
>>>> }
>>>> }
>>>> cout << format("Boost Integer Cast: %1%n") % ( (
>>>>
>>>> high_resolution_clock::now() - start ) / COUNT );
>>>>
>>>> start = high_resolution_clock::now();
>>>> for( int32_t n = 0; n < COUNT; ++n )
>>>> {
>>>> float f = 100.0f;
>>>> int32_t i = static_cast< int32_t >( f );
>>>> }
>>>> cout << format("Native Floating-Integer Cast: %1%n") % ( (
>>>>
>>>> high_resolution_clock::now() - start ) / COUNT );
>>>>
>>>> start = high_resolution_clock::now();
>>>> for( int32_t n = 0; n < COUNT; ++n )
>>>> {
>>>> try
>>>> {
>>>> float f = 123.0f;
>>>> int32_t i = numeric_cast< int32_t >( f );
>>>> }
>>>> catch( const bad_numeric_cast& e )
>>>> {
>>>> cout << e.what() << endl;
>>>> }
>>>> }
>>>> cout << format("Boost Floating-Integer Cast: %1%n") % ( (
>>>>
>>>> high_resolution_clock::now() - start ) / COUNT );
>>>>
>>>> start = high_resolution_clock::now();
>>>> for( int32_t n = 0; n < COUNT; ++n )
>>>> {
>>>> int32_t i = 132;
>>>> float f = static_cast< float >( i );
>>>> }
>>>> cout << format("Native Integer-Floating Cast: %1%n") % ( (
>>>>
>>>> high_resolution_clock::now() - start ) / COUNT );
>>>>
>>>> start = high_resolution_clock::now();
>>>> for( int32_t n = 0; n < COUNT; ++n )
>>>> {
>>>> try
>>>> {
>>>> int32_t i = 128;
>>>> float f = numeric_cast< float >( i );
>>>> }
>>>> catch( const bad_numeric_cast& e )
>>>> {
>>>> cout << e.what() << endl;
>>>> }
>>>> }
>>>> cout << format("Boost Integer-Floating Cast: %1%n") % ( (
>>>>
>>>> high_resolution_clock::now() - start ) / COUNT );
>>>>
>>>> return 0;
>>>> };
>>>>
>>>> Result:
>>>> Native Integer Cast: 3 nanoseconds
>>>> Boost Integer Cast: 311 nanoseconds
>>>> Native Floating-Integer Cast: 4 nanoseconds
>>>> Boost Floating-Integer Cast: 430 nanoseconds
>>>> Native Integer-Floating Cast: 2 nanoseconds
>>>> Boost Integer-Floating Cast: 106 nanoseconds
>>>>
>>>
>>> ______________________________**_________________
>>> Boost-users mailing list
>>> Boost-users_at_[hidden] [1]
>>> http://lists.boost.org/**mailman/listinfo.cgi/boost-**users>[2]
>>>
>>
>>
>>
>> Links:
>> ------
>> [1] mailto:Boost-users_at_lists.**boost.org <Boost-users_at_[hidden]>
>> [2]
http://lists.boost.org/**mailman/listinfo.cgi/boost-**users>
>> [3] mailto:Oswin.Krause_at_ruhr-uni-**bochum.de<Oswin.Krause_at_[hidden]>
>>
>
> ______________________________**_________________
> Boost-users mailing list
> Boost-users_at_[hidden]
>
http://lists.boost.org/**mailman/listinfo.cgi/boost-**users>
>



Boost-users list run by williamkempf at hotmail.com, kalb at libertysoft.com, bjorn.karlsson at readsoft.com, gregod at cs.rpi.edu, wekempf at cox.net