|
Boost : |
From: Alexander Nasonov (alnsn_at_[hidden])
Date: 2008-01-04 05:31:26
Tobias Schwinger <tschwinger <at> isonews2.com> writes:
> We actually very much want a preprocessor-generated 'switch' statement
> because it is a special hint for optimization and most compilers
> generate very efficient code for it...
BTW,
switch_ doesn't implement fall-though and I was worried about performance of
this important case (bzero with Duff's device optimization):
switch(n % 8)
{
case 7: buf[6] = 0;
case 6: buf[5] = 0;
case 5: buf[4] = 0;
case 4: buf[3] = 0;
case 3: buf[2] = 0;
case 2: buf[1] = 0;
case 1: buf[0] = 0;
}
switch_ would generate this code:
switch(n % 8)
{
case 7:
buf[6] = 0; buf[5] = 0; buf[4] = 0; buf[3] = 0;
buf[2] = 0; buf[1] = 0; buf[0] = 0;
break;
case 6:
buf[5] = 0; buf[4] = 0; buf[3] = 0;
buf[2] = 0; buf[1] = 0; buf[0] = 0;
break;
case 5:
buf[4] = 0; buf[3] = 0; buf[2] = 0; buf[1] = 0; buf[0] = 0;
break;
case 4:
buf[3] = 0; buf[2] = 0; buf[1] = 0; buf[0] = 0;
break;
case 3:
buf[2] = 0; buf[1] = 0; buf[0] = 0;
break;
case 2:
buf[1] = 0; buf[0] = 0;
break;
case 1:
buf[0] = 0;
break;
default:
break;
}
Below is a program that demonstates a difference of assembly code between hand-
crafted switch and the switch_. The are identical on gcc 3.4.6 x86_64.
#include <iostream>
#include "switch.hpp"
#include <boost/mpl/integral_c.hpp>
#include <boost/mpl/range_c.hpp>
#include <boost/mpl/vector.hpp>
void classic_duff(char* buf, int n)
{
switch(n % 8)
{
case 7: buf[6] = 0;
case 6: buf[5] = 0;
case 5: buf[4] = 0;
case 4: buf[3] = 0;
case 3: buf[2] = 0;
case 2: buf[1] = 0;
case 1: buf[0] = 0;
}
// ...
}
template<int N>
struct duff_step;
template<int N>
struct duff_step
{
static void step(char* buf)
{
buf[N-1] = 0;
duff_step<N-1>::step(buf);
}
};
template<>
struct duff_step<0>
{
static void step(char* buf)
{
}
};
struct duff_case
{
char* buf;
duff_case(char* buf) : buf(buf) {}
typedef void result_type;
template<class Case>
void operator()(Case) const
{
duff_step<Case::value>::step(buf);
}
};
struct ignore
{
template<class Int>
void operator()(Int) const {}
};
template<int Mod>
void modern_duff(char* buf, int n)
{
using namespace boost;
switch_< mpl::range_c<int,0,Mod> >(n % Mod, duff_case(buf), ignore());
// ...
}
int main(int argc, char* argv[])
{
using namespace boost;
char buf1[7] = { 1, 1, 1, 1, 1, 1, 1 };
modern_duff<8>(buf1, 7);
for(int i = 0; i < 8; ++i)
std::cout << static_cast<int>(buf1[i]) << ", ";
std::cout << '\n';
char buf2[7] = { 1, 1, 1, 1, 1, 1, 1 };
classic_duff(buf2, 7);
for(int i = 0; i < 8; ++i)
std::cout << static_cast<int>(buf2[i]) << ", ";
std::cout << '\n';
}
0000000000400820 <classic_duff(char*, int)>:
400820: 8d 46 07 lea 0x7(%rsi),%eax
400823: 83 fe ff cmp $0xffffffffffffffff,%esi
400826: 0f 4f c6 cmovg %esi,%eax
400829: 83 e0 f8 and $0xfffffffffffffff8,%eax
40082c: 29 c6 sub %eax,%esi
40082e: 83 fe 07 cmp $0x7,%esi
400831: 77 24 ja 400857 <classic_duff(char*, int)
+0x37>
400833: 89 f0 mov %esi,%eax
400835: ff 24 c5 50 0b 40 00 jmpq *0x400b50(,%rax,8)
40083c: c6 47 06 00 movb $0x0,0x6(%rdi)
400840: c6 47 05 00 movb $0x0,0x5(%rdi)
400844: c6 47 04 00 movb $0x0,0x4(%rdi)
400848: c6 47 03 00 movb $0x0,0x3(%rdi)
40084c: c6 47 02 00 movb $0x0,0x2(%rdi)
400850: c6 47 01 00 movb $0x0,0x1(%rdi)
400854: c6 07 00 movb $0x0,(%rdi)
400857: f3 c3 repz retq
400859: 90 nop
40085a: 66 data16
40085b: 66 data16
40085c: 90 nop
40085d: 66 data16
40085e: 66 data16
40085f: 90 nop
0000000000400a00 <void modern_duff<8>(char*, int)>:
400a00: 8d 46 07 lea 0x7(%rsi),%eax
400a03: 83 fe ff cmp $0xffffffffffffffff,%esi
400a06: 0f 4f c6 cmovg %esi,%eax
400a09: 83 e0 f8 and $0xfffffffffffffff8,%eax
400a0c: 29 c6 sub %eax,%esi
400a0e: 83 fe 07 cmp $0x7,%esi
400a11: 77 24 ja 400a37 <void modern_duff<8>
(char*, int)+0x37>
400a13: 89 f0 mov %esi,%eax
400a15: ff 24 c5 90 0b 40 00 jmpq *0x400b90(,%rax,8)
400a1c: c6 47 06 00 movb $0x0,0x6(%rdi)
400a20: c6 47 05 00 movb $0x0,0x5(%rdi)
400a24: c6 47 04 00 movb $0x0,0x4(%rdi)
400a28: c6 47 03 00 movb $0x0,0x3(%rdi)
400a2c: c6 47 02 00 movb $0x0,0x2(%rdi)
400a30: c6 47 01 00 movb $0x0,0x1(%rdi)
400a34: c6 07 00 movb $0x0,(%rdi)
400a37: f3 c3 repz retq
400a39: c6 47 05 00 movb $0x0,0x5(%rdi)
400a3d: c6 47 04 00 movb $0x0,0x4(%rdi)
400a41: c6 47 03 00 movb $0x0,0x3(%rdi)
400a45: c6 47 02 00 movb $0x0,0x2(%rdi)
400a49: c6 47 01 00 movb $0x0,0x1(%rdi)
400a4d: c6 07 00 movb $0x0,(%rdi)
400a50: c3 retq
400a51: 90 nop
400a52: 90 nop
400a53: 90 nop
400a54: 90 nop
400a55: 90 nop
400a56: 90 nop
400a57: 90 nop
400a58: 90 nop
400a59: 90 nop
400a5a: 90 nop
400a5b: 90 nop
400a5c: 90 nop
400a5d: 90 nop
400a5e: 90 nop
400a5f: 90 nop
Boost list run by bdawes at acm.org, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk