Boost logo

Boost :

From: Alexander Nasonov (alnsn_at_[hidden])
Date: 2008-01-04 05:31:26


Tobias Schwinger <tschwinger <at> isonews2.com> writes:
> We actually very much want a preprocessor-generated 'switch' statement
> because it is a special hint for optimization and most compilers
> generate very efficient code for it...

BTW,
switch_ doesn't implement fall-though and I was worried about performance of
this important case (bzero with Duff's device optimization):

  switch(n % 8)
  {
    case 7: buf[6] = 0;
    case 6: buf[5] = 0;
    case 5: buf[4] = 0;
    case 4: buf[3] = 0;
    case 3: buf[2] = 0;
    case 2: buf[1] = 0;
    case 1: buf[0] = 0;
  }

switch_ would generate this code:

  switch(n % 8)
  {
    case 7:
      buf[6] = 0; buf[5] = 0; buf[4] = 0; buf[3] = 0;
      buf[2] = 0; buf[1] = 0; buf[0] = 0;
      break;
    case 6:
      buf[5] = 0; buf[4] = 0; buf[3] = 0;
      buf[2] = 0; buf[1] = 0; buf[0] = 0;
      break;
    case 5:
      buf[4] = 0; buf[3] = 0; buf[2] = 0; buf[1] = 0; buf[0] = 0;
      break;
    case 4:
      buf[3] = 0; buf[2] = 0; buf[1] = 0; buf[0] = 0;
      break;
    case 3:
      buf[2] = 0; buf[1] = 0; buf[0] = 0;
      break;
    case 2:
      buf[1] = 0; buf[0] = 0;
      break;
    case 1:
      buf[0] = 0;
      break;
    default:
      break;
  }

Below is a program that demonstates a difference of assembly code between hand-
crafted switch and the switch_. The are identical on gcc 3.4.6 x86_64.

#include <iostream>
#include "switch.hpp"
#include <boost/mpl/integral_c.hpp>
#include <boost/mpl/range_c.hpp>
#include <boost/mpl/vector.hpp>

void classic_duff(char* buf, int n)
{
  switch(n % 8)
  {
    case 7: buf[6] = 0;
    case 6: buf[5] = 0;
    case 5: buf[4] = 0;
    case 4: buf[3] = 0;
    case 3: buf[2] = 0;
    case 2: buf[1] = 0;
    case 1: buf[0] = 0;
  }

  // ...
}

template<int N>
struct duff_step;

template<int N>
struct duff_step
{
  static void step(char* buf)
  {
    buf[N-1] = 0;
    duff_step<N-1>::step(buf);
  }
};

template<>
struct duff_step<0>
{
  static void step(char* buf)
  {
  }
};

struct duff_case
{
  char* buf;
  duff_case(char* buf) : buf(buf) {}

  typedef void result_type;
  template<class Case>
  void operator()(Case) const
  {
    duff_step<Case::value>::step(buf);
  }
};

struct ignore
{
  template<class Int>
  void operator()(Int) const {}
};

template<int Mod>
void modern_duff(char* buf, int n)
{
  using namespace boost;
  switch_< mpl::range_c<int,0,Mod> >(n % Mod, duff_case(buf), ignore());

  // ...
}

int main(int argc, char* argv[])
{
  using namespace boost;

  char buf1[7] = { 1, 1, 1, 1, 1, 1, 1 };
  modern_duff<8>(buf1, 7);

  for(int i = 0; i < 8; ++i)
    std::cout << static_cast<int>(buf1[i]) << ", ";
  std::cout << '\n';

  char buf2[7] = { 1, 1, 1, 1, 1, 1, 1 };
  classic_duff(buf2, 7);

  for(int i = 0; i < 8; ++i)
    std::cout << static_cast<int>(buf2[i]) << ", ";
  std::cout << '\n';

}

0000000000400820 <classic_duff(char*, int)>:
  400820: 8d 46 07 lea 0x7(%rsi),%eax
  400823: 83 fe ff cmp $0xffffffffffffffff,%esi
  400826: 0f 4f c6 cmovg %esi,%eax
  400829: 83 e0 f8 and $0xfffffffffffffff8,%eax
  40082c: 29 c6 sub %eax,%esi
  40082e: 83 fe 07 cmp $0x7,%esi
  400831: 77 24 ja 400857 <classic_duff(char*, int)
+0x37>
  400833: 89 f0 mov %esi,%eax
  400835: ff 24 c5 50 0b 40 00 jmpq *0x400b50(,%rax,8)
  40083c: c6 47 06 00 movb $0x0,0x6(%rdi)
  400840: c6 47 05 00 movb $0x0,0x5(%rdi)
  400844: c6 47 04 00 movb $0x0,0x4(%rdi)
  400848: c6 47 03 00 movb $0x0,0x3(%rdi)
  40084c: c6 47 02 00 movb $0x0,0x2(%rdi)
  400850: c6 47 01 00 movb $0x0,0x1(%rdi)
  400854: c6 07 00 movb $0x0,(%rdi)
  400857: f3 c3 repz retq
  400859: 90 nop
  40085a: 66 data16
  40085b: 66 data16
  40085c: 90 nop
  40085d: 66 data16
  40085e: 66 data16
  40085f: 90 nop

0000000000400a00 <void modern_duff<8>(char*, int)>:
  400a00: 8d 46 07 lea 0x7(%rsi),%eax
  400a03: 83 fe ff cmp $0xffffffffffffffff,%esi
  400a06: 0f 4f c6 cmovg %esi,%eax
  400a09: 83 e0 f8 and $0xfffffffffffffff8,%eax
  400a0c: 29 c6 sub %eax,%esi
  400a0e: 83 fe 07 cmp $0x7,%esi
  400a11: 77 24 ja 400a37 <void modern_duff<8>
(char*, int)+0x37>
  400a13: 89 f0 mov %esi,%eax
  400a15: ff 24 c5 90 0b 40 00 jmpq *0x400b90(,%rax,8)
  400a1c: c6 47 06 00 movb $0x0,0x6(%rdi)
  400a20: c6 47 05 00 movb $0x0,0x5(%rdi)
  400a24: c6 47 04 00 movb $0x0,0x4(%rdi)
  400a28: c6 47 03 00 movb $0x0,0x3(%rdi)
  400a2c: c6 47 02 00 movb $0x0,0x2(%rdi)
  400a30: c6 47 01 00 movb $0x0,0x1(%rdi)
  400a34: c6 07 00 movb $0x0,(%rdi)
  400a37: f3 c3 repz retq
  400a39: c6 47 05 00 movb $0x0,0x5(%rdi)
  400a3d: c6 47 04 00 movb $0x0,0x4(%rdi)
  400a41: c6 47 03 00 movb $0x0,0x3(%rdi)
  400a45: c6 47 02 00 movb $0x0,0x2(%rdi)
  400a49: c6 47 01 00 movb $0x0,0x1(%rdi)
  400a4d: c6 07 00 movb $0x0,(%rdi)
  400a50: c3 retq
  400a51: 90 nop
  400a52: 90 nop
  400a53: 90 nop
  400a54: 90 nop
  400a55: 90 nop
  400a56: 90 nop
  400a57: 90 nop
  400a58: 90 nop
  400a59: 90 nop
  400a5a: 90 nop
  400a5b: 90 nop
  400a5c: 90 nop
  400a5d: 90 nop
  400a5e: 90 nop
  400a5f: 90 nop


Boost list run by bdawes at acm.org, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk