|
Boost : |
From: Greg Colvin (gcolvin_at_[hidden])
Date: 2001-10-23 16:42:53
And thanks to Steve, here is output from the latest Sun compiler (Forte 6u2).
So far I concude that the function template approach to specifiying constants
is just as efficient as a literal constant. Which is to say, as efficient as
possible.
! CONSTANT POOL
!
___const_seg_900000101:
/* 000000 0 */ .word 1074340347,1413754136
/* 0x0008 0 */ .type ___const_seg_900000101,1
/* 0x0008 0 */ .size ___const_seg_900000101,(.-___const_seg_900000101)
/* 0x0008 0 */ .align 4
! SUBROUTINE __1cHpi_test6F_i_
!
! OFFSET SOURCE LINE LABEL INSTRUCTION
.global __1cHpi_test6F_i_
__1cHpi_test6F_i_:
/* 000000 25 */ save %sp,-136,%sp
/* 0x0004 0 */ sethi %hi(___const_seg_900000101),%g1
/* 0x0008 37 */ add %fp,-8,%o0
/* 0x000c 27 */ ldd [%g1+%lo(___const_seg_900000101)],%f0
/* 0x0010 37 */ add %fp,-16,%o1
/* 0x0014 */ add %fp,-24,%o2
/* 0x0018 */ add %fp,-32,%o3
/* 0x001c */ add %fp,-40,%o4
/* 0x0020 27 */ std %f0,[%fp-8]
/* 0x0024 29 */ std %f0,[%fp-16]
/* 0x0028 31 */ std %f0,[%fp-24]
/* 0x002c 33 */ std %f0,[%fp-32]
/* 0x0030 37 */ call __1cBf6Frd0000_i_ ! params = %o0 %o1 %o2 %o3 %o4 ! Result = %o0
/* 0x0034 35 */ std %f0,[%fp-40]
/* 0x0038 37 */ ret ! Result = %i0
/* 0x003c */ restore %g0,%o0,%o0
From: Greg Colvin <gcolvin_at_[hidden]>
> Here is GCC on Sparc. Sun's CC 4.0 chokes on the first template.
>
> .file "pi_test.cpp"
> gcc2_compiled.:
> .section ".text"
> .align 4
> .global pi_test__Fv
> .type pi_test__Fv,#function
> .proc 04
> pi_test__Fv:
> .LLFB1:
> !#PROLOGUE# 0
> save %sp, -152, %sp
> .LLCFI0:
> !#PROLOGUE# 1
> sethi %hi(1074339840), %o0
> sethi %hi(1413753856), %o1
> or %o0, 507, %o0
> or %o1, 280, %o1
> std %o0, [%fp-56]
> std %o0, [%fp-24]
> std %o0, [%fp-32]
> std %o0, [%fp-40]
> std %o0, [%fp-48]
> add %fp, -24, %o0
> add %fp, -32, %o1
> add %fp, -40, %o2
> add %fp, -48, %o3
> call f__FRdN40, 0
> add %fp, -56, %o4
> ret
> restore %g0, %o0, %o0
> .LLFE1:
> .LLfe1:
> .size pi_test__Fv,.LLfe1-pi_test__Fv
>
> .section ".eh_frame",#alloc,#write
> __FRAME_BEGIN__:
> .uaword .LLECIE1-.LLSCIE1
> .LLSCIE1:
> .uaword 0x0
> .byte 0x1
> .byte 0x0
> .byte 0x1
> .byte 0x7c
> .byte 0x65
> .byte 0xc
> .byte 0xe
> .byte 0x0
> .byte 0x9
> .byte 0x65
> .byte 0xf
> .align 4
> .LLECIE1:
> .uaword .LLEFDE1-.LLSFDE1
> .LLSFDE1:
> .uaword .LLSFDE1-__FRAME_BEGIN__
> .uaword .LLFB1
> .uaword .LLFE1-.LLFB1
> .byte 0x4
> .uaword .LLCFI0-.LLFB1
> .byte 0xd
> .byte 0x1e
> .byte 0x2d
> .byte 0x9
> .byte 0x65
> .byte 0x1f
> .align 4
> .LLEFDE1:
> .ident "GCC: (GNU) 2.95.2 19991024 (release)"
>
> From: Greg Colvin <gcolvin_at_[hidden]>
> > From: Paul A. Bristow <boost_at_[hidden]>
> > > Thanks for this - agrees with my naiver tests.
> > >
> > > It looks promising to me - but what of other compilers - you gcc fans etc?
> >
> > .file "pi_test.cc"
> > gcc2_compiled.:
> > ___gnu_compiled_cplusplus:
> > .text
> > .align 8
> > LC3:
> > .long 0x54442d18,0x400921fb
> > .align 4
> > .globl _pi_test__Fv
> > .def _pi_test__Fv; .scl 2; .type 32; .endef
> > _pi_test__Fv:
> > fldl LC3
> > pushl %ebp
> > movl %esp,%ebp
> > subl $48,%esp
> > fstl -40(%ebp)
> > fstl -32(%ebp)
> > fstl -24(%ebp)
> > fstl -16(%ebp)
> > fstpl -8(%ebp)
> > leal -8(%ebp),%eax
> > pushl %eax
> > leal -16(%ebp),%eax
> > pushl %eax
> > leal -24(%ebp),%eax
> > pushl %eax
> > leal -32(%ebp),%eax
> > pushl %eax
> > leal -40(%ebp),%eax
> > pushl %eax
> > call _f__FRdN40
> > movl %ebp,%esp
> > popl %ebp
> > ret
> > .def _f__FRdN40; .scl 2; .type 32; .endef
> >
> >
> > > Paul
> > >
> > > > -----Original Message-----
> > > > From: Greg Colvin [mailto:gcolvin_at_[hidden]]
> > > > Sent: Tuesday, October 23, 2001 5:44 PM
> > > > To: boost_at_[hidden]
> > > > Subject: Re: [boost] Math constants for naive and gurus? - which
> > > > constants do you want?
> > > >
> > > >
> > > > For what it is worth, appendedis a very simple test program and
> > > > the 80x86 assembly language output from MSVC. I had to add the
> > > > call to f(double&,double&,double&,double&,double&) to prevent the
> > > > optimizer removing almost all the code. The operations for each
> > > > way of assigning pi to a local are a bit scrambled up, I assume
> > > > for optimal scheduling. When I unscamble them they look like:
> > > >
> > > > ; double pi_template = constant<pi_tag,double>();
> > > > mov DWORD PTR _pi_template$[esp+60], 1413754136 ; 54442d18H
> > > > mov DWORD PTR _pi_template$[esp+64], 1074340347 ; 400921fbH
> > > >
> > > > ; double pi_function = pi();
> > > >
> > > > mov DWORD PTR _pi_function$[esp+60], 1413754136 ; 54442d18H
> > > > mov DWORD PTR _pi_function$[esp+64], 1074340347 ; 400921fbH
> > > >
> > > > ; double pi_constant = Pi;
> > > > mov eax, DWORD PTR _Pi$S188
> > > > mov DWORD PTR _pi_constant$[esp+40], eax
> > > > mov ecx, DWORD PTR _Pi$S188+4
> > > > mov DWORD PTR _pi_constant$[esp+52], ecx
> > > >
> > > > ; double pi_defined = PI;
> > > > mov DWORD PTR _pi_defined$[esp+60], 1413754136 ; 54442d18H
> > > > mov DWORD PTR _pi_defined$[esp+64], 1074340347 ; 400921fbH
> > > >
> > > > ; double pi_literal = 3.141592653589793238462643383279502884197;
> > > > mov DWORD PTR _pi_literal$[esp+60], 1413754136 ; 54442d18H
> > > > mov DWORD PTR _pi_literal$[esp+64], 1074340347 ; 400921fbH
> > > >
> > > > ; return f(pi_template,pi_function,pi_constant,pi_defined,pi_literal);
> > > > lea edx, DWORD PTR _pi_literal$[esp+40]
> > > > push edx
> > > > lea eax, DWORD PTR _pi_defined$[esp+44]
> > > > push eax
> > > > lea ecx, DWORD PTR _pi_constant$[esp+48]
> > > > push ecx
> > > > lea edx, DWORD PTR _pi_function$[esp+52]
> > > > push edx
> > > > lea eax, DWORD PTR _pi_template$[esp+56]
> > > > push eax
> > > > call ?f@@YAHAAN0000_at_Z ; f
> > > >
> > > > Make of it all what you will.
> > > >
> > > >
> > > > //////////////////////////////////////////////////////////////////
> > > > // pi_test.cpp
> > > >
> > > > template <typename Tag, typename Rep> struct constant {
> > > > constant() {}
> > > > operator Rep() const;
> > > > };
> > > >
> > > > struct pi_tag {};
> > > >
> > > > template<> inline constant<pi_tag, double>::operator double() const {
> > > > return 3.141592653589793238462643383279502884197;
> > > > }
> > > >
> > > > inline double pi() {
> > > > return 3.141592653589793238462643383279502884197;
> > > > }
> > > >
> > > > const double Pi = 3.141592653589793238462643383279502884197;
> > > >
> > > > #define PI 3.141592653589793238462643383279502884197
> > > >
> > > > extern int f(double&,double&,double&,double&,double&);
> > > >
> > > > int pi_test() {
> > > >
> > > > double pi_template = constant<pi_tag,double>();
> > > >
> > > > double pi_function = pi();
> > > >
> > > > double pi_constant = Pi;
> > > >
> > > > double pi_defined = PI;
> > > >
> > > > double pi_literal = 3.141592653589793238462643383279502884197;
> > > >
> > > > return f(pi_template,pi_function,pi_constant,pi_defined,pi_literal);
> > > > }
> > > >
> > > >
> > > > ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > > > TITLE pi_test.cpp
> > > > .386P
> > > > include listing.inc
> > > > if @Version gt 510
> > > > .model FLAT
> > > > else
> > > > _TEXT SEGMENT PARA USE32 PUBLIC 'CODE'
> > > > _TEXT ENDS
> > > > _DATA SEGMENT DWORD USE32 PUBLIC 'DATA'
> > > > _DATA ENDS
> > > > CONST SEGMENT DWORD USE32 PUBLIC 'CONST'
> > > > CONST ENDS
> > > > _BSS SEGMENT DWORD USE32 PUBLIC 'BSS'
> > > > _BSS ENDS
> > > > _TLS SEGMENT DWORD USE32 PUBLIC 'TLS'
> > > > _TLS ENDS
> > > > ; COMDAT ??B?$constant_at_Upi_tag@@N@@QBENXZ
> > > > _TEXT SEGMENT PARA USE32 PUBLIC 'CODE'
> > > > _TEXT ENDS
> > > > ; COMDAT ?pi@@YANXZ
> > > > _TEXT SEGMENT PARA USE32 PUBLIC 'CODE'
> > > > _TEXT ENDS
> > > > ; COMDAT ??0?$constant_at_Upi_tag@@N@@QAE_at_XZ
> > > > _TEXT SEGMENT PARA USE32 PUBLIC 'CODE'
> > > > _TEXT ENDS
> > > > FLAT GROUP _DATA, CONST, _BSS
> > > > ASSUME CS: FLAT, DS: FLAT, SS: FLAT
> > > > endif
> > > > CONST SEGMENT
> > > > _Pi$S188 DQ 0400921fb54442d18r ; 3.14159
> > > > CONST ENDS
> > > > PUBLIC ?pi_test@@YAHXZ ; pi_test
> > > > EXTRN ?f@@YAHAAN0000_at_Z:NEAR ; f
> > > > EXTRN __fltused:NEAR
> > > > CONST SEGMENT
> > > > $T214 DQ 0400921fb54442d18r ; 3.14159
> > > > CONST ENDS
> > > > _TEXT SEGMENT
> > > > _pi_template$ = -8
> > > > _pi_function$ = -16
> > > > _pi_constant$ = -24
> > > > _pi_defined$ = -32
> > > > _pi_literal$ = -40
> > > > ?pi_test@@YAHXZ PROC NEAR ; pi_test
> > > >
> > > > ; 25 : int pi_test() {
> > > >
> > > > sub esp, 40 ; 00000028H
> > > >
> > > > ; 26 :
> > > > ; 27 : double pi_template = constant<pi_tag,double>();
> > > > ; 28 :
> > > > ; 29 : double pi_function = pi();
> > > > ; 30 :
> > > > ; 31 : double pi_constant = Pi;
> > > >
> > > > mov eax, DWORD PTR _Pi$S188
> > > > mov ecx, DWORD PTR _Pi$S188+4
> > > > mov DWORD PTR _pi_constant$[esp+40], eax
> > > >
> > > > ; 32 :
> > > > ; 33 : double pi_defined = PI;
> > > > ; 34 :
> > > > ; 35 : double pi_literal =
> > > > 3.141592653589793238462643383279502884197;
> > > > ; 36 :
> > > > ; 37 : return
> > > > f(pi_template,pi_function,pi_constant,pi_defined,pi_literal);
> > > >
> > > > lea edx, DWORD PTR _pi_literal$[esp+40]
> > > > mov DWORD PTR _pi_constant$[esp+44], ecx
> > > > lea eax, DWORD PTR _pi_defined$[esp+40]
> > > > push edx
> > > > lea ecx, DWORD PTR _pi_constant$[esp+44]
> > > > push eax
> > > > lea edx, DWORD PTR _pi_function$[esp+48]
> > > > push ecx
> > > > lea eax, DWORD PTR _pi_template$[esp+52]
> > > > push edx
> > > > push eax
> > > > mov DWORD PTR _pi_template$[esp+60], 1413754136 ; 54442d18H
> > > > mov DWORD PTR _pi_template$[esp+64], 1074340347 ; 400921fbH
> > > > mov DWORD PTR _pi_function$[esp+60], 1413754136 ; 54442d18H
> > > > mov DWORD PTR _pi_function$[esp+64], 1074340347 ; 400921fbH
> > > > mov DWORD PTR _pi_defined$[esp+60], 1413754136 ; 54442d18H
> > > > mov DWORD PTR _pi_defined$[esp+64], 1074340347 ; 400921fbH
> > > > mov DWORD PTR _pi_literal$[esp+60], 1413754136 ; 54442d18H
> > > > mov DWORD PTR _pi_literal$[esp+64], 1074340347 ; 400921fbH
> > > > call ?f@@YAHAAN0000_at_Z ; f
> > > > add esp, 20 ; 00000014H
> > > >
> > > > ; 38 : }
> > > >
> > > > add esp, 40 ; 00000028H
> > > > ret 0
> > > > ?pi_test@@YAHXZ ENDP ; pi_test
> > > > _TEXT ENDS
> > > > END
Boost list run by bdawes at acm.org, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk