targeting large function result
John S. Yates, Jr.
jyates@netezza.com
Mon Sep 23 10:27:00 GMT 2002
I am implementing a quadruple-precision integer package
and am looking for a way to construct the final result
directly into the destination, rather than into a temp
passed by the caller. Is there any coding style or
incantation that can accomplish this?
The simple example below shows that the compiler can
adequately optimize an anonymous temporary within an
expression. But, in spite of much effort, I cannot get
it to provide an assignment's lhs in lieu of a temp.
TIA for any help / suggestions,
/john
=== Source ===
class CNumeric128 {
signed int hi;
unsigned int hl;
unsigned int lh;
unsigned int lo;
public:
CNumeric128(signed int a, unsigned int b, unsigned int c, unsigned int d)
: hi(a), hl(b), lh(c), lo(d) {}
};
inline static
CNumeric128 operator + (CNumeric128 const& left,
CNumeric128 const& right)
__attribute__((const, always_inline));
extern "C"
void overflow_128_arith(void);
inline static
CNumeric128 operator + (CNumeric128 const& __restrict__ left,
CNumeric128 const& __restrict__ right)
{ int hi, hl, lh, lo;
asm (
"movl 12+%4,%3\n"
" movl 8+%4,%2\n"
" movl 4+%4,%1\n"
" movl %4,%0\n"
" addl 12+%5,%3\n"
" adcl 8+%5,%2\n"
" adcl 4+%5,%1\n"
" adcl %5,%0\n"
" jno 1f\n"
" call overflow_128_arith\n"
"1:\n"
: "=&r" (hi), "=&r" (hl), "=&r" (lh), "=&r" (lo)
: "o" (left), "o" (right)
: "cc"
);
return CNumeric128(hi,hl,lh,lo);
}
void test(CNumeric128* __restrict__ out,
CNumeric128& __restrict__ op1,
CNumeric128& __restrict__ op2,
CNumeric128& __restrict__ op3)
{
*out = op1 + op2 + op3;
}
=== Assembler ===
.file "add128.cpp"
.version "01.01"
gcc2_compiled.:
.text
.align 4
.globl test__FP11CNumeric128R11CNumeric128N21
.type test__FP11CNumeric128R11CNumeric128N21,@function
test__FP11CNumeric128R11CNumeric128N21:
.LFB1:
pushl %ebp
.LCFI0:
movl %esp, %ebp
.LCFI1:
pushl %edi
.LCFI2:
pushl %esi
.LCFI3:
pushl %ebx
.LCFI4:
subl $60, %esp
.LCFI5:
movl 12(%ebp), %edx
movl 16(%ebp), %ecx
#APP
movl 12+(%edx),%edi
movl 8+(%edx),%esi
movl 4+(%edx),%ebx
movl (%edx),%eax
addl 12+(%ecx),%edi
adcl 8+(%ecx),%esi
adcl 4+(%ecx),%ebx
adcl (%ecx),%eax
jno 1f
call overflow_128_arith
1:
#NO_APP
movl %ebx, %edx
movl %esi, -48(%ebp)
movl %edi, -44(%ebp)
movl %eax, -56(%ebp)
movl %edx, -52(%ebp)
movl 20(%ebp), %esi
#APP
movl 12+-56(%ebp),%ebx
movl 8+-56(%ebp),%ecx
movl 4+-56(%ebp),%edx
movl -56(%ebp),%eax
addl 12+(%esi),%ebx
adcl 8+(%esi),%ecx
adcl 4+(%esi),%edx
adcl (%esi),%eax
jno 1f
call overflow_128_arith
1:
#NO_APP
movl %ecx, -32(%ebp)
movl %ebx, -28(%ebp)
movl %eax, -40(%ebp)
movl %edx, -36(%ebp)
leal -40(%ebp), %esi <<<< address of temp
movl 8(%ebp), %edi <<<< address of lhs
cld <<<< copy direction
movl $4, %ecx <<<< 4 words
rep <<<< block
movsl <<<< move
addl $60, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.LFE1:
.Lfe1:
.size test__FP11CNumeric128R11CNumeric128N21,.Lfe1-test__FP11CNumeric128R11CNumeric128N21
.ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-81)"
More information about the Gcc-help
mailing list