This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: Loop unrolling
- To: John Carr <jfc at mit dot edu>, law at cygnus dot com
- Subject: Re: Loop unrolling
- From: Stephen Williams <steve at icarus dot icarus dot com>
- Date: Tue, 02 Jun 1998 12:50:56 -0700
- Cc: egcs at cygnus dot com
struct x
{
x() {}
};
f()
{
x x1[10];
}
jfc@mit.edu said:
> If you want to complain about the code style, assume that the empty
> constructor appears in a read-only header file (possibly in a base
> class definition -- that's where I first saw this bug).
Or a template. Actually, that code will work great if compiled with
-O2 -funroll-loops. However, I found that if I put an empty destructor
on, all hell breaks loose:
struct x
{
x() {}
~x() { }
};
f()
{
x x1[10];
}
This is for the alpha. I think an important optimization is missed here.
icarus.icarus.com % g++ -v -O9 -funroll-loops -S foo.cc
Reading specs from /usr/local/lib/gcc-lib/alphaev5-unknown-linux-gnu/egcs-2.90.27/specs
gcc version egcs-2.90.27 980315 (egcs-1.0.2 release)
/usr/local/lib/gcc-lib/alphaev5-unknown-linux-gnu/egcs-2.90.27/cpp -lang-c++ -v -undef -D__GNUC__=2 -D__GNUG__=2 -D__cplusplus -D__GNUC_MINOR__=90 -D__alpha -D__alpha__ -D__linux__ -D__linux -D_LONGLONG -Dlinux -Dunix -D__ELF__ -D__alpha -D__alpha__ -D__linux__ -D__linux -D_LONGLONG -D__linux__ -D__unix__ -D__ELF__ -D__linux -D__unix -Asystem(linux) -Acpu(alpha) -Amachine(alpha) -D__EXCEPTIONS -D__OPTIMIZE__ -D__LANGUAGE_C__ -D__LANGUAGE_C -DLANGUAGE_C -D__LANGUAGE_C_PLUS_PLUS__ -D__LANGUAGE_C_PLUS_PLUS -D__cplusplus foo.cc /tmp/cca02854.ii
GNU CPP version egcs-2.90.27 980315 (egcs-1.0.2 release) (Alpha Linux/ELF)
#include "..." search starts here:
#include <...> search starts here:
/usr/local/include/g++
/usr/local/include
/usr/local/alphaev5-unknown-linux-gnu/include
/usr/local/lib/gcc-lib/alphaev5-unknown-linux-gnu/egcs-2.90.27/include
/usr/include
End of search list.
/usr/local/lib/gcc-lib/alphaev5-unknown-linux-gnu/egcs-2.90.27/cc1plus /tmp/cca02854.ii -quiet -dumpbase foo.cc -O9 -version -funroll-loops -o foo.s
GNU C++ version egcs-2.90.27 980315 (egcs-1.0.2 release) (alphaev5-unknown-linux-gnu) compiled by GNU C version egcs-2.90.27 980315 (egcs-1.0.2 release).
--
Steve Williams "The woods are lovely, dark and deep.
steve@icarus.com But I have promises to keep,
steve@picturel.com and lines to code before I sleep,
http://www.picturel.com And lines to code before I sleep."
.file 1 "foo.cc"
.version "01.01"
.set noat
gcc2_compiled.:
__gnu_compiled_cplusplus:
.globl __terminate
.globl __sjthrow
.text
.align 3
.globl f__Fv
.ent f__Fv
f__Fv:
ldgp $29,0($27)
$f__Fv..ng:
lda $30,-368($30)
.frame $30,368,$26,0
stq $26,0($30)
stq $9,8($30)
stq $10,16($30)
stq $11,24($30)
stq $12,32($30)
stq $13,40($30)
stq $14,48($30)
stq $15,56($30)
.mask 0x400fe00,-368
stt $f2,64($30)
stt $f3,72($30)
stt $f4,80($30)
stt $f5,88($30)
stt $f6,96($30)
stt $f7,104($30)
stt $f8,112($30)
stt $f9,120($30)
.fmask 0x3fc,-304
.prologue 1
bis $31,9,$4
stq $4,344($30)
jsr $26,__get_dynamic_handler_chain
ldgp $29,0($26)
stq $0,352($30)
addq $30,128,$4
stq $4,336($30)
ldt $f1,0($0)
lda $2,$L11
bis $31,$31,$3
stt $f1,144($30)
stq $31,152($30)
stq $4,160($30)
addq $30,160,$1
stq $2,8($1)
stq $30,16($1)
br $31,$L12
.align 4
$L11:
br $29,$LGOTO32
$LGOTO32:
ldgp $29,0($29)
bis $1,$1,$27
jsr $26,($27),0
ldgp $29,0($26)
bis $31,1,$3
$L12:
bne $3,$L10
ldq $4,352($30)
addq $30,144,$1
stq $1,0($4)
ldq $4,344($30)
lda $3,-1
ornot $31,$4,$1
and $1,3,$2
beq $2,$L16
cmplt $2,3,$1
beq $1,$L121
cmplt $2,2,$1
beq $1,$L122
subq $4,1,$4
stq $4,344($30)
$L122:
ldq $4,344($30)
subq $4,1,$4
stq $4,344($30)
$L121:
ldq $4,344($30)
subq $4,1,$4
br $31,$L138
.align 4
.align 5
$L16:
ldq $4,344($30)
subq $4,4,$4
$L138:
stq $4,344($30)
cmpeq $4,$3,$1
beq $1,$L16
ldq $4,352($30)
ldq $1,0($4)
ldt $f1,0($1)
stt $f1,0($4)
stt $f1,192($30)
lda $1,$L31
stq $31,200($30)
addq $30,128,$4
stq $4,208($30)
addq $30,208,$2
stq $30,16($2)
bis $31,$31,$3
stq $1,8($2)
br $31,$L32
.align 4
$L31:
br $29,$LGOTO140
$LGOTO140:
ldgp $29,0($29)
bis $1,$1,$27
jsr $26,($27),0
ldgp $29,0($26)
bis $31,1,$3
$L32:
bne $3,$L30
ldq $4,352($30)
addq $30,192,$1
stq $1,0($4)
ldt $f1,192($30)
stt $f1,0($4)
addq $30,138,$2
addq $30,128,$4
cmpeq $4,$2,$1
bne $1,$L61
subq $4,$2,$1
and $1,3,$3
beq $3,$L37
cmplt $3,3,$1
beq $1,$L102
bne $3,$L103
addq $30,137,$2
$L103:
subq $2,1,$2
$L102:
subq $2,1,$2
addq $30,128,$4
cmpeq $4,$2,$1
bne $1,$L61
.align 5
$L37:
subq $2,4,$2
addq $30,128,$4
cmpeq $4,$2,$1
beq $1,$L37
br $31,$L61
.align 4
$L10:
ldq $4,352($30)
lda $2,$L45
bis $31,$31,$3
lda $1,256($30)
ldt $f1,0($4)
stq $31,248($30)
addq $30,128,$4
stq $4,256($30)
stt $f1,240($30)
stq $2,8($1)
stq $30,16($1)
br $31,$L46
.align 4
$L45:
br $29,$LGOTO251
$LGOTO251:
ldgp $29,0($29)
bis $1,$1,$27
jsr $26,($27),0
ldgp $29,0($26)
bis $31,1,$3
$L46:
bne $3,$L44
ldq $4,352($30)
addq $30,240,$1
stq $1,0($4)
ldq $4,336($30)
beq $4,$L20
addq $4,9,$1
ldq $4,344($30)
subq $1,$4,$2
ldq $4,336($30)
cmpeq $4,$2,$1
bne $1,$L20
subq $4,$2,$1
and $1,3,$3
beq $3,$L23
cmplt $3,3,$1
beq $1,$L83
cmplt $3,2,$1
beq $1,$L84
subq $2,1,$2
$L84:
subq $2,1,$2
$L83:
ldq $4,336($30)
subq $2,1,$2
cmpeq $4,$2,$1
bne $1,$L20
.align 5
$L23:
ldq $4,336($30)
subq $2,4,$2
cmpeq $4,$2,$1
beq $1,$L23
$L20:
ldq $4,352($30)
ldq $1,0($4)
ldt $f1,0($1)
stt $f1,0($4)
jsr $26,__sjthrow
ldgp $29,0($26)
.align 4
$L30:
ldq $4,352($30)
lda $2,$L49
bis $31,$31,$3
lda $1,304($30)
ldt $f1,0($4)
stq $31,296($30)
addq $30,128,$4
stq $4,304($30)
stt $f1,288($30)
stq $2,8($1)
stq $30,16($1)
br $31,$L50
.align 4
$L49:
br $29,$LGOTO368
$LGOTO368:
ldgp $29,0($29)
bis $1,$1,$27
jsr $26,($27),0
ldgp $29,0($26)
bis $31,1,$3
$L50:
bne $3,$L48
ldq $4,352($30)
lda $1,288($30)
addq $30,138,$2
stq $1,0($4)
addq $30,128,$4
cmpeq $4,$2,$1
bne $1,$L52
subq $4,$2,$1
and $1,3,$3
beq $3,$L55
cmplt $3,3,$1
beq $1,$L64
bne $3,$L65
addq $30,137,$2
$L65:
subq $2,1,$2
$L64:
subq $2,1,$2
br $31,$L139
.align 4
.align 5
$L55:
subq $2,4,$2
$L139:
addq $30,128,$4
cmpeq $4,$2,$1
beq $1,$L55
$L52:
ldq $4,352($30)
ldq $1,0($4)
ldt $f1,0($1)
stt $f1,0($4)
jsr $26,__sjthrow
ldgp $29,0($26)
.align 4
$L44:
jsr $26,__terminate
ldgp $29,0($26)
.align 4
$L48:
jsr $26,__terminate
ldgp $29,0($26)
.align 4
$L61:
ldq $26,0($30)
ldq $9,8($30)
ldq $10,16($30)
ldq $11,24($30)
ldq $12,32($30)
ldq $13,40($30)
ldq $14,48($30)
ldq $15,56($30)
ldt $f2,64($30)
ldt $f3,72($30)
ldt $f4,80($30)
ldt $f5,88($30)
ldt $f6,96($30)
ldt $f7,104($30)
ldt $f8,112($30)
ldt $f9,120($30)
lda $30,368($30)
ret $31,($26),1
.end f__Fv
.ident "GCC: (GNU) egcs-2.90.27 980315 (egcs-1.0.2 release)"