[Bug rtl-optimization/21827] unroll misses simple elimination - works with manual unroll
tlm at daimi dot au dot dk
gcc-bugzilla@gcc.gnu.org
Tue Jul 19 17:34:00 GMT 2005
------- Additional Comments From tlm at daimi dot au dot dk 2005-07-19 17:02 -------
(In reply to comment #1)
> The first testcase is fixed in 4.0.0. (Though there is a regression on the
mainline). I have not looked
> into the full testcase.
There have not been more reactions on this bug / request, so I give a bit more
information (and hopefully motivation) to move forward to a solution of it.
I have written the following code :
auto_unrolled_knight_count8 and t_auto_unrolled_knight_count9 only have one
difference. The first loop goes to 8 the second loop goes to 9. If I manually
unroll (meaning replaceing with constant up to 64 - it is a chessproblem -
the code is excatly like the code generated in the up to eight example.)
The code generated for the 9 example is in my opion quite bad.
(It does work - but I consider unrolls finest task to be to eliminate what is
(easy known) impossible at compiletime). The code is normally at least
4-5 times slower than the above code !
The source is like this :
#define WHITE_KNIGHT 5
int auto_unrolled_knight_count8(unsigned char* board)
{
int count = 0;
for (int bp=0;bp<8;++bp)
{
if (board[bp]==WHITE_KNIGHT)
{
if (bp%8>1 && bp/8>0) count++;
if (bp%8>0 && bp/8>1) count++;
if (bp%8<6 && bp/8>0) count++;
if (bp%8<7 && bp/8>1) count++;
if (bp%8>1 && bp/8<7) count++;
if (bp%8>0 && bp/8<6) count++;
if (bp%8<6 && bp/8<7) count++;
if (bp%8<7 && bp/8<6) count++;
}
}
return count;
}
int t_auto_unrolled_knight_count9(unsigned char* board)
{
int count = 0;
for (int bp=0;bp<9;++bp)
{
if (board[bp]==WHITE_KNIGHT)
{
if (bp%8>1 && bp/8>0) count++;
if (bp%8>0 && bp/8>1) count++;
if (bp%8<6 && bp/8>0) count++;
if (bp%8<7 && bp/8>1) count++;
if (bp%8>1 && bp/8<7) count++;
if (bp%8>0 && bp/8<6) count++;
if (bp%8<6 && bp/8<7) count++;
if (bp%8<7 && bp/8<6) count++;
}
}
return count;
}
Assembly : (Compiled with -O3 and -funroll-loops)
.file "all_in_one.cpp"
.text
.align 2
.p2align 4,,15
.globl _Z27auto_unrolled_knight_count8Ph
.type _Z27auto_unrolled_knight_count8Ph, @function
_Z27auto_unrolled_knight_count8Ph:
.LFB2:
pushl %ebp
.LCFI0:
xorl %eax, %eax
movl %esp, %ebp
.LCFI1:
movl 8(%ebp), %edx
cmpb $5, (%edx)
je .L22
.L6:
cmpb $5, 1(%edx)
je .L23
.L8:
cmpb $5, 2(%edx)
je .L24
.L10:
cmpb $5, 3(%edx)
.p2align 4,,5
je .L25
.L12:
cmpb $5, 4(%edx)
.p2align 4,,5
je .L26
.L14:
cmpb $5, 5(%edx)
.p2align 4,,5
je .L27
.L16:
cmpb $5, 6(%edx)
.p2align 4,,5
je .L28
.L18:
cmpb $5, 7(%edx)
.p2align 4,,5
je .L29
popl %ebp
.p2align 4,,6
ret
.p2align 4,,7
.L29:
popl %ebp
addl $2, %eax
.p2align 4,,6
ret
.p2align 4,,7
.L28:
addl $3, %eax
.p2align 4,,7
jmp .L18
.p2align 4,,7
.L27:
addl $4, %eax
.p2align 4,,5
jmp .L16
.p2align 4,,7
.L26:
addl $4, %eax
.p2align 4,,5
jmp .L14
.p2align 4,,7
.L25:
addl $4, %eax
.p2align 4,,5
jmp .L12
.p2align 4,,7
.L24:
addl $4, %eax
.p2align 4,,5
jmp .L10
.p2align 4,,7
.L23:
addl $3, %eax
.p2align 4,,5
jmp .L8
.p2align 4,,7
.L22:
movl $2, %eax
.p2align 4,,5
jmp .L6
.LFE2:
.size _Z27auto_unrolled_knight_count8Ph, .-_Z27auto_unrolled_knight_count8Ph
----------------------- End of "nice" code ----------------------
.align 2
.p2align 4,,15
.globl _Z29t_auto_unrolled_knight_count9Ph
.type _Z29t_auto_unrolled_knight_count9Ph, @function
_Z29t_auto_unrolled_knight_count9Ph:
.LFB3:
pushl %ebp
.LCFI2:
movl %esp, %ebp
.LCFI3:
pushl %edi
.LCFI4:
xorl %edi, %edi
pushl %esi
.LCFI5:
xorl %esi, %esi
pushl %ebx
.LCFI6:
subl $8, %esp
.LCFI7:
jmp .L31
.p2align 4,,7
.L32:
incl %esi
movl %esi, -20(%ebp)
cmpb $5, (%eax,%esi)
je .L64
.L52:
incl %esi
cmpb $5, (%eax,%esi)
je .L60
.L54:
movl -20(%ebp), %esi
addl $2, %esi
cmpl $9, %esi
je .L65
.L31:
movl 8(%ebp), %eax
cmpb $5, (%eax,%esi)
jne .L32
movl %esi, %eax
cltd
shrl $29, %edx
leal (%esi,%edx), %ecx
andl $7, %ecx
subl %edx, %ecx
cmpl $1, %ecx
setg -15(%ebp)
cmpl $7, %esi
movzbl -15(%ebp), %edx
setg %bl
andb %bl, %dl
cmpb $1, %dl
sbbl $-1, %edi
testl %ecx, %ecx
setg -14(%ebp)
cmpl $15, %esi
movzbl -14(%ebp), %edx
setg %al
andb %al, %dl
cmpb $1, %dl
sbbl $-1, %edi
cmpl $5, %ecx
setle -13(%ebp)
andb -13(%ebp), %bl
cmpb $1, %bl
sbbl $-1, %edi
cmpl $6, %ecx
setle %bl
andb %bl, %al
cmpb $1, %al
movl 8(%ebp), %eax
sbbl $-1, %edi
cmpl $55, %esi
setle %cl
andb %cl, -15(%ebp)
cmpb $1, -15(%ebp)
sbbl $-1, %edi
cmpl $47, %esi
setle %dl
andb %dl, -14(%ebp)
cmpb $1, -14(%ebp)
sbbl $-1, %edi
andb %cl, -13(%ebp)
cmpb $1, -13(%ebp)
sbbl $-1, %edi
andb %dl, %bl
cmpb $1, %bl
sbbl $-1, %edi
incl %esi
movl %esi, -20(%ebp)
cmpb $5, (%eax,%esi)
jne .L52
.L64:
movl %esi, %eax
cltd
shrl $29, %edx
leal (%esi,%edx), %ecx
andl $7, %ecx
subl %edx, %ecx
cmpl $1, %ecx
setg -15(%ebp)
cmpl $7, %esi
movzbl -15(%ebp), %edx
setg %bl
andb %bl, %dl
cmpb $1, %dl
sbbl $-1, %edi
testl %ecx, %ecx
setg -14(%ebp)
cmpl $15, %esi
movzbl -14(%ebp), %edx
setg %al
andb %al, %dl
cmpb $1, %dl
sbbl $-1, %edi
cmpl $5, %ecx
setle -13(%ebp)
andb -13(%ebp), %bl
cmpb $1, %bl
sbbl $-1, %edi
cmpl $6, %ecx
setle %bl
andb %bl, %al
cmpb $1, %al
movl 8(%ebp), %eax
sbbl $-1, %edi
cmpl $55, %esi
setle %cl
andb %cl, -15(%ebp)
cmpb $1, -15(%ebp)
sbbl $-1, %edi
cmpl $47, %esi
setle %dl
andb %dl, -14(%ebp)
cmpb $1, -14(%ebp)
sbbl $-1, %edi
andb %cl, -13(%ebp)
cmpb $1, -13(%ebp)
sbbl $-1, %edi
andb %dl, %bl
cmpb $1, %bl
sbbl $-1, %edi
incl %esi
cmpb $5, (%eax,%esi)
jne .L54
.L60:
movl %esi, %eax
cltd
shrl $29, %edx
leal (%esi,%edx), %ecx
andl $7, %ecx
subl %edx, %ecx
cmpl $1, %ecx
setg -15(%ebp)
cmpl $7, %esi
movzbl -15(%ebp), %edx
setg %bl
andb %bl, %dl
cmpb $1, %dl
sbbl $-1, %edi
testl %ecx, %ecx
setg -14(%ebp)
cmpl $15, %esi
movzbl -14(%ebp), %edx
setg %al
andb %al, %dl
cmpb $1, %dl
sbbl $-1, %edi
cmpl $5, %ecx
setle -13(%ebp)
andb -13(%ebp), %bl
cmpb $1, %bl
sbbl $-1, %edi
cmpl $6, %ecx
setle %bl
andb %bl, %al
cmpb $1, %al
sbbl $-1, %edi
cmpl $55, %esi
setle %cl
andb %cl, -15(%ebp)
cmpb $1, -15(%ebp)
sbbl $-1, %edi
cmpl $47, %esi
movl -20(%ebp), %esi
setle %dl
andb %dl, -14(%ebp)
cmpb $1, -14(%ebp)
sbbl $-1, %edi
andb %cl, -13(%ebp)
cmpb $1, -13(%ebp)
sbbl $-1, %edi
andb %dl, %bl
cmpb $1, %bl
sbbl $-1, %edi
addl $2, %esi
cmpl $9, %esi
jne .L31
.L65:
addl $8, %esp
movl %edi, %eax
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.LFE3:
.size _Z29t_auto_unrolled_knight_count9Ph, .-_Z29t_auto_unrolled_knight_count9Ph
.ident "GCC: (GNU) 4.0.0"
.section .note.GNU-stack,"",@progbits
I hope you will confirm the problem (so it can be solved). It would really
improve gcc.
Regards Thorbjørn
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21827
More information about the Gcc-bugs
mailing list