This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug rtl-optimization/21827] unroll misses simple elimination - works with manual unroll


------- Additional Comments From tlm at daimi dot au dot dk  2005-05-31 20:45 -------
(In reply to comment #1)
> The first testcase is fixed in 4.0.0.   I have not looked 
> into the full testcase.

Installed gcc 4.0.0 (a bit hard with the current version)
OK - I was wrong before (so please do not close this). 
The simple situation is fixed - however there is still the same problems 
with the knight-example.

int unrolled_knight_count(unsigned char* board)
{
  int count = 0;
  for (int bp=0;bp<2;bp++) // reduces to 2 just for the example
  {
    if (board[bp]==WHITE_KNIGHT)
    {
      if (bp%8>1 && bp/8>0) count++;
      if (bp%8>0 && bp/8>1) count++;
      if (bp%8<6 && bp/8>0) count++;
      if (bp%8<7 && bp/8>1) count++;
      if (bp%8>1 && bp/8<7) count++;
      if (bp%8>0 && bp/8<6) count++;
      if (bp%8<6 && bp/8<7) count++;
      if (bp%8<7 && bp/8<6) count++;
    }
  }
  return count;
}

is compiled to 
	.text
	.align 2
	.p2align 4,,15
.globl _Z26unrolled_knight_countPh
	.type	_Z26auto_unrolled_knight_countPh, @function
_Z26auto_unrolled_knight_countPh:
.LFB2:
	pushl	%ebp
.LCFI0:
	xorl	%eax, %eax
	movl	%esp, %ebp
.LCFI1:
	movl	8(%ebp), %edx
	cmpb	$5, (%edx)
	je	.L10
.L6:
	cmpb	$5, 1(%edx)
	je	.L11
	popl	%ebp
	ret
	.p2align 4,,7
.L11:
	popl	%ebp
	addl	$3, %eax
	.p2align 4,,6
	ret
	.p2align 4,,7
.L10:
	movl	$2, %eax
	.p2align 4,,7
	jmp	.L6
.LFE2:
	.size	_Z26auto_unrolled_knight_countPh, .-_Z26auto_unrolled_knight_countPh
	.ident	"GCC: (GNU) 4.0.0"
	.section	.note.GNU-stack,"",@progbits

Now if I (manual) expand the loop before compiling 

int unrolled_knight_count(unsigned char* board)
{
  int count = 0;
//  for (int bp=0;bp<64;bp++) // We expand 2 as before..
    if (board[0]==WHITE_KNIGHT)
    {
      if (0%8>1 && 0/8>0) count++;
      if (0%8>0 && 0/8>1) count++;
      if (0%8<6 && 0/8>0) count++;
      if (0%8<7 && 0/8>1) count++;
      if (0%8>1 && 0/8<7) count++;
      if (0%8>0 && 0/8<6) count++;
      if (0%8<6 && 0/8<7) count++;
      if (0%8<7 && 0/8<6) count++;
    }
    if (board[1]==WHITE_KNIGHT)
    {
      if (1%8>1 && 1/8>0) count++;
      if (1%8>0 && 1/8>1) count++;
      if (1%8<6 && 1/8>0) count++;
      if (1%8<7 && 1/8>1) count++;
      if (1%8>1 && 1/8<7) count++;
      if (1%8>0 && 1/8<6) count++;
      if (1%8<6 && 1/8<7) count++;
      if (1%8<7 && 1/8<6) count++;
    }
  return count;
}

The result is mush better. (Not that I know assemblercode) 

I have WHITE_KNIGT = 5 (as you might have seen from the assemblercode)
and when I timed I had knights on pos 24,44,55,56. And the code is 
400-500% faster - so it will really improve the speed ...

	.text
	.align 2
	.p2align 4,,15
.globl _Z26unrolled_knight_countPh
	.type	_Z26auto_unrolled_knight_countPh, @function
_Z26unrolled_knight_countPh:
.LFB2:
	pushl	%ebp
.LCFI0:
	xorl	%eax, %eax
	movl	%esp, %ebp
.LCFI1:
	movl	8(%ebp), %edx
	cmpb	$5, (%edx)
	sete	%al
	addl	%eax, %eax
	cmpb	$5, 1(%edx)
	je	.L9
	popl	%ebp
	ret
	.p2align 4,,7
.L9:
	popl	%ebp
	addl	$3, %eax
	ret

Again thanks. I do not want to sound like an unhappy gcc-user 
(I admire the work you are doing). 



-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21827


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]