Optimization stupidity in CRC code

bug-gcc@horizon.com bug-gcc@horizon.com
Mon Feb 17 15:03:00 GMT 2003


Sorry to only send complaints, but I thought this was interesting.

Using a 3.2.3 snapshot:
> gcc.real (GCC) 3.2.3 20030210 (Debian prerelease)

The following C code:

typedef unsigned int u32;
typedef unsigned char u8;

extern u32 const table[256];

u32
crc1(u32 crc, u8 const *buf, unsigned len)
{
	while (len--)
		crc = (crc >> 8) ^ table[(crc ^ *buf++) & 255];
	return crc;
}

u32
crc2(u32 crc, u8 const *buf, unsigned len)
{
	while (len--)
		crc = table[(crc ^ *buf++) & 255] ^ (crc >> 8);
	return crc;
}


u32
crc3(u32 crc, u8 const *buf, unsigned len)
{
	while (len--)
		crc ^= *buf++, crc = (crc >> 8) ^ table[crc & 255];
	return crc;
}

u32
crc4(u32 crc, u8 const *buf, unsigned len)
{
	while (len--)
		crc ^= *buf++, crc = table[crc & 255] ^ (crc >> 8);
	return crc;
}

Compiles (-O2 -fomit-frame-pointer), on x86, into the following assembly:

	.file	"funny.c"
	.text
.globl crc1
	.type	crc1,@function
crc1:
	pushl	%ebx
	movl	16(%esp), %ecx
	decl	%ecx
	cmpl	$-1, %ecx
	movl	8(%esp), %eax
	movl	12(%esp), %ebx
	je	.L7
.L5:
	movl	%eax, %edx
	xorb	(%ebx), %al
	shrl	$8, %edx
	movzbl	%al, %eax
	decl	%ecx
	xorl	table(,%eax,4), %edx
	incl	%ebx
	cmpl	$-1, %ecx
	movl	%edx, %eax
	jne	.L5
.L7:
	popl	%ebx
	ret
.Lfe1:
	.size	crc1,.Lfe1-crc1
.globl crc2
	.type	crc2,@function
crc2:
	pushl	%ebx
	movl	16(%esp), %edx
	decl	%edx
	cmpl	$-1, %edx
	movl	8(%esp), %ecx
	movl	12(%esp), %ebx
	je	.L14
.L12:
	movb	(%ebx), %al
	xorl	%ecx, %eax
	movzbl	%al, %eax
	shrl	$8, %ecx
	decl	%edx
	xorl	table(,%eax,4), %ecx
	incl	%ebx
	cmpl	$-1, %edx
	jne	.L12
.L14:
	movl	%ecx, %eax
	popl	%ebx
	ret
.Lfe2:
	.size	crc2,.Lfe2-crc2
.globl crc3
	.type	crc3,@function
crc3:
	pushl	%esi
	pushl	%ebx
	movl	20(%esp), %ecx
	decl	%ecx
	cmpl	$-1, %ecx
	movl	12(%esp), %ebx
	movl	16(%esp), %esi
	je	.L21
.L19:
	movzbl	(%esi), %eax
	xorl	%eax, %ebx
	movl	%ebx, %edx
	shrl	$8, %edx
	movzbl	%bl,%eax
	decl	%ecx
	movl	%edx, %ebx
	incl	%esi
	xorl	table(,%eax,4), %ebx
	cmpl	$-1, %ecx
	jne	.L19
.L21:
	movl	%ebx, %eax
	popl	%ebx
	popl	%esi
	ret
.Lfe3:
	.size	crc3,.Lfe3-crc3
.globl crc4
	.type	crc4,@function
crc4:
	pushl	%ebx
	movl	16(%esp), %ecx
	decl	%ecx
	cmpl	$-1, %ecx
	movl	8(%esp), %edx
	movl	12(%esp), %ebx
	je	.L28
.L26:
	movzbl	(%ebx), %eax
	xorl	%eax, %edx
	movzbl	%dl,%eax
	decl	%ecx
	shrl	$8, %edx
	incl	%ebx
	xorl	table(,%eax,4), %edx
	cmpl	$-1, %ecx
	jne	.L26
.L28:
	movl	%edx, %eax
	popl	%ebx
	ret
.Lfe4:
	.size	crc4,.Lfe4-crc4
	.ident	"GCC: (GNU) 3.2.3 20030210 (Debian prerelease)"


Note how crc1() and crc3() both move the crc to %edx for shifting and then
move it back to the home register compared to the trivially equivalent
crc2() and crc4(), which omit the redundant move.

It would be nice if GCC could produce the "good" code in either case.



More information about the Gcc-bugs mailing list