This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

ARM gcc 4.1 optimization bug.

From: "Fengwei Yin" <yfw dot debian at gmail dot com>
To: gcc at gcc dot gnu dot org
Date: Sun, 30 Apr 2006 11:03:05 +0800
Subject: ARM gcc 4.1 optimization bug.

Hi,
I am using gcc4.1 for ARM to build Linux kernel. But there is a bug
related to the gcc
optimization. I assume this is correct mail list to report this bug.
If not, please let me know.
And I didn't find the same bug reported too.

The kernel is 2.6.14. When I build ALSA subsystem. I use following commandline:

arm-iwmmxt-linux-gnueabi-gcc -Wp,-MD,sound/core/.pcm_native.o.d -nostdinc -isystem /usr/local/arm-iwmmxt-linux-gnueabi/bin/../lib/gcc/arm-iwmmxt-linux-gnueabi/4.1.0/include -D__KERNEL__ -Iinclude -include include/linux/autoconf.h -mlittle-endian -gdwarf-2 -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -fno-omit-frame-pointer -fno-optimize-sibling-calls -gdwarf-2 -fno-omit-frame-pointer -mapcs -mno-sched-prolog -mabi=aapcs-linux -mno-thumb-interwork -D__LINUX_ARM_ARCH__=5 -march=armv5te -mtune=xscale -Wa,-mcpu=xscale -msoft-float -Uarm -Wdeclaration-after-statement -Wno-pointer-sign -gdwarf-2 -DKBUILD_BASENAME=pcm_native -DKBUILD_MODNAME=snd_pcm -Os -c -o sound/core/pcm_native.o sound/core/pcm_native.c


And the function is like following (using
arm-iwmmxt-linux-gnueabi-objdump -d pcm_nativ.o):

0000211c <snd_mask_refine>:
   211c:	e1a0c00d 	mov	ip, sp
   2120:	e92dd8f0 	stmdb	sp!, {r4, r5, r6, r7, fp, ip, lr, pc}
   2124:	e24cb004 	sub	fp, ip, #4	; 0x4
   2128:	e24dd020 	sub	sp, sp, #32	; 0x20
   212c:	e5913000 	ldr	r3, [r1]
   2130:	e51b502c 	ldr	r5, [fp, #-44]
   2134:	e24b603c 	sub	r6, fp, #60	; 0x3c
   2138:	e1a0c000 	mov	ip, r0
   213c:	e1a0e006 	mov	lr, r6
   2140:	e1a04000 	mov	r4, r0
   2144:	e0055003 	and	r5, r5, r3
   2148:	e1a07001 	mov	r7, r1
   214c:	e8bc000f 	ldmia	ip!, {r0, r1, r2, r3}
   2150:	e8ae000f 	stmia	lr!, {r0, r1, r2, r3}
   2154:	e89c000f 	ldmia	ip, {r0, r1, r2, r3}
   2158:	e5845000 	str	r5, [r4]
   215c:	e597c004 	ldr	ip, [r7, #4]
   2160:	e3550000 	cmp	r5, #0	; 0x0
   2164:	e88e000f 	stmia	lr, {r0, r1, r2, r3}
   2168:	e001300c 	and	r3, r1, ip		/* r1 from
								   2154:	e89c000f 	ldmia	ip, {r0, r1, r2, r3}
								   Using the wrong value.
								   The r1 from this instruction should be used:
								   214c:   ldmia	ip!, {r0, r1, r2, r3}
								*/
   216c:	e1a00004 	mov	r0, r4
   2170:	e3a02008 	mov	r2, #8	; 0x8
   2174:	e1a01006 	mov	r1, r6
   2178:	e5843004 	str	r3, [r4, #4]
   217c:	1a000005 	bne	2198 <snd_mask_refine+0x7c>
   2180:	e3530000 	cmp	r3, #0	; 0x0
   2184:	e3e03015 	mvn	r3, #21	; 0x15
   2188:	1a000002 	bne	2198 <snd_mask_refine+0x7c>
   218c:	e1a00003 	mov	r0, r3
   2190:	e24bd01c 	sub	sp, fp, #28	; 0x1c
   2194:	e89da8f0 	ldmia	sp, {r4, r5, r6, r7, fp, sp, pc}
   2198:	ebfffffe 	bl	0 <memcmp>
   219c:	e2503000 	subs	r3, r0, #0	; 0x0
   21a0:	13a03001 	movne	r3, #1	; 0x1
   21a4:	eafffff8 	b	218c <snd_mask_refine+0x70>

The C code is like following:

#define	SNDRV_MASK_SIZE	2
struct mask_t {
	unsigned int bits[8];
};
typedef	struct mask_t snd_mask_t;
static inline int snd_mask_empty(const snd_mask_t *mask)
{
       int i;
       for (i = 0; i < SNDRV_MASK_SIZE; i++) {
               if (mask->bits[i])
                       return 0;
       }
       return 1;
}
static inline void snd_mask_intersect(snd_mask_t *mask, const snd_mask_t *v)
{
       int i;
       for (i = 0; i < SNDRV_MASK_SIZE; i++)
               mask->bits[i] &= v->bits[i];
}
static inline void snd_mask_copy(snd_mask_t *mask, const snd_mask_t *v)
{
       *mask = *v;
}
int snd_mask_refine(snd_mask_t *mask, const snd_mask_t *v)
{
       snd_mask_t old;

       snd_mask_copy(&old, mask);
       snd_mask_intersect(mask, v);
       if (snd_mask_empty(mask))
               return -1;

       return !snd_mask_eq(mask, &old);
	return 1;
}

When I remove the -O option, the ALSA works OK. the .s file is like following:

00000040 <snd_mask_refine>:
     40:	e1a0c00d 	mov	ip, sp
     44:	e92dd800 	stmdb	sp!, {fp, ip, lr, pc}
     48:	e24cb004 	sub	fp, ip, #4	; 0x4
     4c:	e24dd048 	sub	sp, sp, #72	; 0x48
     50:	e50b0048 	str	r0, [fp, #-72]
     54:	e50b104c 	str	r1, [fp, #-76]
     58:	e51b3048 	ldr	r3, [fp, #-72]
     5c:	e24be040 	sub	lr, fp, #64	; 0x40
     60:	e1a0c003 	mov	ip, r3
     64:	e8bc000f 	ldmia	ip!, {r0, r1, r2, r3}
     68:	e8ae000f 	stmia	lr!, {r0, r1, r2, r3}
     6c:	e89c000f 	ldmia	ip, {r0, r1, r2, r3}
     70:	e88e000f 	stmia	lr, {r0, r1, r2, r3}
     74:	e3a03000 	mov	r3, #0	; 0x0
     78:	e50b3020 	str	r3, [fp, #-32]
     7c:	ea00000c 	b	b4 <snd_mask_refine+0x74>
     80:	e51b0020 	ldr	r0, [fp, #-32]
     84:	e51b2020 	ldr	r2, [fp, #-32]
     88:	e51b3048 	ldr	r3, [fp, #-72]
     8c:	e7931102 	ldr	r1, [r3, r2, lsl #2]
     90:	e51b2020 	ldr	r2, [fp, #-32]
     94:	e51b304c 	ldr	r3, [fp, #-76]
     98:	e7933102 	ldr	r3, [r3, r2, lsl #2]
     9c:	e0012003 	and	r2, r1, r3
     a0:	e51b3048 	ldr	r3, [fp, #-72]
     a4:	e7832100 	str	r2, [r3, r0, lsl #2]
     a8:	e51b3020 	ldr	r3, [fp, #-32]
     ac:	e2833001 	add	r3, r3, #1	; 0x1
     b0:	e50b3020 	str	r3, [fp, #-32]
     b4:	e51b3020 	ldr	r3, [fp, #-32]
     b8:	e3530001 	cmp	r3, #1	; 0x1
     bc:	daffffef 	ble	80 <snd_mask_refine+0x40>
     c0:	e51b3048 	ldr	r3, [fp, #-72]
     c4:	e50b3018 	str	r3, [fp, #-24]
     c8:	e3a03000 	mov	r3, #0	; 0x0
     cc:	e50b301c 	str	r3, [fp, #-28]
     d0:	ea00000a 	b	100 <snd_mask_refine+0xc0>
     d4:	e51b301c 	ldr	r3, [fp, #-28]
     d8:	e51b2018 	ldr	r2, [fp, #-24]
     dc:	e7923103 	ldr	r3, [r2, r3, lsl #2]
     e0:	e3530000 	cmp	r3, #0	; 0x0
     e4:	0a000002 	beq	f4 <snd_mask_refine+0xb4>
     e8:	e3a03000 	mov	r3, #0	; 0x0
     ec:	e50b3050 	str	r3, [fp, #-80]
     f0:	ea000007 	b	114 <snd_mask_refine+0xd4>
     f4:	e51b301c 	ldr	r3, [fp, #-28]
     f8:	e2833001 	add	r3, r3, #1	; 0x1
     fc:	e50b301c 	str	r3, [fp, #-28]
    100:	e51b301c 	ldr	r3, [fp, #-28]
    104:	e3530001 	cmp	r3, #1	; 0x1
    108:	dafffff1 	ble	d4 <snd_mask_refine+0x94>
    10c:	e3a03001 	mov	r3, #1	; 0x1
    110:	e50b3050 	str	r3, [fp, #-80]
    114:	e51b3050 	ldr	r3, [fp, #-80]
    118:	e3530000 	cmp	r3, #0	; 0x0
    11c:	0a000002 	beq	12c <snd_mask_refine+0xec>
    120:	e3e03015 	mvn	r3, #21	; 0x15
    124:	e50b3054 	str	r3, [fp, #-84]
    128:	ea00000f 	b	16c <snd_mask_refine+0x12c>
    12c:	e51b3048 	ldr	r3, [fp, #-72]
    130:	e50b3010 	str	r3, [fp, #-16]
    134:	e24b3040 	sub	r3, fp, #64	; 0x40
    138:	e50b3014 	str	r3, [fp, #-20]
    13c:	e51b0010 	ldr	r0, [fp, #-16]
    140:	e51b1014 	ldr	r1, [fp, #-20]
    144:	e3a02008 	mov	r2, #8	; 0x8
    148:	ebfffffe 	bl	0 <memcmp>
    14c:	e1a03000 	mov	r3, r0
    150:	e3530000 	cmp	r3, #0	; 0x0
    154:	13a03000 	movne	r3, #0	; 0x0
    158:	03a03001 	moveq	r3, #1	; 0x1
    15c:	e3530000 	cmp	r3, #0	; 0x0
    160:	13a03000 	movne	r3, #0	; 0x0
    164:	03a03001 	moveq	r3, #1	; 0x1
    168:	e50b3054 	str	r3, [fp, #-84]
    16c:	e51b3054 	ldr	r3, [fp, #-84]
    170:	e1a00003 	mov	r0, r3
    174:	e24bd00c 	sub	sp, fp, #12	; 0xc
    178:	e89da800 	ldmia	sp, {fp, sp, pc}

If anyone has a solution for this, please let me know. Thanks a lot.


Regards
yfw

Follow-Ups:
- Re: ARM gcc 4.1 optimization bug.
  - From: Daniel Jacobowitz

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]