This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
ARM gcc 4.1 optimization bug.
- From: "Fengwei Yin" <yfw dot debian at gmail dot com>
- To: gcc at gcc dot gnu dot org
- Date: Sun, 30 Apr 2006 11:03:05 +0800
- Subject: ARM gcc 4.1 optimization bug.
Hi,
I am using gcc4.1 for ARM to build Linux kernel. But there is a bug
related to the gcc
optimization. I assume this is correct mail list to report this bug.
If not, please let me know.
And I didn't find the same bug reported too.
The kernel is 2.6.14. When I build ALSA subsystem. I use following commandline:
arm-iwmmxt-linux-gnueabi-gcc -Wp,-MD,sound/core/.pcm_native.o.d
-nostdinc -isystem
/usr/local/arm-iwmmxt-linux-gnueabi/bin/../lib/gcc/arm-iwmmxt-linux-gnueabi/4.1.0/include
-D__KERNEL__ -Iinclude -include include/linux/autoconf.h
-mlittle-endian -gdwarf-2 -Wall -Wundef -Wstrict-prototypes
-Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding
-fno-omit-frame-pointer -fno-optimize-sibling-calls -gdwarf-2
-fno-omit-frame-pointer -mapcs -mno-sched-prolog -mabi=aapcs-linux
-mno-thumb-interwork -D__LINUX_ARM_ARCH__=5 -march=armv5te
-mtune=xscale -Wa,-mcpu=xscale -msoft-float -Uarm
-Wdeclaration-after-statement -Wno-pointer-sign -gdwarf-2
-DKBUILD_BASENAME=pcm_native -DKBUILD_MODNAME=snd_pcm -Os -c -o
sound/core/pcm_native.o sound/core/pcm_native.c
And the function is like following (using
arm-iwmmxt-linux-gnueabi-objdump -d pcm_nativ.o):
0000211c <snd_mask_refine>:
211c: e1a0c00d mov ip, sp
2120: e92dd8f0 stmdb sp!, {r4, r5, r6, r7, fp, ip, lr, pc}
2124: e24cb004 sub fp, ip, #4 ; 0x4
2128: e24dd020 sub sp, sp, #32 ; 0x20
212c: e5913000 ldr r3, [r1]
2130: e51b502c ldr r5, [fp, #-44]
2134: e24b603c sub r6, fp, #60 ; 0x3c
2138: e1a0c000 mov ip, r0
213c: e1a0e006 mov lr, r6
2140: e1a04000 mov r4, r0
2144: e0055003 and r5, r5, r3
2148: e1a07001 mov r7, r1
214c: e8bc000f ldmia ip!, {r0, r1, r2, r3}
2150: e8ae000f stmia lr!, {r0, r1, r2, r3}
2154: e89c000f ldmia ip, {r0, r1, r2, r3}
2158: e5845000 str r5, [r4]
215c: e597c004 ldr ip, [r7, #4]
2160: e3550000 cmp r5, #0 ; 0x0
2164: e88e000f stmia lr, {r0, r1, r2, r3}
2168: e001300c and r3, r1, ip /* r1 from
2154: e89c000f ldmia ip, {r0, r1, r2, r3}
Using the wrong value.
The r1 from this instruction should be used:
214c: ldmia ip!, {r0, r1, r2, r3}
*/
216c: e1a00004 mov r0, r4
2170: e3a02008 mov r2, #8 ; 0x8
2174: e1a01006 mov r1, r6
2178: e5843004 str r3, [r4, #4]
217c: 1a000005 bne 2198 <snd_mask_refine+0x7c>
2180: e3530000 cmp r3, #0 ; 0x0
2184: e3e03015 mvn r3, #21 ; 0x15
2188: 1a000002 bne 2198 <snd_mask_refine+0x7c>
218c: e1a00003 mov r0, r3
2190: e24bd01c sub sp, fp, #28 ; 0x1c
2194: e89da8f0 ldmia sp, {r4, r5, r6, r7, fp, sp, pc}
2198: ebfffffe bl 0 <memcmp>
219c: e2503000 subs r3, r0, #0 ; 0x0
21a0: 13a03001 movne r3, #1 ; 0x1
21a4: eafffff8 b 218c <snd_mask_refine+0x70>
The C code is like following:
#define SNDRV_MASK_SIZE 2
struct mask_t {
unsigned int bits[8];
};
typedef struct mask_t snd_mask_t;
static inline int snd_mask_empty(const snd_mask_t *mask)
{
int i;
for (i = 0; i < SNDRV_MASK_SIZE; i++) {
if (mask->bits[i])
return 0;
}
return 1;
}
static inline void snd_mask_intersect(snd_mask_t *mask, const snd_mask_t *v)
{
int i;
for (i = 0; i < SNDRV_MASK_SIZE; i++)
mask->bits[i] &= v->bits[i];
}
static inline void snd_mask_copy(snd_mask_t *mask, const snd_mask_t *v)
{
*mask = *v;
}
int snd_mask_refine(snd_mask_t *mask, const snd_mask_t *v)
{
snd_mask_t old;
snd_mask_copy(&old, mask);
snd_mask_intersect(mask, v);
if (snd_mask_empty(mask))
return -1;
return !snd_mask_eq(mask, &old);
return 1;
}
When I remove the -O option, the ALSA works OK. the .s file is like following:
00000040 <snd_mask_refine>:
40: e1a0c00d mov ip, sp
44: e92dd800 stmdb sp!, {fp, ip, lr, pc}
48: e24cb004 sub fp, ip, #4 ; 0x4
4c: e24dd048 sub sp, sp, #72 ; 0x48
50: e50b0048 str r0, [fp, #-72]
54: e50b104c str r1, [fp, #-76]
58: e51b3048 ldr r3, [fp, #-72]
5c: e24be040 sub lr, fp, #64 ; 0x40
60: e1a0c003 mov ip, r3
64: e8bc000f ldmia ip!, {r0, r1, r2, r3}
68: e8ae000f stmia lr!, {r0, r1, r2, r3}
6c: e89c000f ldmia ip, {r0, r1, r2, r3}
70: e88e000f stmia lr, {r0, r1, r2, r3}
74: e3a03000 mov r3, #0 ; 0x0
78: e50b3020 str r3, [fp, #-32]
7c: ea00000c b b4 <snd_mask_refine+0x74>
80: e51b0020 ldr r0, [fp, #-32]
84: e51b2020 ldr r2, [fp, #-32]
88: e51b3048 ldr r3, [fp, #-72]
8c: e7931102 ldr r1, [r3, r2, lsl #2]
90: e51b2020 ldr r2, [fp, #-32]
94: e51b304c ldr r3, [fp, #-76]
98: e7933102 ldr r3, [r3, r2, lsl #2]
9c: e0012003 and r2, r1, r3
a0: e51b3048 ldr r3, [fp, #-72]
a4: e7832100 str r2, [r3, r0, lsl #2]
a8: e51b3020 ldr r3, [fp, #-32]
ac: e2833001 add r3, r3, #1 ; 0x1
b0: e50b3020 str r3, [fp, #-32]
b4: e51b3020 ldr r3, [fp, #-32]
b8: e3530001 cmp r3, #1 ; 0x1
bc: daffffef ble 80 <snd_mask_refine+0x40>
c0: e51b3048 ldr r3, [fp, #-72]
c4: e50b3018 str r3, [fp, #-24]
c8: e3a03000 mov r3, #0 ; 0x0
cc: e50b301c str r3, [fp, #-28]
d0: ea00000a b 100 <snd_mask_refine+0xc0>
d4: e51b301c ldr r3, [fp, #-28]
d8: e51b2018 ldr r2, [fp, #-24]
dc: e7923103 ldr r3, [r2, r3, lsl #2]
e0: e3530000 cmp r3, #0 ; 0x0
e4: 0a000002 beq f4 <snd_mask_refine+0xb4>
e8: e3a03000 mov r3, #0 ; 0x0
ec: e50b3050 str r3, [fp, #-80]
f0: ea000007 b 114 <snd_mask_refine+0xd4>
f4: e51b301c ldr r3, [fp, #-28]
f8: e2833001 add r3, r3, #1 ; 0x1
fc: e50b301c str r3, [fp, #-28]
100: e51b301c ldr r3, [fp, #-28]
104: e3530001 cmp r3, #1 ; 0x1
108: dafffff1 ble d4 <snd_mask_refine+0x94>
10c: e3a03001 mov r3, #1 ; 0x1
110: e50b3050 str r3, [fp, #-80]
114: e51b3050 ldr r3, [fp, #-80]
118: e3530000 cmp r3, #0 ; 0x0
11c: 0a000002 beq 12c <snd_mask_refine+0xec>
120: e3e03015 mvn r3, #21 ; 0x15
124: e50b3054 str r3, [fp, #-84]
128: ea00000f b 16c <snd_mask_refine+0x12c>
12c: e51b3048 ldr r3, [fp, #-72]
130: e50b3010 str r3, [fp, #-16]
134: e24b3040 sub r3, fp, #64 ; 0x40
138: e50b3014 str r3, [fp, #-20]
13c: e51b0010 ldr r0, [fp, #-16]
140: e51b1014 ldr r1, [fp, #-20]
144: e3a02008 mov r2, #8 ; 0x8
148: ebfffffe bl 0 <memcmp>
14c: e1a03000 mov r3, r0
150: e3530000 cmp r3, #0 ; 0x0
154: 13a03000 movne r3, #0 ; 0x0
158: 03a03001 moveq r3, #1 ; 0x1
15c: e3530000 cmp r3, #0 ; 0x0
160: 13a03000 movne r3, #0 ; 0x0
164: 03a03001 moveq r3, #1 ; 0x1
168: e50b3054 str r3, [fp, #-84]
16c: e51b3054 ldr r3, [fp, #-84]
170: e1a00003 mov r0, r3
174: e24bd00c sub sp, fp, #12 ; 0xc
178: e89da800 ldmia sp, {fp, sp, pc}
If anyone has a solution for this, please let me know. Thanks a lot.
Regards
yfw