Bug 27363

Summary: ARM gcc 4.1 optimization bug
Product: gcc Reporter: yfw <yfw.debian>
Component: targetAssignee: Paul Brook <pbrook>
Status: RESOLVED FIXED    
Severity: major CC: dberlin, dirk.behme, enrico.scholz+bugsgcc, gcc-bugs, m.k.edwards, s_j_newbury, tbm
Priority: P3    
Version: 4.1.0   
Target Milestone: 4.1.2   
Host: i386 Target: arm xscale iwmmxt
Build: i386 Known to work:
Known to fail: Last reconfirmed: 2006-07-18 21:42:28
Attachments: .i file of pcm_native.c and .s files for -Os, -O1 and -O2
Fix.

Description yfw 2006-04-30 04:48:43 UTC
The kernel is 2.6.14. When I build ALSA subsystem. I use following commandline:

arm-iwmmxt-linux-gnueabi-gcc -Wp,-MD,sound/core/.pcm_native.o.d
-nostdinc -isystem
/usr/local/arm-iwmmxt-linux-gnueabi/bin/../lib/gcc/arm-iwmmxt-linux-gnueabi/4.1.0/include
-D__KERNEL__ -Iinclude  -include include/linux/autoconf.h
-mlittle-endian -gdwarf-2 -Wall -Wundef -Wstrict-prototypes
-Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding
-fno-omit-frame-pointer -fno-optimize-sibling-calls -gdwarf-2
-fno-omit-frame-pointer -mapcs -mno-sched-prolog -mabi=aapcs-linux
-mno-thumb-interwork -D__LINUX_ARM_ARCH__=5 -march=armv5te
-mtune=xscale -Wa,-mcpu=xscale  -msoft-float -Uarm
-Wdeclaration-after-statement -Wno-pointer-sign   -gdwarf-2
-DKBUILD_BASENAME=pcm_native -DKBUILD_MODNAME=snd_pcm -Os -c -o
sound/core/pcm_native.o sound/core/pcm_native.c


And the function is like following (using
arm-iwmmxt-linux-gnueabi-objdump -d pcm_nativ.o):

0000211c <snd_mask_refine>:
   211c:       e1a0c00d        mov     ip, sp
   2120:       e92dd8f0        stmdb   sp!, {r4, r5, r6, r7, fp, ip, lr, pc}
   2124:       e24cb004        sub     fp, ip, #4      ; 0x4
   2128:       e24dd020        sub     sp, sp, #32     ; 0x20
   212c:       e5913000        ldr     r3, [r1]
   2130:       e51b502c        ldr     r5, [fp, #-44]
   2134:       e24b603c        sub     r6, fp, #60     ; 0x3c
   2138:       e1a0c000        mov     ip, r0
   213c:       e1a0e006        mov     lr, r6
   2140:       e1a04000        mov     r4, r0
   2144:       e0055003        and     r5, r5, r3
   2148:       e1a07001        mov     r7, r1
   214c:       e8bc000f        ldmia   ip!, {r0, r1, r2, r3}
   2150:       e8ae000f        stmia   lr!, {r0, r1, r2, r3}
   2154:       e89c000f        ldmia   ip, {r0, r1, r2, r3}
   2158:       e5845000        str     r5, [r4]
   215c:       e597c004        ldr     ip, [r7, #4]
   2160:       e3550000        cmp     r5, #0  ; 0x0
   2164:       e88e000f        stmia   lr, {r0, r1, r2, r3}
   2168:       e001300c        and     r3, r1, ip              /* r1 from
                                                                  2154:        e89c000f        ldmia   ip, {r0, r1, r2, r3}
                                                                  Using the wrong value.
                                                                  The r1 from this instruction should be used:
                                                                  214c:   ldmia        ip!, {r0, r1, r2, r3}
                                                               */
   216c:       e1a00004        mov     r0, r4
   2170:       e3a02008        mov     r2, #8  ; 0x8
   2174:       e1a01006        mov     r1, r6
   2178:       e5843004        str     r3, [r4, #4]
   217c:       1a000005        bne     2198 <snd_mask_refine+0x7c>
   2180:       e3530000        cmp     r3, #0  ; 0x0
   2184:       e3e03015        mvn     r3, #21 ; 0x15
   2188:       1a000002        bne     2198 <snd_mask_refine+0x7c>
   218c:       e1a00003        mov     r0, r3
   2190:       e24bd01c        sub     sp, fp, #28     ; 0x1c
   2194:       e89da8f0        ldmia   sp, {r4, r5, r6, r7, fp, sp, pc}
   2198:       ebfffffe        bl      0 <memcmp>
   219c:       e2503000        subs    r3, r0, #0      ; 0x0
   21a0:       13a03001        movne   r3, #1  ; 0x1
   21a4:       eafffff8        b       218c <snd_mask_refine+0x70>

The C code is like following:

#define SNDRV_MASK_SIZE 2
struct mask_t {
       unsigned int bits[8];
};
typedef struct mask_t snd_mask_t;
static inline int snd_mask_empty(const snd_mask_t *mask)
{
       int i;
       for (i = 0; i < SNDRV_MASK_SIZE; i++) {
               if (mask->bits[i])
                       return 0;
       }
       return 1;
}
static inline void snd_mask_intersect(snd_mask_t *mask, const snd_mask_t *v)
{
       int i;
       for (i = 0; i < SNDRV_MASK_SIZE; i++)
               mask->bits[i] &= v->bits[i];
}
static inline void snd_mask_copy(snd_mask_t *mask, const snd_mask_t *v)
{
       *mask = *v;
}
int snd_mask_refine(snd_mask_t *mask, const snd_mask_t *v)
{
       snd_mask_t old;

       snd_mask_copy(&old, mask);
       snd_mask_intersect(mask, v);
       if (snd_mask_empty(mask))
               return -1;

       return !snd_mask_eq(mask, &old);
       return 1;
}

When I remove the -O option, the ALSA works OK. the .s file is like following:

00000040 <snd_mask_refine>:
     40:       e1a0c00d        mov     ip, sp
     44:       e92dd800        stmdb   sp!, {fp, ip, lr, pc}
     48:       e24cb004        sub     fp, ip, #4      ; 0x4
     4c:       e24dd048        sub     sp, sp, #72     ; 0x48
     50:       e50b0048        str     r0, [fp, #-72]
     54:       e50b104c        str     r1, [fp, #-76]
     58:       e51b3048        ldr     r3, [fp, #-72]
     5c:       e24be040        sub     lr, fp, #64     ; 0x40
     60:       e1a0c003        mov     ip, r3
     64:       e8bc000f        ldmia   ip!, {r0, r1, r2, r3}
     68:       e8ae000f        stmia   lr!, {r0, r1, r2, r3}
     6c:       e89c000f        ldmia   ip, {r0, r1, r2, r3}
     70:       e88e000f        stmia   lr, {r0, r1, r2, r3}
     74:       e3a03000        mov     r3, #0  ; 0x0
     78:       e50b3020        str     r3, [fp, #-32]
     7c:       ea00000c        b       b4 <snd_mask_refine+0x74>
     80:       e51b0020        ldr     r0, [fp, #-32]
     84:       e51b2020        ldr     r2, [fp, #-32]
     88:       e51b3048        ldr     r3, [fp, #-72]
     8c:       e7931102        ldr     r1, [r3, r2, lsl #2]
     90:       e51b2020        ldr     r2, [fp, #-32]
     94:       e51b304c        ldr     r3, [fp, #-76]
     98:       e7933102        ldr     r3, [r3, r2, lsl #2]
     9c:       e0012003        and     r2, r1, r3
     a0:       e51b3048        ldr     r3, [fp, #-72]
     a4:       e7832100        str     r2, [r3, r0, lsl #2]
     a8:       e51b3020        ldr     r3, [fp, #-32]
     ac:       e2833001        add     r3, r3, #1      ; 0x1
     b0:       e50b3020        str     r3, [fp, #-32]
     b4:       e51b3020        ldr     r3, [fp, #-32]
     b8:       e3530001        cmp     r3, #1  ; 0x1
     bc:       daffffef        ble     80 <snd_mask_refine+0x40>
     c0:       e51b3048        ldr     r3, [fp, #-72]
     c4:       e50b3018        str     r3, [fp, #-24]
     c8:       e3a03000        mov     r3, #0  ; 0x0
     cc:       e50b301c        str     r3, [fp, #-28]
     d0:       ea00000a        b       100 <snd_mask_refine+0xc0>
     d4:       e51b301c        ldr     r3, [fp, #-28]
     d8:       e51b2018        ldr     r2, [fp, #-24]
     dc:       e7923103        ldr     r3, [r2, r3, lsl #2]
     e0:       e3530000        cmp     r3, #0  ; 0x0
     e4:       0a000002        beq     f4 <snd_mask_refine+0xb4>
     e8:       e3a03000        mov     r3, #0  ; 0x0
     ec:       e50b3050        str     r3, [fp, #-80]
     f0:       ea000007        b       114 <snd_mask_refine+0xd4>
     f4:       e51b301c        ldr     r3, [fp, #-28]
     f8:       e2833001        add     r3, r3, #1      ; 0x1
     fc:       e50b301c        str     r3, [fp, #-28]
    100:       e51b301c        ldr     r3, [fp, #-28]
    104:       e3530001        cmp     r3, #1  ; 0x1
    108:       dafffff1        ble     d4 <snd_mask_refine+0x94>
    10c:       e3a03001        mov     r3, #1  ; 0x1
    110:       e50b3050        str     r3, [fp, #-80]
    114:       e51b3050        ldr     r3, [fp, #-80]
    118:       e3530000        cmp     r3, #0  ; 0x0
    11c:       0a000002        beq     12c <snd_mask_refine+0xec>
    120:       e3e03015        mvn     r3, #21 ; 0x15
    124:       e50b3054        str     r3, [fp, #-84]
    128:       ea00000f        b       16c <snd_mask_refine+0x12c>
    12c:       e51b3048        ldr     r3, [fp, #-72]
    130:       e50b3010        str     r3, [fp, #-16]
    134:       e24b3040        sub     r3, fp, #64     ; 0x40
    138:       e50b3014        str     r3, [fp, #-20]
    13c:       e51b0010        ldr     r0, [fp, #-16]
    140:       e51b1014        ldr     r1, [fp, #-20]
    144:       e3a02008        mov     r2, #8  ; 0x8
    148:       ebfffffe        bl      0 <memcmp>
    14c:       e1a03000        mov     r3, r0
    150:       e3530000        cmp     r3, #0  ; 0x0
    154:       13a03000        movne   r3, #0  ; 0x0
    158:       03a03001        moveq   r3, #1  ; 0x1
    15c:       e3530000        cmp     r3, #0  ; 0x0
    160:       13a03000        movne   r3, #0  ; 0x0
    164:       03a03001        moveq   r3, #1  ; 0x1
    168:       e50b3054        str     r3, [fp, #-84]
    16c:       e51b3054        ldr     r3, [fp, #-84]
    170:       e1a00003        mov     r0, r3
    174:       e24bd00c        sub     sp, fp, #12     ; 0xc
    178:       e89da800        ldmia   sp, {fp, sp, pc}
Comment 1 Andrew Pinski 2006-04-30 05:01:18 UTC
We really need a self contianed example but I think this was already fixed for 4.1.1.
Comment 2 yfw 2006-04-30 05:09:04 UTC
Subject: Re:  ARM gcc 4.1 optimization bug

Hi pinskia,
I tried to make a simple test example for this bug. But If I put the
code from ALSA subsystem
of Linux kernel to a test.c file, the gcc will product correct
assembly code. :(. So I put the Linux kernel assembly code(with -Os
and withoud -O) to bug reporter.


I will try the 4.1.1 later.

Thanks & Regards
yfw

On 30 Apr 2006 05:01:19 -0000, pinskia at gcc dot gnu dot org
<gcc-bugzilla@gcc.gnu.org> wrote:
>
>
> ------- Comment #1 from pinskia at gcc dot gnu dot org  2006-04-30 05:01 -------
> We really need a self contianed example but I think this was already fixed for
> 4.1.1.
>
>
> --
>
> pinskia at gcc dot gnu dot org changed:
>
>            What    |Removed                     |Added
> ----------------------------------------------------------------------------
>              Status|UNCONFIRMED                 |WAITING
>           Component|c                           |target
>
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27363
>
> ------- You are receiving this mail because: -------
> You reported the bug, or are watching the reporter.
>
Comment 3 yfw 2006-04-30 05:32:36 UTC
Where can I get gcc 4.1.1? From the ftp site, the latest gcc 4.1 release is 4.1.0.

Thanks.
Comment 4 yfw 2006-04-30 09:09:29 UTC
I tried the gcc 4.1.1 snapshot 20060421. The bug still there. The assembly code
producted with -Os option is the same as gcc 4.1.0.
Comment 5 Steven Newbury 2006-05-14 17:15:34 UTC
(In reply to comment #4)
> I tried the gcc 4.1.1 snapshot 20060421. The bug still there. The assembly code
> producted with -Os option is the same as gcc 4.1.0.
> 

Have you got anywhere with this?  I wonder if I'm hitting it too.  All the ALSA modules load but no devices become available.

Kernels 2.6.16 - 2.6.17-rc4 + ALSA SoC
GCC 4.1.0 & 4.1.1
Target arm-iwmmxt-linux-gnueabi (-mabi=linux-aapcs, -march=armv5te, -mtune=iwmmxt, -Os)
Comment 6 dirk 2006-06-07 15:52:56 UTC
Created attachment 11628 [details]
.i file of pcm_native.c and .s files for -Os, -O1 and -O2


Attached the .i and .s for -Os & -O2 (failing) and -O1 (working) (generated with --save-temps).

Used compile options (from Linux kernel 2.6.17-rc5):

 arm-linux-gcc -Wp,-MD,sound/core/.pcm_native.o.d -nostdinc -isystem /usr/arm/arm-linux_4_1_0/bin/../lib/gcc/arm-linux/4.1.0/include -D__KERNEL__ -Iinclude  -include include/linux/autoconf.h -mlittle-endian -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fno-omit-frame-pointer -mapcs -mno-sched-prolog -mabi=apcs-gnu -mno-thumb-interwork -D__LINUX_ARM_ARCH__=5 -march=armv5te -mtune=arm9tdmi  -msoft-float -Uarm -Wdeclaration-after-statement -Wno-pointer-sign   -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(pcm_native)" -D"KBUILD_MODNAME=KBUILD_STR(snd_pcm)" --save-temps -c -o sound/core/pcm_native.o sound/core/pcm_native.c

Only -O1 & -O2 & -Os were modified.

Small note:

While compiling only pcm_native.c with -O1 and using the resulting modules (4 modules depend on it) while using -Os for all other modules make the error I observed go away. However, I get no sound output like if I compile all sound modules with -O1. Seems that some other files of kernels sound system have (the same?) optimization issue as well. But I think to start we should concentrate on this file.
Comment 7 dirk 2006-06-12 15:34:48 UTC
Until a fix for this bug is found, there are two possible workarounds:

- Compile kernels sound system as modules and compile these modules with -O1 instead of default -Os or -O2 (e.g. by changing main Makefile).

- Use this patch from Fengwei Yin <yfw.debian@gmail.com> (then -Os or -O2 should work):

-- linux/include/sound/pcm_params.h    2005-03-02 09:31:53.000000000 +0800
+++ linux-ok/include/sound/pcm_params.h 2006-06-08 09:57:11.000000000 +0800
@@ -196,6 +196,11 @@ INLINE int snd_mask_refine(snd_mask_t *m
       snd_mask_t old;
       assert(!snd_mask_empty(mask));
       snd_mask_copy(&old, mask);
+       /*
+        * add the barrier to fix the optimization
+        * error of GCC 4.1
+       */
+       mb();
       snd_mask_intersect(mask, v);
       if (snd_mask_empty(mask))
               return -EINVAL; 

Note that this is only a workaround. It will drop the Alsa subsystem performance.
Comment 8 Richard Biener 2006-06-26 18:54:27 UTC
One thing we have is some extra virtual operands from CCP:

before:

<bb 2>:
mask_5 = &old;
v_7 = mask_6;
#   SFT.2_33 = V_MAY_DEF <SFT.2_32>;
*mask_5 = *v_7;
mask_8 = mask_6;
v_10 = v_9;
i_11 = 0;
goto <bb 4> (<L4>);



after:

<bb 2>:
mask_5 = &old;
v_7 = mask_6;
#   SFT.2 = V_MUST_DEF <SFT.2>;
#   VUSE <SFT.2>;
old = *v_7;
mask_8 = mask_6;
v_10 = v_9;
i_11 = 0;
goto <bb 4> (<L4>);

all of the SFT.2 are actually the same tree object.
Comment 9 Enrico Scholz 2006-07-15 19:22:04 UTC
*** Bug 28362 has been marked as a duplicate of this bug. ***
Comment 10 Enrico Scholz 2006-07-15 19:26:30 UTC
Bug #28362 contains a self contained example. Basically, it needs only

| some_struct = *some_other_struct;

to trigger this bug. I wonder for how much other segfaults/brokeness this bug is responsible for.

Btw, happens for every gcc release (3.4.6, 4.0.3, 4.1.1).
Comment 11 Paul Brook 2006-07-18 21:42:28 UTC
I'm working on this. Looks like a CSE bug.
Comment 12 Paul Brook 2006-07-20 13:57:39 UTC
Subject: Bug 27363

Author: pbrook
Date: Thu Jul 20 13:57:31 2006
New Revision: 115614

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=115614
Log:
2006-07-20  Paul Brook  <paul@codesourcery.com>

	PR 27363
	gcc/
	* cse.c (cse_insn): Add destination addresses to hash table. Check if
	they are invalidated by this instruction.

	gcc/testsuite/
	* gcc.dg/pr27363.c: New test.


Added:
    trunk/gcc/testsuite/gcc.dg/pr27363.c
Modified:
    trunk/gcc/ChangeLog
    trunk/gcc/cse.c
    trunk/gcc/testsuite/ChangeLog

Comment 13 Paul Brook 2006-07-20 13:59:34 UTC
Subject: Bug 27363

Author: pbrook
Date: Thu Jul 20 13:59:22 2006
New Revision: 115616

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=115616
Log:
	Backport from mainline.
	PR 27363
	gcc/
	* cse.c (cse_insn): Add destination addresses to hash table. Check if
	they are invalidated by this instruction.

	gcc/testsuite/
	* gcc.dg/pr27363.c: New test.


Added:
    branches/csl/sourcerygxx-4_1/gcc/testsuite/gcc.dg/pr27363.c
Modified:
    branches/csl/sourcerygxx-4_1/ChangeLog.csl
    branches/csl/sourcerygxx-4_1/gcc/cse.c

Comment 14 Paul Brook 2006-07-20 15:07:42 UTC
Subject: Bug 27363

Author: pbrook
Date: Thu Jul 20 15:07:25 2006
New Revision: 115620

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=115620
Log:
2006-07-20  Paul Brook  <paul@codesourcery.com>

	Backport from mainline.
	PR 27363
	gcc/
	* cse.c (cse_insn): Add destination addresses to hash table. Check if
	they are invalidated by this instruction.

	gcc/testsuite/
	* gcc.dg/pr27363.c: New test.


Added:
    branches/gcc-4_1-branch/gcc/testsuite/gcc.dg/pr27363.c
Modified:
    branches/gcc-4_1-branch/gcc/ChangeLog
    branches/gcc-4_1-branch/gcc/cse.c
    branches/gcc-4_1-branch/gcc/testsuite/ChangeLog

Comment 15 Paul Brook 2006-07-20 15:08:14 UTC
FIxed.
Comment 16 yfw 2006-07-22 06:21:11 UTC
Coooool. pbrook.
I will try the latest gcc snapshot.

Thanks a lot.


Regards
Yin, Fengwei
Comment 17 dirk 2006-07-22 06:24:23 UTC
Subject: Re:  ARM gcc 4.1 optimization bug

yfw dot debian at gmail dot com wrote:
> ------- Comment #16 from yfw dot debian at gmail dot com  2006-07-22 06:21 -------
> Coooool. pbrook.
> I will try the latest gcc snapshot.
> 
> Thanks a lot.

I tested it using crosstool and applying the patch manually 
against GCC 4.1.0 and it works.

Many thanks from me as well,

Dirk
Comment 18 Leon Woestenberg 2006-10-19 22:01:56 UTC
Created attachment 12464 [details]
Fix.

Copied from http://www.freaknet.org/martin/crosstool/patches/gcc-4.1.1/gcc-4.1.1-bugfix-27363.patch
so that it linked to this report.
Comment 19 Michael K. Edwards 2007-01-26 02:53:45 UTC
Still generates bad code for snd_mask_refine in the gcc-4.1-20070115 snapshot.  I have verified that the patch claimed to fix this bug is in this snapshot.  My gcc is tuned for arm-926ejs, old ABI.  -O1 works.