Bug 43088 - [avr] Suspect optimizer missed code in gcc 4.4.3..
[avr] Suspect optimizer missed code in gcc 4.4.3..
Status: RESOLVED FIXED
Product: gcc
Classification: Unclassified
Component: rtl-optimization
4.4.3
: P3 normal
: 4.7.0
Assigned To: Not yet assigned to anyone
: missed-optimization
Depends on:
Blocks:
  Show dependency treegraph
 
Reported: 2010-02-16 06:03 UTC by uhmgawa
Modified: 2011-07-04 13:30 UTC (History)
4 users (show)

See Also:
Host:
Target: avr-*-*
Build:
Known to work: 4.7.0
Known to fail: 4.4.3, 4.6.0
Last reconfirmed: 2010-09-20 02:30:21


Attachments
C source file (343 bytes, text/plain)
2011-07-02 21:07 UTC, Georg-Johann Lay
Details

Note You need to log in before you can comment on or make changes to this bug.
Description uhmgawa 2010-02-16 06:03:22 UTC
I just built a recent toolchain (avr-gcc 4.4.3, binutils
2.20) in hopes of eliminating what appeared to be
a few artifacts apparently escaping optimization even
with -Os relative to a 4.0.2 version.  Referencing the dump
below, use of r25 is superfluous as it is cleared at 0x14c,
ANDed with zero at 0x150, and then participates in an
additionally unneeded word wide test operation at 0x152:

00000142 <foo>:

void foo(void)
   {
       static unsigned char count;

       if (++count & 0x3f)
142:   80 91 01 01     lds     r24, 0x0101
146:   8f 5f           subi    r24, 0xFF       ; 255
148:   80 93 01 01     sts     0x0101, r24
14c:   90 e0           ldi     r25, 0x00       ; 0
14e:   8f 73           andi    r24, 0x3F       ; 63
150:   90 70           andi    r25, 0x00       ; 0
150:   90 70           andi    r25, 0x00       ; 0
152:   00 97           sbiw    r24, 0x00       ; 0
154:   11 f0           breq    .+4             ; 0x15a <foo+0x18>
               PORTC &= ~0x1;
156:   40 98           cbi     0x08, 0 ; 8
158:   08 95           ret
       else
               PORTC |= 0x1;
15a:   40 9a           sbi     0x08, 0 ; 8
15c:   08 95           ret
   }


This is slightly different (although appearing to derive
from the same code generation logic) than I'd seen with a
4.0.2 toolchain where the first two operations above are
followed by an OR of r25 into r24 as a word width test:

0000013c <foo>:

void foo(void)
   {
       static unsigned char count;

       if (++count & 0x3f)
13c:   80 91 00 01     lds     r24, 0x0100
140:   8f 5f           subi    r24, 0xFF       ; 255
142:   80 93 00 01     sts     0x0100, r24
146:   99 27           eor     r25, r25
148:   8f 73           andi    r24, 0x3F       ; 63
14a:   90 70           andi    r25, 0x00       ; 0
14c:   89 2b           or      r24, r25
14e:   11 f0           breq    .+4             ; 0x154 <foo+0x18>
               PORTC &= ~0x1;
150:   40 98           cbi     0x08, 0 ; 8
152:   08 95           ret
       else
               PORTC |= 0x1;
154:   40 9a           sbi     0x08, 0 ; 8
156:   08 95           ret
   }


Both examples above were compiled with:

"-mmcu=atmega48 -nostdlib -Os -funsigned-char"

FWIW -O2 and -O3 give the same results. It seems to be
an artifact of a scalar widening operation during code
generation although can be quietly eliminated post-code
generation given the char width type.

I've seen some similar bug reports but not specifically
related to a bitwise-and operation where more than one
set bit exists in the constant operand.  Note if the
constant '3' is replaced with an 'unsigned char'
variable of the same value, the expected minimal
code sequence results (sans the fetch of the added
variable from memory).

-----------------------------------------------------------
Per requested bug report boilerplate:

/home/john/avr/gnu/toolbin//bin/avr-gcc -v -save-temps -c -o rt1.o -c -mmcu=atmega48 -nostdlib -Os -funsigned-char -D__BUILD_GAS__ -DDATE=\"100215200656\" -g rt1.c
Using built-in specs.
Target: avr
Configured with: ../configure --prefix=/home/john/avr/gnu/toolbin --target=avr --enable-languages=c,c++ --disable-nls --disable-libssp --with-dwarf2
Thread model: single
gcc version 4.4.3 (GCC) 
COLLECT_GCC_OPTIONS='-v' '-save-temps' '-c' '-o' 'rt1.o' '-c' '-mmcu=atmega48' '-nostdlib' '-Os' '-funsigned-char' '-D__BUILD_GAS__' '-DDATE="100215200656"' '-g'
 /home/john/avr/gnu/toolbin/libexec/gcc/avr/4.4.3/cc1 -E -quiet -v -imultilib avr4 -D__BUILD_GAS__ -DDATE="100215200656" rt1.c -mmcu=atmega48 -funsigned-char -g -fworking-directory -Os -fpch-preprocess -o rt1.i
ignoring nonexistent directory "/home/john/avr/gnu/toolbin/lib/gcc/avr/4.4.3/../../../../avr/sys-include"
#include "..." search starts here:
#include <...> search starts here:
 /home/john/avr/gnu/toolbin/lib/gcc/avr/4.4.3/include
 /home/john/avr/gnu/toolbin/lib/gcc/avr/4.4.3/include-fixed
 /home/john/avr/gnu/toolbin/lib/gcc/avr/4.4.3/../../../../avr/include
End of search list.
COLLECT_GCC_OPTIONS='-v' '-save-temps' '-c' '-o' 'rt1.o' '-c' '-mmcu=atmega48' '-nostdlib' '-Os' '-funsigned-char' '-D__BUILD_GAS__' '-DDATE="100215200656"' '-g'
 /home/john/avr/gnu/toolbin/libexec/gcc/avr/4.4.3/cc1 -fpreprocessed rt1.i -quiet -dumpbase rt1.c -mmcu=atmega48 -auxbase-strip rt1.o -g -Os -version -funsigned-char -o rt1.s
GNU C (GCC) version 4.4.3 (avr)
	compiled by GNU C version 4.4.2 20091222 (Red Hat 4.4.2-20), GMP version 4.3.1, MPFR version 2.4.1.
GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
Compiler executable checksum: 2cb821fe223eaff7cfe636a2c880b1cc
COLLECT_GCC_OPTIONS='-v' '-save-temps' '-c' '-o' 'rt1.o' '-c' '-mmcu=atmega48' '-nostdlib' '-Os' '-funsigned-char' '-D__BUILD_GAS__' '-DDATE="100215200656"' '-g'
 /home/john/avr/gnu/toolbin/lib/gcc/avr/4.4.3/../../../../avr/bin/as -mmcu=atmega48 -o rt1.o rt1.s
COMPILER_PATH=/home/john/avr/gnu/toolbin/libexec/gcc/avr/4.4.3/:/home/john/avr/gnu/toolbin/libexec/gcc/avr/4.4.3/:/home/john/avr/gnu/toolbin/libexec/gcc/avr/:/home/john/avr/gnu/toolbin/lib/gcc/avr/4.4.3/:/home/john/avr/gnu/toolbin/lib/gcc/avr/:/home/john/avr/gnu/toolbin/lib/gcc/avr/4.4.3/../../../../avr/bin/
LIBRARY_PATH=/home/john/avr/gnu/toolbin/lib/gcc/avr/4.4.3/avr4/:/home/john/avr/gnu/toolbin/lib/gcc/avr/4.4.3/../../../../avr/lib/avr4/:/home/john/avr/gnu/toolbin/lib/gcc/avr/4.4.3/:/home/john/avr/gnu/toolbin/lib/gcc/avr/4.4.3/../../../../avr/lib/
COLLECT_GCC_OPTIONS='-v' '-save-temps' '-c' '-o' 'rt1.o' '-c' '-mmcu=atmega48' '-nostdlib' '-Os' '-funsigned-char' '-D__BUILD_GAS__' '-DDATE="100215200656"' '-g'
Comment 1 abnikant 2010-09-13 05:58:44 UTC
This bug is confirmed. andhi3/andsi3 causing this problem. conditional checks in andhi3 and andsi3 need to compare with zero instead of 0xff [etc]. 
i.e. in andhi3 we need to replace
(mask & 0x00ff) != 0xff by (mask & 0x00ff) != 0.

Similarly other checks in andhi3 and andsi3 need to be replaced.
Comment 2 Eric Weddington 2010-09-20 02:30:21 UTC
(In reply to comment #1)
> This bug is confirmed. andhi3/andsi3 causing this problem. conditional checks
> in andhi3 and andsi3 need to compare with zero instead of 0xff [etc]. 
> i.e. in andhi3 we need to replace
> (mask & 0x00ff) != 0xff by (mask & 0x00ff) != 0.
> 
> Similarly other checks in andhi3 and andsi3 need to be replaced.
> 

Abnikant,

Is this bug confirmed for the reported version (4.4.3), or for trunk/4.6?

Comment 3 abnikant 2010-09-20 04:24:41 UTC
(In reply to comment #2)
> (In reply to comment #1)
> > This bug is confirmed. andhi3/andsi3 causing this problem. conditional checks
> > in andhi3 and andsi3 need to compare with zero instead of 0xff [etc]. 
> > i.e. in andhi3 we need to replace
> > (mask & 0x00ff) != 0xff by (mask & 0x00ff) != 0.
> > 
> > Similarly other checks in andhi3 and andsi3 need to be replaced.
> > 
> 
> Abnikant,
> 
> Is this bug confirmed for the reported version (4.4.3), or for trunk/4.6?
> 

It exits for the reported version (4.4.3) and as well as for trunk/4.6.
Comment 4 Eric Weddington 2010-09-20 12:58:44 UTC
(In reply to comment #3)
> 
> It exits for the reported version (4.4.3) and as well as for trunk/4.6.
> 

Abnikant, could you also post the patch that fixes this problem? Thanks.
Comment 5 Georg-Johann Lay 2011-06-10 19:59:47 UTC
Funnc artifact. It occurs only if all low-bits are set, i.e. wich constants 0x3f, 0x1f, 0xf, ... So persumably a middle and pass makes some strange assumption.

(In reply to comment #1)
> This bug is confirmed. andhi3/andsi3 causing this problem. conditional checks
> in andhi3 and andsi3 need to compare with zero instead of 0xff [etc]. 
> i.e. in andhi3 we need to replace
> (mask & 0x00ff) != 0xff by (mask & 0x00ff) != 0.
> 
> Similarly other checks in andhi3 and andsi3 need to be replaced.

Can you explain?

ANDding on a 8-bit part can only be omitted if the mask is 0xff. Otherwise, the AND has to be performed, even if the result is known to be always zero.
Comment 6 Georg-Johann Lay 2011-07-02 21:07:50 UTC
Created attachment 24659 [details]
C source file

Compile with -mmcu=atmega168 -Os.

This file shows the flaw in function foo_3f that compares against 0x3f in comparison to foo_3e that compares against 0x3e.

If the comparison is against 0x3e the code is fine.

The difference happens in pass combine. For 0x3f, combine tryes ZERO_EXTRACT:

Trying 8, 9 -> 10:
Failed to match this instruction:
(parallel [
        (set (cc0)
            (compare (zero_extract:HI (reg:QI 43 [ count.3 ])
                    (const_int 6 [0x6])
                    (const_int 0 [0]))
                (const_int 0 [0])))
        (clobber (scratch:QI))
    ])
Failed to match this instruction:
(set (cc0)
    (compare (zero_extract:HI (reg:QI 43 [ count.3 ])
            (const_int 6 [0x6])
            (const_int 0 [0]))
        (const_int 0 [0])))
Failed to match this instruction:
(set (reg:HI 52)
    (zero_extract:HI (reg:QI 43 [ count.3 ])
        (const_int 6 [0x6])
        (const_int 0 [0])))

.............................................


But for 0x3e it tryes AND, which matches (presumably combine-split):

Trying 8, 9 -> 10:
Failed to match this instruction:
(parallel [
        (set (cc0)
            (compare (and:QI (reg:QI 43 [ count.1 ])
                    (const_int 62 [0x3e]))
                (const_int 0 [0])))
        (clobber (scratch:QI))
    ])
Failed to match this instruction:
(set (cc0)
    (compare (and:QI (reg:QI 43 [ count.1 ])
            (const_int 62 [0x3e]))
        (const_int 0 [0])))
Successfully matched this instruction:
(set (reg:QI 52)
    (and:QI (reg:QI 43 [ count.1 ])
        (const_int 62 [0x3e])))
Successfully matched this instruction:
(set (cc0)
    (compare (reg:QI 52)
        (const_int 0 [0])))
deferring deletion of insn with uid = 8.
...
Comment 7 Georg-Johann Lay 2011-07-04 13:30:03 UTC
Closed in 4.7.0

This was a flaw in insn combine as explained above.

In 4.7.0 trunk (SVN 175811) the problem is solved and the two versions, one with 0x3f and one with 0x3e, yield the same, optimal assembler:

foo_3f:
	lds r24,count.1210	 ;  5	*movqi/4	[length = 2]
	subi r24,lo8(-(1))	 ;  6	addqi3/2	[length = 1]
	sts count.1210,r24	 ;  7	*movqi/3	[length = 2]
	andi r24,lo8(63)	 ;  8	andqi3/2	[length = 1]
	breq .L2	 ;  10	branch	[length = 1]
	cbi 40-0x20,0	 ;  16	*cbi	[length = 1]
	ret	 ;  36	return	[length = 1]
.L2:
	sbi 40-0x20,0	 ;  25	*sbi	[length = 1]
	ret	 ;  38	return	[length = 1]
	.size	foo_3f, .-foo_3f

foo_3e:
	lds r24,count.1214	 ;  5	*movqi/4	[length = 2]
	subi r24,lo8(-(1))	 ;  6	addqi3/2	[length = 1]
	sts count.1214,r24	 ;  7	*movqi/3	[length = 2]
	andi r24,lo8(62)	 ;  8	andqi3/2	[length = 1]
	breq .L5	 ;  10	branch	[length = 1]
	cbi 40-0x20,0	 ;  16	*cbi	[length = 1]
	ret	 ;  34	return	[length = 1]
.L5:
	sbi 40-0x20,0	 ;  25	*sbi	[length = 1]
	ret	 ;  36	return	[length = 1]