This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug middle-end/18424] 3.4.3 ~6x+ performance regression vs 3.3.1, constant trees not being computed.
- From: "schlie at comcast dot net" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: 11 Nov 2004 20:29:06 -0000
- Subject: [Bug middle-end/18424] 3.4.3 ~6x+ performance regression vs 3.3.1, constant trees not being computed.
- References: <20041111023501.18424.schlie@comcast.net>
- Reply-to: gcc-bugzilla at gcc dot gnu dot org
------- Additional Comments From schlie at comcast dot net 2004-11-11 20:28 -------
Subject: Re: 3.4.3 ~6x+ performance regression vs
3.3.1, constant trees not being computed.
> From: joseph at codesourcery dot com <gcc-bugzilla@gcc.gnu.org>
> ------- Additional Comments From joseph at codesourcery dot com 2004-11-11
> 16:22 -------
> Subject: Re: 3.4.3 ~6x+ performance regression vs
> 3.3.1, constant trees not being computed.
>
> Have you actually tried compiling code identical to that you test but with
> 8388608L in place of (1L << 23) before making claims about what is done
> with constant expressions?
>
> Your example may suggest a regression, provided no type sizes changed for
> your target between the versions compared, but you really shouldn't report
> conjectures about the cause of a bug without clear evidence to
> substantiate them, which in this case would involve substituting the value
> of the constant expression in the testcase.
You were correct, the problem wasn't that 3.4.3 wasn't computing the
constant expression values, it was that it was oddly transforming constant
values into runtime computed expressions, such that 3.4.3 converted:
(a & 0x800000L) => ((long)a >> 23) & 1), which doesn't quite seem sensible.
The following are the results for both 3.4.3 and 3.3.1; where 3.4.3 shows a
>100x performance regression, and a ~4x size regression relative to 3.3.1:
----
The source:
/*Compiling: main.c using (for the sake of argument)
avr-gcc -c -mmcu=atmega64 -I. -g -Os -funsigned-char -funsigned-bitfields
-fpack-struct
-fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=main.lst
-I/usr/local/avr/include
-std=gnu99 -funsafe-math-optimizations
-Wp,-M,-MP,-MT,main.o,-MF,.dep/main.o.d main.c
-o main.o
Linking: main.elf (again for the sake of argumnet)
avr-gcc -mmcu=atmega64 -I. -g -Os -funsigned-char -funsigned-bitfields
-fpack-struct
-fshort-enums -Wall -Wstrict-prototypes -Wa,-adhlns=main.o
-I/usr/local/avr/include
-std=gnu99 -funsafe-math-optimizations
-Wp,-M,-MP,-MT,main.o,-MF,.dep/main.elf.d main.o
--output main.elf -Wl,-Map=main.map,--cref -lm
File: main.c
*/
int foo0 ( int a ){
if (a & 0x800000L)
return 1;
else
return 2 ;
}
int foo1 ( int a ){
if (a & (1L << 23))
return 1;
else
return 2 ;
}
int foo2 ( long a ){
if (a & 0x800000L)
return 1;
else
return 2 ;
}
int foo3 ( long a ){
if (a & (1L << 23))
return 1;
else
return 2 ;
}
int main( void ){
volatile int a;
a = foo0 ( a );
a = foo1 ( a );
a = foo2 ( a );
a = foo3 ( a );
return 0;
}
----
Listing for 3.4.3
main.elf: file format elf32-avr
Sections:
Idx Name Size VMA LMA File off Algn
0 .data 00000000 00800100 000001c8 0000025c 2**0
CONTENTS, ALLOC, LOAD, DATA
1 .text 000001c8 00000000 00000000 00000094 2**0
CONTENTS, ALLOC, LOAD, READONLY, CODE
2 .bss 00000000 00800100 000001c8 0000025c 2**0
ALLOC
3 .noinit 00000000 00800100 00800100 0000025c 2**0
CONTENTS
4 .eeprom 00000000 00810000 00810000 0000025c 2**0
CONTENTS
5 .stab 000005d0 00000000 00000000 0000025c 2**2
CONTENTS, READONLY, DEBUGGING
6 .stabstr 0000046e 00000000 00000000 0000082c 2**0
CONTENTS, READONLY, DEBUGGING
Disassembly of section .text:
00000000 <__vectors>:
0: 0c 94 46 00 jmp 0x8c
4: 0c 94 61 00 jmp 0xc2
8: 0c 94 61 00 jmp 0xc2
c: 0c 94 61 00 jmp 0xc2
10: 0c 94 61 00 jmp 0xc2
14: 0c 94 61 00 jmp 0xc2
18: 0c 94 61 00 jmp 0xc2
1c: 0c 94 61 00 jmp 0xc2
20: 0c 94 61 00 jmp 0xc2
24: 0c 94 61 00 jmp 0xc2
28: 0c 94 61 00 jmp 0xc2
2c: 0c 94 61 00 jmp 0xc2
30: 0c 94 61 00 jmp 0xc2
34: 0c 94 61 00 jmp 0xc2
38: 0c 94 61 00 jmp 0xc2
3c: 0c 94 61 00 jmp 0xc2
40: 0c 94 61 00 jmp 0xc2
44: 0c 94 61 00 jmp 0xc2
48: 0c 94 61 00 jmp 0xc2
4c: 0c 94 61 00 jmp 0xc2
50: 0c 94 61 00 jmp 0xc2
54: 0c 94 61 00 jmp 0xc2
58: 0c 94 61 00 jmp 0xc2
5c: 0c 94 61 00 jmp 0xc2
60: 0c 94 61 00 jmp 0xc2
64: 0c 94 61 00 jmp 0xc2
68: 0c 94 61 00 jmp 0xc2
6c: 0c 94 61 00 jmp 0xc2
70: 0c 94 61 00 jmp 0xc2
74: 0c 94 61 00 jmp 0xc2
78: 0c 94 61 00 jmp 0xc2
7c: 0c 94 61 00 jmp 0xc2
80: 0c 94 61 00 jmp 0xc2
84: 0c 94 61 00 jmp 0xc2
88: 0c 94 61 00 jmp 0xc2
0000008c <__ctors_end>:
8c: 11 24 eor r1, r1
8e: 1f be out 0x3f, r1 ; 63
90: cf ef ldi r28, 0xFF ; 255
92: d0 e1 ldi r29, 0x10 ; 16
94: de bf out 0x3e, r29 ; 62
96: cd bf out 0x3d, r28 ; 61
00000098 <__do_copy_data>:
98: 11 e0 ldi r17, 0x01 ; 1
9a: a0 e0 ldi r26, 0x00 ; 0
9c: b1 e0 ldi r27, 0x01 ; 1
9e: e8 ec ldi r30, 0xC8 ; 200
a0: f1 e0 ldi r31, 0x01 ; 1
a2: 02 c0 rjmp .+4 ; 0xa8
000000a4 <.do_copy_data_loop>:
a4: 05 90 lpm r0, Z+
a6: 0d 92 st X+, r0
000000a8 <.do_copy_data_start>:
a8: a0 30 cpi r26, 0x00 ; 0
aa: b1 07 cpc r27, r17
ac: d9 f7 brne .-10 ; 0xa4
000000ae <__do_clear_bss>:
ae: 11 e0 ldi r17, 0x01 ; 1
b0: a0 e0 ldi r26, 0x00 ; 0
b2: b1 e0 ldi r27, 0x01 ; 1
b4: 01 c0 rjmp .+2 ; 0xb8
000000b6 <.do_clear_bss_loop>:
b6: 1d 92 st X+, r1
000000b8 <.do_clear_bss_start>:
b8: a0 30 cpi r26, 0x00 ; 0
ba: b1 07 cpc r27, r17
bc: e1 f7 brne .-8 ; 0xb6
be: 0c 94 b7 00 jmp 0x16e
000000c2 <__bad_interrupt>:
c2: 0c 94 00 00 jmp 0x0
000000c6 <foo0>:
*/
int foo0 ( int a ){
if (a & 0x800000L)
c6: aa 27 eor r26, r26
c8: 97 fd sbrc r25, 7
ca: a0 95 com r26
cc: ba 2f mov r27, r26
ce: 27 e1 ldi r18, 0x17 ; 23
d0: b6 95 lsr r27
d2: a7 95 ror r26
d4: 97 95 ror r25
d6: 87 95 ror r24
d8: 2a 95 dec r18
da: d1 f7 brne .-12 ; 0xd0
dc: 81 70 andi r24, 0x01 ; 1
de: 90 70 andi r25, 0x00 ; 0
e0: 89 2b or r24, r25
e2: 19 f0 breq .+6 ; 0xea
return 1;
e4: 81 e0 ldi r24, 0x01 ; 1
e6: 90 e0 ldi r25, 0x00 ; 0
e8: 08 95 ret
else
return 2 ;
ea: 82 e0 ldi r24, 0x02 ; 2
ec: 90 e0 ldi r25, 0x00 ; 0
}
ee: 08 95 ret
f0: 08 95 ret
000000f2 <foo1>:
int foo1 ( int a ){
if (a & (1L << 23))
f2: aa 27 eor r26, r26
f4: 97 fd sbrc r25, 7
f6: a0 95 com r26
f8: ba 2f mov r27, r26
fa: 37 e1 ldi r19, 0x17 ; 23
fc: b6 95 lsr r27
fe: a7 95 ror r26
100: 97 95 ror r25
102: 87 95 ror r24
104: 3a 95 dec r19
106: d1 f7 brne .-12 ; 0xfc
108: 81 70 andi r24, 0x01 ; 1
10a: 90 70 andi r25, 0x00 ; 0
10c: 89 2b or r24, r25
10e: 19 f0 breq .+6 ; 0x116
return 1;
110: 81 e0 ldi r24, 0x01 ; 1
112: 90 e0 ldi r25, 0x00 ; 0
114: 08 95 ret
else
return 2 ;
116: 82 e0 ldi r24, 0x02 ; 2
118: 90 e0 ldi r25, 0x00 ; 0
}
11a: 08 95 ret
11c: 08 95 ret
0000011e <foo2>:
int foo2 ( long a ){
11e: dc 01 movw r26, r24
120: cb 01 movw r24, r22
if (a & 0x800000L)
122: 47 e1 ldi r20, 0x17 ; 23
124: b6 95 lsr r27
126: a7 95 ror r26
128: 97 95 ror r25
12a: 87 95 ror r24
12c: 4a 95 dec r20
12e: d1 f7 brne .-12 ; 0x124
130: 81 70 andi r24, 0x01 ; 1
132: 90 70 andi r25, 0x00 ; 0
134: 89 2b or r24, r25
136: 19 f0 breq .+6 ; 0x13e
return 1;
138: 81 e0 ldi r24, 0x01 ; 1
13a: 90 e0 ldi r25, 0x00 ; 0
13c: 08 95 ret
else
return 2 ;
13e: 82 e0 ldi r24, 0x02 ; 2
140: 90 e0 ldi r25, 0x00 ; 0
}
142: 08 95 ret
144: 08 95 ret
00000146 <foo3>:
int foo3 ( long a ){
146: dc 01 movw r26, r24
148: cb 01 movw r24, r22
if (a & (1L << 23))
14a: 57 e1 ldi r21, 0x17 ; 23
14c: b6 95 lsr r27
14e: a7 95 ror r26
150: 97 95 ror r25
152: 87 95 ror r24
154: 5a 95 dec r21
156: d1 f7 brne .-12 ; 0x14c
158: 81 70 andi r24, 0x01 ; 1
15a: 90 70 andi r25, 0x00 ; 0
15c: 89 2b or r24, r25
15e: 19 f0 breq .+6 ; 0x166
return 1;
160: 81 e0 ldi r24, 0x01 ; 1
162: 90 e0 ldi r25, 0x00 ; 0
164: 08 95 ret
else
return 2 ;
166: 82 e0 ldi r24, 0x02 ; 2
168: 90 e0 ldi r25, 0x00 ; 0
}
16a: 08 95 ret
16c: 08 95 ret
0000016e <main>:
int main( void ){
16e: cd ef ldi r28, 0xFD ; 253
170: d0 e1 ldi r29, 0x10 ; 16
172: de bf out 0x3e, r29 ; 62
174: cd bf out 0x3d, r28 ; 61
volatile int a;
a = foo0 ( a );
176: 89 81 ldd r24, Y+1 ; 0x01
178: 9a 81 ldd r25, Y+2 ; 0x02
17a: 0e 94 63 00 call 0xc6
17e: 89 83 std Y+1, r24 ; 0x01
180: 9a 83 std Y+2, r25 ; 0x02
a = foo1 ( a );
182: 89 81 ldd r24, Y+1 ; 0x01
184: 9a 81 ldd r25, Y+2 ; 0x02
186: 0e 94 79 00 call 0xf2
18a: 89 83 std Y+1, r24 ; 0x01
18c: 9a 83 std Y+2, r25 ; 0x02
a = foo2 ( a );
18e: 89 81 ldd r24, Y+1 ; 0x01
190: 9a 81 ldd r25, Y+2 ; 0x02
192: aa 27 eor r26, r26
194: 97 fd sbrc r25, 7
196: a0 95 com r26
198: ba 2f mov r27, r26
19a: bc 01 movw r22, r24
19c: cd 01 movw r24, r26
19e: 0e 94 8f 00 call 0x11e
1a2: 89 83 std Y+1, r24 ; 0x01
1a4: 9a 83 std Y+2, r25 ; 0x02
a = foo3 ( a );
1a6: 89 81 ldd r24, Y+1 ; 0x01
1a8: 9a 81 ldd r25, Y+2 ; 0x02
1aa: aa 27 eor r26, r26
1ac: 97 fd sbrc r25, 7
1ae: a0 95 com r26
1b0: ba 2f mov r27, r26
1b2: bc 01 movw r22, r24
1b4: cd 01 movw r24, r26
1b6: 0e 94 a3 00 call 0x146
1ba: 89 83 std Y+1, r24 ; 0x01
1bc: 9a 83 std Y+2, r25 ; 0x02
return 0;
}
1be: 80 e0 ldi r24, 0x00 ; 0
1c0: 90 e0 ldi r25, 0x00 ; 0
1c2: 0c 94 e3 00 jmp 0x1c6
000001c6 <_exit>:
1c6: ff cf rjmp .-2 ; 0x1c6
---------
The listing with avr-gcc (GCC) 3.3.1:
1 .file "main.c"
2 .arch atmega64
3 __SREG__ = 0x3f
4 __SP_H__ = 0x3e
5 __SP_L__ = 0x3d
6 __tmp_reg__ = 0
7 __zero_reg__ = 1
8 .global __do_copy_data
9 .global __do_clear_bss
12 .text
13 .Ltext0:
38 .global foo0
40 foo0:
1:main.c **** /*Compiling: main.c using (for the sake of argument)
2:main.c ****
3:main.c **** avr-gcc -c -mmcu=atmega64 -I. -g -Os
-funsigned-char -funsigned-bitfields
4:main.c **** -fpack-struct
5:main.c **** -fshort-enums -Wall -Wstrict-prototypes
-Wa,-adhlns=main.lst
6:main.c **** -I/usr/local/avr/include
7:main.c **** -std=gnu99 -funsafe-math-optimizations
8:main.c **** -Wp,-M,-MP,-MT,main.o,-MF,.dep/main.o.d main.c
9:main.c **** -o main.o
10:main.c ****
11:main.c **** Linking: main.elf (again for the sake of argumnet)
12:main.c **** avr-gcc -mmcu=atmega64 -I. -g -Os -funsigned-char
-funsigned-bitfields
13:main.c **** -fpack-struct
14:main.c **** -fshort-enums -Wall -Wstrict-prototypes
-Wa,-adhlns=main.o
15:main.c **** -I/usr/local/avr/include
16:main.c **** -std=gnu99 -funsafe-math-optimizations
17:main.c **** -Wp,-M,-MP,-MT,main.o,-MF,.dep/main.elf.d main.o
18:main.c **** --output main.elf -Wl,-Map=main.map,--cref -lm
19:main.c ****
20:main.c **** File: main.c
21:main.c ****
22:main.c **** */
23:main.c ****
24:main.c **** int foo0 ( int a ){
42 .LM1:
43 /* prologue: frame size=0 */
44 /* prologue end (size=0) */
25:main.c ****
26:main.c **** if (a & 0x800000L)
46 .LM2:
47 0000 AA27 clr r26
48 0002 97FD sbrc r25,7
49 0004 A095 com r26
50 0006 BA2F mov r27,r26
51 0008 A7FF sbrs r26,7
52 000a 03C0 rjmp .L2
27:main.c **** return 1;
54 .LM3:
55 000c 81E0 ldi r24,lo8(1)
56 000e 90E0 ldi r25,hi8(1)
28:main.c **** else
29:main.c **** return 2 ;
30:main.c ****
31:main.c **** }
58 .LM4:
59 0010 0895 ret
60 .L2:
62 .LM5:
63 0012 82E0 ldi r24,lo8(2)
64 0014 90E0 ldi r25,hi8(2)
66 .LM6:
67 0016 0895 ret
68 /* epilogue: frame size=0 */
69 0018 0895 ret
70 /* epilogue end (size=1) */
71 /* function foo0 size 13 (12) */
73 .Lscope0:
77 .global foo1
79 foo1:
32:main.c ****
33:main.c **** int foo1 ( int a ){
81 .LM7:
82 /* prologue: frame size=0 */
83 /* prologue end (size=0) */
34:main.c ****
35:main.c **** if (a & (1L << 23))
85 .LM8:
86 001a AA27 clr r26
87 001c 97FD sbrc r25,7
88 001e A095 com r26
89 0020 BA2F mov r27,r26
90 0022 A7FF sbrs r26,7
91 0024 03C0 rjmp .L5
36:main.c **** return 1;
93 .LM9:
94 0026 81E0 ldi r24,lo8(1)
95 0028 90E0 ldi r25,hi8(1)
37:main.c **** else
38:main.c **** return 2 ;
39:main.c ****
40:main.c **** }
97 .LM10:
98 002a 0895 ret
99 .L5:
101 .LM11:
102 002c 82E0 ldi r24,lo8(2)
103 002e 90E0 ldi r25,hi8(2)
105 .LM12:
106 0030 0895 ret
107 /* epilogue: frame size=0 */
108 0032 0895 ret
109 /* epilogue end (size=1) */
110 /* function foo1 size 13 (12) */
112 .Lscope1:
116 .global foo2
118 foo2:
41:main.c ****
42:main.c **** int foo2 ( long a ){
120 .LM13:
121 /* prologue: frame size=0 */
122 /* prologue end (size=0) */
123 0034 DC01 movw r26,r24
124 0036 CB01 movw r24,r22
43:main.c ****
44:main.c **** if (a & 0x800000L)
126 .LM14:
127 0038 A7FF sbrs r26,7
128 003a 03C0 rjmp .L8
45:main.c **** return 1;
130 .LM15:
131 003c 81E0 ldi r24,lo8(1)
132 003e 90E0 ldi r25,hi8(1)
46:main.c **** else
47:main.c **** return 2 ;
48:main.c ****
49:main.c **** }
134 .LM16:
135 0040 0895 ret
136 .L8:
138 .LM17:
139 0042 82E0 ldi r24,lo8(2)
140 0044 90E0 ldi r25,hi8(2)
142 .LM18:
143 0046 0895 ret
144 /* epilogue: frame size=0 */
145 0048 0895 ret
146 /* epilogue end (size=1) */
147 /* function foo2 size 11 (10) */
149 .Lscope2:
153 .global foo3
155 foo3:
50:main.c ****
51:main.c **** int foo3 ( long a ){
157 .LM19:
158 /* prologue: frame size=0 */
159 /* prologue end (size=0) */
160 004a DC01 movw r26,r24
161 004c CB01 movw r24,r22
52:main.c ****
53:main.c **** if (a & (1L << 23))
163 .LM20:
164 004e A7FF sbrs r26,7
165 0050 03C0 rjmp .L11
54:main.c **** return 1;
167 .LM21:
168 0052 81E0 ldi r24,lo8(1)
169 0054 90E0 ldi r25,hi8(1)
55:main.c **** else
56:main.c **** return 2 ;
57:main.c ****
58:main.c **** }
171 .LM22:
172 0056 0895 ret
173 .L11:
175 .LM23:
176 0058 82E0 ldi r24,lo8(2)
177 005a 90E0 ldi r25,hi8(2)
179 .LM24:
180 005c 0895 ret
181 /* epilogue: frame size=0 */
182 005e 0895 ret
183 /* epilogue end (size=1) */
184 /* function foo3 size 11 (10) */
186 .Lscope3:
189 .global main
191 main:
59:main.c ****
60:main.c **** int main( void ){
193 .LM25:
194 /* prologue: frame size=2 */
195 0060 C0E0 ldi r28,lo8(__stack - 2)
196 0062 D0E0 ldi r29,hi8(__stack - 2)
197 0064 DEBF out __SP_H__,r29
198 0066 CDBF out __SP_L__,r28
199 /* prologue end (size=4) */
61:main.c ****
62:main.c **** volatile int a;
63:main.c ****
64:main.c **** a = foo0 ( a );
201 .LM26:
202 .LBB2:
203 0068 8981 ldd r24,Y+1
204 006a 9A81 ldd r25,Y+2
205 006c 0E94 0000 call foo0
206 0070 8983 std Y+1,r24
207 0072 9A83 std Y+2,r25
65:main.c **** a = foo1 ( a );
209 .LM27:
210 0074 8981 ldd r24,Y+1
211 0076 9A81 ldd r25,Y+2
212 0078 0E94 0000 call foo1
213 007c 8983 std Y+1,r24
214 007e 9A83 std Y+2,r25
66:main.c **** a = foo2 ( a );
216 .LM28:
217 0080 8981 ldd r24,Y+1
218 0082 9A81 ldd r25,Y+2
219 0084 AA27 clr r26
220 0086 97FD sbrc r25,7
221 0088 A095 com r26
222 008a BA2F mov r27,r26
223 008c BC01 movw r22,r24
224 008e CD01 movw r24,r26
225 0090 0E94 0000 call foo2
226 0094 8983 std Y+1,r24
227 0096 9A83 std Y+2,r25
67:main.c **** a = foo3 ( a );
229 .LM29:
230 0098 8981 ldd r24,Y+1
231 009a 9A81 ldd r25,Y+2
232 009c AA27 clr r26
233 009e 97FD sbrc r25,7
234 00a0 A095 com r26
235 00a2 BA2F mov r27,r26
236 00a4 BC01 movw r22,r24
237 00a6 CD01 movw r24,r26
238 00a8 0E94 0000 call foo3
239 00ac 8983 std Y+1,r24
240 00ae 9A83 std Y+2,r25
68:main.c ****
69:main.c **** return 0;
70:main.c **** }
242 .LM30:
243 .LBE2:
244 00b0 80E0 ldi r24,lo8(0)
245 00b2 90E0 ldi r25,hi8(0)
246 /* epilogue: frame size=2 */
247 00b4 0C94 0000 jmp exit
248 /* epilogue end (size=2) */
249 /* function main size 44 (38) */
254 .Lscope4:
256 .text
258 Letext:
259 /* File "main.c": code 92 = 0x005c ( 82),
prologues 4, epilogues 6 */
DEFINED SYMBOLS
*ABS*:00000000 main.c
*ABS*:0000003f __SREG__
*ABS*:0000003e __SP_H__
*ABS*:0000003d __SP_L__
*ABS*:00000000 __tmp_reg__
*ABS*:00000001 __zero_reg__
/tmp/ccS6dcXA.s:40 .text:00000000 foo0
/tmp/ccS6dcXA.s:79 .text:0000001a foo1
/tmp/ccS6dcXA.s:118 .text:00000034 foo2
/tmp/ccS6dcXA.s:155 .text:0000004a foo3
/tmp/ccS6dcXA.s:191 .text:00000060 main
/tmp/ccS6dcXA.s:258 .text:000000b8 Letext
UNDEFINED SYMBOLS
__do_copy_data
__do_clear_bss
__stack
exit
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18424