This is GCC Bugzilla
This is GCC Bugzilla Version 2.20+
View Bug Activity | Format For Printing | Clone This Bug
/* miss-optimization of (x & pow2C) conditionals returning bool equivalent values. Conditional expressions returning/assigning bool equivalent values tend to produce less efficient generic code than those returning arbitrary values; GCC 3.3.3 produces the more effecient form of the results consistently. */ int main (void){ volatile char c; volatile int i; volatile long l; /* char tests */ c = (c & (1 << 3)); // ok: ((char)c & C). c = (c & (1 << 3)) ? 3 : 5; // ok: if((char)c & C) -> avr's bit-test&skip. c = (c & (1 << 3)) ? 1 : 0; // poor: if((char)c & C) -> if(((int)c >> C') & 1), w/unnessisary (int)c promotion. if (c & (1 << 3)) // ok: if((char)c & C) -> avr's bit-test&skip. c = 3; else c = 5; if (c & (1 << 3)) // ok: if((char)c & C) -> avr's bit-test&skip. c = 1; else c = 0; /* int tests */ i = i & (1 << 11); // ok: ((int)i & C) i = (i & (1 << 11)) ? 3 : 5; // ok: if((int)i & C) -> avr's bit-test&skip. i = (i & (1 << 11)) ? 1 : 0; // poor: if((int)i & C) -> if(((int)i >> C') & 1). if (i & (1 << 11)) // ok: if((int)i & C) -> avr's bit-test&skip. i = 3; else i = 5; if (i & (1 << 11)) // poor: if((int)i & C) -> avr's bit-test&skip, but w/ineffecient return value. i = 1; else i = 0; /* long tests */ l = (l & ((long)1 << 27)); // ok: ((long)l & C) l = (l & ((long)1 << 27)) ? 3 : 5; // ok: if((long)l & C) -> avr's bit-test&skip. l = (l & ((long)1 << 27)) ? 1 : 0; // poor: if((long)l & C) -> if(((long)l >> C') & 1). if (l & ((long)1 << 27)) // ok: if((long)l & C) -> avr's bit-test&skip. l = 3; else l = 5; if (l & ((long)1 << 27)) // poor: if((long)l & C) -> avr's bit-test&skip, but w/ineffecient return value. l = 1; else l = 0; return 0; } Produces: int main (void){ c6: c8 ef ldi r28, 0xF8 ; 248 c8: d0 e1 ldi r29, 0x10 ; 16 ca: de bf out 0x3e, r29 ; 62 cc: cd bf out 0x3d, r28 ; 61 volatile char c; volatile int i; volatile long l; /* char tests */ c = (c & (1 << 3)); // ok: ((char)c & C). ce: 89 81 ldd r24, Y+1 ; 0x01 d0: 88 70 andi r24, 0x08 ; 8 d2: 89 83 std Y+1, r24 ; 0x01 c = (c & (1 << 3)) ? 3 : 5; // ok: if((char)c & C) -> avr's bit-test&skip. d4: 89 81 ldd r24, Y+1 ; 0x01 d6: 83 ff sbrs r24, 3 d8: 02 c0 rjmp .+4 ; 0xde <main+0x18> da: 83 e0 ldi r24, 0x03 ; 3 dc: 01 c0 rjmp .+2 ; 0xe0 <main+0x1a> de: 85 e0 ldi r24, 0x05 ; 5 e0: 89 83 std Y+1, r24 ; 0x01 c = (c & (1 << 3)) ? 1 : 0; // poor: if((char)c & C) -> if(((int)c >> C') & 1), w/unnessisary (int)c promotion. e2: 89 81 ldd r24, Y+1 ; 0x01 e4: 99 27 eor r25, r25 e6: 43 e0 ldi r20, 0x03 ; 3 e8: 96 95 lsr r25 ea: 87 95 ror r24 ec: 4a 95 dec r20 ee: e1 f7 brne .-8 ; 0xe8 <main+0x22> f0: 81 70 andi r24, 0x01 ; 1 f2: 89 83 std Y+1, r24 ; 0x01 if (c & (1 << 3)) // ok: if((char)c & C) -> avr's bit-test&skip. f4: 89 81 ldd r24, Y+1 ; 0x01 f6: 83 ff sbrs r24, 3 f8: 02 c0 rjmp .+4 ; 0xfe <main+0x38> c = 3; fa: 83 e0 ldi r24, 0x03 ; 3 fc: 01 c0 rjmp .+2 ; 0x100 <main+0x3a> else c = 5; fe: 85 e0 ldi r24, 0x05 ; 5 100: 89 83 std Y+1, r24 ; 0x01 if (c & (1 << 3)) // ok: if((char)c & C) -> avr's bit-test&skip. 102: 89 81 ldd r24, Y+1 ; 0x01 104: 83 ff sbrs r24, 3 106: 03 c0 rjmp .+6 ; 0x10e <main+0x48> c = 1; 108: 81 e0 ldi r24, 0x01 ; 1 10a: 89 83 std Y+1, r24 ; 0x01 10c: 01 c0 rjmp .+2 ; 0x110 <main+0x4a> else c = 0; 10e: 19 82 std Y+1, r1 ; 0x01 /* int tests */ i = i & (1 << 11); // ok: ((int)i & C) 110: 8a 81 ldd r24, Y+2 ; 0x02 112: 9b 81 ldd r25, Y+3 ; 0x03 114: 80 70 andi r24, 0x00 ; 0 116: 98 70 andi r25, 0x08 ; 8 118: 8a 83 std Y+2, r24 ; 0x02 11a: 9b 83 std Y+3, r25 ; 0x03 i = (i & (1 << 11)) ? 3 : 5; // ok: if((int)i & C) -> avr's bit-test&skip. 11c: 8a 81 ldd r24, Y+2 ; 0x02 11e: 9b 81 ldd r25, Y+3 ; 0x03 120: 93 ff sbrs r25, 3 122: 03 c0 rjmp .+6 ; 0x12a <main+0x64> 124: 83 e0 ldi r24, 0x03 ; 3 126: 90 e0 ldi r25, 0x00 ; 0 128: 02 c0 rjmp .+4 ; 0x12e <main+0x68> 12a: 85 e0 ldi r24, 0x05 ; 5 12c: 90 e0 ldi r25, 0x00 ; 0 12e: 8a 83 std Y+2, r24 ; 0x02 130: 9b 83 std Y+3, r25 ; 0x03 i = (i & (1 << 11)) ? 1 : 0; // poor: if((int)i & C) -> if(((int)i >> C') & 1). 132: 8a 81 ldd r24, Y+2 ; 0x02 134: 9b 81 ldd r25, Y+3 ; 0x03 136: 89 2f mov r24, r25 138: 99 27 eor r25, r25 13a: 86 95 lsr r24 13c: 86 95 lsr r24 13e: 86 95 lsr r24 140: 81 70 andi r24, 0x01 ; 1 142: 90 70 andi r25, 0x00 ; 0 144: 8a 83 std Y+2, r24 ; 0x02 146: 9b 83 std Y+3, r25 ; 0x03 if (i & (1 << 11)) // ok: if((int)i & C) -> avr's bit-test&skip. 148: 8a 81 ldd r24, Y+2 ; 0x02 14a: 9b 81 ldd r25, Y+3 ; 0x03 14c: 93 ff sbrs r25, 3 14e: 03 c0 rjmp .+6 ; 0x156 <main+0x90> i = 3; 150: 83 e0 ldi r24, 0x03 ; 3 152: 90 e0 ldi r25, 0x00 ; 0 154: 02 c0 rjmp .+4 ; 0x15a <main+0x94> else i = 5; 156: 85 e0 ldi r24, 0x05 ; 5 158: 90 e0 ldi r25, 0x00 ; 0 15a: 8a 83 std Y+2, r24 ; 0x02 15c: 9b 83 std Y+3, r25 ; 0x03 if (i & (1 << 11)) // poor: if((int)i & C) -> avr's bit-test&skip, but w/ineffecient return value. 15e: 8a 81 ldd r24, Y+2 ; 0x02 160: 9b 81 ldd r25, Y+3 ; 0x03 162: 9c 01 movw r18, r24 164: 20 70 andi r18, 0x00 ; 0 166: 38 70 andi r19, 0x08 ; 8 168: 93 ff sbrs r25, 3 16a: 05 c0 rjmp .+10 ; 0x176 <main+0xb0> i = 1; 16c: 81 e0 ldi r24, 0x01 ; 1 16e: 90 e0 ldi r25, 0x00 ; 0 170: 8a 83 std Y+2, r24 ; 0x02 172: 9b 83 std Y+3, r25 ; 0x03 174: 02 c0 rjmp .+4 ; 0x17a <main+0xb4> else i = 0; 176: 2a 83 std Y+2, r18 ; 0x02 178: 3b 83 std Y+3, r19 ; 0x03 /* long tests */ l = (l & ((long)1 << 27)); // ok: ((long)l & C) 17a: 8c 81 ldd r24, Y+4 ; 0x04 17c: 9d 81 ldd r25, Y+5 ; 0x05 17e: ae 81 ldd r26, Y+6 ; 0x06 180: bf 81 ldd r27, Y+7 ; 0x07 182: 80 70 andi r24, 0x00 ; 0 184: 90 70 andi r25, 0x00 ; 0 186: a0 70 andi r26, 0x00 ; 0 188: b8 70 andi r27, 0x08 ; 8 18a: 8c 83 std Y+4, r24 ; 0x04 18c: 9d 83 std Y+5, r25 ; 0x05 18e: ae 83 std Y+6, r26 ; 0x06 190: bf 83 std Y+7, r27 ; 0x07 l = (l & ((long)1 << 27)) ? 3 : 5; // ok: if((long)l & C) -> avr's bit-test&skip. 192: 8c 81 ldd r24, Y+4 ; 0x04 194: 9d 81 ldd r25, Y+5 ; 0x05 196: ae 81 ldd r26, Y+6 ; 0x06 198: bf 81 ldd r27, Y+7 ; 0x07 19a: b3 ff sbrs r27, 3 19c: 05 c0 rjmp .+10 ; 0x1a8 <main+0xe2> 19e: 83 e0 ldi r24, 0x03 ; 3 1a0: 90 e0 ldi r25, 0x00 ; 0 1a2: a0 e0 ldi r26, 0x00 ; 0 1a4: b0 e0 ldi r27, 0x00 ; 0 1a6: 04 c0 rjmp .+8 ; 0x1b0 <main+0xea> 1a8: 85 e0 ldi r24, 0x05 ; 5 1aa: 90 e0 ldi r25, 0x00 ; 0 1ac: a0 e0 ldi r26, 0x00 ; 0 1ae: b0 e0 ldi r27, 0x00 ; 0 1b0: 8c 83 std Y+4, r24 ; 0x04 1b2: 9d 83 std Y+5, r25 ; 0x05 1b4: ae 83 std Y+6, r26 ; 0x06 1b6: bf 83 std Y+7, r27 ; 0x07 l = (l & ((long)1 << 27)) ? 1 : 0; // poor: if((long)l & C) -> if(((long)l >> C') & 1). 1b8: 8c 81 ldd r24, Y+4 ; 0x04 1ba: 9d 81 ldd r25, Y+5 ; 0x05 1bc: ae 81 ldd r26, Y+6 ; 0x06 1be: bf 81 ldd r27, Y+7 ; 0x07 1c0: 2b e1 ldi r18, 0x1B ; 27 1c2: b6 95 lsr r27 1c4: a7 95 ror r26 1c6: 97 95 ror r25 1c8: 87 95 ror r24 1ca: 2a 95 dec r18 1cc: d1 f7 brne .-12 ; 0x1c2 <main+0xfc> 1ce: aa 27 eor r26, r26 1d0: 97 fd sbrc r25, 7 1d2: a0 95 com r26 1d4: ba 2f mov r27, r26 1d6: 81 70 andi r24, 0x01 ; 1 1d8: 90 70 andi r25, 0x00 ; 0 1da: a0 70 andi r26, 0x00 ; 0 1dc: b0 70 andi r27, 0x00 ; 0 1de: 8c 83 std Y+4, r24 ; 0x04 1e0: 9d 83 std Y+5, r25 ; 0x05 1e2: ae 83 std Y+6, r26 ; 0x06 1e4: bf 83 std Y+7, r27 ; 0x07 if (l & ((long)1 << 27)) // ok: if((long)l & C) -> avr's bit-test&skip. 1e6: 8c 81 ldd r24, Y+4 ; 0x04 1e8: 9d 81 ldd r25, Y+5 ; 0x05 1ea: ae 81 ldd r26, Y+6 ; 0x06 1ec: bf 81 ldd r27, Y+7 ; 0x07 1ee: b3 ff sbrs r27, 3 1f0: 05 c0 rjmp .+10 ; 0x1fc <main+0x136> l = 3; 1f2: 83 e0 ldi r24, 0x03 ; 3 1f4: 90 e0 ldi r25, 0x00 ; 0 1f6: a0 e0 ldi r26, 0x00 ; 0 1f8: b0 e0 ldi r27, 0x00 ; 0 1fa: 04 c0 rjmp .+8 ; 0x204 <main+0x13e> else l = 5; 1fc: 85 e0 ldi r24, 0x05 ; 5 1fe: 90 e0 ldi r25, 0x00 ; 0 200: a0 e0 ldi r26, 0x00 ; 0 202: b0 e0 ldi r27, 0x00 ; 0 204: 8c 83 std Y+4, r24 ; 0x04 206: 9d 83 std Y+5, r25 ; 0x05 208: ae 83 std Y+6, r26 ; 0x06 20a: bf 83 std Y+7, r27 ; 0x07 if (l & ((long)1 << 27)) // poor: if((long)l & C) -> avr's bit-test&skip, but w/ineffecient return value. 20c: 8c 81 ldd r24, Y+4 ; 0x04 20e: 9d 81 ldd r25, Y+5 ; 0x05 210: ae 81 ldd r26, Y+6 ; 0x06 212: bf 81 ldd r27, Y+7 ; 0x07 214: 9c 01 movw r18, r24 216: ad 01 movw r20, r26 218: 20 70 andi r18, 0x00 ; 0 21a: 30 70 andi r19, 0x00 ; 0 21c: 40 70 andi r20, 0x00 ; 0 21e: 58 70 andi r21, 0x08 ; 8 220: b3 ff sbrs r27, 3 222: 09 c0 rjmp .+18 ; 0x236 <main+0x170> l = 1; 224: 81 e0 ldi r24, 0x01 ; 1 226: 90 e0 ldi r25, 0x00 ; 0 228: a0 e0 ldi r26, 0x00 ; 0 22a: b0 e0 ldi r27, 0x00 ; 0 22c: 8c 83 std Y+4, r24 ; 0x04 22e: 9d 83 std Y+5, r25 ; 0x05 230: ae 83 std Y+6, r26 ; 0x06 232: bf 83 std Y+7, r27 ; 0x07 234: 04 c0 rjmp .+8 ; 0x23e <main+0x178> else l = 0; 236: 2c 83 std Y+4, r18 ; 0x04 238: 3d 83 std Y+5, r19 ; 0x05 23a: 4e 83 std Y+6, r20 ; 0x06 23c: 5f 83 std Y+7, r21 ; 0x07 return 0; }
I think the issue here is rtx_cost for avr sucks and does not give a good accurate cost matrix.
Take: char g(char c) { return (c & (1 << 3)) != 0; } (insn 8 4 30 (set (reg:HI 24 r24 [orig:44 c ] [44]) (sign_extend:HI (reg:QI 24 r24 [ c ]))) 81 {extendqihi2} (nil) (nil)) (insn 30 8 11 (parallel [ (set (reg:HI 24 r24 [orig:44 c ] [44]) (lshiftrt:HI (reg:HI 24 r24 [orig:44 c ] [44]) (const_int 3 [0x3]))) (clobber (reg:QI 19 r19)) ]) 70 {*lshrhi3_const} (nil) (expr_list:REG_UNUSED (reg:QI 19 r19) (expr_list:REG_UNUSED (reg:QI 25 r25) (nil)))) (insn 11 30 12 (set (reg:HI 24 r24 [46]) (sign_extend:HI (reg:QI 24 r24 [47]))) 81 {extendqihi2} (insn_list:REG_DEP_TRUE 10 (nil)) (nil)) (insn 19 16 25 (parallel [ (set (reg/i:HI 24 r24 [ <result> ]) (and:HI (reg:HI 24 r24 [46]) (const_int 1 [0x1]))) (clobber (scratch:QI)) ]) 47 {andhi3} (insn_list:REG_DEP_TRUE 11 (nil)) (expr_list:REG_UNUSED (scratch:QI) (nil))) Which looks good if the rtl matches up one to one per instruction but since avr is not like that we get a werid interaction.
Subject: Re: miss-optimization of (x & pow2C) avr conditionals returning bool equivalent values > I think the issue here is rtx_cost for avr sucks and does not give a good > accurate cost matrix. I considered that, and sure it doesn't help; but the form of the conditional presented for code generation is actually different in otherwise identical expressions, therefore implying that it's presumed that: if ((x >> C') & 1) 1 else 0; => ((x >> C') & 1) Is more efficient than: if ((x >> C') & 1) 1 else 0; => if (x & C) 1 else 0; Which is target specific, and should not be presumed, as for targets which support bit-test&branch: if (x & C) 1 else 0; => bit-test&branch x log2C; return 0; return 1; Is significantly more efficient than a (multi-bit-shift & 1). (therefore it doesn't seem proper for such return value dependant assumptions to be made in a target neutral way)
Subject: Re: miss-optimization of (x & pow2C) avr conditionals returning bool equivalent values I apologize, I'm confused, what are you comparing to what? (are you using 4.0, which to my knowledge only builds with my recent proposed patches, unless the problem was fixed otherwise?) - to my understanding, (x & C) should never result in ((x >> C') & 1) presented to the backend, as it's purely a target specific optimization. > From: pinskia at gcc dot gnu dot org <gcc-bugzilla@gcc.gnu.org> > ------- Additional Comments From pinskia at gcc dot gnu dot org 2004-12-25 > 20:57 ------- > Take: > char g(char c) > { > return (c & (1 << 3)) != 0; > } > > (insn 8 4 30 (set (reg:HI 24 r24 [orig:44 c ] [44]) > (sign_extend:HI (reg:QI 24 r24 [ c ]))) 81 {extendqihi2} (nil) > (nil)) > > (insn 30 8 11 (parallel [ > (set (reg:HI 24 r24 [orig:44 c ] [44]) > (lshiftrt:HI (reg:HI 24 r24 [orig:44 c ] [44]) > (const_int 3 [0x3]))) > (clobber (reg:QI 19 r19)) > ]) 70 {*lshrhi3_const} (nil) > (expr_list:REG_UNUSED (reg:QI 19 r19) > (expr_list:REG_UNUSED (reg:QI 25 r25) > (nil)))) > > (insn 11 30 12 (set (reg:HI 24 r24 [46]) > (sign_extend:HI (reg:QI 24 r24 [47]))) 81 {extendqihi2} > (insn_list:REG_DEP_TRUE 10 (nil)) > (nil)) > (insn 19 16 25 (parallel [ > (set (reg/i:HI 24 r24 [ <result> ]) > (and:HI (reg:HI 24 r24 [46]) > (const_int 1 [0x1]))) > (clobber (scratch:QI)) > ]) 47 {andhi3} (insn_list:REG_DEP_TRUE 11 (nil)) > (expr_list:REG_UNUSED (scratch:QI) > (nil))) > > > Which looks good if the rtl matches up one to one per instruction but since > avr is not like that we get a > werid interaction. > > -- > What |Removed |Added > ---------------------------------------------------------------------------- > Status|UNCONFIRMED |NEW > Ever Confirmed| |1 > Last reconfirmed|0000-00-00 00:00:00 |2004-12-25 20:57:17 > date| | > > > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19154 > > ------- You are receiving this mail because: ------- > You reported the bug, or are watching the reporter.
Created an attachment (id=8441) [edit] Partial fix This patch resolves some of the poor code by recognising bit extract and combinations and using bit move instructions BLD/BLT. So shifts are no longer used. It does not help with "long" testcase as backend is presented with unworkable patterns involving too complex sign/zero extend and subreg combinations. This would seem to be a middle end issue. It does not remove superfluous promotions of char to int. (e.g. 'zero extended' 1/0 result, where top byte is immediately discarded for 'char' result) Sub-optimal return value mentioned in testcase are also unchanged. However, it is way beyond backend to be able recognise that result=2^n can be formed by a similar bit move - i.e. spans way too many RTL instructions. On the otherhand, it does a nice good!
In a similar case I've got the impression that the ifcombine pass might be responsible for rewriting the "if" into shift and mask. It is rather questionable whether this really optimizes things for simple targets which jump faster as they can shift. I am suprised however, that many optimization strategies can be disabled by some -fxxx option, but not this one. A first and probably simple approach could be, to just add such an option.
Subject: Bug 19154 Author: hutchinsonandy Date: Sat Oct 24 15:36:40 2009 New Revision: 153530 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=153530 Log: PR middle-end/19154 * avr.md (QIDI): Add new mode iterator. (sbrx_branch<mode>): Create new zero extract bit, test and jump patterns for all QI-DI modes combinations. (sbrx_and_branch<mode>): Create new and based bit test and jump patterns for QI-SI modes. avr.c (avr_out_sbxx_branch): Use only bit number. Modified: trunk/gcc/config/avr/avr.c trunk/gcc/config/avr/avr.md
Subject: Bug 19154 Author: hutchinsonandy Date: Sat Oct 24 15:39:23 2009 New Revision: 153531 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=153531 Log: PR middle-end/19154 * avr.md (QIDI): Add new mode iterator. (sbrx_branch<mode>): Create new zero extract bit, test and jump patterns for all QI-DI modes combinations. (sbrx_and_branch<mode>): Create new and based bit test and jump patterns for QI-SI modes. avr.c (avr_out_sbxx_branch): Use only bit number. Modified: trunk/gcc/ChangeLog