In the code below, g_52 should end up being 5. At -O1, avr-gcc-4.5.0 puts 1 into this variable. Verified using AVR Studio. 4.3.3 also seems to have this bug. regehr@john-home:~/volatile/work0/014255$ avr-gcc -O1 small.c -mmcu=atmega128 -o small.elf regehr@john-home:~/volatile/work0/014255$ avr-gcc -v Using built-in specs. Target: avr Configured with: ../configure --prefix=/home/regehr/avrgcc440/install --target=avr --enable-languages=c,c++ --disable-nls --disable-libssp : (reconfigured) ../configure --prefix=/home/regehr/avrgcc440/install --target=avr --enable-languages=c,c++ --disable-nls --disable-libssp : (reconfigured) ../configure --prefix=/home/regehr/avrgcc440/install --target=avr --enable-languages=c,c++ --disable-nls --disable-libssp Thread model: single gcc version 4.5.0 20090403 (experimental) (GCC) regehr@john-home:~/volatile/work0/014255$ cat small.c #include <stdint.h> #include <limits.h> #define safe_add_macro_int8_t_s_s(si1,si2) \ ((((((int8_t)(si1))>((int8_t)0)) && (((int8_t)(si2))>((int8_t)0)) && (((int8_t)(si1)) > ((INT8_MAX)-((int8_t)(si2))))) \ || ((((int8_t)(si1))<((int8_t)0)) && (((int8_t)(si2))<((int8_t)0)) && (((int8_t)(si1)) < ((INT8_MIN)-((int8_t)(si2)))))) \ ? ((int8_t)(si1)) \ : (((int8_t)(si1)) + ((int8_t)(si2))) \ ) static int8_t safe_add_func_int8_t_s_s(int8_t _si1, int8_t _si2) { return safe_add_macro_int8_t_s_s(_si1,_si2); } uint8_t g_52; void func_1 (void); void func_1 (void) { uint64_t l_116; for (l_116 = 0; l_116 < 13; l_116 = safe_add_func_int8_t_s_s (l_116, 3)) { g_52++; } } static inline void platform_main_end(uint32_t crc); static inline void platform_main_end(uint32_t crc) { uint16_t crc16 = crc ^ (crc >> 16); asm volatile("cli" "\n\t" "mov r30, %A0" "\n\t" "mov r31, %B0" "\n\t" "break" : : "r" (crc16) : "memory" ); } int main (void) { func_1 (); platform_main_end (g_52); return 0; }
At -O2, -O3, -Os g_52 contains the value 5 while in -O1 it is 1.It is confirmed.
(In reply to comment #1) > At -O2, -O3, -Os g_52 contains the value 5 while in -O1 it is 1.It is > confirmed. > Hi Abnikant, What version did you use to confirm this bug?
Subject: RE: [avr] loop bug Hi Eric, Version is (avr-gcc )4.3.2. -----Original Message----- From: eric dot weddington at atmel dot com [mailto:gcc-bugzilla@gcc.gnu.org] Sent: Monday, August 17, 2009 5:26 PM To: Singh, Abnikant Subject: [Bug target/39633] [avr] loop bug ------- Comment #2 from eric dot weddington at atmel dot com 2009-08-17 11:56 ------- (In reply to comment #1) > At -O2, -O3, -Os g_52 contains the value 5 while in -O1 it is 1.It is > confirmed. > Hi Abnikant, What version did you use to confirm this bug?
4.3.2
Did you try this with -fno-strict-overflow? Code like (INT8_MIN)-((int8_t)(si2)) might lead to signed overflow, and if gcc gets knowledge of si2 being strictly positive, the results is undefined. http://gcc.gnu.org/onlinedocs/gcc-4.3.5/gcc/Optimize-Options.html#Optimize-Options
I confirm it on 4.6.1, 4.5.0 and 4.5.2 (-fno-strict-overflow does not help) I see the bug only for -O1 (in which case g_52=1) and not for other optimization levels (in which case g_52=5). The bug disappears if l_116 in func_1 is declared as uint32_t instead of uint64_t. In 4.6.1, g_52 is as follows: -Os -fsplit-wide-types: 5 -Os -fno-split-wide-types: 1 This remains after adding the no_inline attribute to safe_add_func_int8_t_s_s that gets compiler to: safe_add_func_int8_t_s_s.constprop.0: cpi r24,lo8(125) ; _si1, ; 6 *cmpqi/3 brge .L2 ; , ; 7 branch subi r24,lo8(-(3)) ; _si1, ; 9 addqi3/2 .L2: ret ; 28 return With -Os -fsplit-wide-types, the compile of func_1 is(g_52=5 ok): func_1: /* prologue: function */ ldi r24,lo8(0) ; l_116, ; 3 *movqi/1 .L4: lds r25,g_52 ; tmp52, g_52 ; 15 *movqi/4 subi r25,lo8(-(1)) ; tmp52, ; 16 addqi3/2 sts g_52,r25 ; g_52, tmp52 ; 17 *movqi/3 rcall safe_add_func_int8_t_s_s.constprop.0 ; 20 call_value_insn/3 sbrc r24,7 ; l_116, ; 99 *sbrx_branchhi rjmp .L3 ; cpi r24,lo8(13) ; l_116, ; 60 *cmpqi/3 brlo .L4 ; , ; 61 branch .L3: ret ; 98 return With -Os -fno-split-wide-types, the compile of func_1 is (g_52=1 wrong): func_1: push r8 ; ; 90 *pushqi/1 push r9 ; ; 91 *pushqi/1 push r10 ; ; 92 *pushqi/1 push r11 ; ; 93 *pushqi/1 push r12 ; ; 94 *pushqi/1 push r13 ; ; 95 *pushqi/1 push r14 ; ; 96 *pushqi/1 push r15 ; ; 97 *pushqi/1 /* prologue: function */ clr r8 ; l_116 ; 3 *movqi/1 .L4: lds r24,g_52 ; tmp52, g_52 ; 15 *movqi/4 subi r24,lo8(-(1)) ; tmp52, ; 16 addqi3/2= 1] sts g_52,r24 ; g_52, tmp52 ; 17 *movqi/3 mov r24,r8 ; , l_116 ; 19 *movqi/1 rcall safe_add_func_int8_t_s_s.constprop.0 ; 20 call_value_insn/3 mov r8,r24 ; l_116, D.2168 ; 22 *movqi/1 lsl r24 ; tmp54 ; 23 ashrqi3/5 sbc r24,r24 ; tmp54 brne .L3 ; , ; 33 branch tst r24 ; l_116 ; 36 *cmpqi/1 brne .L3 ; , ; 37 branch tst r24 ; l_116 ; 40 *cmpqi/1 brne .L3 ; , ; 41 branch tst r24 ; l_116 ; 44 *cmpqi/1 brne .L3 ; , ; 45 branch tst r24 ; l_116 ; 48 *cmpqi/1 brne .L3 ; , ; 49 branch tst r24 ; l_116 ; 52 *cmpqi/1 brne .L3 ; , ; 53 branch tst r24 ; l_116 ; 56 *cmpqi/1 brne .L3 ; , ; 57 branch ldi r24,lo8(12) ; , ; 89 *movqi/2 cp r24,r8 ; , l_116 ; 60 *cmpqi/2 brsh .L4 ; , ; 61 branch .L3: /* epilogue start */ pop r15 ; ; 100 popqi pop r14 ; ; 101 popqi pop r13 ; ; 102 popqi pop r12 ; ; 103 popqi pop r11 ; ; 104 popqi pop r10 ; ; 105 popqi pop r9 ; ; 106 popqi pop r8 ; ; 107 popqi ret ; 108 return_from_epilogue
The bug is here: lsl r24 ashrqi3/5 sbc r24,r24 brne .L3 branch The SBC doc says: > Z: Previous value [of Z] remains unchanged when the > result [of SBC] is zero; cleared otherwise. I.e. the SBC leaves cc0 in a mess, and that's ashrqi3/5: "r,0,n" and "cc=clobber". I do not understand where the BRNE comes from because it's a conditional jump and cc0 is in a mess.
Created attachment 24727 [details] C file, reduced test case Compiler with avr-gcc 4.6.1 -Os -S -fno-split-wide-types -dp to get: func_1: push r8 ; 94 *pushqi/1 [length = 1] push r9 ; 95 *pushqi/1 [length = 1] push r10 ; 96 *pushqi/1 [length = 1] push r11 ; 97 *pushqi/1 [length = 1] push r12 ; 98 *pushqi/1 [length = 1] push r13 ; 99 *pushqi/1 [length = 1] push r14 ; 100 *pushqi/1 [length = 1] push r15 ; 101 *pushqi/1 [length = 1] clr r8 ; 3 *movqi/1 [length = 1] .L2: lds r24,g_52 ; 15 *movqi/4 [length = 2] subi r24,lo8(-(1)) ; 16 addqi3/2 [length = 1] sts g_52,r24 ; 17 *movqi/3 [length = 2] mov r24,r8 ; 19 *movqi/1 [length = 1] ldi r22,lo8(3) ; 20 *movqi/2 [length = 1] rcall foo ; 21 call_value_insn/3 [length = 1] mov r8,r24 ; 23 *movqi/1 [length = 1] >>>> BUG START lsl r24 ; 24 ashrqi3/5 [length = 2] sbc r24,r24 brne .L1 ; 34 branch [length = 1] >>>> BUG END tst r24 ; 37 *cmpqi/1 [length = 1] brne .L1 ; 38 branch [length = 1] tst r24 ; 41 *cmpqi/1 [length = 1] brne .L1 ; 42 branch [length = 1] tst r24 ; 45 *cmpqi/1 [length = 1] brne .L1 ; 46 branch [length = 1] tst r24 ; 49 *cmpqi/1 [length = 1] brne .L1 ; 50 branch [length = 1] tst r24 ; 53 *cmpqi/1 [length = 1] brne .L1 ; 54 branch [length = 1] tst r24 ; 57 *cmpqi/1 [length = 1] brne .L1 ; 58 branch [length = 1] ldi r24,lo8(12) ; 93 *movqi/2 [length = 1] cp r24,r8 ; 61 *cmpqi/2 [length = 1] brsh .L2 ; 62 branch [length = 1] .L1: pop r15 ; 104 popqi [length = 1] pop r14 ; 105 popqi [length = 1] pop r13 ; 106 popqi [length = 1] pop r12 ; 107 popqi [length = 1] pop r11 ; 108 popqi [length = 1] pop r10 ; 109 popqi [length = 1] pop r9 ; 110 popqi [length = 1] pop r8 ; 111 popqi [length = 1] ret ; 112 return_from_epilogue [length = 1]
The needed *cmpqi insn 33 can be tracked until .221r.nothrow dump; the last dump that actually contains RTL dump. But then the insn 33 is gone, even with -fno-peephole. (insn 24 23 33 (set (reg:QI 24 r24 [52]) (ashiftrt:QI (reg:QI 24 r24 [orig:46 D.1929 ] [46]) (const_int 7 [0x7]))) testit.c:8 72 {ashrqi3} (nil)) (insn 33 24 34 (set (cc0) (compare (reg:QI 24 r24 [orig:15 l_116+7 ] [15]) (const_int 0 [0]))) testit.c:8 106 {*cmpqi} (expr_list:REG_DEAD (reg:QI 15 r15 [ l_116+7 ]) (nil))) (jump_insn 34 33 72 (set (pc) (if_then_else (ne (cc0) (const_int 0 [0])) (label_ref:HI 69) (pc))) testit.c:8 117 {branch} (expr_list:REG_BR_PROB (const_int 989 [0x3dd]) (nil)) -> 69)
Created attachment 24729 [details] Minimal test case Here is a minimal testcase: char c; void func_1 (char a) { a = a >> 7; if (a) c = a; } Code with 4.6.1 -Os -dp -S -mmcu=atmega88: func_1: lsl r24 ; 6 ashrqi3/5 [length = 2] sbc r24,r24 breq .L1 ; 8 branch [length = 1] sts c,r24 ; 10 *movqi/3 [length = 2] .L1: ret ; 20 return [length = 1]
It's bug in avr.c:notice_update_cc() because for shift offset=7 cc0 is set as if it contained meaningful state: case CC_CLOBBER: /* Insn doesn't leave CC in a usable state. */ CC_STATUS_INIT; /* Correct CC for the ashrqi3 with the shift count as CONST_INT != 6 */ set = single_set (insn); if (set) { rtx src = SET_SRC (set); if (GET_CODE (src) == ASHIFTRT && GET_MODE (src) == QImode) { rtx x = XEXP (src, 1); if (GET_CODE (x) == CONST_INT && INTVAL (x) > 0 && INTVAL (x) != 6) { cc_status.value1 = SET_DEST (set); cc_status.flags |= CC_OVERFLOW_UNUSABLE; } } } break;
Author: gjl Date: Mon Jul 11 10:13:30 2011 New Revision: 176141 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=176141 Log: gcc/ PR target/39633 * config/avr/avr.c (notice_update_cc): For ashiftrt:QI, only offsets 1..5 set cc0 in a usable way. testsuite/ PR target/39633 * gcc.target/avr/torture/pr39633.c: New test case. Added: trunk/gcc/testsuite/gcc.target/avr/torture/pr39633.c Modified: trunk/gcc/ChangeLog trunk/gcc/config/avr/avr.c trunk/gcc/testsuite/ChangeLog
Author: gjl Date: Mon Jul 11 10:24:46 2011 New Revision: 176143 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=176143 Log: PR target/39633 Backport from mainline r176141 2011-07-11 Georg-Johann Lay * config/avr/avr.c (notice_update_cc): For ashiftrt:QI, only offsets 1..5 set cc0 in a usable way. Modified: branches/gcc-4_6-branch/gcc/ChangeLog branches/gcc-4_6-branch/gcc/config/avr/avr.c
Cloased as FIXED in 4.6.2 milestone.
Author: gjl Date: Wed May 2 17:14:32 2012 New Revision: 187056 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=187056 Log: Backport from 2011-07-11 4.6-branch r176143 PR target/39633 * config/avr/avr.c (notice_update_cc): For ashiftrt:QI, only offsets 1..5 set cc0 in a usable way. Modified: branches/gcc-4_5-branch/gcc/ChangeLog branches/gcc-4_5-branch/gcc/config/avr/avr.c
Backported to 4.5.4