The following two literal initializer data should both be considered static const data references: char s[5] = "abcde"; char t[5] = {'a','b','c','d','e'}; the memory reference to "abcde" should be marked readonly, not frame-relative (as it isn't), as otherwise it's impossible to properly identify "static const data" memory references at the tree/rtl level as may be required for target specific code/data generation/optimization. The following shows the problem (as well as a missed optimization associated with potentially being able to access the initializer data directly, as opposed to accessesing the frame-relative variable data string/array after being copied if never modifed; and an inappropriate inline of function foo() which should not have likely been inlined given it's size, and note that bar being identical wan't inlined, which is correct): volatile char v; void foo ( void ) { char x[5] = "abcde"; v = x[v]; } void bar ( void ) { char y[5] = {'a','b','c','d','e'}; v = y[v]; } int main ( void ) { char x[5] = "abcde"; // Initializer string not marked READONLY static const. char y[5] = {'a','b','c','d','e'}; // Although char arrary is correctly. v = x[v]; v = y[v]; foo(); bar(); return 0; } resulting code: volatile char v; void foo ( void ) { c6: cf 93 push r28 c8: df 93 push r29 ca: cd b7 in r28, 0x3d ; 61 cc: de b7 in r29, 0x3e ; 62 ce: 25 97 sbiw r28, 0x05 ; 5 d0: 0f b6 in r0, 0x3f ; 63 d2: f8 94 cli d4: de bf out 0x3e, r29 ; 62 d6: 0f be out 0x3f, r0 ; 63 d8: cd bf out 0x3d, r28 ; 61 char x[5] = "abcde"; da: de 01 movw r26, r28 dc: 11 96 adiw r26, 0x01 ; 1 de: e0 e0 ldi r30, 0x00 ; 0 e0: f1 e0 ldi r31, 0x01 ; 1 e2: 85 e0 ldi r24, 0x05 ; 5 e4: 01 90 ld r0, Z+ e6: 0d 92 st X+, r0 e8: 81 50 subi r24, 0x01 ; 1 ea: e1 f7 brne .-8 ; 0xe4 <foo+0x1e> v = x[v]; ec: 80 91 10 01 lds r24, 0x0110 f0: fe 01 movw r30, r28 f2: e8 0f add r30, r24 f4: f1 1d adc r31, r1 f6: 81 81 ldd r24, Z+1 ; 0x01 f8: 80 93 10 01 sts 0x0110, r24 fc: 25 96 adiw r28, 0x05 ; 5 fe: 0f b6 in r0, 0x3f ; 63 100: f8 94 cli 102: de bf out 0x3e, r29 ; 62 104: 0f be out 0x3f, r0 ; 63 106: cd bf out 0x3d, r28 ; 61 108: df 91 pop r29 10a: cf 91 pop r28 10c: 08 95 ret 0000010e <bar>: } void bar ( void ) { 10e: cf 93 push r28 110: df 93 push r29 112: cd b7 in r28, 0x3d ; 61 114: de b7 in r29, 0x3e ; 62 116: 25 97 sbiw r28, 0x05 ; 5 118: 0f b6 in r0, 0x3f ; 63 11a: f8 94 cli 11c: de bf out 0x3e, r29 ; 62 11e: 0f be out 0x3f, r0 ; 63 120: cd bf out 0x3d, r28 ; 61 char y[5] = {'a','b','c','d','e'}; 122: de 01 movw r26, r28 124: 11 96 adiw r26, 0x01 ; 1 126: e6 e0 ldi r30, 0x06 ; 6 128: f1 e0 ldi r31, 0x01 ; 1 12a: 85 e0 ldi r24, 0x05 ; 5 12c: 04 90 lpm r0, Z 12e: 0d 92 st X+, r0 130: 81 50 subi r24, 0x01 ; 1 132: e1 f7 brne .-8 ; 0x12c <bar+0x1e> v = y[v]; 134: 80 91 10 01 lds r24, 0x0110 138: fe 01 movw r30, r28 13a: e8 0f add r30, r24 13c: f1 1d adc r31, r1 13e: 81 81 ldd r24, Z+1 ; 0x01 140: 80 93 10 01 sts 0x0110, r24 144: 25 96 adiw r28, 0x05 ; 5 146: 0f b6 in r0, 0x3f ; 63 148: f8 94 cli 14a: de bf out 0x3e, r29 ; 62 14c: 0f be out 0x3f, r0 ; 63 14e: cd bf out 0x3d, r28 ; 61 150: df 91 pop r29 152: cf 91 pop r28 154: 08 95 ret 00000156 <main>: } int main ( void ) { 156: c0 ef ldi r28, 0xF0 ; 240 158: d0 e1 ldi r29, 0x10 ; 16 15a: de bf out 0x3e, r29 ; 62 15c: cd bf out 0x3d, r28 ; 61 char x[5] = "abcde"; // Initializer string not marked READONLY static const. 15e: de 01 movw r26, r28 160: 11 96 adiw r26, 0x01 ; 1 162: e0 e0 ldi r30, 0x00 ; 0 164: f1 e0 ldi r31, 0x01 ; 1 166: 85 e0 ldi r24, 0x05 ; 5 168: 01 90 ld r0, Z+ 16a: 0d 92 st X+, r0 16c: 81 50 subi r24, 0x01 ; 1 16e: e1 f7 brne .-8 ; 0x168 <main+0x12> char y[5] = {'a','b','c','d','e'}; // Although char arrary is correctly. 170: de 01 movw r26, r28 172: 16 96 adiw r26, 0x06 ; 6 174: eb e0 ldi r30, 0x0B ; 11 176: f1 e0 ldi r31, 0x01 ; 1 178: 85 e0 ldi r24, 0x05 ; 5 17a: 04 90 lpm r0, Z 17c: 0d 92 st X+, r0 17e: 81 50 subi r24, 0x01 ; 1 180: e1 f7 brne .-8 ; 0x17a <main+0x24> v = x[v]; 182: 80 91 10 01 lds r24, 0x0110 186: fe 01 movw r30, r28 188: e8 0f add r30, r24 18a: f1 1d adc r31, r1 18c: 81 81 ldd r24, Z+1 ; 0x01 18e: 80 93 10 01 sts 0x0110, r24 v = y[v]; 192: 80 91 10 01 lds r24, 0x0110 196: fe 01 movw r30, r28 198: e8 0f add r30, r24 19a: f1 1d adc r31, r1 19c: 86 81 ldd r24, Z+6 ; 0x06 19e: 80 93 10 01 sts 0x0110, r24 foo(); char x[5] = "abcde"; 1a2: de 01 movw r26, r28 1a4: 1b 96 adiw r26, 0x0b ; 11 1a6: e0 e0 ldi r30, 0x00 ; 0 1a8: f1 e0 ldi r31, 0x01 ; 1 1aa: 85 e0 ldi r24, 0x05 ; 5 1ac: 01 90 ld r0, Z+ 1ae: 0d 92 st X+, r0 1b0: 81 50 subi r24, 0x01 ; 1 1b2: e1 f7 brne .-8 ; 0x1ac <main+0x56> foo(); v = x[v]; 1b4: 80 91 10 01 lds r24, 0x0110 1b8: fe 01 movw r30, r28 1ba: e8 0f add r30, r24 1bc: f1 1d adc r31, r1 1be: 83 85 ldd r24, Z+11 ; 0x0b 1c0: 80 93 10 01 sts 0x0110, r24 bar(); 1c4: 0e 94 87 00 call 0x10e <bar> return 0; } 1c8: 80 e0 ldi r24, 0x00 ; 0 1ca: 90 e0 ldi r25, 0x00 ; 0 1cc: 0c 94 e8 00 jmp 0x1d0 <_exit> 000001d0 <_exit>: 1d0: ff cf rjmp .-2 ; 0x1d0 <_exit> from gimple: ;; Function main (main) main () { static char C.3[5] = {97, 98, 99, 100, 101}; // should probably be "static const char" volatile char v.4; int D.1152; char D.1153; volatile char v.5; int D.1155; char D.1156; int D.1157; { char x[5]; char y[5]; x = "abcde"; // should have been separatly declared "static const char" initilizer. y = C.3; v.4 = v; D.1152 = (int) v.4; D.1153 = x[D.1152]; v = D.1153; v.5 = v; D.1155 = (int) v.5; D.1156 = y[D.1155]; v = D.1156; { { volatile char v.0; int D.1185; char D.1186; char x[5]; x = "abcde"; v.0 = v; D.1185 = (int) v.0; D.1186 = x[D.1185]; v = D.1186; } } (void) 0; bar (); D.1157 = 0; return D.1157; } D.1157 = 0; return D.1157; } resulting tree/rtl: showing frame-relative reference to "abcde" initializing data which is wrong: (insn 12 11 13 1 (set (reg:HI 44) (symbol_ref/f:HI ("*.LC0") [flags 0x2] <string_cst 0x160b840>)) -1 (nil) (nil)) showing readonly reference to {'a','b','c','d','e') initializing data which is correct: (insn 23 36 24 3 (set (reg:QI 42 [ v.0 ]) (mem/v/i:QI (symbol_ref:HI ("v") <var_decl 0x16791b0 v>) [0 v+0 S1 A8])) -1 (nil) (nil))
(In reply to comment #0) > resulting tree/rtl: > > showing frame-relative reference to "abcde" initializing data which is wrong: > > (insn 12 11 13 1 (set (reg:HI 44) > (symbol_ref/f:HI ("*.LC0") [flags 0x2] <string_cst 0x160b840>)) -1 (nil) > (nil)) > > showing readonly reference to {'a','b','c','d','e') initializing data which is correct: > > (insn 23 36 24 3 (set (reg:QI 42 [ v.0 ]) > (mem/v/i:QI (symbol_ref:HI ("v") <var_decl 0x16791b0 v>) [0 v+0 S1 A8])) -1 (nil) > (nil)) Sorry more accurately: Showing frame-relative/non-readonly reference to "abcde" initializing tree/rtl type which is wrong: (insn 12 11 13 1 (set (reg:HI 44) (symbol_ref/f:HI ("*.LC0") [flags 0x2] <string_cst 0x160b840>)) -1 (nil) (nil)) (insn 15 35 16 2 (set (reg:QI 0 r0) (mem/s:QI (reg:HI 44) [1 S5 A8])) -1 (nil) (nil)) Showing readonly reference to {'a','b','c','d','e') initializing data which is correct: (insn 21 20 22 3 (set (reg:HI 52) (symbol_ref:HI ("C.3.1150") [flags 0x2] <var_decl 0x167e438 C.3>)) -1 (nil) (nil)) (insn 24 81 25 4 (set (reg:QI 0 r0) (mem/s/u:QI (reg:HI 52) [2 C.3+0 S5 A8])) -1 (nil) (nil))
Note the C.x variables are not normal VAR_DECLs but CONST_DECL so maybe avr should be changed to recongize them as such.
Subject: Re: Initializing string literal data improperly marked frame-relative?, should be readonly static const. > Note the C.x variables are not normal VAR_DECLs but CONST_DECL so maybe avr > should be changed to recongize them as such. Actually the problem seems then be that literal string constants aren't being consistently defined through CONST_DECL's (just as initializing char array data, which are equivalent to string initializers, and all other literal and static constants which end up being stored as literal data are); for which MEM_READONLY_P allows all memory references to, to be easily identified, which seems to be it's intent. Is there any reason that literal string constant data shouldn't be similarly declared and correspondingly identifiable? (or just an oversight?)
Subject: Re: Initializing string literal data improperly marked frame-relative?, should be readonly static const. > From: Paul Schlie <schlie@comcast.net> > Subject: Re: [Bug middle-end/21018] Initializing string literal data > improperly marked frame-relative?, should be readonly static const. > >> Note the C.x variables are not normal VAR_DECLs but CONST_DECL so maybe avr >> should be changed to recongize them as such. > > Actually the problem seems then be that literal string constants aren't > being consistently defined through CONST_DECL's (just as initializing char > array data, which are equivalent to string initializers, and all other literal > and static constants which end up being stored as literal data are); for which > MEM_READONLY_P allows all memory references to, to be easily identified, which > seems to be it's intent. > > Is there any reason that literal string constant data shouldn't be similarly > declared and correspondingly identifiable? (or just an oversight?) I suspect it was likely an artifact of the now depreciated writeable-strings extension, which previously pretended that literal string constants were not READONLY after being copied from the executable image into read/write memory.
Is this still a valid bug?
I'm not sure whether this is related or not... but from the description, it looks so. avr-libc contains a macro that helps the users declaring a flash-ROM string, lacking any real support in GCC for different memory sections (as proposed by the Embedded C draft). This works by using the GCC extension that a block can return a value: a block is openened, and a block-scope static variable is allocated, using the string literal passed in by the user. Then, that block returns the address of this variable to the caller. By giving the block-scope variable the "progmem" attribute, it will eventually be allocated in ROM rather than RAM. Now, if that macro is called with identical string literals within one translation unit, the strings are allocated separately rather than merged into a single location. This behaviour is reproducable as well on the i386 target, so it looks like not really related to the avr backend. Here's a simple test case. For simplicity, the original PSTR() macro is just reproduced in the file rather than including the entire original header(s). #if defined(__AVR__) # define PROGMEM __attribute__((__progmem__)) #else # define PROGMEM /* nothing */ #endif #define PSTR(x) \ (__extension__({static const char __c[] PROGMEM = (x); &__c[0];})) extern void puts_P(const char *); void dosomething(void) { puts_P(PSTR("Hello world!")); puts_P(PSTR("Hello world!")); } Compiling for the avr target yields: .file "foo.c" __SREG__ = 0x3f __SP_H__ = 0x3e __SP_L__ = 0x3d __tmp_reg__ = 0 __zero_reg__ = 1 .global __do_copy_data .global __do_clear_bss .text .global dosomething .type dosomething, @function dosomething: /* prologue: frame size=0 */ /* prologue end (size=0) */ ldi r24,lo8(__c.1471) ldi r25,hi8(__c.1471) rcall puts_P ldi r24,lo8(__c.1473) ldi r25,hi8(__c.1473) rcall puts_P /* epilogue: frame size=0 */ ret /* epilogue end (size=1) */ /* function dosomething size 7 (6) */ .size dosomething, .-dosomething .section .progmem.data,"a",@progbits .type __c.1473, @object .size __c.1473, 13 __c.1473: .string "Hello world!" .type __c.1471, @object .size __c.1471, 13 __c.1471: .string "Hello world!" /* File "foo.c": code 7 = 0x0007 ( 6), prologues 0, epilogues 1 */ Compiling the same file for the i386 target also shows two copies of the string literal: .file "foo.c" .section .rodata .type __c.0, @object .size __c.0, 13 __c.0: .string "Hello world!" .type __c.1, @object .size __c.1, 13 __c.1: .string "Hello world!" .text .globl dosomething .type dosomething, @function dosomething: pushl %ebp movl %esp, %ebp pushl $__c.0 call puts_P movl $__c.1, (%esp) call puts_P leave ret .size dosomething, .-dosomething .ident "GCC: (GNU) 3.4.4 [FreeBSD] 20050518"
Subject: Re: Initializing string literal data improperly marked frame-relative?, should be readonly static const. I believe so. > From: manu at gcc dot gnu dot org <gcc-bugzilla@gcc.gnu.org> > Reply-To: <gcc-bugzilla@gcc.gnu.org> > Date: 15 Nov 2007 16:19:49 -0000 > To: <schlie@comcast.net> > Subject: [Bug middle-end/21018] Initializing string literal data improperly > marked frame-relative?, should be readonly static const. > > > > ------- Comment #5 from manu at gcc dot gnu dot org 2007-11-15 16:19 ------- > Is this still a valid bug? > > > -- > > manu at gcc dot gnu dot org changed: > > What |Removed |Added > ---------------------------------------------------------------------------- > CC| |manu at gcc dot gnu dot org > Status|UNCONFIRMED |WAITING > > > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21018 > > ------- You are receiving this mail because: ------- > You reported the bug, or are watching the reporter.
OK, setting it back to NEW, but I don't have any idea about how to approach this.
*** Bug 43746 has been marked as a duplicate of this bug. ***
Closed as WONTFIX.
Setting this as duplicate; not because it's a real duplicate -- the original message is rather confusing and I can't really depict what it's intention is -- but because the contributors to this PR might be also interestet in PR43746 which touches similar topic like duplicate strings and string merging on AVR that is addressed here, too. (In reply to comment #0) > char s[5] = "abcde"; > char t[5] = {'a','b','c','d','e'}; Note that "abcde" is a string literal containing 6 chars (only 5 are copied) and the second initializer has length 5 bytes and is not a string. > resulting tree/rtl: > > showing frame-relative reference to "abcde" initializing data which is wrong: > > (set (reg:HI 44) > (symbol_ref/f:HI ("*.LC0") [flags 0x2] <string_cst 0x160b840>)) Note that the /f flag has different meanings depending on where it is attached to, see http://gcc.gnu.org/viewcvs/trunk/gcc/rtl.h?view=markup (In reply to comment #6) > Now, if that macro is called with identical string literals within one > translation unit, the strings are allocated separately rather than merged > into a single location. > > This behaviour is reproducable as well on the i386 target, so it looks like > not really related to the avr backend. > > [...] > > #define PROGMEM __attribute__((__progmem__)) > > #define PSTR(x) \ > (__extension__({static const char __c[] PROGMEM = (x); &__c[0];})) > > extern void puts_P(const char *); > > void dosomething(void) > { > puts_P(PSTR("Hello world!")); > puts_P(PSTR("Hello world!")); > } > > Compiling for the avr target yields: > > [...] > .section .progmem.data,"a",@progbits > .type __c.1473, @object > .size __c.1473, 13 > __c.1473: > .string "Hello world!" > .type __c.1471, @object > .size __c.1471, 13 > __c.1471: > .string "Hello world!" > > > Compiling the same file for the i386 target also shows two copies of the > string literal: [...] Problem is not the duplicate string literals it's the missing section flags like "aMS" for mergeable strings and binutils would take care of this. This is rooted in the way avr backend up to now implements progem attribute, i.e. is simply bangs a "section .progmem" against a decl without caring for the kind of data. This is not hard to change, but it has to be implemented. For constants in .rodata string merging was achieved wihout effort by cleaning up the avr backend. Notice that .rodata is still in RAM but named .rodata now and not mapped to .data as in <= 4.6. *** This bug has been marked as a duplicate of bug 43746 ***