Bug 21018 - Initializing string literal data improperly marked frame-relative?, should be readonly static const.
Initializing string literal data improperly marked frame-relative?, should be...
Status: RESOLVED DUPLICATE of bug 43746
Product: gcc
Classification: Unclassified
Component: middle-end
4.1.0
: P3 enhancement
: ---
Assigned To: Not yet assigned to anyone
: missed-optimization
Depends on:
Blocks:
  Show dependency treegraph
 
Reported: 2005-04-14 06:44 UTC by Paul Schlie
Modified: 2011-08-03 19:40 UTC (History)
4 users (show)

See Also:
Host:
Target: avr
Build:
Known to work:
Known to fail:
Last reconfirmed: 2009-08-05 15:07:42


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Paul Schlie 2005-04-14 06:44:57 UTC
The following two literal initializer data should both be considered static const data references:

 char s[5] = "abcde";
 char t[5] = {'a','b','c','d','e'};

 the memory reference to "abcde" should be marked readonly, not frame-relative (as it isn't),
 as otherwise it's impossible to properly identify "static const data" memory references at the
tree/rtl level as may be required for target specific code/data generation/optimization.

The following shows the problem (as well as a missed optimization associated with potentially
being able to access the initializer data directly, as opposed to accessesing the frame-relative
variable data string/array after being copied if never modifed; and an inappropriate inline of
function foo() which should not have likely been inlined given it's size, and note that bar being
identical wan't inlined, which is correct):

volatile char v;

void foo ( void )
{
  char x[5] = "abcde";
  
  v = x[v];
}

void bar ( void )
{
  char y[5] = {'a','b','c','d','e'};
  
  v = y[v];
}

int main ( void )
{
  char x[5] = "abcde"; // Initializer string not marked READONLY static const.
  
  char y[5] = {'a','b','c','d','e'}; // Although char arrary is correctly.
      
  v = x[v];
  
  v = y[v];
  
  foo();
  
  bar();
  
  return 0;
}

resulting code:

volatile char v;

void foo ( void )
{
  c6:	cf 93       	push	r28
  c8:	df 93       	push	r29
  ca:	cd b7       	in	r28, 0x3d	; 61
  cc:	de b7       	in	r29, 0x3e	; 62
  ce:	25 97       	sbiw	r28, 0x05	; 5
  d0:	0f b6       	in	r0, 0x3f	; 63
  d2:	f8 94       	cli
  d4:	de bf       	out	0x3e, r29	; 62
  d6:	0f be       	out	0x3f, r0	; 63
  d8:	cd bf       	out	0x3d, r28	; 61
  
  char x[5] = "abcde";
  da:	de 01       	movw	r26, r28
  dc:	11 96       	adiw	r26, 0x01	; 1
  de:	e0 e0       	ldi	r30, 0x00	; 0
  e0:	f1 e0       	ldi	r31, 0x01	; 1
  e2:	85 e0       	ldi	r24, 0x05	; 5
  e4:	01 90       	ld	r0, Z+
  e6:	0d 92       	st	X+, r0
  e8:	81 50       	subi	r24, 0x01	; 1
  ea:	e1 f7       	brne	.-8      	; 0xe4 <foo+0x1e>
  
  v = x[v];
  ec:	80 91 10 01 	lds	r24, 0x0110
  f0:	fe 01       	movw	r30, r28
  f2:	e8 0f       	add	r30, r24
  f4:	f1 1d       	adc	r31, r1
  f6:	81 81       	ldd	r24, Z+1	; 0x01
  f8:	80 93 10 01 	sts	0x0110, r24
  
  fc:	25 96       	adiw	r28, 0x05	; 5
  fe:	0f b6       	in	r0, 0x3f	; 63
 100:	f8 94       	cli
 102:	de bf       	out	0x3e, r29	; 62
 104:	0f be       	out	0x3f, r0	; 63
 106:	cd bf       	out	0x3d, r28	; 61
 108:	df 91       	pop	r29
 10a:	cf 91       	pop	r28
 10c:	08 95       	ret

0000010e <bar>:
}

void bar ( void )
{
 10e:	cf 93       	push	r28
 110:	df 93       	push	r29
 112:	cd b7       	in	r28, 0x3d	; 61
 114:	de b7       	in	r29, 0x3e	; 62
 116:	25 97       	sbiw	r28, 0x05	; 5
 118:	0f b6       	in	r0, 0x3f	; 63
 11a:	f8 94       	cli
 11c:	de bf       	out	0x3e, r29	; 62
 11e:	0f be       	out	0x3f, r0	; 63
 120:	cd bf       	out	0x3d, r28	; 61
 
  char y[5] = {'a','b','c','d','e'};
 122:	de 01       	movw	r26, r28
 124:	11 96       	adiw	r26, 0x01	; 1
 126:	e6 e0       	ldi	r30, 0x06	; 6
 128:	f1 e0       	ldi	r31, 0x01	; 1
 12a:	85 e0       	ldi	r24, 0x05	; 5
 12c:	04 90       	lpm	r0, Z
 12e:	0d 92       	st	X+, r0
 130:	81 50       	subi	r24, 0x01	; 1
 132:	e1 f7       	brne	.-8      	; 0x12c <bar+0x1e>
  
  v = y[v];
 134:	80 91 10 01 	lds	r24, 0x0110
 138:	fe 01       	movw	r30, r28
 13a:	e8 0f       	add	r30, r24
 13c:	f1 1d       	adc	r31, r1
 13e:	81 81       	ldd	r24, Z+1	; 0x01
 140:	80 93 10 01 	sts	0x0110, r24
 
 144:	25 96       	adiw	r28, 0x05	; 5
 146:	0f b6       	in	r0, 0x3f	; 63
 148:	f8 94       	cli
 14a:	de bf       	out	0x3e, r29	; 62
 14c:	0f be       	out	0x3f, r0	; 63
 14e:	cd bf       	out	0x3d, r28	; 61
 150:	df 91       	pop	r29
 152:	cf 91       	pop	r28
 154:	08 95       	ret

00000156 <main>:
}

int main ( void )
{
 156:	c0 ef       	ldi	r28, 0xF0	; 240
 158:	d0 e1       	ldi	r29, 0x10	; 16
 15a:	de bf       	out	0x3e, r29	; 62
 15c:	cd bf       	out	0x3d, r28	; 61
 
  char x[5] = "abcde"; // Initializer string not marked READONLY static const.
 15e:	de 01       	movw	r26, r28
 160:	11 96       	adiw	r26, 0x01	; 1
 162:	e0 e0       	ldi	r30, 0x00	; 0
 164:	f1 e0       	ldi	r31, 0x01	; 1
 166:	85 e0       	ldi	r24, 0x05	; 5
 168:	01 90       	ld	r0, Z+
 16a:	0d 92       	st	X+, r0
 16c:	81 50       	subi	r24, 0x01	; 1
 16e:	e1 f7       	brne	.-8      	; 0x168 <main+0x12>
  
  char y[5] = {'a','b','c','d','e'}; // Although char arrary is correctly.
 170:	de 01       	movw	r26, r28
 172:	16 96       	adiw	r26, 0x06	; 6
 174:	eb e0       	ldi	r30, 0x0B	; 11
 176:	f1 e0       	ldi	r31, 0x01	; 1
 178:	85 e0       	ldi	r24, 0x05	; 5
 17a:	04 90       	lpm	r0, Z
 17c:	0d 92       	st	X+, r0
 17e:	81 50       	subi	r24, 0x01	; 1
 180:	e1 f7       	brne	.-8      	; 0x17a <main+0x24>
      
  v = x[v];
 182:	80 91 10 01 	lds	r24, 0x0110
 186:	fe 01       	movw	r30, r28
 188:	e8 0f       	add	r30, r24
 18a:	f1 1d       	adc	r31, r1
 18c:	81 81       	ldd	r24, Z+1	; 0x01
 18e:	80 93 10 01 	sts	0x0110, r24
  
  v = y[v];
 192:	80 91 10 01 	lds	r24, 0x0110
 196:	fe 01       	movw	r30, r28
 198:	e8 0f       	add	r30, r24
 19a:	f1 1d       	adc	r31, r1
 19c:	86 81       	ldd	r24, Z+6	; 0x06
 19e:	80 93 10 01 	sts	0x0110, r24
 
  foo(); char x[5] = "abcde";
 1a2:	de 01       	movw	r26, r28
 1a4:	1b 96       	adiw	r26, 0x0b	; 11
 1a6:	e0 e0       	ldi	r30, 0x00	; 0
 1a8:	f1 e0       	ldi	r31, 0x01	; 1
 1aa:	85 e0       	ldi	r24, 0x05	; 5
 1ac:	01 90       	ld	r0, Z+
 1ae:	0d 92       	st	X+, r0
 1b0:	81 50       	subi	r24, 0x01	; 1
 1b2:	e1 f7       	brne	.-8      	; 0x1ac <main+0x56>
 
  foo(); v = x[v];
 1b4:	80 91 10 01 	lds	r24, 0x0110
 1b8:	fe 01       	movw	r30, r28
 1ba:	e8 0f       	add	r30, r24
 1bc:	f1 1d       	adc	r31, r1
 1be:	83 85       	ldd	r24, Z+11	; 0x0b
 1c0:	80 93 10 01 	sts	0x0110, r24
  
  bar();
 1c4:	0e 94 87 00 	call	0x10e <bar>
  
  return 0;
}
 1c8:	80 e0       	ldi	r24, 0x00	; 0
 1ca:	90 e0       	ldi	r25, 0x00	; 0
 1cc:	0c 94 e8 00 	jmp	0x1d0 <_exit>

000001d0 <_exit>:
 1d0:	ff cf       	rjmp	.-2      	; 0x1d0 <_exit>

from gimple:

;; Function main (main)

main ()
{
  static char C.3[5] = {97, 98, 99, 100, 101}; // should probably be "static const char"
  volatile char v.4;
  int D.1152;
  char D.1153;
  volatile char v.5;
  int D.1155;
  char D.1156;
  int D.1157;

  {
    char x[5];
    char y[5];

    x = "abcde";   // should have been separatly declared "static const char" initilizer.
    y = C.3;
    v.4 = v;
    D.1152 = (int) v.4;
    D.1153 = x[D.1152];
    v = D.1153;
    v.5 = v;
    D.1155 = (int) v.5;
    D.1156 = y[D.1155];
    v = D.1156;
    {
      {
        volatile char v.0;
        int D.1185;
        char D.1186;
        char x[5];

        x = "abcde";
        v.0 = v;
        D.1185 = (int) v.0;
        D.1186 = x[D.1185];
        v = D.1186;
      }
    }
    (void) 0;
    bar ();
    D.1157 = 0;
    return D.1157;
  }
  D.1157 = 0;
  return D.1157;
}

resulting tree/rtl:

showing frame-relative reference to "abcde" initializing data which is wrong:

(insn 12 11 13 1 (set (reg:HI 44)
        (symbol_ref/f:HI ("*.LC0") [flags 0x2] <string_cst 0x160b840>)) -1 (nil)
    (nil))

showing readonly reference to {'a','b','c','d','e') initializing data which is correct:

(insn 23 36 24 3 (set (reg:QI 42 [ v.0 ])
        (mem/v/i:QI (symbol_ref:HI ("v") <var_decl 0x16791b0 v>) [0 v+0 S1 A8])) -1 (nil)
    (nil))
Comment 1 Paul Schlie 2005-04-14 07:43:29 UTC
(In reply to comment #0)
> resulting tree/rtl:
> 
> showing frame-relative reference to "abcde" initializing data which is wrong:
> 
> (insn 12 11 13 1 (set (reg:HI 44)
>         (symbol_ref/f:HI ("*.LC0") [flags 0x2] <string_cst 0x160b840>)) -1 (nil)
>     (nil))
> 
> showing readonly reference to {'a','b','c','d','e') initializing data which is correct:
> 
> (insn 23 36 24 3 (set (reg:QI 42 [ v.0 ])
>         (mem/v/i:QI (symbol_ref:HI ("v") <var_decl 0x16791b0 v>) [0 v+0 S1 A8])) -1 (nil)
>     (nil))

Sorry more accurately:

Showing frame-relative/non-readonly reference to "abcde" initializing tree/rtl type which is wrong:

(insn 12 11 13 1 (set (reg:HI 44)
        (symbol_ref/f:HI ("*.LC0") [flags 0x2] <string_cst 0x160b840>)) -1 (nil)
    (nil))

(insn 15 35 16 2 (set (reg:QI 0 r0)
        (mem/s:QI (reg:HI 44) [1 S5 A8])) -1 (nil)
    (nil))


Showing readonly reference to {'a','b','c','d','e') initializing data which is correct:

(insn 21 20 22 3 (set (reg:HI 52)
        (symbol_ref:HI ("C.3.1150") [flags 0x2] <var_decl 0x167e438 C.3>)) -1 (nil)
    (nil))

(insn 24 81 25 4 (set (reg:QI 0 r0)
        (mem/s/u:QI (reg:HI 52) [2 C.3+0 S5 A8])) -1 (nil)
    (nil))
Comment 2 Andrew Pinski 2005-04-16 16:46:26 UTC
Note the C.x variables are not normal VAR_DECLs but CONST_DECL so maybe avr should be changed to 
recongize them as such.
Comment 3 Paul Schlie 2005-04-16 18:22:09 UTC
Subject: Re:  Initializing string literal data
 improperly marked frame-relative?, should be readonly static const.

> Note the C.x variables are not normal VAR_DECLs but CONST_DECL so maybe avr
> should be changed to recongize them as such.

Actually the problem seems then be that literal string constants aren't
being consistently defined through CONST_DECL's (just as initializing char
array data, which are equivalent to string initializers, and all other
literal and static constants which end up being stored as literal data are);
for which MEM_READONLY_P allows all memory references to, to be easily
identified, which seems to be it's intent.

Is there any reason that literal string constant data shouldn't be similarly
declared and correspondingly identifiable? (or just an oversight?)


Comment 4 Paul Schlie 2005-04-16 18:41:46 UTC
Subject: Re:  Initializing string literal data
 improperly marked frame-relative?, should be readonly static const.

> From: Paul Schlie <schlie@comcast.net>
> Subject: Re: [Bug middle-end/21018] Initializing string literal data
> improperly marked frame-relative?, should be readonly static const.
> 
>> Note the C.x variables are not normal VAR_DECLs but CONST_DECL so maybe avr
>> should be changed to recongize them as such.
> 
> Actually the problem seems then be that literal string constants aren't
> being consistently defined through CONST_DECL's (just as initializing char
> array data, which are equivalent to string initializers, and all other literal
> and static constants which end up being stored as literal data are); for which
> MEM_READONLY_P allows all memory references to, to be easily identified, which
> seems to be it's intent.
> 
> Is there any reason that literal string constant data shouldn't be similarly
> declared and correspondingly identifiable? (or just an oversight?)

I suspect it was likely an artifact of the now depreciated writeable-strings
extension, which previously pretended that literal string constants were not
READONLY after being copied from the executable image into read/write
memory.


Comment 5 Manuel López-Ibáñez 2007-11-15 16:19:48 UTC
Is this still a valid bug?
Comment 6 Joerg Wunsch 2007-11-15 21:21:49 UTC
I'm not sure whether this is related or not... but from the description, it
looks so.

avr-libc contains a macro that helps the users declaring a flash-ROM string,
lacking any real support in GCC for different memory sections (as proposed
by the Embedded C draft).  This works by using the GCC extension that a
block can return a value: a block is openened, and a block-scope static
variable is allocated, using the string literal passed in by the user.  Then,
that block returns the address of this variable to the caller.  By giving
the block-scope variable the "progmem" attribute, it will eventually be
allocated in ROM rather than RAM.

Now, if that macro is called with identical string literals within one
translation unit, the strings are allocated separately rather than merged
into a single location.

This behaviour is reproducable as well on the i386 target, so it looks like
not really related to the avr backend.

Here's a simple test case.  For simplicity, the original PSTR() macro is
just reproduced in the file rather than including the entire original
header(s).

#if defined(__AVR__)
#  define PROGMEM __attribute__((__progmem__))
#else
#  define PROGMEM /* nothing */
#endif

#define PSTR(x) \
(__extension__({static const char __c[] PROGMEM = (x); &__c[0];}))

extern void puts_P(const char *);

void
dosomething(void)
{
        puts_P(PSTR("Hello world!"));
        puts_P(PSTR("Hello world!"));
}

Compiling for the avr target yields:

        .file   "foo.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
        .global __do_copy_data
        .global __do_clear_bss
        .text
.global dosomething
        .type   dosomething, @function
dosomething:
/* prologue: frame size=0 */
/* prologue end (size=0) */
        ldi r24,lo8(__c.1471)
        ldi r25,hi8(__c.1471)
        rcall puts_P
        ldi r24,lo8(__c.1473)
        ldi r25,hi8(__c.1473)
        rcall puts_P
/* epilogue: frame size=0 */
        ret
/* epilogue end (size=1) */
/* function dosomething size 7 (6) */
        .size   dosomething, .-dosomething
        .section        .progmem.data,"a",@progbits
        .type   __c.1473, @object
        .size   __c.1473, 13
__c.1473:
        .string "Hello world!"
        .type   __c.1471, @object
        .size   __c.1471, 13
__c.1471:
        .string "Hello world!"
/* File "foo.c": code    7 = 0x0007 (   6), prologues   0, epilogues   1 */

Compiling the same file for the i386 target also shows two copies of the
string literal:

        .file   "foo.c"
        .section        .rodata
        .type   __c.0, @object
        .size   __c.0, 13
__c.0:
        .string "Hello world!"
        .type   __c.1, @object
        .size   __c.1, 13
__c.1:
        .string "Hello world!"
        .text
.globl dosomething
        .type   dosomething, @function
dosomething:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   $__c.0
        call    puts_P
        movl    $__c.1, (%esp)
        call    puts_P
        leave
        ret
        .size   dosomething, .-dosomething
        .ident  "GCC: (GNU) 3.4.4 [FreeBSD] 20050518"
Comment 7 Paul Schlie 2007-11-16 02:35:26 UTC
Subject: Re:  Initializing string literal data
 improperly marked frame-relative?, should be readonly static const.

I believe so.


> From: manu at gcc dot gnu dot org <gcc-bugzilla@gcc.gnu.org>
> Reply-To: <gcc-bugzilla@gcc.gnu.org>
> Date: 15 Nov 2007 16:19:49 -0000
> To: <schlie@comcast.net>
> Subject: [Bug middle-end/21018] Initializing string literal data improperly
> marked frame-relative?, should be readonly static const.
> 
> 
> 
> ------- Comment #5 from manu at gcc dot gnu dot org  2007-11-15 16:19 -------
> Is this still a valid bug?
> 
> 
> -- 
> 
> manu at gcc dot gnu dot org changed:
> 
>            What    |Removed                     |Added
> ----------------------------------------------------------------------------
>                  CC|                            |manu at gcc dot gnu dot org
>              Status|UNCONFIRMED                 |WAITING
> 
> 
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21018
> 
> ------- You are receiving this mail because: -------
> You reported the bug, or are watching the reporter.


Comment 8 Manuel López-Ibáñez 2009-08-05 15:07:42 UTC
OK, setting it back to NEW, but I don't have any idea about how to approach this.
Comment 9 Tomasz Francuz 2010-04-14 20:45:41 UTC
*** Bug 43746 has been marked as a duplicate of this bug. ***
Comment 10 Eric Weddington 2010-09-20 16:51:55 UTC
Closed as WONTFIX.
Comment 11 Georg-Johann Lay 2011-08-03 19:40:32 UTC
Setting this as duplicate; not because it's a real duplicate -- the original message is rather confusing and I can't really depict what it's intention is -- but because the contributors to this PR might be also interestet in PR43746 which touches similar topic like duplicate strings and string merging on AVR that is addressed here, too.

(In reply to comment #0)
> char s[5] = "abcde";
> char t[5] = {'a','b','c','d','e'};

Note that "abcde" is a string literal containing 6 chars (only 5 are copied) and the second initializer has length 5 bytes and is not a string.

> resulting tree/rtl:
>
> showing frame-relative reference to "abcde" initializing data which is wrong:
>
> (set (reg:HI 44)
>      (symbol_ref/f:HI ("*.LC0") [flags 0x2] <string_cst 0x160b840>))

Note that the /f flag has different meanings depending on where it is attached to, see http://gcc.gnu.org/viewcvs/trunk/gcc/rtl.h?view=markup

(In reply to comment #6)

> Now, if that macro is called with identical string literals within one
> translation unit, the strings are allocated separately rather than merged
> into a single location.
> 
> This behaviour is reproducable as well on the i386 target, so it looks like
> not really related to the avr backend.
> 
> [...]
> 
> #define PROGMEM __attribute__((__progmem__))
> 
> #define PSTR(x) \
> (__extension__({static const char __c[] PROGMEM = (x); &__c[0];}))
> 
> extern void puts_P(const char *);
> 
> void dosomething(void)
> {
>         puts_P(PSTR("Hello world!"));
>         puts_P(PSTR("Hello world!"));
> }
> 
> Compiling for the avr target yields:
> 
> [...]
>         .section        .progmem.data,"a",@progbits
>         .type   __c.1473, @object
>         .size   __c.1473, 13
> __c.1473:
>         .string "Hello world!"
>         .type   __c.1471, @object
>         .size   __c.1471, 13
> __c.1471:
>         .string "Hello world!"
>
> 
> Compiling the same file for the i386 target also shows two copies of the
> string literal: [...]

Problem is not the duplicate string literals it's the missing section flags like "aMS" for mergeable strings and binutils would take care of this.

This is rooted in the way avr backend up to now implements progem attribute, i.e. is simply bangs a "section .progmem" against a decl without caring for the kind of data. This is not hard to change, but it has to be implemented.

For constants in .rodata string merging was achieved wihout effort by cleaning up the avr backend.  Notice that .rodata is still in RAM but named .rodata now and not mapped to .data as in <= 4.6.

*** This bug has been marked as a duplicate of bug 43746 ***