Bug 29158 - store merge optimization not done
Summary: store merge optimization not done
Status: RESOLVED DUPLICATE of bug 23684
Alias: None
Product: gcc
Classification: Unclassified
Component: middle-end (show other bugs)
Version: 4.1.1
: P3 enhancement
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization
Depends on:
Blocks:
 
Reported: 2006-09-21 04:36 UTC by Albert Cahalan
Modified: 2006-11-29 05:26 UTC (History)
2 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Albert Cahalan 2006-09-21 04:36:52 UTC
In the example below, define T as "char" or "short". (probably "int" too on a 64-bit machine, with minor adjustment to the example code) Compile the code with "-Os -S" and observe the assembly. Even if there are no aliasing or alignment issues, the compiler fails to merge multiple stores into larger stores.

This is tested with three gcc 4.1 compilers:

i386 from Fedora Core 6 test2
x86-64 from Fedora Core 6 test2
ppc32 from Debian-unstable

BTW, I hope I guessed right in filing against "middle-end". I didn't know if it was tree-optimization or rtl-optimization, and there still isn't an obvious place to file bugs which are common to both C and C++.

-------------------------------------
tmp 0 $ cat e.c
#ifndef T
#define T short
#endif

typedef struct S{
        T a;
        T b;
        T c;
        T d;
}S;

S *structbug(S *s){
        s->a = 1;
        s->b = 2;
        s->c = 3;
        s->d = 4;
        return s;
}

T *arraybug(T *s){
        s[0] = 1;
        s[1] = 2;
        s[2] = 3;
        s[3] = 4;
        return s;
}

tmp 0 $ gcc -W -Wall -m32 -Os -fomit-frame-pointer -S e.c
tmp 0 $ cat e.s
        .file   "e.c"
        .text
.globl structbug
        .type   structbug, @function
structbug:
        movl    4(%esp), %eax
        movw    $1, (%eax)
        movw    $2, 2(%eax)
        movw    $3, 4(%eax)
        movw    $4, 6(%eax)
        ret
        .size   structbug, .-structbug
.globl arraybug
        .type   arraybug, @function
arraybug:
        movl    4(%esp), %eax
        movw    $1, (%eax)
        movw    $2, 2(%eax)
        movw    $3, 4(%eax)
        movw    $4, 6(%eax)
        ret
        .size   arraybug, .-arraybug
        .ident  "GCC: (GNU) 4.1.1 20060828 (Red Hat 4.1.1-20)"
        .section        .note.GNU-stack,"",@progbits
tmp 0 $
Comment 1 Andrew Pinski 2006-09-21 04:39:19 UTC

*** This bug has been marked as a duplicate of 23684 ***
Comment 2 Albert Cahalan 2006-11-29 05:09:42 UTC
This hits it too. (example is PowerPC)

///////////////////
#include <string.h>

char *foo(char *buf)
{
 short temp;
 int temp1;

 *buf++=42;

 temp = 0xfeed;
 memcpy(buf, &temp, sizeof(temp));
 buf+=sizeof(temp);

 temp1 = 0x12345678;
 memcpy(buf, &temp1, sizeof(temp1));
 buf+=sizeof(temp1);

 temp1 = 0x12345678;
 memcpy(buf, &temp1, sizeof(temp1));
 buf+=sizeof(temp1);

 *buf++=42;

 return buf;
}
///////////////
foo:
        stwu 1,-32(1)
        lis 9,0x1234
        li 11,42
        li 0,-275
        ori 9,9,22136
        stb 11,11(3)
        stb 11,0(3)
        sth 0,1(3)
        stw 9,7(3)
        stw 9,3(3)
        addi 3,3,12
        addi 1,1,32
        blr
Comment 3 Andrew Pinski 2006-11-29 05:26:22 UTC
(In reply to comment #2)
> This hits it too. (example is PowerPC)
No it does not, look at the what it gives:
        stb 11,11(3)
        stb 11,0(3)
        sth 0,1(3)
        stw 9,7(3)
        stw 9,3(3)

Reordering the stores to be in a natural order:

        stb 11,0(3)
        sth 0,1(3)
        stw 9,3(3)
        stw 9,7(3)
        stb 11,11(3)

we do one byte store and then half word store and then two word stores and finally one byte store, yes we could reagrange the stores to get three word stores but is that really useful in this case, almost no but because of the constant generation is going to hurt.