Bug 24177 - function returning structure produce very long/slow assembly
Summary: function returning structure produce very long/slow assembly
Status: RESOLVED FIXED
Alias: None
Product: gcc
Classification: Unclassified
Component: tree-optimization (show other bugs)
Version: 4.0.2
: P2 enhancement
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization
Depends on:
Blocks: 14295
  Show dependency treegraph
 
Reported: 2005-10-03 15:45 UTC by etienne_lorrain
Modified: 2016-12-15 11:36 UTC (History)
2 users (show)

See Also:
Host:
Target: MOVE_COST is low
Build:
Known to work:
Known to fail:
Last reconfirmed: 2009-04-22 22:45:29


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description etienne_lorrain 2005-10-03 15:45:41 UTC
Compiling this file with -O2 optimisation:
----------------
struct str {
        int a, b, c, d;
};

void fct3 (struct str *);

extern inline struct str fct (void)
{
        struct str returned = { 1, 2, 3, 4 };
        return returned;
}

void fct2 (void)
{
        struct str tmp;

        tmp = fct ();
        fct3 (&tmp);
}
----------------
  with this compiler:
gcc version 4.0.2 20050913 (prerelease) (Debian 4.0.1-7)
  creates this assembler having three copies of the structure
 in the stack, and one as constant in .rodata:
$ gcc -O2 tmp.c -S -o tmp.s
$ cat tmp.s
        .file   "tmp.c"
        .section        .rodata
        .align 4
        .type   C.0.1141, @object
        .size   C.0.1141, 16
C.0.1141:
        .long   1
        .long   2
        .long   3
        .long   4
        .text
        .p2align 4,,15
.globl fct2
        .type   fct2, @function
fct2:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        subl    $76, %esp
        leal    -56(%ebp), %edi
        movl    $C.0.1141, %esi
        cld
        movl    $4, %ecx
        rep
        movsl
        leal    -24(%ebp), %edi
        leal    -56(%ebp), %esi
        movb    $4, %cl
        rep
        movsl
        leal    -40(%ebp), %edi
        leal    -24(%ebp), %esi
        movb    $4, %cl
        rep
        movsl
        leal    -40(%ebp), %eax
        pushl   %eax
        call    fct3
        addl    $16, %esp
        leal    -8(%ebp), %esp
        popl    %esi
        popl    %edi
        popl    %ebp
        ret
        .size   fct2, .-fct2
        .ident  "GCC: (GNU) 4.0.2 20050913 (prerelease) (Debian 4.0.1-7)"
        .section        .note.GNU-stack,"",@progbits
----------------
  If compiled with -Os, the "memcpy" function is called three times.
$ gcc -Os tmp.c -S -o tmp.s
$ cat tmp.s
        .file   "tmp.c"
        .section        .rodata
        .align 4
        .type   C.0.1141, @object
        .size   C.0.1141, 16
C.0.1141:
        .long   1
        .long   2
        .long   3
        .long   4
        .text
.globl fct2
        .type   fct2, @function
fct2:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %esi
        pushl   %ebx
        subl    $48, %esp
        leal    -56(%ebp), %ebx
        pushl   $16
        pushl   $C.0.1141
        pushl   %ebx
        call    memcpy
        leal    -24(%ebp), %esi
        pushl   $16
        pushl   %ebx
        pushl   %esi
        call    memcpy
        leal    -40(%ebp), %ebx
        pushl   $16
        pushl   %esi
        pushl   %ebx
        call    memcpy
        addl    $36, %esp
        pushl   %ebx
        call    fct3
        popl    %eax
        leal    -8(%ebp), %esp
        popl    %ebx
        popl    %esi
        popl    %ebp
        ret
        .size   fct2, .-fct2
        .ident  "GCC: (GNU) 4.0.2 20050913 (prerelease) (Debian 4.0.1-7)"
        .section        .note.GNU-stack,"",@progbits
----------------
  That is not a regression, gcc-3.4* and gcc-2.95 do not produce very good
 assembler code neither for this source file.

  Etienne.
Comment 1 Andrew Pinski 2005-10-03 15:55:31 UTC
Confirmed, this is just another case where aggregate copy prop is needed.
Comment 2 etienne_lorrain 2016-12-15 11:36:35 UTC
Closing, code generated by g++ (GCC-Explorer-Build) 7.0.0 20161113 (experimental) from the source is now:
fct2():
        subl    $40, %esp
        leal    12(%esp), %eax
        movl    $1, 12(%esp)
        movl    $2, 16(%esp)
        movl    $3, 20(%esp)
        movl    $4, 24(%esp)
        pushl   %eax
        call    fct3(str*)
        addl    $44, %esp
        ret