[patch i386 4.4/trunk]: Fix sse restore for x86_64 in case of !sp_valid

Kai Tietz ktietz70@googlemail.com
Tue Sep 15 16:59:00 GMT 2009


2009/9/15 Jakub Jelinek <jakub@redhat.com>:
> On Tue, Sep 15, 2009 at 06:17:45PM +0200, Kai Tietz wrote:
>> Sorry, added wrong version for 4.5 (without red_offset).
>
> Testcase?
>
>        Jakub
>

Hmm, well a reduced easy testcase. I have to look, if I can produce an easy one.

The following test shows the issue for w64 pretty well, but I have to
see, if I get it into a smaller and executable check. (To be compiled
with -O3 -fno-omit-frame-pointers)

#include <malloc.h>

double bar (double a, double b, double c, double d, char *h);
int boo (double a, double b, double c, double d);

double foo (double a, double b, double c, double d)
{
 int aa = boo (b, c, d, a);
 return bar (a, b, c, d, (char *) alloca (aa)) + bar (d, c, b, a,
(char *) alloca (aa));
}

produces with current 4.5 the following assembler
(/usr/local/bin/x86_64-pc-mingw32-gcc.exe -S -O3 tsse.c -o t.s
-fno-omit-frame-pointer):
       .file   "tsse.c"
       .text
       .p2align 4,,15
.globl _foo
       .def    _foo;   .scl    2;      .type   32;     .endef
_foo:
       pushq   %rbp
       movq    %rsp, %rbp
       pushq   %rbx
       subq    $136, %rsp
       movdqa  %xmm6, -96(%rbp)
       movapd  %xmm3, %xmm6
       movdqa  %xmm7, -80(%rbp)
       movapd  %xmm2, %xmm7
       movdqa  %xmm8, -64(%rbp)
       movapd  %xmm1, %xmm8
       movdqa  %xmm9, -48(%rbp)
       movapd  %xmm0, %xmm3
       movapd  %xmm0, %xmm9
...
       leaq    -96(%rbp), %rsp
       addsd   %xmm10, %xmm0
       movdqa  48(%rsp), %xmm6
       movdqa  64(%rsp), %xmm7
       movdqa  80(%rsp), %xmm8
       movdqa  96(%rsp), %xmm9
       movdqa  112(%rsp), %xmm10
       addq    $88, %rsp
       popq    %rbx
       leave
       ret

As you easily can see are the offsets simply wrong. I assume that this
doesn't shows up on x86, as there sse registers are quite rare, and no
save for them by ABI is done in epilogue/prologue in general.

By changing the the line I mentioned to zero instead of
frame.to_allocate, assembly produced looks correct:

       .file   "tsse.c"
       .text
       .p2align 4,,15
.globl _foo
       .def    _foo;   .scl    2;      .type   32;     .endef
_foo:
       pushq   %rbp
       movq    %rsp, %rbp
       pushq   %rbx
       subq    $136, %rsp
       movdqa  %xmm6, -96(%rbp)
       movapd  %xmm3, %xmm6
       movdqa  %xmm7, -80(%rbp)
       movapd  %xmm2, %xmm7
       movdqa  %xmm8, -64(%rbp)
       movapd  %xmm1, %xmm8
       movdqa  %xmm9, -48(%rbp)
       movapd  %xmm0, %xmm3
       movapd  %xmm0, %xmm9
       movapd  %xmm6, %xmm2
       movapd  %xmm7, %xmm1
       movapd  %xmm8, %xmm0
       movdqa  %xmm10, -32(%rbp)
...

      leaq    -96(%rbp), %rsp
      addsd   %xmm10, %xmm0
      movdqa  (%rsp), %xmm6
      movdqa  16(%rsp), %xmm7
      movdqa  32(%rsp), %xmm8
      movdqa  48(%rsp), %xmm9
      movdqa  64(%rsp), %xmm10
      addq    $88, %rsp
      popq    %rbx
      leave
      ret


-- 
|  (\_/) This is Bunny. Copy and paste
| (='.'=) Bunny into your signature to help
| (")_(") him gain world domination



More information about the Gcc-patches mailing list