This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[x86 RFC]: SSE function arguments are not properly aligned on stack
- From: Uros Bizjak <uros dot bizjak at kss-loka dot si>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 30 Jun 2005 11:26:04 +0200
- Subject: [x86 RFC]: SSE function arguments are not properly aligned on stack
Hello!
As shown in PR target/22229, gcc currently generates unaligned movaps if SSE
arguments to function are passed on stack. The problem is that
cfun->stack_aligment_needed variable is not updated properly.
To show the problem, in following compiled example (gcc -O2 -msse) from PR,
main function calls foo:
main:
pushl %ebp
movl %esp, %ebp
subl $40, %esp
movaps .LC0, %xmm0
andl $-16, %esp <<< %esp aligned here
subl $16, %esp <<< % esp - 0x10
call foo <<< call to foo, %esp - 0x14
movl $.LC1, (%esp)
movaps %xmm0, -24(%ebp)
flds -16(%ebp)
fstpl 4(%esp)
call printf
xorl %eax, %eax
leave
ret
foo:
pushl %ebp <<< %esp - 0x18
movl %esp, %ebp
subl $16, %esp <<< %esp - 0x28
movaps %xmm0, (%esp) <<< segfault
movaps %xmm0, %xmm2
movaps %xmm0, %xmm1
call bar
leave
ret
This problem can be solved with following patch that forces stack alignment
correctly in case when SSE value is passed on stack:
--- i386.c_ 2005-06-30 09:58:47.000000000 +0200
+++ i386.c 2005-06-30 09:41:18.000000000 +0200
@@ -2812,6 +2812,16 @@ function_arg_advance (CUMULATIVE_ARGS *c
cum->sse_words += words;
cum->sse_nregs -= 1;
cum->sse_regno += 1;
+
+ if (cum->sse_nregs < 0)
+ {
+ if (TARGET_DEBUG_ARG)
+ fprintf(stderr, "function_adv: forced "
+ "stack_alignment_needed to %i\n",
+ (int) GET_MODE_ALIGNMENT (mode));
+ cfun->stack_alignment_needed = GET_MODE_ALIGNMENT (mode);
+ }
+
if (cum->sse_nregs <= 0)
{
cum->sse_nregs = 0;
This patch (hack) produces correctly aligned stack references:
bar:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
leave
ret
foo:
pushl %ebp <<< %esp - 0x18
movl %esp, %ebp
subl $24, %esp <<< %esp - 0x30
movaps %xmm0, (%esp) <<< OK.
movaps %xmm0, %xmm2
movaps %xmm0, %xmm1
call bar
addl $16, %esp
leave
ret
Similar problems wrt __builtin_apply can be shown by compiling
gcc.dg/20020218-1.c with -O2 -msse:
foo:
pushl %ebp <<< %esp - 0x18
movl %esp, %ebp <<< %ebp = %esp
pushl %esi
pushl %ebx
subl $144, %esp <<< %esp aligned, %ebp - 0x18
movl 8(%ebp), %ebx
leal 4(%ebx), %edx
movl %edx, -12(%ebp)
movl %esp, %esi
subl $32, %esp
movl 4(%ebx), %eax
movl %eax, (%esp)
movl 4(%edx), %eax
movl %eax, 4(%esp)
movl -8(%ebp), %eax
movl -4(%ebp), %edx
movl 0(%ebp), %ecx
movaps 4(%ebp), %xmm0 <<< segfault, unaligned access to argblock
movaps 20(%ebp), %xmm1
movaps 36(%ebp), %xmm2
call *(%ebx)
fstp %st(1)
movl %eax, -152(%ebp)
fstpt -148(%ebp)
movaps %xmm0, -136(%ebp)
movl %esi, %esp
movl -152(%ebp), %eax
fldt -148(%ebp)
movaps -136(%ebp), %xmm0
fstp %st(0)
leal -8(%ebp), %esp
popl %ebx
popl %esi
leave
ret
As this hack somehow fixes x86 argument passing, it doesn't fix __builtin_apply,
as this function calculates alignment inside the argument block by itself. It
looks that offset of argument block is calculated in wrong way.
This patch has not been tested on x86_64 at all, and although it fixes alignment
problems on x86, I don't know if the approach is correct.
Uros.