This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: PATCH: PR target/32000/35767: x86 backend uses aligned load on unaligned memory
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: Uros Bizjak <ubizjak at gmail dot com>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Mon, 31 Mar 2008 07:00:35 -0700
- Subject: Re: PATCH: PR target/32000/35767: x86 backend uses aligned load on unaligned memory
- References: <5787cf470803310425q153e1c46n6abec99fbc99f99@mail.gmail.com>
On Mon, Mar 31, 2008 at 01:25:17PM +0200, Uros Bizjak wrote:
> Hello!
>
> > 2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
> >
> > PR target/32000
> > * config/i386/i386.md (*movti_internal): Emit unaligned SSE
> > load/store if memory is unaligned.
> > (*movti_rex64): Likewise.
> >
> > * config/i386/predicates.md (misaligned_operand): New.
> >
> > 2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
> >
> > PR target/32000
> > * gcc.target/i386/pr32000-1.c: New.
> > * gcc.target/i386/pr32000-2.c: Likewise.
> > * gcc.target/i386/pr32000-3.c: Likewise.
> > * gcc.target/i386/pr32000-4.c: Likewise.
> > * gcc.target/i386/pr32000-5.c: Likewise.
> > * gcc.target/i386/pr32000-6.c: Likewise.
>
> This is OK for mainline (AFAICS, this is not a regression on 4.3).
>
I checked the modified patch without pr32000-[2-6].c since pr32000-2.c
belongs to PR 35767 and the other testcases aren't useful after
PR 35767 is fixed.
There is a similar bug for SSE modes:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35767
There is another bug when types with canonical types have user defined
alignment:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35771
Middle end uses canonical types for parameter passing in caller. But
the x86 backend uses original types to access parameter in callee.
This mismatch causes callee to read the parameter at the wrong offset
on stack.
This patch changes the x86 backend to use canonical types for
callee and check unaligned memory access for all SSE modes. It
also fixed a bug in assign_stack_local_1 to set missing alignment
on stack slot. Otherwise ix86_expand_vector_move will generate
unaligned load from stack variables to registers, as shown in
gcc.target/i386/pr35767-4.c, since stack variables don't have
correct alignment. Tested on Linux/ia32 and Linux/Intel64. OK
to install?
Thanks.
H.J.
----
gcc/
2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
PR target/35767
PR target/35771
* config/i386/i386.c (ix86_function_arg_boundary): Use
alignment of canonical type.
(ix86_expand_vector_move): Check unaligned memory access for
all SSE modes.
* function.c (assign_stack_local_1): Set alignment on stack
slot.
gcc/testsuite/
2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
PR target/35767
PR target/35771
* gcc.target/i386/pr35767-1.c: New.
* gcc.target/i386/pr35767-1d.c: Likewise.
* gcc.target/i386/pr35767-1i.c: Likewise.
* gcc.target/i386/pr35767-2.c: Likewise.
* gcc.target/i386/pr35767-2d.c: Likewise.
* gcc.target/i386/pr35767-2i.c: Likewise.
* gcc.target/i386/pr35767-3.c: Likewise.
* gcc.target/i386/pr35767-4.c: Likewise.
--- gcc/config/i386/i386.c.unaligned 2008-03-30 21:58:28.000000000 -0700
+++ gcc/config/i386/i386.c 2008-03-30 22:29:04.000000000 -0700
@@ -4579,7 +4579,12 @@ ix86_function_arg_boundary (enum machine
{
int align;
if (type)
- align = TYPE_ALIGN (type);
+ {
+ if (TYPE_STRUCTURAL_EQUALITY_P (type))
+ align = TYPE_ALIGN (type);
+ else
+ align = TYPE_ALIGN (TYPE_CANONICAL (type));
+ }
else
align = GET_MODE_ALIGNMENT (mode);
if (align < PARM_BOUNDARY)
@@ -10331,12 +10336,10 @@ ix86_expand_vector_move (enum machine_mo
&& standard_sse_constant_p (op1) <= 0)
op1 = validize_mem (force_const_mem (mode, op1));
- /* TDmode values are passed as TImode on the stack. TImode values
- are moved via xmm registers, and moving them to stack can result in
- unaligned memory access. Use ix86_expand_vector_move_misalign()
- if memory operand is not aligned correctly. */
+ /* We need to check memory alignment for SSE mode since attribute
+ can make operands unaligned. */
if (can_create_pseudo_p ()
- && (mode == TImode) && !TARGET_64BIT
+ && SSE_REG_MODE_P (mode)
&& ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
|| (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
{
--- gcc/function.c.unaligned 2008-03-19 06:34:51.000000000 -0700
+++ gcc/function.c 2008-03-31 06:25:10.000000000 -0700
@@ -403,7 +403,7 @@ assign_stack_local_1 (enum machine_mode
{
rtx x, addr;
int bigend_correction = 0;
- unsigned int alignment;
+ unsigned int alignment, alignment_in_bits;
int frame_off, frame_alignment, frame_phase;
if (align == 0)
@@ -440,8 +440,10 @@ assign_stack_local_1 (enum machine_mode
if (alignment * BITS_PER_UNIT > PREFERRED_STACK_BOUNDARY)
alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
- if (function->stack_alignment_needed < alignment * BITS_PER_UNIT)
- function->stack_alignment_needed = alignment * BITS_PER_UNIT;
+ alignment_in_bits = alignment * BITS_PER_UNIT;
+
+ if (function->stack_alignment_needed < alignment_in_bits)
+ function->stack_alignment_needed = alignment_in_bits;
/* Calculate how many bytes the start of local variables is off from
stack alignment. */
@@ -494,6 +496,7 @@ assign_stack_local_1 (enum machine_mode
function->x_frame_offset += size;
x = gen_rtx_MEM (mode, addr);
+ set_mem_align (x, alignment_in_bits);
MEM_NOTRAP_P (x) = 1;
function->x_stack_slot_list
--- gcc/testsuite/gcc.target/i386/pr35767-1.c.unaligned 2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1.c 2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef struct { __m128 f __attribute__((packed)); } packed;
+
+__m128 __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+ __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+ return y.f;
+}
+
+void
+sse2_test (void)
+{
+ packed x;
+ __m128 y = { 0 };
+ x.f = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-1d.c.unaligned 2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1d.c 2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef struct { __m128d f __attribute__((packed)); } packed;
+
+__m128d __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+ __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+ return y.f;
+}
+
+void
+sse2_test (void)
+{
+ packed x;
+ __m128d y = { 0 };
+ x.f = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-1i.c.unaligned 2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1i.c 2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef int __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef struct { __m128i f __attribute__((packed)); } packed;
+
+__m128i __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+ __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+ return y.f;
+}
+
+void
+sse2_test (void)
+{
+ packed x;
+ __m128i y = { 0 };
+ x.f = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2.c.unaligned 2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2.c 2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef __m128 __attribute__((aligned(1))) unaligned;
+
+__m128 __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+ __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+ return y;
+}
+
+void
+sse2_test (void)
+{
+ unaligned x;
+ __m128 y = { 0 };
+ x = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2d.c.unaligned 2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2d.c 2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef __m128d __attribute__((aligned(1))) unaligned;
+
+__m128d __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+ __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+ return y;
+}
+
+void
+sse2_test (void)
+{
+ unaligned x;
+ __m128d y = { 0 };
+ x = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2i.c.unaligned 2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2i.c 2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef int __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef __m128i __attribute__((aligned(1))) unaligned;
+
+__m128i __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+ __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+ return y;
+}
+
+void
+sse2_test (void)
+{
+ unaligned x;
+ __m128i y = { 0 };
+ x = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-3.c.unaligned 2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-3.c 2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-require-effective-target dfp } */
+/* { dg-options "-O -msse2 -std=gnu99" } */
+
+#include "sse2-check.h"
+
+typedef _Decimal128 unaligned __attribute__((aligned(1)));
+
+_Decimal128 __attribute__((noinline))
+foo (_Decimal128 a1, _Decimal128 a2, _Decimal128 a3, _Decimal128 a4,
+ _Decimal128 a5, _Decimal128 a6, _Decimal128 a7, _Decimal128 a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+ return y;
+}
+
+void
+sse2_test (void)
+{
+ unaligned x;
+ _Decimal128 y = -1;
+ x = y;
+ y = foo (0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x, sizeof (y)))
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-4.c.unaligned 2008-03-31 06:26:27.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-4.c 2008-03-31 06:21:02.000000000 -0700
@@ -0,0 +1,14 @@
+/* Test that we generate aligned load when memory is aligned. */
+/* { dg-do compile } */
+/* { dg-require-effective-target dfp } */
+/* { dg-options "-O -march=x86-64 -mtune=generic -std=gnu99" } */
+/* { dg-final { scan-assembler-not "movdqu" } } */
+/* { dg-final { scan-assembler "movdqa" } } */
+
+extern _Decimal128 foo (_Decimal128, _Decimal128, _Decimal128);
+
+void
+bar (void)
+{
+ foo (0, 0, 0);
+}