PATCH: PR target/35767: x86 backend uses aligned load on unaligned memory
H.J. Lu
hjl.tools@gmail.com
Tue May 27 14:52:00 GMT 2008
Hi,
When __attribute__((packed)) is applied to __m128, it becomes
unaligned. We can't use aligned access on it. Also we should
check canonical type since it may be used for parameter passing.
This patch fixes the problem, similar to _Decimal128. OK for
mainline?
Thanks.
H.J.
----
gcc/
2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
PR target/35767
PR target/35771
* config/i386/i386.c (ix86_function_arg_boundary): Use
alignment of canonical type.
(ix86_expand_vector_move): Check unaligned memory access for
all SSE modes.
gcc/testsuite/
2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
PR target/35767
PR target/35771
* gcc.target/i386/pr35767-1.c: New.
* gcc.target/i386/pr35767-1d.c: Likewise.
* gcc.target/i386/pr35767-1i.c: Likewise.
* gcc.target/i386/pr35767-2.c: Likewise.
* gcc.target/i386/pr35767-2d.c: Likewise.
* gcc.target/i386/pr35767-2i.c: Likewise.
* gcc.target/i386/pr35767-3.c: Likewise.
* gcc.target/i386/pr35767-4.c: Likewise.
--- gcc/config/i386/i386.c.unaligned 2008-04-18 05:25:47.000000000 -0700
+++ gcc/config/i386/i386.c 2008-04-18 05:25:47.000000000 -0700
@@ -4627,7 +4627,12 @@ ix86_function_arg_boundary (enum machine
{
int align;
if (type)
- align = TYPE_ALIGN (type);
+ {
+ if (TYPE_STRUCTURAL_EQUALITY_P (type))
+ align = TYPE_ALIGN (type);
+ else
+ align = TYPE_ALIGN (TYPE_CANONICAL (type));
+ }
else
align = GET_MODE_ALIGNMENT (mode);
if (align < PARM_BOUNDARY)
@@ -10406,12 +10411,10 @@ ix86_expand_vector_move (enum machine_mo
&& standard_sse_constant_p (op1) <= 0)
op1 = validize_mem (force_const_mem (mode, op1));
- /* TDmode values are passed as TImode on the stack. TImode values
- are moved via xmm registers, and moving them to stack can result in
- unaligned memory access. Use ix86_expand_vector_move_misalign()
- if memory operand is not aligned correctly. */
+ /* We need to check memory alignment for SSE mode since attribute
+ can make operands unaligned. */
if (can_create_pseudo_p ()
- && (mode == TImode) && !TARGET_64BIT
+ && SSE_REG_MODE_P (mode)
&& ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
|| (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
{
--- gcc/testsuite/gcc.target/i386/pr35767-1.c.unaligned 2008-05-15 18:39:51.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1.c 2008-05-16 06:03:43.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef struct { __m128 f __attribute__((packed)); } packed;
+
+__m128 __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+ __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+ return y.f;
+}
+
+void
+sse2_test (void)
+{
+ packed x;
+ __m128 y = { 0 };
+ x.f = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-1d.c.unaligned 2008-05-15 18:39:51.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1d.c 2008-05-16 06:03:50.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef struct { __m128d f __attribute__((packed)); } packed;
+
+__m128d __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+ __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+ return y.f;
+}
+
+void
+sse2_test (void)
+{
+ packed x;
+ __m128d y = { 0 };
+ x.f = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-1i.c.unaligned 2008-05-15 18:39:51.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1i.c 2008-05-16 06:03:55.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef struct { __m128i f __attribute__((packed)); } packed;
+
+__m128i __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+ __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+ return y.f;
+}
+
+void
+sse2_test (void)
+{
+ packed x;
+ __m128i y = { 0 };
+ x.f = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2.c.unaligned 2008-05-15 18:39:51.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2.c 2008-05-16 06:04:00.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef __m128 __attribute__((aligned(1))) unaligned;
+
+__m128 __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+ __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+ return y;
+}
+
+void
+sse2_test (void)
+{
+ unaligned x;
+ __m128 y = { 0 };
+ x = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2d.c.unaligned 2008-05-15 18:39:51.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2d.c 2008-05-16 06:04:05.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef __m128d __attribute__((aligned(1))) unaligned;
+
+__m128d __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+ __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+ return y;
+}
+
+void
+sse2_test (void)
+{
+ unaligned x;
+ __m128d y = { 0 };
+ x = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2i.c.unaligned 2008-05-15 18:39:51.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2i.c 2008-05-16 06:04:09.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef __m128i __attribute__((aligned(1))) unaligned;
+
+__m128i __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+ __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+ return y;
+}
+
+void
+sse2_test (void)
+{
+ unaligned x;
+ __m128i y = { 0 };
+ x = y;
+ y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-3.c.unaligned 2008-05-15 18:39:51.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-3.c 2008-05-15 18:39:51.000000000 -0700
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-require-effective-target dfp } */
+/* { dg-options "-O -msse2 -std=gnu99" } */
+
+#include "sse2-check.h"
+
+typedef _Decimal128 unaligned __attribute__((aligned(1)));
+
+_Decimal128 __attribute__((noinline))
+foo (_Decimal128 a1, _Decimal128 a2, _Decimal128 a3, _Decimal128 a4,
+ _Decimal128 a5, _Decimal128 a6, _Decimal128 a7, _Decimal128 a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+ return y;
+}
+
+void
+sse2_test (void)
+{
+ unaligned x;
+ _Decimal128 y = -1;
+ x = y;
+ y = foo (0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, -1, x);
+ if (__builtin_memcmp (&y, &x, sizeof (y)))
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-4.c.unaligned 2008-05-15 18:39:51.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-4.c 2008-05-15 18:39:51.000000000 -0700
@@ -0,0 +1,14 @@
+/* Test that we generate aligned load when memory is aligned. */
+/* { dg-do compile } */
+/* { dg-require-effective-target dfp } */
+/* { dg-options "-O -march=x86-64 -mtune=generic -std=gnu99" } */
+/* { dg-final { scan-assembler-not "movdqu" } } */
+/* { dg-final { scan-assembler "movdqa" } } */
+
+extern _Decimal128 foo (_Decimal128, _Decimal128, _Decimal128);
+
+void
+bar (void)
+{
+ foo (0, 0, 0);
+}
More information about the Gcc-patches
mailing list