This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PATCH: PR target/35767: x86 backend uses aligned load on unaligned memory


On Tue, May 27, 2008 at 11:36 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> H.J. Lu wrote:
>
>>> IIRC, using SSE_REG_MODE_P resulted in unaligned access when SSE mode
>>> values were passed on (or spilled to?) the stack, so the change that
>>>
>>
>> I noticed it also. It was a middle-end bug where it failed to
>> record alignment on spill slot. It was fixed as the part of
>> PR middle-end/36253:
>>
>> http://gcc.gnu.org/ml/gcc-patches/2008-05/msg01615.html
>>
>> @@ -432,6 +438,7 @@
>>     frame_offset += size;
>>     x = gen_rtx_MEM (mode, addr);
>> +  set_mem_align (x, alignment_in_bits);
>>   MEM_NOTRAP_P (x) = 1;
>>     stack_slot_list
>>
>>
>>>
>>> your patch now reverts was introduced [1]. Since there was a lot of
>>> work recently in this area, can you check how SSE values are pushed to
>>> stack with your patch?
>>>
>>> [1] http://gcc.gnu.org/ml/gcc-patches/2007-05/msg01863.html
>>>
>>
>> I will double check it.
>>
>
> I have checked -m32 and -m64 using following test:
>
> --cut here--
> typedef float v4sf __attribute__ ((__vector_size__ (16)));
>
> extern void foo(v4sf a, v4sf b, v4sf c, v4sf d,
>       v4sf e, v4sf f, v4sf g, v4sf h, v4sf i);
>
> int test(void)
> {
>  v4sf x = { 0.0, 1.0, 2.0, 3.0 };
>
>  foo (x, x, x, x, x, x, x, x, x);
>
>  return 0;
> }
> --cut here--
>
> It always produced movaps, so the patch is OK with me, but please resolve
> canonical type issue with Paolo before committing this patch. (BTW: Can you
> also add above test to testsuite, checking for movups with -msse2 to cover
> unaligned loads when SSE args are passed to function ?)
>

Hi Paolo,

Here is the updated patch with a new testcase to check aligned load.
Is there still
a problem with canonical type?

Thanks.


H.J.
---
gcc/

2008-05-27  H.J. Lu  <hongjiu.lu@intel.com>

        PR target/35767
        PR target/35771
        * config/i386/i386.c (ix86_function_arg_boundary): Use
        alignment of canonical type.
        (ix86_expand_vector_move): Check unaligned memory access for
        all SSE modes.

gcc/testsuite/

2008-05-27  H.J. Lu  <hongjiu.lu@intel.com>

        PR target/35767
        PR target/35771
        * gcc.target/i386/pr35767-1.c: New.
        * gcc.target/i386/pr35767-1d.c: Likewise.
        * gcc.target/i386/pr35767-1i.c: Likewise.
        * gcc.target/i386/pr35767-2.c: Likewise.
        * gcc.target/i386/pr35767-2d.c: Likewise.
        * gcc.target/i386/pr35767-2i.c: Likewise.
        * gcc.target/i386/pr35767-3.c: Likewise.
        * gcc.target/i386/pr35767-4.c: Likewise.
        * gcc.target/i386/pr35767-5.c: Likewise.
gcc/

2008-05-27  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/35767
	PR target/35771
	* config/i386/i386.c (ix86_function_arg_boundary): Use
	alignment of canonical type.
	(ix86_expand_vector_move): Check unaligned memory access for
	all SSE modes.

gcc/testsuite/

2008-05-27  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/35767
	PR target/35771
	* gcc.target/i386/pr35767-1.c: New.
	* gcc.target/i386/pr35767-1d.c: Likewise.
	* gcc.target/i386/pr35767-1i.c: Likewise.
	* gcc.target/i386/pr35767-2.c: Likewise.
	* gcc.target/i386/pr35767-2d.c: Likewise.
	* gcc.target/i386/pr35767-2i.c: Likewise.
	* gcc.target/i386/pr35767-3.c: Likewise.
	* gcc.target/i386/pr35767-4.c: Likewise.
	* gcc.target/i386/pr35767-5.c: Likewise.

--- gcc/config/i386/i386.c.unaligned	2008-05-27 11:08:08.000000000 -0700
+++ gcc/config/i386/i386.c	2008-05-27 11:08:10.000000000 -0700
@@ -4642,7 +4642,12 @@ ix86_function_arg_boundary (enum machine
 {
   int align;
   if (type)
-    align = TYPE_ALIGN (type);
+    {
+      if (TYPE_STRUCTURAL_EQUALITY_P (type))
+	align = TYPE_ALIGN (type);
+      else
+	align = TYPE_ALIGN (TYPE_CANONICAL (type));
+    }
   else
     align = GET_MODE_ALIGNMENT (mode);
   if (align < PARM_BOUNDARY)
@@ -10441,12 +10446,10 @@ ix86_expand_vector_move (enum machine_mo
       && standard_sse_constant_p (op1) <= 0)
     op1 = validize_mem (force_const_mem (mode, op1));
 
-  /* TDmode values are passed as TImode on the stack.  TImode values
-     are moved via xmm registers, and moving them to stack can result in
-     unaligned memory access.  Use ix86_expand_vector_move_misalign()
-     if memory operand is not aligned correctly.  */
+  /* We need to check memory alignment for SSE mode since attribute
+     can make operands unaligned.  */
   if (can_create_pseudo_p ()
-      && (mode == TImode) && !TARGET_64BIT
+      && SSE_REG_MODE_P (mode)
       && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
 	  || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
     {
--- gcc/testsuite/gcc.target/i386/pr35767-1.c.unaligned	2008-05-27 11:08:10.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1.c	2008-05-27 11:08:10.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef struct { __m128 f __attribute__((packed)); } packed;
+
+__m128  __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+     __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+  return y.f;
+}
+
+void
+sse2_test (void)
+{
+  packed x;
+  __m128 y = { 0 };
+  x.f = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-1d.c.unaligned	2008-05-27 11:08:10.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1d.c	2008-05-27 11:08:10.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef struct { __m128d f __attribute__((packed)); } packed;
+
+__m128d  __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+     __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+  return y.f;
+}
+
+void
+sse2_test (void)
+{
+  packed x;
+  __m128d y = { 0 };
+  x.f = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-1i.c.unaligned	2008-05-27 11:08:10.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1i.c	2008-05-27 11:08:10.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef struct { __m128i f __attribute__((packed)); } packed;
+
+__m128i  __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+     __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+  return y.f;
+}
+
+void
+sse2_test (void)
+{
+  packed x;
+  __m128i y = { 0 };
+  x.f = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2.c.unaligned	2008-05-27 11:08:10.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2.c	2008-05-27 11:08:10.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef  __m128 __attribute__((aligned(1))) unaligned;
+
+__m128  __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+     __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+  return y;
+}
+
+void
+sse2_test (void)
+{
+  unaligned x;
+  __m128 y = { 0 };
+  x = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2d.c.unaligned	2008-05-27 11:08:10.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2d.c	2008-05-27 11:08:10.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef  __m128d __attribute__((aligned(1))) unaligned;
+
+__m128d  __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+     __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+  return y;
+}
+
+void
+sse2_test (void)
+{
+  unaligned x;
+  __m128d y = { 0 };
+  x = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2i.c.unaligned	2008-05-27 11:08:10.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2i.c	2008-05-27 11:08:10.000000000 -0700
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef  __m128i __attribute__((aligned(1))) unaligned;
+
+__m128i  __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+     __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+  return y;
+}
+
+void
+sse2_test (void)
+{
+  unaligned x;
+  __m128i y = { 0 };
+  x = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-3.c.unaligned	2008-05-27 11:08:10.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-3.c	2008-05-27 11:08:10.000000000 -0700
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-require-effective-target dfp } */
+/* { dg-options "-O -msse2 -std=gnu99" } */
+
+#include "sse2-check.h"
+
+typedef _Decimal128 unaligned __attribute__((aligned(1)));
+
+_Decimal128 __attribute__((noinline))
+foo (_Decimal128 a1, _Decimal128 a2, _Decimal128 a3, _Decimal128 a4,
+     _Decimal128 a5, _Decimal128 a6, _Decimal128 a7, _Decimal128 a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+  return y;
+}
+
+void
+sse2_test (void)
+{
+  unaligned x;
+  _Decimal128 y = -1;
+  x = y;
+  y = foo (0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x, sizeof (y)))
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-4.c.unaligned	2008-05-27 11:08:10.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-4.c	2008-05-27 11:08:10.000000000 -0700
@@ -0,0 +1,14 @@
+/* Test that we generate aligned load when memory is aligned.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target dfp } */
+/* { dg-options "-O -march=x86-64 -mtune=generic -std=gnu99" } */
+/* { dg-final { scan-assembler-not "movdqu" } } */
+/* { dg-final { scan-assembler "movdqa" } } */
+
+extern _Decimal128 foo (_Decimal128, _Decimal128, _Decimal128);
+
+void
+bar (void)
+{
+  foo (0, 0, 0);
+}
--- gcc/testsuite/gcc.target/i386/pr35767-5.c.unaligned	2008-05-27 11:47:08.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-5.c	2008-05-27 11:49:30.000000000 -0700
@@ -0,0 +1,17 @@
+/* Test that we generate aligned load when memory is aligned.  */
+/* { dg-do compile } */
+/* { dg-options "-O -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler-not "movups" } } */
+/* { dg-final { scan-assembler "movaps" } } */
+
+typedef float v4sf __attribute__ ((__vector_size__ (16)));
+
+extern void foo(v4sf, v4sf, v4sf, v4sf, v4sf, v4sf, v4sf, v4sf, v4sf);
+
+int test(void)
+{
+  v4sf x = { 0.0, 1.0, 2.0, 3.0 };
+
+  foo (x, x, x, x, x, x, x, x, x);
+  return 0;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]