This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PATCH: PR target/32000/35767: x86 backend uses aligned load on unaligned memory


On Mon, Mar 31, 2008 at 01:25:17PM +0200, Uros Bizjak wrote:
> Hello!
> 
> > 2008-03-30  H.J. Lu  <hongjiu.lu@intel.com>
> >
> > 	PR target/32000
> > 	* config/i386/i386.md (*movti_internal): Emit unaligned SSE
> > 	load/store if memory is unaligned.
> > 	(*movti_rex64): Likewise.
> >
> > 	* config/i386/predicates.md (misaligned_operand): New.
> >
> > 2008-03-30  H.J. Lu  <hongjiu.lu@intel.com>
> >
> > 	PR target/32000
> > 	* gcc.target/i386/pr32000-1.c: New.
> > 	* gcc.target/i386/pr32000-2.c: Likewise.
> > 	* gcc.target/i386/pr32000-3.c: Likewise.
> > 	* gcc.target/i386/pr32000-4.c: Likewise.
> > 	* gcc.target/i386/pr32000-5.c: Likewise.
> > 	* gcc.target/i386/pr32000-6.c: Likewise.
> 
> This is OK for mainline (AFAICS, this is not a regression on 4.3).
> 

I checked the modified patch without pr32000-[2-6].c since pr32000-2.c
belongs to PR 35767 and the other testcases aren't useful after
PR 35767 is fixed.

There is a similar bug for SSE modes:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35767

There is another bug when types with canonical types have user defined
alignment:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35771

Middle end uses canonical types for parameter passing in caller. But
the x86 backend uses original types to access parameter in callee. 
This mismatch causes callee to read the parameter at the wrong offset
on stack.

This patch changes the x86 backend to use canonical types for
callee and check unaligned memory access for all SSE modes. It
also fixed a bug in assign_stack_local_1 to set missing alignment
on stack slot. Otherwise ix86_expand_vector_move will generate
unaligned load from stack variables to registers, as shown in
gcc.target/i386/pr35767-4.c, since stack variables don't have
correct alignment. Tested on Linux/ia32 and Linux/Intel64. OK
to install?

Thanks.


H.J.
----
gcc/

2008-03-30  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/35767
	PR target/35771
	* config/i386/i386.c (ix86_function_arg_boundary): Use
	alignment of canonical type.
	(ix86_expand_vector_move): Check unaligned memory access for
	all SSE modes.

	* function.c (assign_stack_local_1): Set alignment on stack
	slot.

gcc/testsuite/

2008-03-30  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/35767
	PR target/35771
	* gcc.target/i386/pr35767-1.c: New.
	* gcc.target/i386/pr35767-1d.c: Likewise.
	* gcc.target/i386/pr35767-1i.c: Likewise.
	* gcc.target/i386/pr35767-2.c: Likewise.
	* gcc.target/i386/pr35767-2d.c: Likewise.
	* gcc.target/i386/pr35767-2i.c: Likewise.
	* gcc.target/i386/pr35767-3.c: Likewise.
	* gcc.target/i386/pr35767-4.c: Likewise.

--- gcc/config/i386/i386.c.unaligned	2008-03-30 21:58:28.000000000 -0700
+++ gcc/config/i386/i386.c	2008-03-30 22:29:04.000000000 -0700
@@ -4579,7 +4579,12 @@ ix86_function_arg_boundary (enum machine
 {
   int align;
   if (type)
-    align = TYPE_ALIGN (type);
+    {
+      if (TYPE_STRUCTURAL_EQUALITY_P (type))
+	align = TYPE_ALIGN (type);
+      else
+	align = TYPE_ALIGN (TYPE_CANONICAL (type));
+    }
   else
     align = GET_MODE_ALIGNMENT (mode);
   if (align < PARM_BOUNDARY)
@@ -10331,12 +10336,10 @@ ix86_expand_vector_move (enum machine_mo
       && standard_sse_constant_p (op1) <= 0)
     op1 = validize_mem (force_const_mem (mode, op1));
 
-  /* TDmode values are passed as TImode on the stack.  TImode values
-     are moved via xmm registers, and moving them to stack can result in
-     unaligned memory access.  Use ix86_expand_vector_move_misalign()
-     if memory operand is not aligned correctly.  */
+  /* We need to check memory alignment for SSE mode since attribute
+     can make operands unaligned.  */
   if (can_create_pseudo_p ()
-      && (mode == TImode) && !TARGET_64BIT
+      && SSE_REG_MODE_P (mode)
       && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
 	  || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
     {
--- gcc/function.c.unaligned	2008-03-19 06:34:51.000000000 -0700
+++ gcc/function.c	2008-03-31 06:25:10.000000000 -0700
@@ -403,7 +403,7 @@ assign_stack_local_1 (enum machine_mode 
 {
   rtx x, addr;
   int bigend_correction = 0;
-  unsigned int alignment;
+  unsigned int alignment, alignment_in_bits;
   int frame_off, frame_alignment, frame_phase;
 
   if (align == 0)
@@ -440,8 +440,10 @@ assign_stack_local_1 (enum machine_mode 
   if (alignment * BITS_PER_UNIT > PREFERRED_STACK_BOUNDARY)
     alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
 
-  if (function->stack_alignment_needed < alignment * BITS_PER_UNIT)
-    function->stack_alignment_needed = alignment * BITS_PER_UNIT;
+  alignment_in_bits = alignment * BITS_PER_UNIT;
+
+  if (function->stack_alignment_needed < alignment_in_bits)
+    function->stack_alignment_needed = alignment_in_bits;
 
   /* Calculate how many bytes the start of local variables is off from
      stack alignment.  */
@@ -494,6 +496,7 @@ assign_stack_local_1 (enum machine_mode 
     function->x_frame_offset += size;
 
   x = gen_rtx_MEM (mode, addr);
+  set_mem_align (x, alignment_in_bits);
   MEM_NOTRAP_P (x) = 1;
 
   function->x_stack_slot_list
--- gcc/testsuite/gcc.target/i386/pr35767-1.c.unaligned	2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1.c	2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef struct { __m128 f __attribute__((packed)); } packed;
+
+__m128  __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+     __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+  return y.f;
+}
+
+void
+sse2_test (void)
+{
+  packed x;
+  __m128 y = { 0 };
+  x.f = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-1d.c.unaligned	2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1d.c	2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef struct { __m128d f __attribute__((packed)); } packed;
+
+__m128d  __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+     __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+  return y.f;
+}
+
+void
+sse2_test (void)
+{
+  packed x;
+  __m128d y = { 0 };
+  x.f = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-1i.c.unaligned	2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-1i.c	2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef int __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef struct { __m128i f __attribute__((packed)); } packed;
+
+__m128i  __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+     __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+  return y.f;
+}
+
+void
+sse2_test (void)
+{
+  packed x;
+  __m128i y = { 0 };
+  x.f = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x.f, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2.c.unaligned	2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2.c	2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef  __m128 __attribute__((aligned(1))) unaligned;
+
+__m128  __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+     __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+  return y;
+}
+
+void
+sse2_test (void)
+{
+  unaligned x;
+  __m128 y = { 0 };
+  x = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2d.c.unaligned	2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2d.c	2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef  __m128d __attribute__((aligned(1))) unaligned;
+
+__m128d  __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+     __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+  return y;
+}
+
+void
+sse2_test (void)
+{
+  unaligned x;
+  __m128d y = { 0 };
+  x = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-2i.c.unaligned	2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-2i.c	2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+typedef int __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+
+typedef  __m128i __attribute__((aligned(1))) unaligned;
+
+__m128i  __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+     __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+  return y;
+}
+
+void
+sse2_test (void)
+{
+  unaligned x;
+  __m128i y = { 0 };
+  x = y; 
+  y = foo (y, y, y, y, y, y, y, y, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x, sizeof (y)) != 0)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-3.c.unaligned	2008-03-30 21:58:28.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-3.c	2008-03-30 21:58:28.000000000 -0700
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-require-effective-target dfp } */
+/* { dg-options "-O -msse2 -std=gnu99" } */
+
+#include "sse2-check.h"
+
+typedef _Decimal128 unaligned __attribute__((aligned(1)));
+
+_Decimal128 __attribute__((noinline))
+foo (_Decimal128 a1, _Decimal128 a2, _Decimal128 a3, _Decimal128 a4,
+     _Decimal128 a5, _Decimal128 a6, _Decimal128 a7, _Decimal128 a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, unaligned y)
+{
+  return y;
+}
+
+void
+sse2_test (void)
+{
+  unaligned x;
+  _Decimal128 y = -1;
+  x = y;
+  y = foo (0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, -1, x);
+  if (__builtin_memcmp (&y, &x, sizeof (y)))
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr35767-4.c.unaligned	2008-03-31 06:26:27.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr35767-4.c	2008-03-31 06:21:02.000000000 -0700
@@ -0,0 +1,14 @@
+/* Test that we generate aligned load when memory is aligned.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target dfp } */
+/* { dg-options "-O -march=x86-64 -mtune=generic -std=gnu99" } */
+/* { dg-final { scan-assembler-not "movdqu" } } */
+/* { dg-final { scan-assembler "movdqa" } } */
+
+extern _Decimal128 foo (_Decimal128, _Decimal128, _Decimal128);
+
+void
+bar (void)
+{
+  foo (0, 0, 0);
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]