This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH: PR target/32000: x86 backend uses aligned load on unaligned memory


On Tue, Mar 25, 2008 at 2:51 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> We may generate unaligned access with those *mov*_internal patterns.
>  But we never check if memory access is aligned or not. This patch
>  adds the alignment check for those *mov*_internal patterns.
>

Here is the updated patch with a few testcases. But this patch introduces
many unaligned load from stack. It is because assign_stack_local_1 doesn't
update alignment on stack slot. I have a patch:

http://gcc.gnu.org/ml/gcc-patches/2008-03/msg01631.html

works on simple testcases.


H.J.
gcc/

2008-03-26  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/32000
	* config/i386/i386-protos.h (ix86_aligned_p): Removed.
	(ix86_mode_aligned_access_p): New.

	* config/i386/i386.c (ix86_mode_aligned_access_p): New.

	* config/i386/i386.md (*movti_internal): Emit unaligned SSE
	load/store if memory is unaligned.
	(*movti_rex64): Likewise.
	(*movtf_internal): Likewise.
	* config/i386/sse.md (*mov<mode>_internal): Likewise.
	(*movv4sf_internal): Likewise.
	(*movv2df_internal): Likewise.


gcc/testsuite/

2008-03-26  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/32000
	* gcc.target/i386/pr32000-1.c: New.
	* gcc.target/i386/pr32000-2.c: Likewise.
	* gcc.target/i386/pr32000-3.c: Likewise.
	* gcc.target/i386/pr32000-4.c: Likewise.
	* gcc.target/i386/pr32000-5.c: Likewise.
	* gcc.target/i386/pr32000-6.c: Likewise.
	* gcc.target/i386/pr32000-7.c: Likewise.
	* gcc.target/i386/pr32000-8.c: Likewise.
	* gcc.target/i386/pr32000-9.c: Likewise.

--- gcc/config/i386/i386-protos.h.packed	2008-03-26 12:33:57.000000000 -0700
+++ gcc/config/i386/i386-protos.h	2008-03-26 12:34:08.000000000 -0700
@@ -36,8 +36,7 @@ extern void ix86_output_addr_vec_elt (FI
 extern void ix86_output_addr_diff_elt (FILE *, int, int);
 
 #ifdef RTX_CODE
-extern int ix86_aligned_p (rtx);
-
+extern bool ix86_mode_aligned_access_p (rtx, rtx);
 extern int standard_80387_constant_p (rtx);
 extern const char *standard_80387_constant_opcode (rtx);
 extern rtx standard_80387_constant_rtx (int);
--- gcc/config/i386/i386.c.packed	2008-03-26 12:33:57.000000000 -0700
+++ gcc/config/i386/i386.c	2008-03-26 12:34:08.000000000 -0700
@@ -25459,6 +25459,24 @@ x86_builtin_vectorization_cost (bool run
     return 0;
 }
 
+/* Return true if memmory access to SRC or DEST is aligned by their
+   mode.  */
+
+bool
+ix86_mode_aligned_access_p (rtx dest, rtx src)
+{
+  rtx mem;
+
+  if (MEM_P (src))
+    mem = src;
+  else if (MEM_P (dest))
+    mem = dest;
+  else
+    return true;
+
+  return MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (GET_MODE (mem));
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ATTRIBUTE_TABLE
 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
--- gcc/config/i386/i386.md.packed	2008-03-26 12:33:57.000000000 -0700
+++ gcc/config/i386/i386.md	2008-03-26 12:34:08.000000000 -0700
@@ -2387,10 +2387,20 @@
 	return "pxor\t%0, %0";
     case 1:
     case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-	return "movaps\t{%1, %0|%0, %1}";
+      if (ix86_mode_aligned_access_p (operands[0], operands[1]))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "movaps\t{%1, %0|%0, %1}";
+	  else
+	    return "movdqa\t{%1, %0|%0, %1}";
+	}
       else
-	return "movdqa\t{%1, %0|%0, %1}";
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "movups\t{%1, %0|%0, %1}";
+	  else
+	    return "movdqu\t{%1, %0|%0, %1}";
+	}
     default:
       gcc_unreachable ();
     }
@@ -2424,10 +2434,20 @@
 	return "pxor\t%0, %0";
     case 3:
     case 4:
-      if (get_attr_mode (insn) == MODE_V4SF)
-	return "movaps\t{%1, %0|%0, %1}";
+      if (ix86_mode_aligned_access_p (operands[0], operands[1]))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "movaps\t{%1, %0|%0, %1}";
+	  else
+	    return "movdqa\t{%1, %0|%0, %1}";
+	}
       else
-	return "movdqa\t{%1, %0|%0, %1}";
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "movups\t{%1, %0|%0, %1}";
+	  else
+	    return "movdqu\t{%1, %0|%0, %1}";
+	}
     default:
       gcc_unreachable ();
     }
@@ -3223,10 +3243,20 @@
     {
     case 0:
     case 1:
-      if (get_attr_mode (insn) == MODE_V4SF)
-	return "movaps\t{%1, %0|%0, %1}";
+      if (ix86_mode_aligned_access_p (operands[1], operands[0]))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "movaps\t{%1, %0|%0, %1}";
+	  else
+	    return "movdqa\t{%1, %0|%0, %1}";
+	}
       else
-	return "movdqa\t{%1, %0|%0, %1}";
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "movups\t{%1, %0|%0, %1}";
+	  else
+	    return "movdqu\t{%1, %0|%0, %1}";
+	}
     case 2:
       if (get_attr_mode (insn) == MODE_V4SF)
 	return "xorps\t%0, %0";
--- gcc/config/i386/sse.md.packed	2008-03-26 12:33:57.000000000 -0700
+++ gcc/config/i386/sse.md	2008-03-26 12:34:08.000000000 -0700
@@ -86,10 +86,20 @@
       return standard_sse_constant_opcode (insn, operands[1]);
     case 1:
     case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-	return "movaps\t{%1, %0|%0, %1}";
+      if (ix86_mode_aligned_access_p (operands[1], operands[0]))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "movaps\t{%1, %0|%0, %1}";
+	  else
+	    return "movdqa\t{%1, %0|%0, %1}";
+	}
       else
-	return "movdqa\t{%1, %0|%0, %1}";
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "movups\t{%1, %0|%0, %1}";
+	  else
+	    return "movdqu\t{%1, %0|%0, %1}";
+	}
     default:
       gcc_unreachable ();
     }
@@ -162,7 +172,10 @@
       return standard_sse_constant_opcode (insn, operands[1]);
     case 1:
     case 2:
-      return "movaps\t{%1, %0|%0, %1}";
+      if (ix86_mode_aligned_access_p (operands[1], operands[0]))
+	return "movaps\t{%1, %0|%0, %1}";
+      else
+	return "movups\t{%1, %0|%0, %1}";
     default:
       gcc_unreachable ();
     }
@@ -197,10 +210,20 @@
       return standard_sse_constant_opcode (insn, operands[1]);
     case 1:
     case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-	return "movaps\t{%1, %0|%0, %1}";
+      if (ix86_mode_aligned_access_p (operands[1], operands[0]))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "movaps\t{%1, %0|%0, %1}";
+	  else
+	    return "movapd\t{%1, %0|%0, %1}";
+	}
       else
-	return "movapd\t{%1, %0|%0, %1}";
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "movups\t{%1, %0|%0, %1}";
+	  else
+	    return "movupd\t{%1, %0|%0, %1}";
+	}
     default:
       gcc_unreachable ();
     }
--- gcc/testsuite/gcc.target/i386/pr32000-1.c.packed	2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-1.c	2008-03-26 12:05:27.000000000 -0700
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-require-effective-target dfp } */
+/* { dg-options "-O -msse2 -std=gnu99" } */
+
+#include "sse2-check.h"
+
+int __attribute__((noinline))
+iszero (_Decimal128 x)
+{
+  return x == 0;
+}
+
+typedef struct { _Decimal128 f __attribute__((packed)); } packed;
+
+int __attribute__((noinline))
+foo (_Decimal128 a1, _Decimal128 a2, _Decimal128 a3, _Decimal128 a4,
+     _Decimal128 a5, _Decimal128 a6, _Decimal128 a7, _Decimal128 a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+  return iszero (y.f);
+}
+
+void
+sse2_test (void)
+{
+  if (!foo (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (packed) { 0 }))
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr32000-2.c.packed	2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-2.c	2008-03-26 12:31:56.000000000 -0700
@@ -0,0 +1,19 @@
+/* Test that we don't generate aligned load when memory is unaligned.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target dfp } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -std=gnu99" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+/* { dg-final { scan-assembler "movdqu" } } */
+
+extern int iszero (_Decimal128);
+
+typedef struct { _Decimal128 f __attribute__((packed)); } packed;
+
+int __attribute__((noinline))
+foo (_Decimal128 a1, _Decimal128 a2, _Decimal128 a3, _Decimal128 a4,
+     _Decimal128 a5, _Decimal128 a6, _Decimal128 a7, _Decimal128 a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+  return iszero (y.f);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-3.c.packed	2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-3.c	2008-03-26 12:32:02.000000000 -0700
@@ -0,0 +1,20 @@
+/* Test that we don't generate aligned load when memory is unaligned.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler "movups" } } */
+
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128);
+
+typedef struct { __m128 f __attribute__((packed)); } packed;
+
+int __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+     __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+  return iszero (y.f);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-4.c.packed	2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-4.c	2008-03-26 12:32:08.000000000 -0700
@@ -0,0 +1,20 @@
+/* Test that we don't generate aligned load when memory is unaligned.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movapd" } } */
+/* { dg-final { scan-assembler "movupd" } } */
+
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128d);
+
+typedef struct { __m128d f __attribute__((packed)); } packed;
+
+int __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+     __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+  return iszero (y.f);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-5.c.packed	2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-5.c	2008-03-26 12:32:15.000000000 -0700
@@ -0,0 +1,20 @@
+/* Test that we don't generate aligned load when memory is unaligned.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+/* { dg-final { scan-assembler "movdqu" } } */
+
+typedef int __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128i);
+
+typedef struct { __m128i f __attribute__((packed)); } packed;
+
+int __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+     __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+  return iszero (y.f);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-6.c.packed	2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-6.c	2008-03-26 12:32:21.000000000 -0700
@@ -0,0 +1,17 @@
+/* Test that we generate aligned load when memory is aligned.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target dfp } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -std=gnu99" } */
+/* { dg-final { scan-assembler-not "movdqu" } } */
+/* { dg-final { scan-assembler "movdqa" } } */
+
+extern int iszero (_Decimal128);
+
+int __attribute__((noinline))
+foo (_Decimal128 a1, _Decimal128 a2, _Decimal128 a3, _Decimal128 a4,
+     _Decimal128 a5, _Decimal128 a6, _Decimal128 a7, _Decimal128 a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, _Decimal128 y)
+{
+  return iszero (y);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-7.c.packed	2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-7.c	2008-03-26 12:32:26.000000000 -0700
@@ -0,0 +1,18 @@
+/* Test that we generate aligned load when memory is aligned.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movups" } } */
+/* { dg-final { scan-assembler "movaps" } } */
+
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128);
+
+int __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+     __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, __m128 y)
+{
+  return iszero (y);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-8.c.packed	2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-8.c	2008-03-26 12:32:31.000000000 -0700
@@ -0,0 +1,18 @@
+/* Test that we generate aligned load when memory is aligned.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movupd" } } */
+/* { dg-final { scan-assembler "movapd" } } */
+
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128d);
+
+int __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+     __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, __m128d y)
+{
+  return iszero (y);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-9.c.packed	2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-9.c	2008-03-26 12:32:38.000000000 -0700
@@ -0,0 +1,18 @@
+/* Test that we generate aligned load when memory is aligned.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movdqu" } } */
+/* { dg-final { scan-assembler "movdqa" } } */
+
+typedef int __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128i);
+
+int __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+     __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+     int b1, int b2, int b3, int b4, int b5, int b6, int b7, __m128i y)
+{
+  return iszero (y);
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]