This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PATCH: PR target/32000: x86 backend uses aligned load on unaligned memory
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: "GCC Patches" <gcc-patches at gcc dot gnu dot org>
- Date: Wed, 26 Mar 2008 14:03:57 -0700
- Subject: PATCH: PR target/32000: x86 backend uses aligned load on unaligned memory
On Tue, Mar 25, 2008 at 2:51 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> We may generate unaligned access with those *mov*_internal patterns.
> But we never check if memory access is aligned or not. This patch
> adds the alignment check for those *mov*_internal patterns.
>
Here is the updated patch with a few testcases. But this patch introduces
many unaligned load from stack. It is because assign_stack_local_1 doesn't
update alignment on stack slot. I have a patch:
http://gcc.gnu.org/ml/gcc-patches/2008-03/msg01631.html
works on simple testcases.
H.J.
gcc/
2008-03-26 H.J. Lu <hongjiu.lu@intel.com>
PR target/32000
* config/i386/i386-protos.h (ix86_aligned_p): Removed.
(ix86_mode_aligned_access_p): New.
* config/i386/i386.c (ix86_mode_aligned_access_p): New.
* config/i386/i386.md (*movti_internal): Emit unaligned SSE
load/store if memory is unaligned.
(*movti_rex64): Likewise.
(*movtf_internal): Likewise.
* config/i386/sse.md (*mov<mode>_internal): Likewise.
(*movv4sf_internal): Likewise.
(*movv2df_internal): Likewise.
gcc/testsuite/
2008-03-26 H.J. Lu <hongjiu.lu@intel.com>
PR target/32000
* gcc.target/i386/pr32000-1.c: New.
* gcc.target/i386/pr32000-2.c: Likewise.
* gcc.target/i386/pr32000-3.c: Likewise.
* gcc.target/i386/pr32000-4.c: Likewise.
* gcc.target/i386/pr32000-5.c: Likewise.
* gcc.target/i386/pr32000-6.c: Likewise.
* gcc.target/i386/pr32000-7.c: Likewise.
* gcc.target/i386/pr32000-8.c: Likewise.
* gcc.target/i386/pr32000-9.c: Likewise.
--- gcc/config/i386/i386-protos.h.packed 2008-03-26 12:33:57.000000000 -0700
+++ gcc/config/i386/i386-protos.h 2008-03-26 12:34:08.000000000 -0700
@@ -36,8 +36,7 @@ extern void ix86_output_addr_vec_elt (FI
extern void ix86_output_addr_diff_elt (FILE *, int, int);
#ifdef RTX_CODE
-extern int ix86_aligned_p (rtx);
-
+extern bool ix86_mode_aligned_access_p (rtx, rtx);
extern int standard_80387_constant_p (rtx);
extern const char *standard_80387_constant_opcode (rtx);
extern rtx standard_80387_constant_rtx (int);
--- gcc/config/i386/i386.c.packed 2008-03-26 12:33:57.000000000 -0700
+++ gcc/config/i386/i386.c 2008-03-26 12:34:08.000000000 -0700
@@ -25459,6 +25459,24 @@ x86_builtin_vectorization_cost (bool run
return 0;
}
+/* Return true if memmory access to SRC or DEST is aligned by their
+ mode. */
+
+bool
+ix86_mode_aligned_access_p (rtx dest, rtx src)
+{
+ rtx mem;
+
+ if (MEM_P (src))
+ mem = src;
+ else if (MEM_P (dest))
+ mem = dest;
+ else
+ return true;
+
+ return MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (GET_MODE (mem));
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
--- gcc/config/i386/i386.md.packed 2008-03-26 12:33:57.000000000 -0700
+++ gcc/config/i386/i386.md 2008-03-26 12:34:08.000000000 -0700
@@ -2387,10 +2387,20 @@
return "pxor\t%0, %0";
case 1:
case 2:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
+ if (ix86_mode_aligned_access_p (operands[0], operands[1]))
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ }
else
- return "movdqa\t{%1, %0|%0, %1}";
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movups\t{%1, %0|%0, %1}";
+ else
+ return "movdqu\t{%1, %0|%0, %1}";
+ }
default:
gcc_unreachable ();
}
@@ -2424,10 +2434,20 @@
return "pxor\t%0, %0";
case 3:
case 4:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
+ if (ix86_mode_aligned_access_p (operands[0], operands[1]))
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ }
else
- return "movdqa\t{%1, %0|%0, %1}";
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movups\t{%1, %0|%0, %1}";
+ else
+ return "movdqu\t{%1, %0|%0, %1}";
+ }
default:
gcc_unreachable ();
}
@@ -3223,10 +3243,20 @@
{
case 0:
case 1:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
+ if (ix86_mode_aligned_access_p (operands[1], operands[0]))
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ }
else
- return "movdqa\t{%1, %0|%0, %1}";
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movups\t{%1, %0|%0, %1}";
+ else
+ return "movdqu\t{%1, %0|%0, %1}";
+ }
case 2:
if (get_attr_mode (insn) == MODE_V4SF)
return "xorps\t%0, %0";
--- gcc/config/i386/sse.md.packed 2008-03-26 12:33:57.000000000 -0700
+++ gcc/config/i386/sse.md 2008-03-26 12:34:08.000000000 -0700
@@ -86,10 +86,20 @@
return standard_sse_constant_opcode (insn, operands[1]);
case 1:
case 2:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
+ if (ix86_mode_aligned_access_p (operands[1], operands[0]))
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ }
else
- return "movdqa\t{%1, %0|%0, %1}";
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movups\t{%1, %0|%0, %1}";
+ else
+ return "movdqu\t{%1, %0|%0, %1}";
+ }
default:
gcc_unreachable ();
}
@@ -162,7 +172,10 @@
return standard_sse_constant_opcode (insn, operands[1]);
case 1:
case 2:
- return "movaps\t{%1, %0|%0, %1}";
+ if (ix86_mode_aligned_access_p (operands[1], operands[0]))
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movups\t{%1, %0|%0, %1}";
default:
gcc_unreachable ();
}
@@ -197,10 +210,20 @@
return standard_sse_constant_opcode (insn, operands[1]);
case 1:
case 2:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
+ if (ix86_mode_aligned_access_p (operands[1], operands[0]))
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movapd\t{%1, %0|%0, %1}";
+ }
else
- return "movapd\t{%1, %0|%0, %1}";
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movups\t{%1, %0|%0, %1}";
+ else
+ return "movupd\t{%1, %0|%0, %1}";
+ }
default:
gcc_unreachable ();
}
--- gcc/testsuite/gcc.target/i386/pr32000-1.c.packed 2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-1.c 2008-03-26 12:05:27.000000000 -0700
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-require-effective-target dfp } */
+/* { dg-options "-O -msse2 -std=gnu99" } */
+
+#include "sse2-check.h"
+
+int __attribute__((noinline))
+iszero (_Decimal128 x)
+{
+ return x == 0;
+}
+
+typedef struct { _Decimal128 f __attribute__((packed)); } packed;
+
+int __attribute__((noinline))
+foo (_Decimal128 a1, _Decimal128 a2, _Decimal128 a3, _Decimal128 a4,
+ _Decimal128 a5, _Decimal128 a6, _Decimal128 a7, _Decimal128 a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+ return iszero (y.f);
+}
+
+void
+sse2_test (void)
+{
+ if (!foo (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (packed) { 0 }))
+ abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr32000-2.c.packed 2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-2.c 2008-03-26 12:31:56.000000000 -0700
@@ -0,0 +1,19 @@
+/* Test that we don't generate aligned load when memory is unaligned. */
+/* { dg-do compile } */
+/* { dg-require-effective-target dfp } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -std=gnu99" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+/* { dg-final { scan-assembler "movdqu" } } */
+
+extern int iszero (_Decimal128);
+
+typedef struct { _Decimal128 f __attribute__((packed)); } packed;
+
+int __attribute__((noinline))
+foo (_Decimal128 a1, _Decimal128 a2, _Decimal128 a3, _Decimal128 a4,
+ _Decimal128 a5, _Decimal128 a6, _Decimal128 a7, _Decimal128 a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+ return iszero (y.f);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-3.c.packed 2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-3.c 2008-03-26 12:32:02.000000000 -0700
@@ -0,0 +1,20 @@
+/* Test that we don't generate aligned load when memory is unaligned. */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler "movups" } } */
+
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128);
+
+typedef struct { __m128 f __attribute__((packed)); } packed;
+
+int __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+ __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+ return iszero (y.f);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-4.c.packed 2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-4.c 2008-03-26 12:32:08.000000000 -0700
@@ -0,0 +1,20 @@
+/* Test that we don't generate aligned load when memory is unaligned. */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movapd" } } */
+/* { dg-final { scan-assembler "movupd" } } */
+
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128d);
+
+typedef struct { __m128d f __attribute__((packed)); } packed;
+
+int __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+ __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+ return iszero (y.f);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-5.c.packed 2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-5.c 2008-03-26 12:32:15.000000000 -0700
@@ -0,0 +1,20 @@
+/* Test that we don't generate aligned load when memory is unaligned. */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movdqa" } } */
+/* { dg-final { scan-assembler "movdqu" } } */
+
+typedef int __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128i);
+
+typedef struct { __m128i f __attribute__((packed)); } packed;
+
+int __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+ __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, packed y)
+{
+ return iszero (y.f);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-6.c.packed 2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-6.c 2008-03-26 12:32:21.000000000 -0700
@@ -0,0 +1,17 @@
+/* Test that we generate aligned load when memory is aligned. */
+/* { dg-do compile } */
+/* { dg-require-effective-target dfp } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -std=gnu99" } */
+/* { dg-final { scan-assembler-not "movdqu" } } */
+/* { dg-final { scan-assembler "movdqa" } } */
+
+extern int iszero (_Decimal128);
+
+int __attribute__((noinline))
+foo (_Decimal128 a1, _Decimal128 a2, _Decimal128 a3, _Decimal128 a4,
+ _Decimal128 a5, _Decimal128 a6, _Decimal128 a7, _Decimal128 a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, _Decimal128 y)
+{
+ return iszero (y);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-7.c.packed 2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-7.c 2008-03-26 12:32:26.000000000 -0700
@@ -0,0 +1,18 @@
+/* Test that we generate aligned load when memory is aligned. */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movups" } } */
+/* { dg-final { scan-assembler "movaps" } } */
+
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128);
+
+int __attribute__((noinline))
+foo (__m128 a1, __m128 a2, __m128 a3, __m128 a4,
+ __m128 a5, __m128 a6, __m128 a7, __m128 a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, __m128 y)
+{
+ return iszero (y);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-8.c.packed 2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-8.c 2008-03-26 12:32:31.000000000 -0700
@@ -0,0 +1,18 @@
+/* Test that we generate aligned load when memory is aligned. */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movupd" } } */
+/* { dg-final { scan-assembler "movapd" } } */
+
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128d);
+
+int __attribute__((noinline))
+foo (__m128d a1, __m128d a2, __m128d a3, __m128d a4,
+ __m128d a5, __m128d a6, __m128d a7, __m128d a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, __m128d y)
+{
+ return iszero (y);
+}
--- gcc/testsuite/gcc.target/i386/pr32000-9.c.packed 2008-03-26 12:34:30.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/pr32000-9.c 2008-03-26 12:32:38.000000000 -0700
@@ -0,0 +1,18 @@
+/* Test that we generate aligned load when memory is aligned. */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movdqu" } } */
+/* { dg-final { scan-assembler "movdqa" } } */
+
+typedef int __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern int iszero (__m128i);
+
+int __attribute__((noinline))
+foo (__m128i a1, __m128i a2, __m128i a3, __m128i a4,
+ __m128i a5, __m128i a6, __m128i a7, __m128i a8,
+ int b1, int b2, int b3, int b4, int b5, int b6, int b7, __m128i y)
+{
+ return iszero (y);
+}