Split 32-byte AVX unaligned load/store.

author H.J. Lu <hongjiu.lu@intel.com>

Sun, 27 Mar 2011 18:56:00 +0000 (18:56 +0000)

committer H.J. Lu <hjl@gcc.gnu.org>

Sun, 27 Mar 2011 18:56:00 +0000 (11:56 -0700)
author H.J. Lu <hongjiu.lu@intel.com>
Sun, 27 Mar 2011 18:56:00 +0000 (18:56 +0000)
committer H.J. Lu <hjl@gcc.gnu.org>
Sun, 27 Mar 2011 18:56:00 +0000 (11:56 -0700)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 41c0ef22801c5025de170edbe90ed5f0fd74277d..ca0e3d69ef4a3e14bcf913839b255a2fd11c15dd 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2011-03-27  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load
+       and -mavx256-split-unaligned-store.
+       (ix86_option_override_internal): Split 32-byte AVX unaligned
+       load/store by default.
+       (ix86_avx256_split_vector_move_misalign): New.
+       (ix86_expand_vector_move_misalign): Use it.
+
+       * config/i386/i386.opt: Add -mavx256-split-unaligned-load and
+       -mavx256-split-unaligned-store.
+
+       * config/i386/sse.md (*avx_mov<mode>_internal): Verify unaligned
+       256bit load/store.  Generate unaligned store on misaligned memory
+       operand.
+       (*avx_movu<ssemodesuffix><avxmodesuffix>): Verify unaligned
+       256bit load/store.
+       (*avx_movdqu<avxmodesuffix>): Likewise.
+
+       * doc/invoke.texi: Document -mavx256-split-unaligned-load and
+       -mavx256-split-unaligned-store.
+
  2011-03-27  Richard Sandiford  <rdsandiford@googlemail.com>
  
         PR target/38598
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index 4e8ca698a4470da0adb831154baa30dce57ffe5d..a4ca762228ccb517d33ca9352fdba77cec4ff288 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3130,6 +3130,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
      { "-mvect8-ret-in-mem",            MASK_VECT8_RETURNS },
      { "-m8bit-idiv",                   MASK_USE_8BIT_IDIV },
      { "-mvzeroupper",                  MASK_VZEROUPPER },
+    { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
+    { "-mavx256-split-unaligned-stroe",        MASK_AVX256_SPLIT_UNALIGNED_STORE},
    };
  
    const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
@@ -4274,11 +4276,18 @@ ix86_option_override_internal (bool main_args_p)
    if (TARGET_AVX)
      {
        /* When not optimize for size, enable vzeroupper optimization for
-        TARGET_AVX with -fexpensive-optimizations.  */
-      if (!optimize_size
-         && flag_expensive_optimizations
-         && !(target_flags_explicit & MASK_VZEROUPPER))
-       target_flags |= MASK_VZEROUPPER;
+        TARGET_AVX with -fexpensive-optimizations and split 32-byte
+        AVX unaligned load/store.  */
+      if (!optimize_size)
+       {
+         if (flag_expensive_optimizations
+             && !(target_flags_explicit & MASK_VZEROUPPER))
+           target_flags |= MASK_VZEROUPPER;
+         if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+           target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
+         if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
+           target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
+       }
      }
    else 
      {
@@ -15588,6 +15597,57 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
    emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
  }
  
+/* Split 32-byte AVX unaligned load and store if needed.  */
+
+static void
+ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
+{
+  rtx m;
+  rtx (*extract) (rtx, rtx, rtx);
+  rtx (*move_unaligned) (rtx, rtx);
+  enum machine_mode mode;
+
+  switch (GET_MODE (op0))
+    {
+    default:
+      gcc_unreachable ();
+    case V32QImode:
+      extract = gen_avx_vextractf128v32qi;
+      move_unaligned = gen_avx_movdqu256;
+      mode = V16QImode;
+      break;
+    case V8SFmode:
+      extract = gen_avx_vextractf128v8sf;
+      move_unaligned = gen_avx_movups256;
+      mode = V4SFmode;
+      break;
+    case V4DFmode:
+      extract = gen_avx_vextractf128v4df;
+      move_unaligned = gen_avx_movupd256;
+      mode = V2DFmode;
+      break;
+    }
+
+  if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+    {
+      rtx r = gen_reg_rtx (mode);
+      m = adjust_address (op1, mode, 0);
+      emit_move_insn (r, m);
+      m = adjust_address (op1, mode, 16);
+      r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
+      emit_move_insn (op0, r);
+    }
+  else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+    {
+      m = adjust_address (op0, mode, 0);
+      emit_insn (extract (m, op1, const0_rtx));
+      m = adjust_address (op0, mode, 16);
+      emit_insn (extract (m, op1, const1_rtx));
+    }
+  else
+    emit_insn (move_unaligned (op0, op1));
+}
+
  /* Implement the movmisalign patterns for SSE.  Non-SSE modes go
     straight to ix86_expand_vector_move.  */
  /* Code generation for scalar reg-reg moves of single and double precision data:
@@ -15672,7 +15732,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
             case 32:
               op0 = gen_lowpart (V32QImode, op0);
               op1 = gen_lowpart (V32QImode, op1);
-             emit_insn (gen_avx_movdqu256 (op0, op1));
+             ix86_avx256_split_vector_move_misalign (op0, op1);
               break;
             default:
               gcc_unreachable ();
@@ -15688,7 +15748,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
               emit_insn (gen_avx_movups (op0, op1));
               break;
             case V8SFmode:
-             emit_insn (gen_avx_movups256 (op0, op1));
+             ix86_avx256_split_vector_move_misalign (op0, op1);
               break;
             case V2DFmode:
               if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
@@ -15701,7 +15761,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
               emit_insn (gen_avx_movupd (op0, op1));
               break;
             case V4DFmode:
-             emit_insn (gen_avx_movupd256 (op0, op1));
+             ix86_avx256_split_vector_move_misalign (op0, op1);
               break;
             default:
               gcc_unreachable ();
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt

index e02d098fd1948a20d4840cafb25b202cd9965f70..f63a40629e1b995de319ecc1be6e387df06991f1 100644 (file)
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -420,3 +420,11 @@ Emit profiling counter call at function entry before prologue.
  m8bit-idiv
  Target Report Mask(USE_8BIT_IDIV) Save
  Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check
+
+mavx256-split-unaligned-load
+Target Report Mask(AVX256_SPLIT_UNALIGNED_LOAD) Save
+Split 32-byte AVX unaligned load
+
+mavx256-split-unaligned-store
+Target Report Mask(AVX256_SPLIT_UNALIGNED_STORE) Save
+Split 32-byte AVX unaligned store
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

index 70a0b344bdfee8357ecaf4972a05bff3a1edc67d..de11f7362ec7f47521273d3930a8094e370aa98f 100644 (file)
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -203,19 +203,35 @@
        return standard_sse_constant_opcode (insn, operands[1]);
      case 1:
      case 2:
+      if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
+         && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
+              && misaligned_operand (operands[0], <MODE>mode))
+             || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
+                 && misaligned_operand (operands[1], <MODE>mode))))
+       gcc_unreachable ();
        switch (get_attr_mode (insn))
          {
         case MODE_V8SF:
         case MODE_V4SF:
-         return "vmovaps\t{%1, %0|%0, %1}";
+         if (misaligned_operand (operands[0], <MODE>mode)
+             || misaligned_operand (operands[1], <MODE>mode))
+           return "vmovups\t{%1, %0|%0, %1}";
+         else
+           return "vmovaps\t{%1, %0|%0, %1}";
         case MODE_V4DF:
         case MODE_V2DF:
-         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+         if (misaligned_operand (operands[0], <MODE>mode)
+             || misaligned_operand (operands[1], <MODE>mode))
+           return "vmovupd\t{%1, %0|%0, %1}";
+         else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
             return "vmovaps\t{%1, %0|%0, %1}";
           else
             return "vmovapd\t{%1, %0|%0, %1}";
         default:
-         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+         if (misaligned_operand (operands[0], <MODE>mode)
+             || misaligned_operand (operands[1], <MODE>mode))
+           return "vmovdqu\t{%1, %0|%0, %1}";
+         else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
             return "vmovaps\t{%1, %0|%0, %1}";
           else
             return "vmovdqa\t{%1, %0|%0, %1}";
@@ -400,7 +416,15 @@
           UNSPEC_MOVU))]
    "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
     && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
+{
+  if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
+      && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
+          && misaligned_operand (operands[0], <MODE>mode))
+         || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
+             && misaligned_operand (operands[1], <MODE>mode))))
+    gcc_unreachable ();
+  return "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
+}
    [(set_attr "type" "ssemov")
     (set_attr "movu" "1")
     (set_attr "prefix" "vex")
@@ -459,7 +483,15 @@
           [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
           UNSPEC_MOVU))]
    "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "vmovdqu\t{%1, %0|%0, %1}"
+{
+  if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
+      && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
+          && misaligned_operand (operands[0], <MODE>mode))
+         || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
+             && misaligned_operand (operands[1], <MODE>mode))))
+    gcc_unreachable ();
+  return "vmovdqu\t{%1, %0|%0, %1}";
+}
    [(set_attr "type" "ssemov")
     (set_attr "movu" "1")
     (set_attr "prefix" "vex")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

index 925455d0e30e31ab50b95f734114dc2a0e1fab96..85bf2b4854e084cc09265a11c4c26b370efe7f2f 100644 (file)
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -602,7 +602,8 @@ Objective-C and Objective-C++ Dialects}.
  -momit-leaf-frame-pointer  -mno-red-zone -mno-tls-direct-seg-refs @gol
  -mcmodel=@var{code-model} -mabi=@var{name} @gol
  -m32  -m64 -mlarge-data-threshold=@var{num} @gol
--msse2avx -mfentry -m8bit-idiv}
+-msse2avx -mfentry -m8bit-idiv @gol
+-mavx256-split-unaligned-load -mavx256-split-unaligned-store}
  
  @emph{i386 and x86-64 Windows Options}
  @gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol
@@ -12669,6 +12670,12 @@ runt-time check.  If both dividend and divisor are within range of 0
  to 255, 8bit unsigned integer divide will be used instead of
  32bit/64bit integer divide.
  
+@item -mavx256-split-unaligned-load
+@item -mavx256-split-unaligned-store
+@opindex avx256-split-unaligned-load
+@opindex avx256-split-unaligned-store
+Split 32-byte AVX unaligned load and store.
+
  @end table
  
  These @samp{-m} switches are supported in addition to the above
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 3cc61b079d5c88d2d5907dd0bc47c4e42a92a60d..fdcc95f91b479a17faa8f945694bd083373fd560 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,20 @@
+2011-03-27  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * gcc.target/i386/avx256-unaligned-load-1.c: New.
+       * gcc.target/i386/avx256-unaligned-load-2.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-load-3.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-load-4.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-load-5.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-load-6.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-load-7.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-store-1.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-store-2.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-store-4.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-store-5.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-store-6.c: Likewise.
+       * gcc.target/i386/avx256-unaligned-store-7.c: Likewise.
+
  2011-03-27  Thomas Koenig  <tkoenig@gcc.gnu.org>
  
         PR fortran/47065
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c

new file mode 100644 (file)

index 0000000..023e859
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
+
+#define N 1024
+
+float a[N], b[N+3], c[N];
+
+void
+avx_test (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    c[i] = a[i] * b[i+3];
+}
+
+/* { dg-final { scan-assembler-not "\\*avx_movups256/1" } } */
+/* { dg-final { scan-assembler "\\*avx_movups/1" } } */
+/* { dg-final { scan-assembler "vinsertf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c

new file mode 100644 (file)

index 0000000..8394e27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
+
+#define N 1024
+
+char **ep;
+char **fp;
+
+void
+avx_test (void)
+{
+  int i;
+  char **ap;
+  char **bp;
+  char **cp;
+
+  ap = ep;
+  bp = fp;
+  for (i = 128; i >= 0; i--)
+    {
+      *ap++ = *cp++;
+      *bp++ = 0;
+    }
+}
+
+/* { dg-final { scan-assembler-not "\\*avx_movdqu256/1" } } */
+/* { dg-final { scan-assembler "\\*avx_movdqu/1" } } */
+/* { dg-final { scan-assembler "vinsertf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c

new file mode 100644 (file)

index 0000000..ec7d59d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
+
+#define N 1024
+
+double a[N], b[N+3], c[N];
+
+void
+avx_test (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    c[i] = a[i] * b[i+3];
+}
+
+/* { dg-final { scan-assembler-not "\\*avx_movupd256/1" } } */
+/* { dg-final { scan-assembler "\\*avx_movupd/1" } } */
+/* { dg-final { scan-assembler "vinsertf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c

new file mode 100644 (file)

index 0000000..0d3ef33
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" } */
+
+#define N 1024
+
+float a[N], b[N+3];
+
+void
+avx_test (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    b[i] = a[i+3] * 2;
+}
+
+/* { dg-final { scan-assembler "\\*avx_movups256/1" } } */
+/* { dg-final { scan-assembler-not "\\*avx_movups/1" } } */
+/* { dg-final { scan-assembler-not "vinsertf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-5.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-5.c

new file mode 100644 (file)

index 0000000..153b66f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-5.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
+
+#include "avx-check.h"
+
+#define N 8
+
+float a[N+3] = { -1, -1, -1, 24.43, 68.346, 43.35,
+                546.46, 46.79, 82.78, 82.7, 9.4 };
+float b[N];
+float c[N];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    b[i] = a[i+3] * 2;
+}
+
+__attribute__ ((noinline))
+float
+bar (float x)
+{
+  return x * 2;
+}
+
+void
+avx_test (void)
+{
+  int i;
+
+  foo ();
+
+  for (i = 0; i < N; i++)
+    c[i] = bar (a[i+3]);
+
+  for (i = 0; i < N; i++)
+    if (b[i] != c[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-6.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-6.c

new file mode 100644 (file)

index 0000000..2fa984c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-6.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
+
+#include "avx-check.h"
+
+#define N 4
+
+double a[N+3] = { -1, -1, -1, 24.43, 68.346, 43.35, 546.46 };
+double b[N];
+double c[N];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    b[i] = a[i+3] * 2;
+}
+
+__attribute__ ((noinline))
+double
+bar (double x)
+{
+  return x * 2;
+}
+
+void
+avx_test (void)
+{
+  int i;
+
+  foo ();
+
+  for (i = 0; i < N; i++)
+    c[i] = bar (a[i+3]);
+
+  for (i = 0; i < N; i++)
+    if (b[i] != c[i])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-7.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-7.c

new file mode 100644 (file)

index 0000000..ad16a53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-7.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
+
+#include "avx-check.h"
+
+#define N 128
+
+char **ep;
+char **fp;
+char **mp;
+char **lp;
+
+__attribute__ ((noinline))
+void
+foo (void)
+{
+  mp = (char **) malloc (N);
+  lp = (char **) malloc (N);
+  ep = (char **) malloc (N);
+  fp = (char **) malloc (N);
+}
+
+void
+avx_test (void)
+{
+  int i;
+  char **ap, **bp, **cp, **dp;
+  char *str = "STR";
+
+  foo ();
+
+  cp = mp;
+  dp = lp;
+
+  for (i = N; i >= 0; i--)
+    {
+      *cp++ = str;
+      *dp++ = str;
+    }
+
+  ap = ep;
+  bp = fp;
+  cp = mp;
+  dp = lp;
+
+  for (i = N; i >= 0; i--)
+    {
+      *ap++ = *cp++;
+      *bp++ = *dp++;
+    }
+
+  for (i = N; i >= 0; i--)
+    {
+      if (strcmp (*--ap, "STR") != 0)
+       abort ();
+      if (strcmp (*--bp, "STR") != 0)
+       abort ();
+    }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c

new file mode 100644 (file)

index 0000000..99db55c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
+
+#define N 1024
+
+float a[N], b[N+3], c[N], d[N];
+
+void
+avx_test (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    b[i+3] = a[i] * 10.0;
+
+  for (i = 0; i < N; i++)
+    d[i] = c[i] * 20.0;
+}
+
+/* { dg-final { scan-assembler-not "\\*avx_movups256/2" } } */
+/* { dg-final { scan-assembler "movups.*\\*avx_movv4sf_internal/3" } } */
+/* { dg-final { scan-assembler "vextractf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c

new file mode 100644 (file)

index 0000000..38ee9e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
+
+#define N 1024
+
+char **ep;
+char **fp;
+
+void
+avx_test (void)
+{
+  int i;
+  char **ap;
+  char **bp;
+  char **cp;
+
+  ap = ep;
+  bp = fp;
+  for (i = 128; i >= 0; i--)
+    {
+      *ap++ = *cp++;
+      *bp++ = 0;
+    }
+}
+
+/* { dg-final { scan-assembler-not "\\*avx_movdqu256/2" } } */
+/* { dg-final { scan-assembler "movdqu.*\\*avx_movv16qi_internal/3" } } */
+/* { dg-final { scan-assembler "vextractf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c

new file mode 100644 (file)

index 0000000..eaab6fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
+
+#define N 1024
+
+double a[N], b[N+3], c[N], d[N];
+
+void
+avx_test (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    b[i+3] = a[i] * 10.0;
+
+  for (i = 0; i < N; i++)
+    d[i] = c[i] * 20.0;
+}
+
+/* { dg-final { scan-assembler-not "\\*avx_movupd256/2" } } */
+/* { dg-final { scan-assembler "movupd.*\\*avx_movv2df_internal/3" } } */
+/* { dg-final { scan-assembler "vextractf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c

new file mode 100644 (file)

index 0000000..96cca66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" } */
+
+#define N 1024
+
+float a[N], b[N+3], c[N];
+
+void
+avx_test (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    b[i+3] = a[i] * c[i];
+}
+
+/* { dg-final { scan-assembler "\\*avx_movups256/2" } } */
+/* { dg-final { scan-assembler-not "\\*avx_movups/2" } } */
+/* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */
+/* { dg-final { scan-assembler-not "vextractf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-5.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-5.c

new file mode 100644 (file)

index 0000000..642da3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-5.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
+
+#include "avx-check.h"
+
+#define N 8
+
+float a[N] = { 24.43, 68.346, 43.35, 546.46, 46.79, 82.78, 82.7, 9.4 };
+float b[N+3];
+float c[N+3];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    b[i+3] = a[i] * 2;
+}
+
+__attribute__ ((noinline))
+float
+bar (float x)
+{
+  return x * 2;
+}
+
+void
+avx_test (void)
+{
+  int i;
+
+  foo ();
+
+  for (i = 0; i < N; i++)
+    c[i+3] = bar (a[i]);
+
+  for (i = 0; i < N; i++)
+    if (b[i+3] != c[i+3])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-6.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-6.c

new file mode 100644 (file)

index 0000000..a0de7a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-6.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
+
+#include "avx-check.h"
+
+#define N 4
+
+double a[N] = { 24.43, 68.346, 43.35, 546.46 };
+double b[N+3];
+double c[N+3];
+
+void
+foo (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    b[i+3] = a[i] * 2;
+}
+
+__attribute__ ((noinline))
+double
+bar (double x)
+{
+  return x * 2;
+}
+
+void
+avx_test (void)
+{
+  int i;
+
+  foo ();
+
+  for (i = 0; i < N; i++)
+    c[i+3] = bar (a[i]);
+
+  for (i = 0; i < N; i++)
+    if (b[i+3] != c[i+3])
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-7.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-7.c

new file mode 100644 (file)

index 0000000..4272dc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-7.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */
+
+#include "avx-check.h"
+
+#define N 128
+
+char **ep;
+char **fp;
+
+__attribute__ ((noinline))
+void
+foo (void)
+{
+  ep = (char **) malloc (N);
+  fp = (char **) malloc (N);
+}
+
+void
+avx_test (void)
+{
+  int i;
+  char **ap, **bp;
+  char *str = "STR";
+
+  foo ();
+
+  ap = ep;
+  bp = fp;
+
+  for (i = N; i >= 0; i--)
+    {
+      *ap++ = str;
+      *bp++ = str;
+    }
+
+  for (i = N; i >= 0; i--)
+    {
+      if (strcmp (*--ap, "STR") != 0)
+       abort ();
+      if (strcmp (*--bp, "STR") != 0)
+       abort ();
+    }
+}
author	H.J. Lu <hongjiu.lu@intel.com>
	Sun, 27 Mar 2011 18:56:00 +0000 (18:56 +0000)
committer	H.J. Lu <hjl@gcc.gnu.org>
	Sun, 27 Mar 2011 18:56:00 +0000 (11:56 -0700)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/i386/i386.c		patch \| blob \| blame \| history
gcc/config/i386/i386.opt		patch \| blob \| blame \| history
gcc/config/i386/sse.md		patch \| blob \| blame \| history
gcc/doc/invoke.texi		patch \| blob \| blame \| history
gcc/testsuite/ChangeLog		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-load-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-load-6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-load-7.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-store-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-store-6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx256-unaligned-store-7.c	[new file with mode: 0644]	patch \| blob