]> gcc.gnu.org Git - gcc.git/commitdiff
tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not disable peeling...
authorRichard Biener <rguenther@suse.de>
Fri, 10 May 2013 07:52:25 +0000 (07:52 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Fri, 10 May 2013 07:52:25 +0000 (07:52 +0000)
2013-05-10  Richard Biener  <rguenther@suse.de>

* tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not
disable peeling when we version for aliasing.
(vector_alignment_reachable_p): Honor explicit user alignment.
(vect_supportable_dr_alignment): Likewise.
* tree-vect-loop-manip.c (vect_can_advance_ivs_p): Use
STMT_VINFO_LOOP_PHI_EVOLUTION_PART instead of recomputing it.
* tree-vect-loop.c (vect_transform_loop): First apply versioning,
then peeling to arrange for the cost-model check to come first.

* gcc.target/i386/avx256-unaligned-load-2.c: Make well-defined.
* gcc.target/i386/l_fma_double_1.c: Adjust.
* gcc.target/i386/l_fma_double_2.c: Likewise.
* gcc.target/i386/l_fma_double_3.c: Likewise.
* gcc.target/i386/l_fma_double_4.c: Likewise.
* gcc.target/i386/l_fma_double_5.c: Likewise.
* gcc.target/i386/l_fma_double_6.c: Likewise.
* gcc.target/i386/l_fma_float_1.c: Likewise.
* gcc.target/i386/l_fma_float_2.c: Likewise.
* gcc.target/i386/l_fma_float_3.c: Likewise.
* gcc.target/i386/l_fma_float_4.c: Likewise.
* gcc.target/i386/l_fma_float_5.c: Likewise.
* gcc.target/i386/l_fma_float_6.c: Likewise.

From-SVN: r198767

18 files changed:
gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c
gcc/testsuite/gcc.target/i386/l_fma_double_1.c
gcc/testsuite/gcc.target/i386/l_fma_double_2.c
gcc/testsuite/gcc.target/i386/l_fma_double_3.c
gcc/testsuite/gcc.target/i386/l_fma_double_4.c
gcc/testsuite/gcc.target/i386/l_fma_double_5.c
gcc/testsuite/gcc.target/i386/l_fma_double_6.c
gcc/testsuite/gcc.target/i386/l_fma_float_1.c
gcc/testsuite/gcc.target/i386/l_fma_float_2.c
gcc/testsuite/gcc.target/i386/l_fma_float_3.c
gcc/testsuite/gcc.target/i386/l_fma_float_4.c
gcc/testsuite/gcc.target/i386/l_fma_float_5.c
gcc/testsuite/gcc.target/i386/l_fma_float_6.c
gcc/tree-vect-data-refs.c
gcc/tree-vect-loop-manip.c
gcc/tree-vect-loop.c

index b6101bad68d87d093de481e1e44ac92b7f7a9a27..c7d6db9a744d5169e420745c60d51c69d63b47a7 100644 (file)
@@ -1,3 +1,14 @@
+2013-05-10  Richard Biener  <rguenther@suse.de>
+
+       * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not
+       disable peeling when we version for aliasing.
+       (vector_alignment_reachable_p): Honor explicit user alignment.
+       (vect_supportable_dr_alignment): Likewise.
+       * tree-vect-loop-manip.c (vect_can_advance_ivs_p): Use
+       STMT_VINFO_LOOP_PHI_EVOLUTION_PART instead of recomputing it.
+       * tree-vect-loop.c (vect_transform_loop): First apply versioning,
+       then peeling to arrange for the cost-model check to come first.
+
 2013-05-10  Alan Modra  <amodra@gmail.com>
 
        * configure.ac (HAVE_AS_TLS): Swap powerpc64 and powerpc cases.
index 3ab939af5f562764e204dfbf341a5e5ca6d390ee..7656de9b0ab574aeca17cb93b81f3bc1417f970d 100644 (file)
@@ -1,3 +1,19 @@
+2013-05-10  Richard Biener  <rguenther@suse.de>
+
+       * gcc.target/i386/avx256-unaligned-load-2.c: Make well-defined.
+       * gcc.target/i386/l_fma_double_1.c: Adjust.
+       * gcc.target/i386/l_fma_double_2.c: Likewise.
+       * gcc.target/i386/l_fma_double_3.c: Likewise.
+       * gcc.target/i386/l_fma_double_4.c: Likewise.
+       * gcc.target/i386/l_fma_double_5.c: Likewise.
+       * gcc.target/i386/l_fma_double_6.c: Likewise.
+       * gcc.target/i386/l_fma_float_1.c: Likewise.
+       * gcc.target/i386/l_fma_float_2.c: Likewise.
+       * gcc.target/i386/l_fma_float_3.c: Likewise.
+       * gcc.target/i386/l_fma_float_4.c: Likewise.
+       * gcc.target/i386/l_fma_float_5.c: Likewise.
+       * gcc.target/i386/l_fma_float_6.c: Likewise.
+
 2013-05-08  Paolo Carlini  <paolo.carlini@oracle.com>
 
        PR c++/51226
index 2947d9eab3e4e084c8acd5bd61c0810db426d8ce..e3ec8542e0111750a808467065b40b1e99e82e11 100644 (file)
@@ -1,26 +1,13 @@
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
 
-#define N 1024
-
-char **ep;
-char **fp;
-
 void
-avx_test (void)
+avx_test (char **cp, char **ep)
 {
   int i;
-  char **ap;
-  char **bp;
-  char **cp;
-
-  ap = ep;
-  bp = fp;
-  for (i = 128; i >= 0; i--)
-    {
-      *ap++ = *cp++;
-      *bp++ = 0;
-    }
+  char **ap = __builtin_assume_aligned (ep, 32);
+  for (i = 128; i > 0; i--)
+    *ap++ = *cp++;
 }
 
 /* { dg-final { scan-assembler-not "avx_loaddqu256" } } */
index 270659359f497c0d4e3e3a46385ed0bd9e1896b0..b3ffcf2c00fa5587410aa5b7ca09475115458cc4 100644 (file)
@@ -4,23 +4,24 @@
 /* Test that the compiler properly optimizes floating point multiply
    and add instructions into FMA3 instructions.  */
 
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
 
 #include "l_fma_1.h"
 
 /* { dg-final { scan-assembler-times "vfmadd132pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd231pd" 4  } } */
+/* { dg-final { scan-assembler-times "vfmadd213pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfmsub132pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmsub231pd" 4  } } */
+/* { dg-final { scan-assembler-times "vfmsub213pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd231pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub231pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfmadd213sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfmsub213sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfnmadd213sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfnmsub213sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 28  } } */
+/* { dg-final { scan-assembler-times "vfmadd213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfmsub213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmadd213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmsub213sd" 28 } } */
index e8933e25d534d67666ea6aa2a074c6b707591b8e..713b24b7c27beaa787a793bf63a87379ffd94533 100644 (file)
@@ -4,7 +4,8 @@
 /* Test that the compiler properly optimizes floating point multiply
    and add instructions into FMA3 instructions.  */
 
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
 
 #include "l_fma_2.h"
 
@@ -12,7 +13,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 32  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 56  } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */
index 00c756775c81ea1c30b0e9a30fea2df5fccb43e1..cbc6ef832aa53fb230e066fcf72fe743d692472b 100644 (file)
@@ -4,23 +4,24 @@
 /* Test that the compiler properly optimizes floating point multiply
    and add instructions into FMA3 instructions.  */
 
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
 
 #include "l_fma_3.h"
 
 /* { dg-final { scan-assembler-times "vfmadd132pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd231pd" 4  } } */
+/* { dg-final { scan-assembler-times "vfmadd213pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfmsub132pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmsub231pd" 4  } } */
+/* { dg-final { scan-assembler-times "vfmsub213pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd231pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub231pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfmadd213sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfmsub213sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfnmadd213sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 16  } } */
-/* { dg-final { scan-assembler-times "vfnmsub213sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfmadd213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfmsub213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmadd213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmsub213sd" 28 } } */
index 09970bdb5c64d469b590b4df7d164180fc5d8710..d571aca1b01ecd74105c8deb57347ad94695787c 100644 (file)
@@ -4,7 +4,8 @@
 /* Test that the compiler properly optimizes floating point multiply
    and add instructions into FMA3 instructions.  */
 
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
 
 #include "l_fma_4.h"
 
@@ -12,7 +13,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 32  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */
index 2a1428e4d9c9c0d09a2e4257a15215ed39c41b95..56d86369bc3373b0eb726aa4a2fafd04a6bf2691 100644 (file)
@@ -4,7 +4,8 @@
 /* Test that the compiler properly optimizes floating point multiply
    and add instructions into FMA3 instructions.  */
 
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
 
 #include "l_fma_5.h"
 
@@ -12,7 +13,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 32  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 56  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 56  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 56  } } */
index 092032aa0b5bcd0c604e72f203bcc5b82beb2f51..f22763d6f99166eda8f889497b2bffa5b7bae942 100644 (file)
@@ -4,7 +4,8 @@
 /* Test that the compiler properly optimizes floating point multiply
    and add instructions into FMA3 instructions.  */
 
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
 
 #include "l_fma_6.h"
 
@@ -12,7 +13,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 56  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 56  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 56  } } */
index 4bcd81de9da4e234d91f43246100395754b7d324..b2f58ac2f95e713288456fca4d283826495be50f 100644 (file)
@@ -9,18 +9,18 @@
 #include "l_fma_1.h"
 
 /* { dg-final { scan-assembler-times "vfmadd132ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd231ps" 4  } } */
+/* { dg-final { scan-assembler-times "vfmadd213ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfmsub132ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmsub231ps" 4  } } */
+/* { dg-final { scan-assembler-times "vfmsub213ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd231ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub231ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfmadd213ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfmsub213ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmadd213ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmsub213ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfmadd213ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfmsub213ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmadd213ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmsub213ss" 60 } } */
index 34b7fcb6dd5b89f0320ac756bbae1ce62d0bd1e5..6377585bffecc38f1826956cb9c2b0fa886c62a3 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 120  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 120  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 120  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 120  } } */
index 6ff2c6eacd54ea9048e750a7f5b60b74dcd05039..878babb31c51df67f6fd218f7518d8915e946a77 100644 (file)
@@ -9,18 +9,18 @@
 #include "l_fma_3.h"
 
 /* { dg-final { scan-assembler-times "vfmadd132ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd231ps" 4  } } */
+/* { dg-final { scan-assembler-times "vfmadd213ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfmsub132ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmsub231ps" 4  } } */
+/* { dg-final { scan-assembler-times "vfmsub213ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd231ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub231ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfmadd213ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfmsub213ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmadd213ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 32  } } */
-/* { dg-final { scan-assembler-times "vfnmsub213ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 60  } } */
+/* { dg-final { scan-assembler-times "vfmadd213ss" 60  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 60  } } */
+/* { dg-final { scan-assembler-times "vfmsub213ss" 60  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 60  } } */
+/* { dg-final { scan-assembler-times "vfnmadd213ss" 60  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 60  } } */
+/* { dg-final { scan-assembler-times "vfnmsub213ss" 60  } } */
index 39548bfa76b74f6e83e8139e92b535ad727d9a24..bacb01e7f3ffbd8119a3bf39c3efacc1badb7ca1 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 120  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 120  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 120  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 120  } } */
index 83d795125921b5cdc2a8216c7ed140fca3d3aaf0..a32fc41d9f9c4f8b18f705074698ee7f754ca7e8 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 120  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 120  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 120  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 120  } } */
index 1eefc817c3681325d53266c1d62c46962820fe06..a7a74fb42227ed804a022bec81111bb13356b691 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 64  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 120  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 120  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 120  } } */
index c1b5826ee1274cb90bd10afd3e71822bc86e544e..bf0b510db44d1af2b4fcfebf8a5aec072cbbd84d 100644 (file)
@@ -1024,7 +1024,8 @@ vector_alignment_reachable_p (struct data_reference *dr)
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
                          "Unknown misalignment, is_packed = %d",is_packed);
-      if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
+      if ((TYPE_USER_ALIGN (type) && !is_packed)
+         || targetm.vectorize.vector_alignment_reachable (type, is_packed))
        return true;
       else
        return false;
@@ -1323,7 +1324,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   bool stat;
   gimple stmt;
   stmt_vec_info stmt_info;
-  int vect_versioning_for_alias_required;
   unsigned int npeel = 0;
   bool all_misalignments_unknown = true;
   unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@@ -1510,15 +1510,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
         }
     }
 
-  vect_versioning_for_alias_required
-    = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
-
-  /* Temporarily, if versioning for alias is required, we disable peeling
-     until we support peeling and versioning.  Often peeling for alignment
-     will require peeling for loop-bound, which in turn requires that we
-     know how to adjust the loop ivs after the loop.  */
-  if (vect_versioning_for_alias_required
-      || !vect_can_advance_ivs_p (loop_vinfo)
+  /* Check if we can possibly peel the loop.  */
+  if (!vect_can_advance_ivs_p (loop_vinfo)
       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
     do_peeling = false;
 
@@ -4722,9 +4715,10 @@ vect_supportable_dr_alignment (struct data_reference *dr,
       if (!known_alignment_for_access_p (dr))
        is_packed = not_size_aligned (DR_REF (dr));
 
-      if (targetm.vectorize.
-         support_vector_misalignment (mode, type,
-                                      DR_MISALIGNMENT (dr), is_packed))
+      if ((TYPE_USER_ALIGN (type) && !is_packed)
+         || targetm.vectorize.
+              support_vector_misalignment (mode, type,
+                                           DR_MISALIGNMENT (dr), is_packed))
        /* Can't software pipeline the loads, but can at least do them.  */
        return dr_unaligned_supported;
     }
@@ -4736,9 +4730,10 @@ vect_supportable_dr_alignment (struct data_reference *dr,
       if (!known_alignment_for_access_p (dr))
        is_packed = not_size_aligned (DR_REF (dr));
 
-     if (targetm.vectorize.
-         support_vector_misalignment (mode, type,
-                                     DR_MISALIGNMENT (dr), is_packed))
+     if ((TYPE_USER_ALIGN (type) && !is_packed)
+        || targetm.vectorize.
+             support_vector_misalignment (mode, type,
+                                          DR_MISALIGNMENT (dr), is_packed))
        return dr_unaligned_supported;
     }
 
index bff5c22130ea6e66489055378bfadfb518234935..82e724f1d9c1013e41790735d19989e94dbb5b2f 100644 (file)
@@ -1555,7 +1555,6 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:");
   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     {
-      tree access_fn = NULL;
       tree evolution_part;
 
       phi = gsi_stmt (gsi);
@@ -1588,31 +1587,13 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 
       /* Analyze the evolution function.  */
 
-      access_fn = instantiate_parameters
-       (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
-
-      if (!access_fn)
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                             "No Access function.");
-         return false;
-       }
-
-      STRIP_NOPS (access_fn);
-      if (dump_enabled_p ())
-        {
-         dump_printf_loc (MSG_NOTE, vect_location,
-                           "Access function of PHI: ");
-         dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
-        }
-
-      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
-
+      evolution_part
+       = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
       if (evolution_part == NULL_TREE)
         {
          if (dump_enabled_p ())
-           dump_printf (MSG_MISSED_OPTIMIZATION, "No evolution.");
+           dump_printf (MSG_MISSED_OPTIMIZATION,
+                        "No access function or evolution.");
          return false;
         }
 
index 40eccea550072964654c2cf69764ebba4ff7d3e4..0fb2daed3700e0025d8aec457a82949fc68ec6b0 100644 (file)
@@ -5499,19 +5499,22 @@ vect_transform_loop (loop_vec_info loop_vinfo)
       check_profitability = true;
     }
 
-  /* Peel the loop if there are data refs with unknown alignment.
-     Only one data ref with unknown store is allowed.  */
+  /* Version the loop first, if required, so the profitability check
+     comes first.  */
 
-  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+      || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
     {
-      vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
+      vect_loop_versioning (loop_vinfo, th, check_profitability);
       check_profitability = false;
     }
 
-  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
-      || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
+  /* Peel the loop if there are data refs with unknown alignment.
+     Only one data ref with unknown store is allowed.  */
+
+  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
     {
-      vect_loop_versioning (loop_vinfo, th, check_profitability);
+      vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
       check_profitability = false;
     }
 
This page took 0.113291 seconds and 5 git commands to generate.