This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[autovect] [patch] Vectorization of strided accesses for x86.
- From: Ira Rosen <IRAR at il dot ibm dot com>
- To: gcc-patches at gnu dot org
- Date: Thu, 28 Sep 2006 13:29:22 +0300
- Subject: [autovect] [patch] Vectorization of strided accesses for x86.
Made vectorization of strided accesses work for x86, all strided testcases
now pass on i386.
We'd probably want to consider associated costs or implement more
efficiently if possible (at least for smaller types).
Committed to autovect branch.
Ira
ChangeLog entry:
* targhooks.c (interleave_vectorize_builtin_extract_evenodd):
Fix to produce a correct instructions sequence.
* tree-vect-transform.c (vect_permute_store_chain): Choose the
correct instruction according to the endianness. Call
mark_new_vars_to_rename.
Patch:
Index: targhooks.c
===================================================================
--- targhooks.c (revision 117273)
+++ targhooks.c (working copy)
@@ -456,6 +456,7 @@ interleave_vectorize_builtin_extract_eve
enum machine_mode mode;
block_stmt_iterator bsi;
tree th, tl, result, x;
+ int scalar_type_size, i, tmp;
/* If the first argument is a type, just check if support
is available. Return a non NULL value if supported, NULL_TREE
otherwise.
@@ -479,31 +480,46 @@ interleave_vectorize_builtin_extract_eve
return NULL;
bsi = bsi_for_stmt (stmt);
-
- th = make_rename_temp (type, NULL);
- x = build2 (VEC_INTERLEAVE_HIGH_EXPR, type, vec1, vec2);
- x = build2 (MODIFY_EXPR, type, th, x);
- th = make_ssa_name (th, x);
- TREE_OPERAND (x, 0) = th;
- bsi_insert_before (&bsi, x, BSI_SAME_STMT);
-
- tl = make_rename_temp (type, NULL);
- x = build2 (VEC_INTERLEAVE_LOW_EXPR, type, vec1, vec2);
- x = build2 (MODIFY_EXPR, type, tl, x);
- tl = make_ssa_name (tl, x);
- TREE_OPERAND (x, 0) = tl;
- bsi_insert_before (&bsi, x, BSI_SAME_STMT);
+
+ scalar_type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (stmt)));
+ tmp = exact_log2 (UNITS_PER_SIMD_WORD / scalar_type_size) - 1;
+
+ th = vec1;
+ tl = vec2;
+ for (i = 0; i < tmp; i++)
+ {
+ th = make_rename_temp (type, NULL);
+ x = build2 (VEC_INTERLEAVE_HIGH_EXPR, type, vec1, vec2);
+ x = build2 (MODIFY_EXPR, type, th, x);
+ th = make_ssa_name (th, x);
+ TREE_OPERAND (x, 0) = th;
+ bsi_insert_before (&bsi, x, BSI_SAME_STMT);
+ mark_new_vars_to_rename (x);
+
+ tl = make_rename_temp (type, NULL);
+ x = build2 (VEC_INTERLEAVE_LOW_EXPR, type, vec1, vec2);
+ x = build2 (MODIFY_EXPR, type, tl, x);
+ tl = make_ssa_name (tl, x);
+ TREE_OPERAND (x, 0) = tl;
+ bsi_insert_before (&bsi, x, BSI_SAME_STMT);
+ mark_new_vars_to_rename (x);
+ vec1 = BYTES_BIG_ENDIAN ? th : tl;
+ vec2 = BYTES_BIG_ENDIAN ? tl : th;
+ }
result = make_rename_temp (type, NULL);
- /* ??? Endianness issues? */
- x = build2 (odd_p ? VEC_INTERLEAVE_HIGH_EXPR : VEC_INTERLEAVE_LOW_EXPR,
- type, th, tl);
+
+ if (BYTES_BIG_ENDIAN)
+ x = build2 (odd_p ? VEC_INTERLEAVE_HIGH_EXPR :
VEC_INTERLEAVE_LOW_EXPR,
+ type, th, tl);
+ else
+ x = build2 (odd_p ? VEC_INTERLEAVE_HIGH_EXPR :
VEC_INTERLEAVE_LOW_EXPR,
+ type, tl, th);
x = build2 (MODIFY_EXPR, type, result, x);
result = make_ssa_name (result, x);
TREE_OPERAND (x, 0) = result;
- bsi_insert_before (&bsi, x, BSI_SAME_STMT);
- return result;
+ return x;
}
tree
Index: testsuite/ChangeLog.autovect
===================================================================
--- testsuite/ChangeLog.autovect (revision 117273)
+++ testsuite/ChangeLog.autovect (working copy)
@@ -1,3 +1,32 @@
+2006-09-28 Ira Rosen <irar@il.ibm.com>
+
+ * lib/target-supports.exp (vect_strided): Define.
+ * gcc.dg/vect/vect-strided-a-mult.c: Vectorizable on targets
+ that support vectorization of strided accesses.
+ * gcc.dg/vect/vect-strided-mult.c,
+ gcc.dg/vect/vect-strided-u8-i8-gap2.c,
+ gcc.dg/vect/vect-strided-u8-i2.c,
+ gcc.dg/vect/vect-strided-a-u16-mult.c,
+ gcc.dg/vect/vect-strided-a-u8-i2-gap.c,
+ gcc.dg/vect/vect-strided-a-u16-i2.c, gcc.dg/vect/vect-1.c,
+ gcc.dg/vect/noreasoc-vect-strided-reduc-u8-i2.c,
+ gcc.dg/vect/vect-strided-u32-i4.c,
+ gcc.dg/vect/vect-strided-u16-i4.c,
+ gcc.dg/vect/vect-strided-u32-i8.c,
+ gcc.dg/vect/vect-strided-u8-i8-gap7.c,
+ gcc.dg/vect/vect-107.c, gcc.dg/vect/vect-98.c,
+ gcc.dg/vect/vect-strided-mult-char.c,
+ gcc.dg/vect/vect-strided-float.c,
+ gcc.dg/vect/vect-strided-u8-i2-gap.c,
+ gcc.dg/vect/vect-strided-a-u8-i8-gap2.c,
+ gcc.dg/vect/vect-strided-u8-i8-gap4.c,
+ gcc.dg/vect/vect-strided-a-u32-mult.c,
+ gcc.dg/vect/vect-strided-u8-i8.c,
+ gcc.dg/vect/vect-strided-u32-mult.c,
+ gcc.dg/vect/vect-strided-a-u16-i4.c,
+ gcc.dg/vect/vect-strided-u16-i2.c,
+ gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Likewise.
+
2006-05-07 Victor Kaplansky <victork@il.ibm.com>
Merge from mainline (110552:110553)
Index: testsuite/gcc.dg/vect/vect-strided-a-mult.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-a-mult.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-a-mult.c (working copy)
@@ -71,5 +71,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-mult.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-mult.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-mult.c (working copy)
@@ -71,5 +71,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c (working copy)
@@ -79,5 +79,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-u8-i2.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u8-i2.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-u8-i2.c (working copy)
@@ -54,5 +54,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c (working copy)
@@ -62,5 +62,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c (working copy)
@@ -69,5 +69,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c (working copy)
@@ -55,5 +55,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-1.c
===================================================================
--- testsuite/gcc.dg/vect/vect-1.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-1.c (working copy)
@@ -86,8 +86,8 @@ foo (int n)
fbar (a);
}
-/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" {
target powerpc*-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail
powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" {
target vect_strided } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail
vect_strided} } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0
"vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/noreasoc-vect-strided-reduc-u8-i2.c
===================================================================
--- testsuite/gcc.dg/vect/noreasoc-vect-strided-reduc-u8-i2.c (revision
117273)
+++ testsuite/gcc.dg/vect/noreasoc-vect-strided-reduc-u8-i2.c (working
copy)
@@ -54,5 +54,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-u32-i4.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u32-i4.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-u32-i4.c (working copy)
@@ -63,5 +63,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-u16-i4.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u16-i4.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-u16-i4.c (working copy)
@@ -68,5 +68,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-u32-i8.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u32-i8.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-u32-i8.c (working copy)
@@ -77,5 +77,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c (working copy)
@@ -83,5 +83,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-107.c
===================================================================
--- testsuite/gcc.dg/vect/vect-107.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-107.c (working copy)
@@ -39,6 +39,6 @@ int main (void)
}
/* Needs interleaving support. */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail
powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail
vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-mult-char.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-mult-char.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-mult-char.c (working copy)
@@ -71,5 +71,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-98.c
===================================================================
--- testsuite/gcc.dg/vect/vect-98.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-98.c (working copy)
@@ -36,7 +36,7 @@ int main (void)
return main1 (ia);
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail
powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail
vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-float.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-float.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-float.c (working copy)
@@ -38,6 +38,6 @@ int main (void)
}
/* Needs interleaving support. */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail
powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail
vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c (working copy)
@@ -71,5 +71,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c (working copy)
@@ -77,5 +77,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c (working copy)
@@ -80,5 +80,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-a-u32-mult.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-a-u32-mult.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-a-u32-mult.c (working copy)
@@ -62,5 +62,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-u8-i8.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u8-i8.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-u8-i8.c (working copy)
@@ -86,5 +86,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-u32-mult.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u32-mult.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-u32-mult.c (working copy)
@@ -62,5 +62,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c (working copy)
@@ -68,5 +68,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-u16-i2.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-u16-i2.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-u16-i2.c (working copy)
@@ -55,5 +55,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c
===================================================================
--- testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c (revision 117273)
+++ testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c (working copy)
@@ -81,5 +81,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target powerpc*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {
target vect_strided } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp (revision 117273)
+++ testsuite/lib/target-supports.exp (working copy)
@@ -1359,6 +1359,27 @@ proc check_effective_target_vect_short_m
return $et_vect_short_mult_saved
}
+# Return 1 if the target supports vectorization of strided accesses, 0
otherwise.
+
+proc check_effective_target_vect_strided { } {
+ global et_vect_strided_saved
+
+ if [info exists et_vect_strided_saved] {
+ verbose "check_effective_target_vect_strided: using cached result"
2
+ } else {
+ set et_vect_strided_saved 0
+ if { [istarget powerpc*-*-*]
+ || [istarget ia64-*-*]
+ || [istarget i?86-*-*]
+ || [istarget x86_64-*-*] } {
+ set et_vect_strided_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect_strided: returning
$et_vect_strided_saved" 2
+ return $et_vect_strided_saved
+}
+
# Return 1 if the target supports atomic operations on "int" and "long".
proc check_effective_target_sync_int_long { } {
Index: tree-vect-transform.c
===================================================================
--- tree-vect-transform.c (revision 117273)
+++ tree-vect-transform.c (working copy)
@@ -2564,23 +2564,35 @@ vect_permute_store_chain (VEC(tree,heap)
/* high = interleave_high (vect1, vect2); */
perm_dest = create_tmp_var (vectype, "vect_inter_high");
add_referenced_tmp_var (perm_dest);
- perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
- build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype,
vect1,
- vect2));
- high = make_ssa_name (perm_dest, perm_stmt);
+ if (BYTES_BIG_ENDIAN)
+ perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
+ build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype,
vect1,
+ vect2));
+ else
+ perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
+ build2 (VEC_INTERLEAVE_LOW_EXPR, vectype,
vect1,
+ vect2));
+ high = make_ssa_name (perm_dest, perm_stmt);
TREE_OPERAND (perm_stmt, 0) = high;
vect_finish_stmt_generation (stmt, perm_stmt, bsi);
- VEC_replace (tree, *result_chain, 2*j, high);
+ mark_new_vars_to_rename (perm_stmt);
+ VEC_replace (tree, *result_chain, 2*j, high);
/* low = interleave_low (vect1, vect2); */
perm_dest = create_tmp_var (vectype, "vect_inter_low");
add_referenced_tmp_var (perm_dest);
- perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
- build2 (VEC_INTERLEAVE_LOW_EXPR, vectype,
vect1,
- vect2));
- low = make_ssa_name (perm_dest, perm_stmt);
+ if (BYTES_BIG_ENDIAN)
+ perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
+ build2 (VEC_INTERLEAVE_LOW_EXPR, vectype,
vect1,
+ vect2));
+ else
+ perm_stmt = build2 (MODIFY_EXPR, vectype, perm_dest,
+ build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype,
vect1,
+ vect2));
+ low = make_ssa_name (perm_dest, perm_stmt);
TREE_OPERAND (perm_stmt, 0) = low;
vect_finish_stmt_generation (stmt, perm_stmt, bsi);
+ mark_new_vars_to_rename (perm_stmt);
VEC_replace (tree, *result_chain, 2*j+1, low);
}
dr_chain = VEC_copy (tree, heap, *result_chain);