This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Extend vect_recog_bool_pattern also to stores into bool memory (PR tree-optimization/50596)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Ira Rosen <ira dot rosen at linaro dot org>, Richard Guenther <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Wed, 19 Oct 2011 19:14:40 +0200
- Subject: [PATCH] Extend vect_recog_bool_pattern also to stores into bool memory (PR tree-optimization/50596)
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
Similarly to casts of bool to integer, even stores into bool arrays
can be handled similarly. Just we need to ensure tree-vect-data-refs.c
doesn't reject vectorization before tree-vect-patterns.c has a chance
to optimize it.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2011-10-19 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/50596
* tree-vect-stmts.c (vect_mark_relevant): Only use
FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME.
(vectorizable_store): If is_pattern_stmt_p look through
VIEW_CONVERT_EXPR on lhs.
* tree-vect-patterns.c (vect_recog_bool_pattern): Optimize
also stores into bool memory in addition to casts from bool
to integral types.
(vect_mark_pattern_stmts): If pattern_stmt already has vinfo
created, don't create it again.
* tree-vect-data-refs.c (vect_analyze_data_refs): For stores
into bool memory use vectype for integral type corresponding
to bool's mode.
* tree-vect-loop.c (vect_determine_vectorization_factor): Give up
if a store into bool memory hasn't been replaced by the pattern
recognizer.
* gcc.dg/vect/vect-cond-10.c: New test.
--- gcc/tree-vect-stmts.c.jj 2011-10-18 23:52:07.000000000 +0200
+++ gcc/tree-vect-stmts.c 2011-10-19 14:19:00.000000000 +0200
@@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **w
/* This use is out of pattern use, if LHS has other uses that are
pattern uses, we should mark the stmt itself, and not the pattern
stmt. */
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
- {
- if (is_gimple_debug (USE_STMT (use_p)))
- continue;
- use_stmt = USE_STMT (use_p);
+ if (TREE_CODE (lhs) == SSA_NAME)
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+ {
+ if (is_gimple_debug (USE_STMT (use_p)))
+ continue;
+ use_stmt = USE_STMT (use_p);
- if (vinfo_for_stmt (use_stmt)
- && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
- {
- found = true;
- break;
- }
- }
+ if (vinfo_for_stmt (use_stmt)
+ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
+ {
+ found = true;
+ break;
+ }
+ }
}
if (!found)
@@ -3656,6 +3657,9 @@ vectorizable_store (gimple stmt, gimple_
return false;
scalar_dest = gimple_assign_lhs (stmt);
+ if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
+ && is_pattern_stmt_p (stmt_info))
+ scalar_dest = TREE_OPERAND (scalar_dest, 0);
if (TREE_CODE (scalar_dest) != ARRAY_REF
&& TREE_CODE (scalar_dest) != INDIRECT_REF
&& TREE_CODE (scalar_dest) != COMPONENT_REF
--- gcc/tree-vect-patterns.c.jj 2011-10-18 23:52:05.000000000 +0200
+++ gcc/tree-vect-patterns.c 2011-10-19 13:55:27.000000000 +0200
@@ -1933,6 +1933,50 @@ vect_recog_bool_pattern (VEC (gimple, he
VEC_safe_push (gimple, heap, *stmts, last_stmt);
return pattern_stmt;
}
+ else if (rhs_code == SSA_NAME
+ && STMT_VINFO_DATA_REF (stmt_vinfo))
+ {
+ stmt_vec_info pattern_stmt_info;
+ vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+ gcc_assert (vectype != NULL_TREE);
+ if (!check_bool_pattern (var, loop_vinfo))
+ return NULL;
+
+ rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
+ if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF)
+ {
+ lhs = copy_node (lhs);
+ TREE_TYPE (lhs) = TREE_TYPE (vectype);
+ }
+ else
+ lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
+ if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
+ {
+ tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
+ gimple cast_stmt
+ = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
+ STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt;
+ rhs = rhs2;
+ }
+ pattern_stmt
+ = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
+ pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
+ set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
+ STMT_VINFO_DATA_REF (pattern_stmt_info)
+ = STMT_VINFO_DATA_REF (stmt_vinfo);
+ STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
+ = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
+ STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
+ STMT_VINFO_DR_OFFSET (pattern_stmt_info)
+ = STMT_VINFO_DR_OFFSET (stmt_vinfo);
+ STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
+ STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
+ = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
+ *type_out = vectype;
+ *type_in = vectype;
+ VEC_safe_push (gimple, heap, *stmts, last_stmt);
+ return pattern_stmt;
+ }
else
return NULL;
}
@@ -1949,19 +1993,22 @@ vect_mark_pattern_stmts (gimple orig_stm
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
gimple def_stmt;
- set_vinfo_for_stmt (pattern_stmt,
- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
- gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
+ if (pattern_stmt_info == NULL)
+ {
+ pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
+ set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
+ }
+ gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
STMT_VINFO_DEF_TYPE (pattern_stmt_info)
- = STMT_VINFO_DEF_TYPE (orig_stmt_info);
+ = STMT_VINFO_DEF_TYPE (orig_stmt_info);
STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
- = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
+ = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
{
def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
--- gcc/tree-vect-data-refs.c.jj 2011-09-20 21:43:07.000000000 +0200
+++ gcc/tree-vect-data-refs.c 2011-10-19 14:37:44.000000000 +0200
@@ -2752,8 +2752,23 @@ vect_analyze_data_refs (loop_vec_info lo
/* Set vectype for STMT. */
scalar_type = TREE_TYPE (DR_REF (dr));
- STMT_VINFO_VECTYPE (stmt_info) =
- get_vectype_for_scalar_type (scalar_type);
+ STMT_VINFO_VECTYPE (stmt_info)
+ = get_vectype_for_scalar_type (scalar_type);
+ if (!STMT_VINFO_VECTYPE (stmt_info)
+ && ((TYPE_PRECISION (scalar_type) == 1
+ && TYPE_UNSIGNED (scalar_type))
+ || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
+ && DR_IS_WRITE (dr)
+ && loop_vinfo)
+ {
+ /* For bool stores use integral type with the same
+ TYPE_MODE, but bigger precision. vect_recog_bool_pattern
+ can transform those into something vectorizable. */
+ unsigned int modesize = GET_MODE_BITSIZE (TYPE_MODE (scalar_type));
+ scalar_type = build_nonstandard_integer_type (modesize, 1);
+ STMT_VINFO_VECTYPE (stmt_info)
+ = get_vectype_for_scalar_type (scalar_type);
+ }
if (!STMT_VINFO_VECTYPE (stmt_info))
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
--- gcc/tree-vect-loop.c.jj 2011-09-26 14:06:52.000000000 +0200
+++ gcc/tree-vect-loop.c 2011-10-19 14:49:18.000000000 +0200
@@ -1,5 +1,5 @@
/* Loop Vectorization
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com> and
Ira Rosen <irar@il.ibm.com>
@@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo
gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
|| is_pattern_stmt_p (stmt_info));
vectype = STMT_VINFO_VECTYPE (stmt_info);
+ if (STMT_VINFO_DATA_REF (stmt_info))
+ {
+ struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+ tree scalar_type = TREE_TYPE (DR_REF (dr));
+ /* vect_analyze_data_refs will allow bool writes through,
+ in order to allow vect_recog_bool_pattern to transform
+ those. If they couldn't be transformed, give up now. */
+ if (((TYPE_PRECISION (scalar_type) == 1
+ && TYPE_UNSIGNED (scalar_type))
+ || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
+ && DR_IS_WRITE (dr)
+ && !is_pattern_stmt_p (stmt_info))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+ {
+ fprintf (vect_dump,
+ "not vectorized: unsupported data-type ");
+ print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
+ }
+ return false;
+ }
+ }
}
else
{
--- gcc/testsuite/gcc.dg/vect/vect-cond-10.c.jj 2011-10-19 15:54:42.000000000 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-cond-10.c 2011-10-19 16:00:22.000000000 +0200
@@ -0,0 +1,165 @@
+/* { dg-require-effective-target vect_cond_mixed } */
+
+#include "tree-vect.h"
+
+#define N 1024
+float a[N], b[N], c[N], d[N];
+_Bool k[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+ int i;
+ for (i = 0; i < N; ++i)
+ {
+ int x = a[i] < b[i];
+ int y = c[i] < d[i];
+ k[i] = x & y;
+ }
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+ int i;
+ for (i = 0; i < N; ++i)
+ k[i] = (a[i] < b[i]) & (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+ int i;
+ for (i = 0; i < N; ++i)
+ {
+ int x = a[i] < b[i];
+ int y = c[i] < d[i];
+ k[i] = x | y;
+ }
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+ int i;
+ for (i = 0; i < N; ++i)
+ k[i] = (a[i] < b[i]) | (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f5 (_Bool *p)
+{
+ int i;
+ for (i = 0; i < N; ++i)
+ {
+ int x = a[i] < b[i];
+ int y = c[i] < d[i];
+ p[i] = x & y;
+ }
+}
+
+__attribute__((noinline, noclone)) void
+f6 (_Bool *p)
+{
+ int i;
+ for (i = 0; i < N; ++i)
+ p[i] = (a[i] < b[i]) & (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f7 (_Bool *p)
+{
+ int i;
+ for (i = 0; i < N; ++i)
+ {
+ int x = a[i] < b[i];
+ int y = c[i] < d[i];
+ p[i] = x | y;
+ }
+}
+
+__attribute__((noinline, noclone)) void
+f8 (_Bool *p)
+{
+ int i;
+ for (i = 0; i < N; ++i)
+ p[i] = (a[i] < b[i]) | (c[i] < d[i]);
+}
+
+int
+main ()
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ switch (i % 9)
+ {
+ case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
+ case 1: a[i] = 0; b[i] = 0; break;
+ case 2: a[i] = i + 1; b[i] = - i - 1; break;
+ case 3: a[i] = i; b[i] = i + 7; break;
+ case 4: a[i] = i; b[i] = i; break;
+ case 5: a[i] = i + 16; b[i] = i + 3; break;
+ case 6: a[i] = - i - 5; b[i] = - i; break;
+ case 7: a[i] = - i; b[i] = - i; break;
+ case 8: a[i] = - i; b[i] = - i - 7; break;
+ }
+ }
+ for (i = 0; i < N; i++)
+ {
+ switch ((i / 9) % 3)
+ {
+ case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break;
+ case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break;
+ case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break;
+ }
+ }
+ f1 ();
+ for (i = 0; i < N; i++)
+ if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+ abort ();
+ __builtin_memset (k, 0, sizeof (k));
+ f2 ();
+ for (i = 0; i < N; i++)
+ if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+ abort ();
+ __builtin_memset (k, 0, sizeof (k));
+ f3 ();
+ for (i = 0; i < N; i++)
+ if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+ abort ();
+ __builtin_memset (k, 0, sizeof (k));
+ f4 ();
+ for (i = 0; i < N; i++)
+ if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+ abort ();
+ __builtin_memset (k, 0, sizeof (k));
+ f5 (k);
+ for (i = 0; i < N; i++)
+ if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+ abort ();
+ __builtin_memset (k, 0, sizeof (k));
+ f6 (k);
+ for (i = 0; i < N; i++)
+ if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+ abort ();
+ __builtin_memset (k, 0, sizeof (k));
+ f7 (k);
+ for (i = 0; i < N; i++)
+ if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+ abort ();
+ __builtin_memset (k, 0, sizeof (k));
+ f8 (k);
+ for (i = 0; i < N; i++)
+ if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+ abort ();
+ __builtin_memset (k, 0, sizeof (k));
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 8 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Jakub