This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Extend vect_recog_bool_pattern also to stores into bool memory (PR tree-optimization/50596)
- From: Richard Guenther <rguenther at suse dot de>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: Ira Rosen <ira dot rosen at linaro dot org>, gcc-patches at gcc dot gnu dot org
- Date: Thu, 20 Oct 2011 11:42:01 +0200 (CEST)
- Subject: Re: [PATCH] Extend vect_recog_bool_pattern also to stores into bool memory (PR tree-optimization/50596)
- References: <20111019171440.GE2210@tyan-ft48-01.lab.bos.redhat.com>
On Wed, 19 Oct 2011, Jakub Jelinek wrote:
> Hi!
>
> Similarly to casts of bool to integer, even stores into bool arrays
> can be handled similarly. Just we need to ensure tree-vect-data-refs.c
> doesn't reject vectorization before tree-vect-patterns.c has a chance
> to optimize it.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok with ...
> 2011-10-19 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/50596
> * tree-vect-stmts.c (vect_mark_relevant): Only use
> FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME.
> (vectorizable_store): If is_pattern_stmt_p look through
> VIEW_CONVERT_EXPR on lhs.
> * tree-vect-patterns.c (vect_recog_bool_pattern): Optimize
> also stores into bool memory in addition to casts from bool
> to integral types.
> (vect_mark_pattern_stmts): If pattern_stmt already has vinfo
> created, don't create it again.
> * tree-vect-data-refs.c (vect_analyze_data_refs): For stores
> into bool memory use vectype for integral type corresponding
> to bool's mode.
> * tree-vect-loop.c (vect_determine_vectorization_factor): Give up
> if a store into bool memory hasn't been replaced by the pattern
> recognizer.
>
> * gcc.dg/vect/vect-cond-10.c: New test.
>
> --- gcc/tree-vect-stmts.c.jj 2011-10-18 23:52:07.000000000 +0200
> +++ gcc/tree-vect-stmts.c 2011-10-19 14:19:00.000000000 +0200
> @@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **w
> /* This use is out of pattern use, if LHS has other uses that are
> pattern uses, we should mark the stmt itself, and not the pattern
> stmt. */
> - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
> - {
> - if (is_gimple_debug (USE_STMT (use_p)))
> - continue;
> - use_stmt = USE_STMT (use_p);
> + if (TREE_CODE (lhs) == SSA_NAME)
> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
> + {
> + if (is_gimple_debug (USE_STMT (use_p)))
> + continue;
> + use_stmt = USE_STMT (use_p);
>
> - if (vinfo_for_stmt (use_stmt)
> - && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
> - {
> - found = true;
> - break;
> - }
> - }
> + if (vinfo_for_stmt (use_stmt)
> + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
> + {
> + found = true;
> + break;
> + }
> + }
> }
>
> if (!found)
> @@ -3656,6 +3657,9 @@ vectorizable_store (gimple stmt, gimple_
> return false;
>
> scalar_dest = gimple_assign_lhs (stmt);
> + if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
> + && is_pattern_stmt_p (stmt_info))
> + scalar_dest = TREE_OPERAND (scalar_dest, 0);
> if (TREE_CODE (scalar_dest) != ARRAY_REF
> && TREE_CODE (scalar_dest) != INDIRECT_REF
> && TREE_CODE (scalar_dest) != COMPONENT_REF
Just change the if () stmt to
if (!handled_component_p (scalar_dest)
&& TREE_CODE (scalar_dest) != MEM_REF)
return false;
> --- gcc/tree-vect-patterns.c.jj 2011-10-18 23:52:05.000000000 +0200
> +++ gcc/tree-vect-patterns.c 2011-10-19 13:55:27.000000000 +0200
> @@ -1933,6 +1933,50 @@ vect_recog_bool_pattern (VEC (gimple, he
> VEC_safe_push (gimple, heap, *stmts, last_stmt);
> return pattern_stmt;
> }
> + else if (rhs_code == SSA_NAME
> + && STMT_VINFO_DATA_REF (stmt_vinfo))
> + {
> + stmt_vec_info pattern_stmt_info;
> + vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
> + gcc_assert (vectype != NULL_TREE);
> + if (!check_bool_pattern (var, loop_vinfo))
> + return NULL;
> +
> + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
> + if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF)
> + {
> + lhs = copy_node (lhs);
We don't handle TARGET_MEM_REF in vectorizable_store, so no need to
do it here. In fact, just unconditionally do ...
> + TREE_TYPE (lhs) = TREE_TYPE (vectype);
> + }
> + else
> + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
... this (wrap it in a V_C_E). No need to special-case any
MEM_REFs.
> + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
This should never be false, so you can as well unconditionally build
the conversion stmt.
> + {
> + tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
> + gimple cast_stmt
> + = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
> + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt;
> + rhs = rhs2;
> + }
> + pattern_stmt
> + = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
> + pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
> + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
> + STMT_VINFO_DATA_REF (pattern_stmt_info)
> + = STMT_VINFO_DATA_REF (stmt_vinfo);
> + STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
> + = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
> + STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
> + STMT_VINFO_DR_OFFSET (pattern_stmt_info)
> + = STMT_VINFO_DR_OFFSET (stmt_vinfo);
> + STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
> + STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
> + = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
> + *type_out = vectype;
> + *type_in = vectype;
> + VEC_safe_push (gimple, heap, *stmts, last_stmt);
> + return pattern_stmt;
> + }
> else
> return NULL;
> }
> @@ -1949,19 +1993,22 @@ vect_mark_pattern_stmts (gimple orig_stm
> loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
> gimple def_stmt;
>
> - set_vinfo_for_stmt (pattern_stmt,
> - new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
> - gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
> pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
> + if (pattern_stmt_info == NULL)
> + {
> + pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
> + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
> + }
> + gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
>
> STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
> STMT_VINFO_DEF_TYPE (pattern_stmt_info)
> - = STMT_VINFO_DEF_TYPE (orig_stmt_info);
> + = STMT_VINFO_DEF_TYPE (orig_stmt_info);
> STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
> STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
> STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
> STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
> - = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
> + = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
> if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
> {
> def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
> --- gcc/tree-vect-data-refs.c.jj 2011-09-20 21:43:07.000000000 +0200
> +++ gcc/tree-vect-data-refs.c 2011-10-19 14:37:44.000000000 +0200
> @@ -2752,8 +2752,23 @@ vect_analyze_data_refs (loop_vec_info lo
>
> /* Set vectype for STMT. */
> scalar_type = TREE_TYPE (DR_REF (dr));
> - STMT_VINFO_VECTYPE (stmt_info) =
> - get_vectype_for_scalar_type (scalar_type);
> + STMT_VINFO_VECTYPE (stmt_info)
> + = get_vectype_for_scalar_type (scalar_type);
> + if (!STMT_VINFO_VECTYPE (stmt_info)
> + && ((TYPE_PRECISION (scalar_type) == 1
> + && TYPE_UNSIGNED (scalar_type))
> + || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
> + && DR_IS_WRITE (dr)
> + && loop_vinfo)
> + {
> + /* For bool stores use integral type with the same
> + TYPE_MODE, but bigger precision. vect_recog_bool_pattern
> + can transform those into something vectorizable. */
> + unsigned int modesize = GET_MODE_BITSIZE (TYPE_MODE (scalar_type));
> + scalar_type = build_nonstandard_integer_type (modesize, 1);
> + STMT_VINFO_VECTYPE (stmt_info)
> + = get_vectype_for_scalar_type (scalar_type);
> + }
> if (!STMT_VINFO_VECTYPE (stmt_info))
> {
> if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> --- gcc/tree-vect-loop.c.jj 2011-09-26 14:06:52.000000000 +0200
> +++ gcc/tree-vect-loop.c 2011-10-19 14:49:18.000000000 +0200
> @@ -1,5 +1,5 @@
> /* Loop Vectorization
> - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
> + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
> Free Software Foundation, Inc.
> Contributed by Dorit Naishlos <dorit@il.ibm.com> and
> Ira Rosen <irar@il.ibm.com>
> @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo
> gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
> || is_pattern_stmt_p (stmt_info));
> vectype = STMT_VINFO_VECTYPE (stmt_info);
> + if (STMT_VINFO_DATA_REF (stmt_info))
> + {
> + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> + tree scalar_type = TREE_TYPE (DR_REF (dr));
> + /* vect_analyze_data_refs will allow bool writes through,
> + in order to allow vect_recog_bool_pattern to transform
> + those. If they couldn't be transformed, give up now. */
> + if (((TYPE_PRECISION (scalar_type) == 1
> + && TYPE_UNSIGNED (scalar_type))
> + || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
Shouldn't it be always possible to vectorize those? For loads
we can assume the memory contains only 1 or 0 (we assume that for
scalar loads), for stores we can mask out all other bits explicitly
if you add support for truncating conversions to non-mode precision
(in fact, we could support non-mode precision vectorization that way,
if not support bitfield loads or extending conversions).
So maybe that obsoletes my conditional approval ;) Can you
investigate whether the above would work?
Thanks,
Richard.
> + && DR_IS_WRITE (dr)
> + && !is_pattern_stmt_p (stmt_info))
> + {
> + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> + {
> + fprintf (vect_dump,
> + "not vectorized: unsupported data-type ");
> + print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
> + }
> + return false;
> + }
> + }
> }
> else
> {
> --- gcc/testsuite/gcc.dg/vect/vect-cond-10.c.jj 2011-10-19 15:54:42.000000000 +0200
> +++ gcc/testsuite/gcc.dg/vect/vect-cond-10.c 2011-10-19 16:00:22.000000000 +0200
> @@ -0,0 +1,165 @@
> +/* { dg-require-effective-target vect_cond_mixed } */
> +
> +#include "tree-vect.h"
> +
> +#define N 1024
> +float a[N], b[N], c[N], d[N];
> +_Bool k[N];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (void)
> +{
> + int i;
> + for (i = 0; i < N; ++i)
> + {
> + int x = a[i] < b[i];
> + int y = c[i] < d[i];
> + k[i] = x & y;
> + }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (void)
> +{
> + int i;
> + for (i = 0; i < N; ++i)
> + k[i] = (a[i] < b[i]) & (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f3 (void)
> +{
> + int i;
> + for (i = 0; i < N; ++i)
> + {
> + int x = a[i] < b[i];
> + int y = c[i] < d[i];
> + k[i] = x | y;
> + }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f4 (void)
> +{
> + int i;
> + for (i = 0; i < N; ++i)
> + k[i] = (a[i] < b[i]) | (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f5 (_Bool *p)
> +{
> + int i;
> + for (i = 0; i < N; ++i)
> + {
> + int x = a[i] < b[i];
> + int y = c[i] < d[i];
> + p[i] = x & y;
> + }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f6 (_Bool *p)
> +{
> + int i;
> + for (i = 0; i < N; ++i)
> + p[i] = (a[i] < b[i]) & (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (_Bool *p)
> +{
> + int i;
> + for (i = 0; i < N; ++i)
> + {
> + int x = a[i] < b[i];
> + int y = c[i] < d[i];
> + p[i] = x | y;
> + }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (_Bool *p)
> +{
> + int i;
> + for (i = 0; i < N; ++i)
> + p[i] = (a[i] < b[i]) | (c[i] < d[i]);
> +}
> +
> +int
> +main ()
> +{
> + int i;
> +
> + check_vect ();
> +
> + for (i = 0; i < N; i++)
> + {
> + switch (i % 9)
> + {
> + case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
> + case 1: a[i] = 0; b[i] = 0; break;
> + case 2: a[i] = i + 1; b[i] = - i - 1; break;
> + case 3: a[i] = i; b[i] = i + 7; break;
> + case 4: a[i] = i; b[i] = i; break;
> + case 5: a[i] = i + 16; b[i] = i + 3; break;
> + case 6: a[i] = - i - 5; b[i] = - i; break;
> + case 7: a[i] = - i; b[i] = - i; break;
> + case 8: a[i] = - i; b[i] = - i - 7; break;
> + }
> + }
> + for (i = 0; i < N; i++)
> + {
> + switch ((i / 9) % 3)
> + {
> + case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break;
> + case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break;
> + case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break;
> + }
> + }
> + f1 ();
> + for (i = 0; i < N; i++)
> + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> + abort ();
> + __builtin_memset (k, 0, sizeof (k));
> + f2 ();
> + for (i = 0; i < N; i++)
> + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> + abort ();
> + __builtin_memset (k, 0, sizeof (k));
> + f3 ();
> + for (i = 0; i < N; i++)
> + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> + abort ();
> + __builtin_memset (k, 0, sizeof (k));
> + f4 ();
> + for (i = 0; i < N; i++)
> + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> + abort ();
> + __builtin_memset (k, 0, sizeof (k));
> + f5 (k);
> + for (i = 0; i < N; i++)
> + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> + abort ();
> + __builtin_memset (k, 0, sizeof (k));
> + f6 (k);
> + for (i = 0; i < N; i++)
> + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> + abort ();
> + __builtin_memset (k, 0, sizeof (k));
> + f7 (k);
> + for (i = 0; i < N; i++)
> + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> + abort ();
> + __builtin_memset (k, 0, sizeof (k));
> + f8 (k);
> + for (i = 0; i < N; i++)
> + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> + abort ();
> + __builtin_memset (k, 0, sizeof (k));
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 8 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
>
> Jakub
>
>
--
Richard Guenther <rguenther@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer