[PATCH] Extend vect_recog_bool_pattern also to stores into bool memory (PR tree-optimization/50596)

Richard Guenther rguenther@suse.de
Thu Oct 20 10:36:00 GMT 2011


On Wed, 19 Oct 2011, Jakub Jelinek wrote:

> Hi!
> 
> Similarly to casts of bool to integer, even stores into bool arrays
> can be handled similarly.  Just we need to ensure tree-vect-data-refs.c
> doesn't reject vectorization before tree-vect-patterns.c has a chance
> to optimize it.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok with ...

> 2011-10-19  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/50596
> 	* tree-vect-stmts.c (vect_mark_relevant): Only use
> 	FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME.
> 	(vectorizable_store): If is_pattern_stmt_p look through
> 	VIEW_CONVERT_EXPR on lhs.
> 	* tree-vect-patterns.c (vect_recog_bool_pattern): Optimize
> 	also stores into bool memory in addition to casts from bool
> 	to integral types.
> 	(vect_mark_pattern_stmts): If pattern_stmt already has vinfo
> 	created, don't create it again.
> 	* tree-vect-data-refs.c (vect_analyze_data_refs): For stores
> 	into bool memory use vectype for integral type corresponding
> 	to bool's mode.
> 	* tree-vect-loop.c (vect_determine_vectorization_factor): Give up
> 	if a store into bool memory hasn't been replaced by the pattern
> 	recognizer.
> 
> 	* gcc.dg/vect/vect-cond-10.c: New test.
> 
> --- gcc/tree-vect-stmts.c.jj	2011-10-18 23:52:07.000000000 +0200
> +++ gcc/tree-vect-stmts.c	2011-10-19 14:19:00.000000000 +0200
> @@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **w
>            /* This use is out of pattern use, if LHS has other uses that are
>               pattern uses, we should mark the stmt itself, and not the pattern
>               stmt.  */
> -          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
> -            {
> -              if (is_gimple_debug (USE_STMT (use_p)))
> -                continue;
> -              use_stmt = USE_STMT (use_p);
> +	  if (TREE_CODE (lhs) == SSA_NAME)
> +	    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
> +	      {
> +		if (is_gimple_debug (USE_STMT (use_p)))
> +		  continue;
> +		use_stmt = USE_STMT (use_p);
>  
> -              if (vinfo_for_stmt (use_stmt)
> -                  && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
> -                {
> -                  found = true;
> -                  break;
> -                }
> -            }
> +		if (vinfo_for_stmt (use_stmt)
> +		    && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
> +		  {
> +		    found = true;
> +		    break;
> +		  }
> +	      }
>          }
>  
>        if (!found)
> @@ -3656,6 +3657,9 @@ vectorizable_store (gimple stmt, gimple_
>      return false;
>  
>    scalar_dest = gimple_assign_lhs (stmt);
> +  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
> +      && is_pattern_stmt_p (stmt_info))
> +    scalar_dest = TREE_OPERAND (scalar_dest, 0);
>    if (TREE_CODE (scalar_dest) != ARRAY_REF
>        && TREE_CODE (scalar_dest) != INDIRECT_REF
>        && TREE_CODE (scalar_dest) != COMPONENT_REF

Just change the if () stmt to

 if (!handled_component_p (scalar_dest)
     && TREE_CODE (scalar_dest) != MEM_REF)
   return false;

> --- gcc/tree-vect-patterns.c.jj	2011-10-18 23:52:05.000000000 +0200
> +++ gcc/tree-vect-patterns.c	2011-10-19 13:55:27.000000000 +0200
> @@ -1933,6 +1933,50 @@ vect_recog_bool_pattern (VEC (gimple, he
>        VEC_safe_push (gimple, heap, *stmts, last_stmt);
>        return pattern_stmt;
>      }
> +  else if (rhs_code == SSA_NAME
> +	   && STMT_VINFO_DATA_REF (stmt_vinfo))
> +    {
> +      stmt_vec_info pattern_stmt_info;
> +      vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
> +      gcc_assert (vectype != NULL_TREE);
> +      if (!check_bool_pattern (var, loop_vinfo))
> +	return NULL;
> +
> +      rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
> +      if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF)
> +	{
> +	  lhs = copy_node (lhs);

We don't handle TARGET_MEM_REF in vectorizable_store, so no need to
do it here.  In fact, just unconditionally do ...

> +	  TREE_TYPE (lhs) = TREE_TYPE (vectype);
> +	}
> +      else
> +	lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);

... this (wrap it in a V_C_E).  No need to special-case any
MEM_REFs.

> +      if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))

This should never be false, so you can as well unconditionally build
the conversion stmt.

> +	{
> +	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
> +	  gimple cast_stmt
> +	    = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
> +	  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt;
> +	  rhs = rhs2;
> +	}
> +      pattern_stmt
> +	= gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
> +      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
> +      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
> +      STMT_VINFO_DATA_REF (pattern_stmt_info)
> +	= STMT_VINFO_DATA_REF (stmt_vinfo);
> +      STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
> +	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
> +      STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
> +      STMT_VINFO_DR_OFFSET (pattern_stmt_info)
> +	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
> +      STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
> +      STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
> +	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
> +      *type_out = vectype;
> +      *type_in = vectype;
> +      VEC_safe_push (gimple, heap, *stmts, last_stmt);
> +      return pattern_stmt;
> +    }
>    else
>      return NULL;
>  }
> @@ -1949,19 +1993,22 @@ vect_mark_pattern_stmts (gimple orig_stm
>    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
>    gimple def_stmt;
>  
> -  set_vinfo_for_stmt (pattern_stmt,
> -                      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
> -  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
>    pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
> +  if (pattern_stmt_info == NULL)
> +    {
> +      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
> +      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
> +    }
> +  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
>  
>    STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
>    STMT_VINFO_DEF_TYPE (pattern_stmt_info)
> -	= STMT_VINFO_DEF_TYPE (orig_stmt_info);
> +    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
>    STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
>    STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
>    STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
>    STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
> -	= STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
> +    = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
>    if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
>      {
>        def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
> --- gcc/tree-vect-data-refs.c.jj	2011-09-20 21:43:07.000000000 +0200
> +++ gcc/tree-vect-data-refs.c	2011-10-19 14:37:44.000000000 +0200
> @@ -2752,8 +2752,23 @@ vect_analyze_data_refs (loop_vec_info lo
>  
>        /* Set vectype for STMT.  */
>        scalar_type = TREE_TYPE (DR_REF (dr));
> -      STMT_VINFO_VECTYPE (stmt_info) =
> -                get_vectype_for_scalar_type (scalar_type);
> +      STMT_VINFO_VECTYPE (stmt_info)
> +	= get_vectype_for_scalar_type (scalar_type);
> +      if (!STMT_VINFO_VECTYPE (stmt_info)
> +	  && ((TYPE_PRECISION (scalar_type) == 1
> +	       && TYPE_UNSIGNED (scalar_type))
> +	      || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
> +	  && DR_IS_WRITE (dr)
> +	  && loop_vinfo)
> +	{
> +	  /* For bool stores use integral type with the same
> +	     TYPE_MODE, but bigger precision.  vect_recog_bool_pattern
> +	     can transform those into something vectorizable.  */
> +	  unsigned int modesize = GET_MODE_BITSIZE (TYPE_MODE (scalar_type));
> +	  scalar_type = build_nonstandard_integer_type (modesize, 1);
> +	  STMT_VINFO_VECTYPE (stmt_info)
> +	    = get_vectype_for_scalar_type (scalar_type);
> +	}
>        if (!STMT_VINFO_VECTYPE (stmt_info))
>          {
>            if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> --- gcc/tree-vect-loop.c.jj	2011-09-26 14:06:52.000000000 +0200
> +++ gcc/tree-vect-loop.c	2011-10-19 14:49:18.000000000 +0200
> @@ -1,5 +1,5 @@
>  /* Loop Vectorization
> -   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
> +   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
>     Free Software Foundation, Inc.
>     Contributed by Dorit Naishlos <dorit@il.ibm.com> and
>     Ira Rosen <irar@il.ibm.com>
> @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo
>  	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
>  			  || is_pattern_stmt_p (stmt_info));
>  	      vectype = STMT_VINFO_VECTYPE (stmt_info);
> +	      if (STMT_VINFO_DATA_REF (stmt_info))
> +		{
> +		  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> +		  tree scalar_type = TREE_TYPE (DR_REF (dr));
> +		  /* vect_analyze_data_refs will allow bool writes through,
> +		     in order to allow vect_recog_bool_pattern to transform
> +		     those.  If they couldn't be transformed, give up now.  */
> +		  if (((TYPE_PRECISION (scalar_type) == 1
> +			&& TYPE_UNSIGNED (scalar_type))
> +		       || TREE_CODE (scalar_type) == BOOLEAN_TYPE)

Shouldn't it be always possible to vectorize those?  For loads
we can assume the memory contains only 1 or 0 (we assume that for
scalar loads), for stores we can mask out all other bits explicitly
if you add support for truncating conversions to non-mode precision
(in fact, we could support non-mode precision vectorization that way,
if not support bitfield loads or extending conversions).

So maybe that obsoletes my conditional approval ;)  Can you
investigate whether the above would work?

Thanks,
Richard.

> +		      && DR_IS_WRITE (dr)
> +		      && !is_pattern_stmt_p (stmt_info))
> +		    {
> +		      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> +			{
> +			  fprintf (vect_dump,
> +				   "not vectorized: unsupported data-type ");
> +			  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
> +			}
> +		      return false;
> +		    }
> +		}
>  	    }
>  	  else
>  	    {
> --- gcc/testsuite/gcc.dg/vect/vect-cond-10.c.jj	2011-10-19 15:54:42.000000000 +0200
> +++ gcc/testsuite/gcc.dg/vect/vect-cond-10.c	2011-10-19 16:00:22.000000000 +0200
> @@ -0,0 +1,165 @@
> +/* { dg-require-effective-target vect_cond_mixed } */
> +
> +#include "tree-vect.h"
> +
> +#define N 1024
> +float a[N], b[N], c[N], d[N];
> +_Bool k[N];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      k[i] = x & y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    k[i] = (a[i] < b[i]) & (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f3 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      k[i] = x | y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f4 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    k[i] = (a[i] < b[i]) | (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f5 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      p[i] = x & y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f6 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    p[i] = (a[i] < b[i]) & (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      p[i] = x | y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    p[i] = (a[i] < b[i]) | (c[i] < d[i]);
> +}
> +
> +int
> +main ()
> +{
> +  int i;
> +
> +  check_vect ();
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      switch (i % 9)
> +	{
> +	case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
> +	case 1: a[i] = 0; b[i] = 0; break;
> +	case 2: a[i] = i + 1; b[i] = - i - 1; break;
> +	case 3: a[i] = i; b[i] = i + 7; break;
> +	case 4: a[i] = i; b[i] = i; break;
> +	case 5: a[i] = i + 16; b[i] = i + 3; break;
> +	case 6: a[i] = - i - 5; b[i] = - i; break;
> +	case 7: a[i] = - i; b[i] = - i; break;
> +	case 8: a[i] = - i; b[i] = - i - 7; break;
> +	}
> +    }
> +  for (i = 0; i < N; i++)
> +    {
> +      switch ((i / 9) % 3)
> +	{
> +	case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break;
> +	case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break;
> +	case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break;
> +	}
> +    }
> +  f1 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f2 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f3 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f4 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f5 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f6 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f7 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f8 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 8 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> 
> 	Jakub
> 
> 

-- 
Richard Guenther <rguenther@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer


More information about the Gcc-patches mailing list