This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.


On Fri, 22 Nov 2013, Cong Hou wrote:

> Hi
> 
> Currently in GCC vectorization, some loop invariant may be detected
> after aliasing checks, which can be hoisted outside of the loop. The
> current method in GCC may break the information built during the
> analysis phase, causing some crash (see PR59006 and PR58921).
> 
> This patch improves the loop invariant hoisting by delaying it until
> all statements are vectorized, thereby keeping all built information.
> But those loop invariant statements won't be vectorized, and if a
> variable is defined by one of those loop invariant, it is treated as
> an external definition.
> 
> Bootstrapped and testes on an x86-64 machine.

Hmm.  I'm still thinking that we should handle this during the regular
transform step.

Like with the following incomplete patch.  Missing is adjusting
the rest of the vectorizable_* functions to handle the case where all defs
are dt_external or constant by setting their own STMT_VINFO_DEF_TYPE to
dt_external.  From the gcc.dg/vect/pr58508.c we get only 4 hoists
instead of 8 because of this (I think).

Also gcc.dg/vect/pr52298.c ICEs for yet unanalyzed reason.

I can take over the bug if you like.

Thanks,
Richard.

Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c	(revision 205435)
--- gcc/tree-vect-data-refs.c	(working copy)
*************** again:
*** 3668,3673 ****
--- 3668,3682 ----
  	    }
  	  STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
  	}
+       else if (loop_vinfo
+ 	       && integer_zerop (DR_STEP (dr)))
+ 	{
+ 	  /* All loads from a non-varying address will be disambiguated
+ 	     by data-ref analysis or via a runtime alias check and thus
+ 	     they will become invariant.  Force them to be vectorized
+ 	     as external.  */
+ 	  STMT_VINFO_DEF_TYPE (stmt_info) = vect_external_def;
+ 	}
      }
  
    /* If we stopped analysis at the first dataref we could not analyze
Index: gcc/tree-vect-loop-manip.c
===================================================================
*** gcc/tree-vect-loop-manip.c	(revision 205435)
--- gcc/tree-vect-loop-manip.c	(working copy)
*************** vect_loop_versioning (loop_vec_info loop
*** 2269,2275 ****
  
    /* Extract load statements on memrefs with zero-stride accesses.  */
  
!   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
      {
        /* In the loop body, we iterate each statement to check if it is a load.
  	 Then we check the DR_STEP of the data reference.  If DR_STEP is zero,
--- 2269,2275 ----
  
    /* Extract load statements on memrefs with zero-stride accesses.  */
  
!   if (0 && LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
      {
        /* In the loop body, we iterate each statement to check if it is a load.
  	 Then we check the DR_STEP of the data reference.  If DR_STEP is zero,
Index: gcc/tree-vect-loop.c
===================================================================
*** gcc/tree-vect-loop.c	(revision 205435)
--- gcc/tree-vect-loop.c	(working copy)
*************** vect_transform_loop (loop_vec_info loop_
*** 5995,6000 ****
--- 5995,6020 ----
  		}
  	    }
  
+ 	  /* If the stmt is loop invariant simply move it.  */
+ 	  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_external_def)
+ 	    {
+ 	      if (dump_enabled_p ())
+ 		{
+ 		  dump_printf_loc (MSG_NOTE, vect_location,
+ 				   "hoisting out of the vectorized loop: ");
+ 		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ 		  dump_printf (MSG_NOTE, "\n");
+ 		}
+ 	      gsi_remove (&si, false);
+ 	      if (gimple_vuse (stmt))
+ 		gimple_set_vuse (stmt, NULL);
+ 	      basic_block new_bb;
+ 	      new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (loop),
+ 						     stmt);
+ 	      gcc_assert (!new_bb);
+ 	      continue;
+ 	    }
+ 
  	  /* -------- vectorize statement ------------ */
  	  if (dump_enabled_p ())
  	    dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c	(revision 205435)
--- gcc/tree-vect-stmts.c	(working copy)
*************** vectorizable_operation (gimple stmt, gim
*** 3497,3502 ****
--- 3497,3503 ----
    vec<tree> vec_oprnds2 = vNULL;
    tree vop0, vop1, vop2;
    bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+   bool all_ops_external = true;
    int vf;
  
    if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
*************** vectorizable_operation (gimple stmt, gim
*** 3557,3562 ****
--- 3558,3565 ----
                           "use not simple.\n");
        return false;
      }
+   if (dt[0] != vect_external_def && dt[0] != vect_constant_def)
+     all_ops_external = false;
    /* If op0 is an external or constant def use a vector type with
       the same size as the output vector type.  */
    if (!vectype)
*************** vectorizable_operation (gimple stmt, gim
*** 3593,3598 ****
--- 3596,3603 ----
                               "use not simple.\n");
  	  return false;
  	}
+       if (dt[1] != vect_external_def && dt[1] != vect_constant_def)
+ 	all_ops_external = false;
      }
    if (op_type == ternary_op)
      {
*************** vectorizable_operation (gimple stmt, gim
*** 3605,3610 ****
--- 3610,3623 ----
                               "use not simple.\n");
  	  return false;
  	}
+       if (dt[2] != vect_external_def && dt[2] != vect_constant_def)
+ 	all_ops_external = false;
+     }
+ 
+   if (all_ops_external && loop_vinfo)
+     {
+       STMT_VINFO_DEF_TYPE (stmt_info) = vect_external_def;
+       return true;
      }
  
    if (loop_vinfo)
*************** vect_analyze_stmt (gimple stmt, bool *ne
*** 5771,5779 ****
                       || relevance == vect_unused_in_scope));
           break;
  
        case vect_induction_def:
        case vect_constant_def:
-       case vect_external_def:
        case vect_unknown_def_type:
        default:
          gcc_unreachable ();
--- 5784,5795 ----
                       || relevance == vect_unused_in_scope));
           break;
  
+       case vect_external_def:
+ 	/* If we decided a stmt is invariant don't bother to vectorize it.  */
+ 	return true;
+ 
        case vect_induction_def:
        case vect_constant_def:
        case vect_unknown_def_type:
        default:
          gcc_unreachable ();


> 
> thanks,
> Cong
> 
> 
> 
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 2c0554b..0614bab 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,18 @@
> +2013-11-22  Cong Hou  <congh@google.com>
> +
> + PR tree-optimization/58921
> + PR tree-optimization/59006
> + * tree-vectorizer.h (struct _stmt_vec_info): New data member
> + loop_invariant.
> + * tree-vect-loop-manip.c (vect_loop_versioning): Delay hoisting loop
> + invariants until all statements are vectorized.
> + * tree-vect-loop.c (vect_hoist_loop_invariants): New functions.
> + (vect_transform_loop): Hoist loop invariants after all statements
> + are vectorized.  Do not vectorize loop invariants stmts.
> + * tree-vect-stmts.c (vect_get_vec_def_for_operand): Treat a loop
> + invariant as an external definition.
> + (new_stmt_vec_info): Initialize new data member.
> +
>  2013-11-12  Jeff Law  <law@redhat.com>
> 
>   * tree-ssa-threadedge.c (thread_around_empty_blocks): New
> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index 09c7f20..447625b 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,3 +1,10 @@
> +2013-11-22  Cong Hou  <congh@google.com>
> +
> + PR tree-optimization/58921
> + PR tree-optimization/59006
> + * gcc.dg/vect/pr58921.c: New test.
> + * gcc.dg/vect/pr59006.c: New test.
> +
>  2013-11-12  Balaji V. Iyer  <balaji.v.iyer@intel.com>
> 
>   * gcc.dg/cilk-plus/cilk-plus.exp: Added a check for LTO before running
> diff --git a/gcc/testsuite/gcc.dg/vect/pr58921.c
> b/gcc/testsuite/gcc.dg/vect/pr58921.c
> new file mode 100644
> index 0000000..ee3694a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr58921.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_int } */
> +
> +int a[7];
> +int b;
> +
> +void
> +fn1 ()
> +{
> +  for (; b; b++)
> +    a[b] = ((a[b] <= 0) == (a[0] != 0));
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr59006.c
> b/gcc/testsuite/gcc.dg/vect/pr59006.c
> new file mode 100644
> index 0000000..95d90a9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr59006.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_int } */
> +
> +int a[8], b;
> +
> +void fn1 (void)
> +{
> +  int c;
> +  for (; b; b++)
> +    {
> +      int d = a[b];
> +      c = a[0] ? d : 0;
> +      a[b] = c;
> +    }
> +}
> +
> +void fn2 ()
> +{
> +  for (; b <= 0; b++)
> +    a[b] = a[0] || b;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
> index 15227856..3adc73d 100644
> --- a/gcc/tree-vect-loop-manip.c
> +++ b/gcc/tree-vect-loop-manip.c
> @@ -2448,8 +2448,12 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
>    FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
>      {
>        gimple def = SSA_NAME_DEF_STMT (var);
> +      stmt_vec_info def_stmt_info;
> +
>        if (!gimple_nop_p (def)
> -  && flow_bb_inside_loop_p (loop, gimple_bb (def)))
> +  && flow_bb_inside_loop_p (loop, gimple_bb (def))
> +  && !((def_stmt_info = vinfo_for_stmt (def))
> + && STMT_VINFO_LOOP_INVARIANT_P (def_stmt_info)))
>   {
>    hoist = false;
>    break;
> @@ -2458,21 +2462,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
> 
>    if (hoist)
>      {
> -      if (dr)
> - gimple_set_vuse (stmt, NULL);
> -
> -      gsi_remove (&si, false);
> -      gsi_insert_on_edge_immediate (loop_preheader_edge (loop),
> -    stmt);
> -
> -      if (dump_enabled_p ())
> - {
> -  dump_printf_loc
> -      (MSG_NOTE, vect_location,
> -       "hoisting out of the vectorized loop: ");
> -  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
> -  dump_printf (MSG_NOTE, "\n");
> - }
> +      STMT_VINFO_LOOP_INVARIANT_P (stmt_info) = true;
> +      gsi_next (&si);
>        continue;
>      }
>   }
> @@ -2481,6 +2472,7 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
>   }
>      }
> 
> +
>    /* End loop-exit-fixes after versioning.  */
> 
>    if (cond_expr_stmt_list)
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 292e771..148f9f1 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -5572,6 +5572,49 @@ vect_loop_kill_debug_uses (struct loop *loop,
> gimple stmt)
>      }
>  }
> 
> +/* Find all loop invariants detected after alias checks, and hoist them
> +   before the loop preheader.  */
> +
> +static void
> +vect_hoist_loop_invariants (loop_vec_info loop_vinfo)
> +{
> +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> +  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
> +  gimple_seq loop_invariants = NULL;
> +
> +  for (int i = 0; i < (int)loop->num_nodes; i++)
> +    {
> +      basic_block bb = bbs[i];
> +      for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);)
> + {
> +  gimple stmt = gsi_stmt (si);
> +  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
> +  if (stmt_vinfo && STMT_VINFO_LOOP_INVARIANT_P (stmt_vinfo))
> +    {
> +      if (gimple_has_mem_ops (stmt))
> + gimple_set_vuse (stmt, NULL);
> +
> +      gsi_remove (&si, false);
> +      gimple_seq_add_stmt (&loop_invariants, stmt);
> +
> +      if (dump_enabled_p ())
> + {
> +  dump_printf_loc
> +    (MSG_NOTE, vect_location,
> +     "hoisting out of the vectorized loop: ");
> +  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
> +  dump_printf (MSG_NOTE, "\n");
> + }
> +    }
> +  else
> +    gsi_next (&si);
> + }
> +    }
> +  basic_block pre_header = loop_preheader_edge (loop)->src;
> +  gcc_assert (EDGE_COUNT (pre_header->preds) == 1);
> +  gsi_insert_seq_on_edge_immediate (EDGE_PRED (pre_header, 0),
> loop_invariants);
> +}
> +
>  /* Function vect_transform_loop.
> 
>     The analysis phase has determined that the loop is vectorizable.
> @@ -5835,6 +5878,15 @@ vect_transform_loop (loop_vec_info loop_vinfo)
>   transform_pattern_stmt = false;
>              }
> 
> +          /* If stmt is a loop invariant (detected after alias checks),
> +             do not generate the vectorized stmt for it as it will be
> +             hoisted later.  */
> +  if (STMT_VINFO_LOOP_INVARIANT_P (stmt_info))
> +    {
> +      gsi_next (&si);
> +      continue;
> +    }
> +
>    gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
>    nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
>                                                 STMT_VINFO_VECTYPE (stmt_info));
> @@ -5910,6 +5962,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
>   }        /* stmts in BB */
>      } /* BBs in loop */
> 
> +  /* Hoist all loop invariants.  */
> +  vect_hoist_loop_invariants (loop_vinfo);
> +
>    slpeel_make_loop_iterate_ntimes (loop, ratio);
> 
>    /* Reduce loop iterations by the vectorization factor.  */
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index b0e0fa9..3e15372 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -1362,6 +1362,18 @@ vect_get_vec_def_for_operand (tree op, gimple
> stmt, tree *scalar_def)
>          }
>      }
> 
> +  /* After alias checks, some loop invariants may be detected, and we won't
> +     generate vectorized stmts for them.  We only hoist them after all stmts
> +     are vectorized.  Here if we meet a loop invariant, we need to assume it
> +     is already hoisted before the loop.  We do this by setting the def-type
> +     to vect_external_def.  */
> +  if (def_stmt && dt == vect_internal_def)
> +    {
> +      stmt_vec_info stmt_vinfo = vinfo_for_stmt (def_stmt);
> +      if (stmt_vinfo && STMT_VINFO_LOOP_INVARIANT_P (stmt_vinfo))
> + dt = vect_external_def;
> +    }
> +
>    switch (dt)
>      {
>      /* Case 1: operand is a constant.  */
> @@ -6083,6 +6095,7 @@ new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
>    STMT_VINFO_BB_VINFO (res) = bb_vinfo;
>    STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
>    STMT_VINFO_LIVE_P (res) = false;
> +  STMT_VINFO_LOOP_INVARIANT_P (res) = false;
>    STMT_VINFO_VECTYPE (res) = NULL;
>    STMT_VINFO_VEC_STMT (res) = NULL;
>    STMT_VINFO_VECTORIZABLE (res) = true;
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index bbd50e1..2c230f9 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -516,6 +516,10 @@ typedef struct _stmt_vec_info {
>       used outside the loop.  */
>    bool live;
> 
> +  /* Indicates whether this stmt is a loop invariant, which can be hoisted.
> +     A stmt may become loop invariant after alias checks.  */
> +  bool loop_invariant;
> +
>    /* Stmt is part of some pattern (computation idiom)  */
>    bool in_pattern_p;
> 
> @@ -623,6 +627,7 @@ typedef struct _stmt_vec_info {
>  #define STMT_VINFO_BB_VINFO(S)             (S)->bb_vinfo
>  #define STMT_VINFO_RELEVANT(S)             (S)->relevant
>  #define STMT_VINFO_LIVE_P(S)               (S)->live
> +#define STMT_VINFO_LOOP_INVARIANT_P(S)     (S)->loop_invariant
>  #define STMT_VINFO_VECTYPE(S)              (S)->vectype
>  #define STMT_VINFO_VEC_STMT(S)             (S)->vectorized_stmt
>  #define STMT_VINFO_VECTORIZABLE(S)         (S)->vectorizable
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]