This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[06/11] Handle VMAT_INVARIANT separately


Invariant loads were handled as a variation on the code for contiguous
loads.  We detected whether they were invariant or not as a byproduct of
creating the vector pointer ivs: vect_create_data_ref_ptr passed back an
inv_p to say whether the pointer was invariant.

But vectorised invariant loads just keep the original scalar load,
so this meant that detecting invariant loads had the side-effect of
creating an unwanted vector pointer iv.  The placement of the code
also meant that we'd create a vector load and then not use the result.
In principle this is wrong code, since there's no guarantee that there's
a vector's worth of accessible data at that address, but we rely on DCE
to get rid of the load before any harm is done.

E.g., for an invariant load in an inner loop (which seems like the more
common use case for this code), we'd create:

   vectp_a.6_52 = &a + 4;

   # vectp_a.5_53 = PHI <vectp_a.5_54(9), vectp_a.6_52(2)>

   # vectp_a.5_55 = PHI <vectp_a.5_53(3), vectp_a.5_56(10)>

   vect_next_a_11.7_57 = MEM[(int *)vectp_a.5_55];
   next_a_11 = a[_1];
   vect_cst__58 = {next_a_11, next_a_11, next_a_11, next_a_11};

   vectp_a.5_56 = vectp_a.5_55 + 4;

   vectp_a.5_54 = vectp_a.5_53 + 0;

whereas all we want is:

   next_a_11 = a[_1];
   vect_cst__58 = {next_a_11, next_a_11, next_a_11, next_a_11};

This patch moves the handling to its own block and makes
vect_create_data_ref_ptr assert (when creating a full iv) that the
address isn't invariant.

The ncopies handling is unfortunate, but a preexisting issue.
Richi's suggestion of using a vector of vector statements would
let us reuse one statement for all copies.


2018-07-30  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vectorizer.h (vect_create_data_ref_ptr): Remove inv_p
	parameter.
	* tree-vect-data-refs.c (vect_create_data_ref_ptr): Likewise.
	When creating an iv, assert that the step is not known to be zero.
	(vect_setup_realignment): Update call accordingly.
	* tree-vect-stmts.c (vectorizable_store): Likewise.
	(vectorizable_load): Likewise.  Handle VMAT_INVARIANT separately.

Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h	2018-07-30 12:32:29.586506669 +0100
--- gcc/tree-vectorizer.h	2018-07-30 12:40:13.000000000 +0100
*************** extern bool vect_analyze_data_refs (vec_
*** 1527,1533 ****
  extern void vect_record_base_alignments (vec_info *);
  extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree,
  				      tree *, gimple_stmt_iterator *,
! 				      gimple **, bool, bool *,
  				      tree = NULL_TREE, tree = NULL_TREE);
  extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *,
  			     stmt_vec_info, tree);
--- 1527,1533 ----
  extern void vect_record_base_alignments (vec_info *);
  extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree,
  				      tree *, gimple_stmt_iterator *,
! 				      gimple **, bool,
  				      tree = NULL_TREE, tree = NULL_TREE);
  extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *,
  			     stmt_vec_info, tree);
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c	2018-07-30 12:32:26.214536374 +0100
--- gcc/tree-vect-data-refs.c	2018-07-30 12:32:32.546480596 +0100
*************** vect_create_addr_base_for_vector_ref (st
*** 4674,4689 ****
  
        Return the increment stmt that updates the pointer in PTR_INCR.
  
!    3. Set INV_P to true if the access pattern of the data reference in the
!       vectorized loop is invariant.  Set it to false otherwise.
! 
!    4. Return the pointer.  */
  
  tree
  vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type,
  			  struct loop *at_loop, tree offset,
  			  tree *initial_address, gimple_stmt_iterator *gsi,
! 			  gimple **ptr_incr, bool only_init, bool *inv_p,
  			  tree byte_offset, tree iv_step)
  {
    const char *base_name;
--- 4674,4686 ----
  
        Return the increment stmt that updates the pointer in PTR_INCR.
  
!    3. Return the pointer.  */
  
  tree
  vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type,
  			  struct loop *at_loop, tree offset,
  			  tree *initial_address, gimple_stmt_iterator *gsi,
! 			  gimple **ptr_incr, bool only_init,
  			  tree byte_offset, tree iv_step)
  {
    const char *base_name;
*************** vect_create_data_ref_ptr (stmt_vec_info
*** 4705,4711 ****
    bool insert_after;
    tree indx_before_incr, indx_after_incr;
    gimple *incr;
-   tree step;
    bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
  
    gcc_assert (iv_step != NULL_TREE
--- 4702,4707 ----
*************** vect_create_data_ref_ptr (stmt_vec_info
*** 4726,4739 ****
        *ptr_incr = NULL;
      }
  
-   /* Check the step (evolution) of the load in LOOP, and record
-      whether it's invariant.  */
-   step = vect_dr_behavior (dr_info)->step;
-   if (integer_zerop (step))
-     *inv_p = true;
-   else
-     *inv_p = false;
- 
    /* Create an expression for the first address accessed by this load
       in LOOP.  */
    base_name = get_name (DR_BASE_ADDRESS (dr));
--- 4722,4727 ----
*************** vect_create_data_ref_ptr (stmt_vec_info
*** 4849,4863 ****
      aptr = aggr_ptr_init;
    else
      {
        if (iv_step == NULL_TREE)
  	{
! 	  /* The step of the aggregate pointer is the type size.  */
  	  iv_step = TYPE_SIZE_UNIT (aggr_type);
! 	  /* One exception to the above is when the scalar step of the load in
! 	     LOOP is zero. In this case the step here is also zero.  */
! 	  if (*inv_p)
! 	    iv_step = size_zero_node;
! 	  else if (tree_int_cst_sgn (step) == -1)
  	    iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
  	}
  
--- 4837,4853 ----
      aptr = aggr_ptr_init;
    else
      {
+       /* Accesses to invariant addresses should be handled specially
+ 	 by the caller.  */
+       tree step = vect_dr_behavior (dr_info)->step;
+       gcc_assert (!integer_zerop (step));
+ 
        if (iv_step == NULL_TREE)
  	{
! 	  /* The step of the aggregate pointer is the type size,
! 	     negated for downward accesses.  */
  	  iv_step = TYPE_SIZE_UNIT (aggr_type);
! 	  if (tree_int_cst_sgn (step) == -1)
  	    iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
  	}
  
*************** vect_setup_realignment (stmt_vec_info st
*** 5462,5468 ****
    gphi *phi_stmt;
    tree msq = NULL_TREE;
    gimple_seq stmts = NULL;
-   bool inv_p;
    bool compute_in_loop = false;
    bool nested_in_vect_loop = false;
    struct loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
--- 5452,5457 ----
*************** vect_setup_realignment (stmt_vec_info st
*** 5556,5562 ****
        vec_dest = vect_create_destination_var (scalar_dest, vectype);
        ptr = vect_create_data_ref_ptr (stmt_info, vectype,
  				      loop_for_initial_load, NULL_TREE,
! 				      &init_addr, NULL, &inc, true, &inv_p);
        if (TREE_CODE (ptr) == SSA_NAME)
  	new_temp = copy_ssa_name (ptr);
        else
--- 5545,5551 ----
        vec_dest = vect_create_destination_var (scalar_dest, vectype);
        ptr = vect_create_data_ref_ptr (stmt_info, vectype,
  				      loop_for_initial_load, NULL_TREE,
! 				      &init_addr, NULL, &inc, true);
        if (TREE_CODE (ptr) == SSA_NAME)
  	new_temp = copy_ssa_name (ptr);
        else
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c	2018-07-30 12:32:29.586506669 +0100
--- gcc/tree-vect-stmts.c	2018-07-30 12:40:14.000000000 +0100
*************** vectorizable_store (stmt_vec_info stmt_i
*** 6254,6260 ****
    unsigned int group_size, i;
    vec<tree> oprnds = vNULL;
    vec<tree> result_chain = vNULL;
-   bool inv_p;
    tree offset = NULL_TREE;
    vec<tree> vec_oprnds = vNULL;
    bool slp = (slp_node != NULL);
--- 6254,6259 ----
*************** vectorizable_store (stmt_vec_info stmt_i
*** 7018,7039 ****
  	    {
  	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
  	      dataref_offset = build_int_cst (ref_type, 0);
- 	      inv_p = false;
  	    }
  	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
! 	    {
! 	      vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
! 					   &dataref_ptr, &vec_offset);
! 	      inv_p = false;
! 	    }
  	  else
  	    dataref_ptr
  	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
  					  simd_lane_access_p ? loop : NULL,
  					  offset, &dummy, gsi, &ptr_incr,
! 					  simd_lane_access_p, &inv_p,
! 					  NULL_TREE, bump);
! 	  gcc_assert (bb_vinfo || !inv_p);
  	}
        else
  	{
--- 7017,7032 ----
  	    {
  	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
  	      dataref_offset = build_int_cst (ref_type, 0);
  	    }
  	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
! 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
! 					 &dataref_ptr, &vec_offset);
  	  else
  	    dataref_ptr
  	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
  					  simd_lane_access_p ? loop : NULL,
  					  offset, &dummy, gsi, &ptr_incr,
! 					  simd_lane_access_p, NULL_TREE, bump);
  	}
        else
  	{
*************** vectorizable_load (stmt_vec_info stmt_in
*** 7419,7425 ****
    bool grouped_load = false;
    stmt_vec_info first_stmt_info;
    stmt_vec_info first_stmt_info_for_drptr = NULL;
-   bool inv_p;
    bool compute_in_loop = false;
    struct loop *at_loop;
    int vec_num;
--- 7412,7417 ----
*************** vectorizable_load (stmt_vec_info stmt_in
*** 7669,7674 ****
--- 7661,7723 ----
        return true;
      }
  
+   if (memory_access_type == VMAT_INVARIANT)
+     {
+       gcc_assert (!grouped_load && !mask && !bb_vinfo);
+       /* If we have versioned for aliasing or the loop doesn't
+ 	 have any data dependencies that would preclude this,
+ 	 then we are sure this is a loop invariant load and
+ 	 thus we can insert it on the preheader edge.  */
+       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
+ 		      && !nested_in_vect_loop
+ 		      && hoist_defs_of_uses (stmt_info, loop));
+       if (hoist_p)
+ 	{
+ 	  gassign *stmt = as_a <gassign *> (stmt_info->stmt);
+ 	  if (dump_enabled_p ())
+ 	    {
+ 	      dump_printf_loc (MSG_NOTE, vect_location,
+ 			       "hoisting out of the vectorized loop: ");
+ 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ 	    }
+ 	  scalar_dest = copy_ssa_name (scalar_dest);
+ 	  tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
+ 	  gsi_insert_on_edge_immediate
+ 	    (loop_preheader_edge (loop),
+ 	     gimple_build_assign (scalar_dest, rhs));
+ 	}
+       /* These copies are all equivalent, but currently the representation
+ 	 requires a separate STMT_VINFO_VEC_STMT for each one.  */
+       prev_stmt_info = NULL;
+       gimple_stmt_iterator gsi2 = *gsi;
+       gsi_next (&gsi2);
+       for (j = 0; j < ncopies; j++)
+ 	{
+ 	  stmt_vec_info new_stmt_info;
+ 	  if (hoist_p)
+ 	    {
+ 	      new_temp = vect_init_vector (stmt_info, scalar_dest,
+ 					   vectype, NULL);
+ 	      gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
+ 	      new_stmt_info = vinfo->add_stmt (new_stmt);
+ 	    }
+ 	  else
+ 	    {
+ 	      new_temp = vect_init_vector (stmt_info, scalar_dest,
+ 					   vectype, &gsi2);
+ 	      new_stmt_info = vinfo->lookup_def (new_temp);
+ 	    }
+ 	  if (slp)
+ 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
+ 	  else if (j == 0)
+ 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+ 	  else
+ 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+ 	  prev_stmt_info = new_stmt_info;
+ 	}
+       return true;
+     }
+ 
    if (memory_access_type == VMAT_ELEMENTWISE
        || memory_access_type == VMAT_STRIDED_SLP)
      {
*************** vectorizable_load (stmt_vec_info stmt_in
*** 8177,8183 ****
  	    {
  	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
  	      dataref_offset = build_int_cst (ref_type, 0);
- 	      inv_p = false;
  	    }
  	  else if (first_stmt_info_for_drptr
  		   && first_stmt_info != first_stmt_info_for_drptr)
--- 8226,8231 ----
*************** vectorizable_load (stmt_vec_info stmt_in
*** 8186,8192 ****
  		= vect_create_data_ref_ptr (first_stmt_info_for_drptr,
  					    aggr_type, at_loop, offset, &dummy,
  					    gsi, &ptr_incr, simd_lane_access_p,
! 					    &inv_p, byte_offset, bump);
  	      /* Adjust the pointer by the difference to first_stmt.  */
  	      data_reference_p ptrdr
  		= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
--- 8234,8240 ----
  		= vect_create_data_ref_ptr (first_stmt_info_for_drptr,
  					    aggr_type, at_loop, offset, &dummy,
  					    gsi, &ptr_incr, simd_lane_access_p,
! 					    byte_offset, bump);
  	      /* Adjust the pointer by the difference to first_stmt.  */
  	      data_reference_p ptrdr
  		= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
*************** vectorizable_load (stmt_vec_info stmt_in
*** 8199,8214 ****
  					     stmt_info, diff);
  	    }
  	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
! 	    {
! 	      vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
! 					   &dataref_ptr, &vec_offset);
! 	      inv_p = false;
! 	    }
  	  else
  	    dataref_ptr
  	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
  					  offset, &dummy, gsi, &ptr_incr,
! 					  simd_lane_access_p, &inv_p,
  					  byte_offset, bump);
  	  if (mask)
  	    vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
--- 8247,8259 ----
  					     stmt_info, diff);
  	    }
  	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
! 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
! 					 &dataref_ptr, &vec_offset);
  	  else
  	    dataref_ptr
  	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
  					  offset, &dummy, gsi, &ptr_incr,
! 					  simd_lane_access_p,
  					  byte_offset, bump);
  	  if (mask)
  	    vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
*************** vectorizable_load (stmt_vec_info stmt_in
*** 8492,8538 ****
  		    }
  		}
  
- 	      /* 4. Handle invariant-load.  */
- 	      if (inv_p && !bb_vinfo)
- 		{
- 		  gcc_assert (!grouped_load);
- 		  /* If we have versioned for aliasing or the loop doesn't
- 		     have any data dependencies that would preclude this,
- 		     then we are sure this is a loop invariant load and
- 		     thus we can insert it on the preheader edge.  */
- 		  if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
- 		      && !nested_in_vect_loop
- 		      && hoist_defs_of_uses (stmt_info, loop))
- 		    {
- 		      gassign *stmt = as_a <gassign *> (stmt_info->stmt);
- 		      if (dump_enabled_p ())
- 			{
- 			  dump_printf_loc (MSG_NOTE, vect_location,
- 					   "hoisting out of the vectorized "
- 					   "loop: ");
- 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
- 			}
- 		      tree tem = copy_ssa_name (scalar_dest);
- 		      gsi_insert_on_edge_immediate
- 			(loop_preheader_edge (loop),
- 			 gimple_build_assign (tem,
- 					      unshare_expr
- 					        (gimple_assign_rhs1 (stmt))));
- 		      new_temp = vect_init_vector (stmt_info, tem,
- 						   vectype, NULL);
- 		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
- 		      new_stmt_info = vinfo->add_stmt (new_stmt);
- 		    }
- 		  else
- 		    {
- 		      gimple_stmt_iterator gsi2 = *gsi;
- 		      gsi_next (&gsi2);
- 		      new_temp = vect_init_vector (stmt_info, scalar_dest,
- 						   vectype, &gsi2);
- 		      new_stmt_info = vinfo->lookup_def (new_temp);
- 		    }
- 		}
- 
  	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
  		{
  		  tree perm_mask = perm_mask_for_reverse (vectype);
--- 8537,8542 ----


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]