This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Re-do fix for PR53185, maybe fix PR53342


This re-does the fix for PR53185 in a less intrusive way, allowing
both strided load vectorization and peeling for alignment.  Testing
on my local machine tells me that this fix results in a 3% speedup
of rnflow.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2013-02-05  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/53342
	PR tree-optimization/53185
	* tree-vectorizer.h (vect_check_strided_load): Remove.
	* tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do
	not disallow peeling for vectorized strided loads.
	(vect_check_strided_load): Make static and simplify.
	(vect_analyze_data_refs): Adjust.
	* tree-vect-stmts.c (vectorizable_load): Handle peeled loops
	correctly when vectorizing strided loads.

	* gcc.dg/vect/pr53185-2.c: New testcase.

Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h	(revision 195751)
--- gcc/tree-vectorizer.h	(working copy)
*************** extern bool vect_analyze_data_ref_access
*** 923,929 ****
  extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
  extern tree vect_check_gather (gimple, loop_vec_info, tree *, tree *,
  			       int *);
- extern bool vect_check_strided_load (gimple, loop_vec_info, tree *, tree *);
  extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *);
  extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
  				      tree *, gimple_stmt_iterator *,
--- 923,928 ----
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c	(revision 195751)
--- gcc/tree-vect-data-refs.c	(working copy)
*************** vect_enhance_data_refs_alignment (loop_v
*** 1615,1632 ****
            && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
          continue;
  
-       /* FORNOW: Any strided load prevents peeling.  The induction
-          variable analysis will fail when the prologue loop is generated,
- 	 and so we can't generate the new base for the pointer.  */
-       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
- 	{
- 	  if (dump_enabled_p ())
- 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                              "strided load prevents peeling");
- 	  do_peeling = false;
- 	  break;
- 	}
- 
        /* For invariant accesses there is nothing to enhance.  */
        if (integer_zerop (DR_STEP (dr)))
  	continue;
--- 1615,1620 ----
*************** vect_check_gather (gimple stmt, loop_vec
*** 2890,2898 ****
     This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
     base pointer) only.  */
  
! bool
! vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
! 			 tree *stepp)
  {
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
--- 2878,2885 ----
     This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
     base pointer) only.  */
  
! static bool
! vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo)
  {
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
*************** vect_check_strided_load (gimple stmt, lo
*** 2925,2934 ****
        || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
      return false;
  
-   if (basep)
-     *basep = iv.base;
-   if (stepp)
-     *stepp = iv.step;
    return true;
  }
  
--- 2912,2917 ----
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3473,3480 ****
  	{
  	  bool strided_load = false;
  	  if (!nested_in_vect_loop_p (loop, stmt))
! 	    strided_load
! 	      = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
  	  if (!strided_load)
  	    {
  	      if (dump_enabled_p ())
--- 3456,3462 ----
  	{
  	  bool strided_load = false;
  	  if (!nested_in_vect_loop_p (loop, stmt))
! 	    strided_load = vect_check_strided_load (stmt, loop_vinfo);
  	  if (!strided_load)
  	    {
  	      if (dump_enabled_p ())
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c	(revision 195751)
--- gcc/tree-vect-stmts.c	(working copy)
*************** vectorizable_load (gimple stmt, gimple_s
*** 4353,4359 ****
    tree aggr_type;
    tree gather_base = NULL_TREE, gather_off = NULL_TREE;
    tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
-   tree stride_base, stride_step;
    int gather_scale = 1;
    enum vect_def_type gather_dt = vect_unknown_def_type;
  
--- 4353,4358 ----
*************** vectorizable_load (gimple stmt, gimple_s
*** 4462,4472 ****
  	}
      }
    else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
!     {
!       if (!vect_check_strided_load (stmt, loop_vinfo,
! 				    &stride_base, &stride_step))
! 	return false;
!     }
    else
      {
        negative = tree_int_cst_compare (nested_in_vect_loop
--- 4461,4467 ----
  	}
      }
    else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
!     ;
    else
      {
        negative = tree_int_cst_compare (nested_in_vect_loop
*************** vectorizable_load (gimple stmt, gimple_s
*** 4674,4686 ****
        bool insert_after;
        gimple incr;
        tree offvar;
-       tree ref = DR_REF (dr);
        tree ivstep;
        tree running_off;
        vec<constructor_elt, va_gc> *v = NULL;
        gimple_seq stmts = NULL;
  
!       gcc_assert (stride_base && stride_step);
  
        /* For a load with loop-invariant (but other than power-of-2)
           stride (i.e. not a grouped access) like so:
--- 4669,4689 ----
        bool insert_after;
        gimple incr;
        tree offvar;
        tree ivstep;
        tree running_off;
        vec<constructor_elt, va_gc> *v = NULL;
        gimple_seq stmts = NULL;
+       tree stride_base, stride_step, alias_off;
+ 
+       gcc_assert (!nested_in_vect_loop);
  
!       stride_base
! 	= fold_build_pointer_plus
! 	    (unshare_expr (DR_BASE_ADDRESS (dr)),
! 	     size_binop (PLUS_EXPR,
! 			 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
! 			 convert_to_ptrofftype (DR_INIT(dr))));
!       stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
  
        /* For a load with loop-invariant (but other than power-of-2)
           stride (i.e. not a grouped access) like so:
*************** vectorizable_load (gimple stmt, gimple_s
*** 4716,4721 ****
--- 4719,4725 ----
  
        prev_stmt_info = NULL;
        running_off = offvar;
+       alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
        for (j = 0; j < ncopies; j++)
  	{
  	  tree vec_inv;
*************** vectorizable_load (gimple stmt, gimple_s
*** 4725,4757 ****
  	    {
  	      tree newref, newoff;
  	      gimple incr;
! 	      if (TREE_CODE (ref) == ARRAY_REF)
! 		{
! 		  newref = build4 (ARRAY_REF, TREE_TYPE (ref),
! 				   unshare_expr (TREE_OPERAND (ref, 0)),
! 				   running_off,
! 				   NULL_TREE, NULL_TREE);
! 		  if (!useless_type_conversion_p (TREE_TYPE (vectype),
! 						  TREE_TYPE (newref)))
! 		    newref = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype),
! 				     newref);
! 		}
! 	      else
! 		newref = build2 (MEM_REF, TREE_TYPE (vectype),
! 				 running_off,
! 				 TREE_OPERAND (ref, 1));
  
  	      newref = force_gimple_operand_gsi (gsi, newref, true,
  						 NULL_TREE, true,
  						 GSI_SAME_STMT);
  	      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
  	      newoff = copy_ssa_name (running_off, NULL);
! 	      if (POINTER_TYPE_P (TREE_TYPE (newoff)))
! 		incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
! 						     running_off, stride_step);
! 	      else
! 		incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
! 						     running_off, stride_step);
  	      vect_finish_stmt_generation (stmt, incr, gsi);
  
  	      running_off = newoff;
--- 4729,4744 ----
  	    {
  	      tree newref, newoff;
  	      gimple incr;
! 	      newref = build2 (MEM_REF, TREE_TYPE (vectype),
! 			       running_off, alias_off);
  
  	      newref = force_gimple_operand_gsi (gsi, newref, true,
  						 NULL_TREE, true,
  						 GSI_SAME_STMT);
  	      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
  	      newoff = copy_ssa_name (running_off, NULL);
! 	      incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
! 						   running_off, stride_step);
  	      vect_finish_stmt_generation (stmt, incr, gsi);
  
  	      running_off = newoff;
Index: gcc/testsuite/gcc.dg/vect/pr53185-2.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr53185-2.c	(revision 0)
--- gcc/testsuite/gcc.dg/vect/pr53185-2.c	(working copy)
***************
*** 0 ****
--- 1,27 ----
+ void __attribute__((noinline,noclone))
+ fn1 (int * __restrict f, int * __restrict d, unsigned short a, int c)
+ {
+   unsigned short e;
+   for (e = 0; e < a; ++e)
+     f[e] = d[e * c];
+ }
+ 
+ extern void abort (void);
+ 
+ int main ()
+ {
+   int a[32], b[3 * 32];
+   int i, off;
+   for (i = 0; i < 3 * 32; ++i)
+     b[i] = i;
+   for (off = 0; off < 8; ++off)
+     {
+       fn1 (&a[off], &b[off], 32 - off, 3);
+       for (i = 0; i < 32 - off; ++i)
+ 	if (a[off+i] != b[off+i*3])
+ 	  abort ();
+     }
+   return 0;
+ }
+ 
+ /* { dg-final { cleanup-tree-dump "vect" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]