[PATCH] Fix PR81410

Richard Biener rguenther@suse.de
Tue Jul 18 13:54:00 GMT 2017


The following fixes SLP loads with gaps in the case of no permutation.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk
sofar.

Richard.

2017-06-18  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/81410
	* tree-vect-stmts.c (vectorizable_load): Properly adjust for
	the gap in the ! slp_perm SLP case after each group.

	* gcc.dg/vect/pr81410.c: New testcase.

Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c	(revision 250296)
--- gcc/tree-vect-stmts.c	(working copy)
*************** vectorizable_load (gimple *stmt, gimple_
*** 7118,7123 ****
--- 7118,7124 ----
      {
        first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
        group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+       int group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
        /* For SLP vectorization we directly vectorize a subchain
           without permutation.  */
        if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
*************** vectorizable_load (gimple *stmt, gimple_
*** 7153,7162 ****
  	     not only the number of vector stmts the permutation result
  	     fits in.  */
  	  if (slp_perm)
! 	    vec_num = (group_size * vf + nunits - 1) / nunits;
  	  else
! 	    vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! 	  group_gap_adj = vf * group_size - nunits * vec_num;
      	}
        else
  	vec_num = group_size;
--- 7154,7168 ----
  	     not only the number of vector stmts the permutation result
  	     fits in.  */
  	  if (slp_perm)
! 	    {
! 	      vec_num = (group_size * vf + nunits - 1) / nunits;
! 	      group_gap_adj = vf * group_size - nunits * vec_num;
! 	    }
  	  else
! 	    {
! 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! 	      group_gap_adj = group_gap;
! 	    }
      	}
        else
  	vec_num = group_size;
*************** vectorizable_load (gimple *stmt, gimple_
*** 7316,7321 ****
--- 7322,7328 ----
      aggr_type = vectype;
  
    prev_stmt_info = NULL;
+   int group_elt = 0;
    for (j = 0; j < ncopies; j++)
      {
        /* 1. Create the vector or array pointer update chain.  */
*************** vectorizable_load (gimple *stmt, gimple_
*** 7603,7612 ****
  	      /* Store vector loads in the corresponding SLP_NODE.  */
  	      if (slp && !slp_perm)
  		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
  	    }
  	  /* Bump the vector pointer to account for a gap or for excess
  	     elements loaded for a permuted SLP load.  */
! 	  if (group_gap_adj != 0)
  	    {
  	      bool ovf;
  	      tree bump
--- 7610,7636 ----
  	      /* Store vector loads in the corresponding SLP_NODE.  */
  	      if (slp && !slp_perm)
  		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ 
+ 	      /* With SLP permutation we load the gaps as well, without
+ 	         we need to skip the gaps after we manage to fully load
+ 		 all elements.  group_gap_adj is GROUP_SIZE here.  */
+ 	      group_elt += nunits;
+ 	      if (group_gap_adj != 0 && ! slp_perm
+ 		  && group_elt == group_size - group_gap_adj)
+ 		{
+ 		  bool ovf;
+ 		  tree bump
+ 		    = wide_int_to_tree (sizetype,
+ 					wi::smul (TYPE_SIZE_UNIT (elem_type),
+ 						  group_gap_adj, &ovf));
+ 		  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+ 						 stmt, bump);
+ 		  group_elt = 0;
+ 		}
  	    }
  	  /* Bump the vector pointer to account for a gap or for excess
  	     elements loaded for a permuted SLP load.  */
! 	  if (group_gap_adj != 0 && slp_perm)
  	    {
  	      bool ovf;
  	      tree bump
Index: gcc/testsuite/gcc.dg/vect/pr81410.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr81410.c	(nonexistent)
--- gcc/testsuite/gcc.dg/vect/pr81410.c	(working copy)
***************
*** 0 ****
--- 1,38 ----
+ /* { dg-do run } */
+ /* { dg-require-effective-target vect_long_long } */
+ 
+ #include "tree-vect.h"
+ 
+ typedef long long uint64_t;
+ uint64_t x[24];
+ uint64_t y[16];
+ uint64_t z[8];
+ 
+ void __attribute__((noinline)) foo()
+ {
+   for (int i = 0; i < 8; ++i)
+     {
+       y[2*i] = x[3*i];
+       y[2*i + 1] = x[3*i + 1];
+       z[i] = 1;
+     }
+ }
+ 
+ int main()
+ {
+   check_vect ();
+ 
+   for (int i = 0; i < 24; ++i)
+     {
+       x[i] = i;
+       __asm__ volatile ("" : : : "memory");
+     }
+   foo ();
+   for (int i = 0; i < 8; ++i)
+     if (y[2*i] != 3*i || y[2*i+1] != 3*i + 1)
+       __builtin_abort ();
+ 
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */



More information about the Gcc-patches mailing list