[PATCH] Fix PR79920

Richard Biener rguenther@suse.de
Wed Mar 8 08:48:00 GMT 2017


The following fixes a bug in vect_transform_slp_perm_load which tries
to be clever in computing sth like "ncopies" but fails to do that
correctly (and in fact it can't be done).  Instead just compute all
loads/permutations manually.

The fix is as simple as

Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c     (revision 245279)
+++ tree-vect-slp.c     (working copy)
@@ -3412,7 +3412,7 @@ vect_transform_slp_perm_load (slp_tree n
   int second_vec_index = -1;
   bool noop_p = true;
 
-  for (int j = 0; j < unroll_factor; j++)
+  for (int j = 0; j < vf; j++)
     {
       for (int k = 0; k < group_size; k++)
        {
@@ -3486,7 +3486,7 @@ vect_transform_slp_perm_load (slp_tree n
                  vect_create_mask_and_perm (stmt, mask_vec, 
first_vec_index,
                                             second_vec_index,
                                             gsi, node, vectype, dr_chain,
-                                            ncopies, 
vect_stmts_counter++);
+                                            1, vect_stmts_counter++);
                }
 
              index = 0;

but it allows dead code to be removed and the now pointless helper
vect_create_mask_and_perm to vanish.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2017-03-08  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/79920
	* tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline
	with ncopies == 1 to ...
	(vect_transform_slp_perm_load): ... here.  Properly compute
	all element loads by iterating VF times over the group.  Do
	not handle ncopies (computed in a broken way) in
	vect_create_mask_and_perm.

	* gcc.dg/vect/pr79920.c: New testcase.

Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c	(revision 245947)
--- gcc/tree-vect-slp.c	(working copy)
*************** vect_get_slp_defs (vec<tree> ops, slp_tr
*** 3379,3444 ****
      }
  }
  
- 
- /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
-    building a vector of type MASK_TYPE from it) and two input vectors placed in
-    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
-    shifting by STRIDE elements of DR_CHAIN for every copy.
-    (STRIDE is the number of vectorized stmts for NODE divided by the number of
-    copies).
-    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
-    the created stmts must be inserted.  */
- 
- static inline void
- vect_create_mask_and_perm (gimple *stmt,
-                            tree mask, int first_vec_indx, int second_vec_indx,
-                            gimple_stmt_iterator *gsi, slp_tree node,
-                            tree vectype, vec<tree> dr_chain,
-                            int ncopies, int vect_stmts_counter)
- {
-   tree perm_dest;
-   gimple *perm_stmt = NULL;
-   int i, stride_in, stride_out;
-   tree first_vec, second_vec, data_ref;
- 
-   stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
-   stride_in = dr_chain.length () / ncopies;
- 
-   /* Initialize the vect stmts of NODE to properly insert the generated
-      stmts later.  */
-   for (i = SLP_TREE_VEC_STMTS (node).length ();
-        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
-     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
- 
-   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
-   for (i = 0; i < ncopies; i++)
-     {
-       first_vec = dr_chain[first_vec_indx];
-       second_vec = dr_chain[second_vec_indx];
- 
-       /* Generate the permute statement if necessary.  */
-       if (mask)
- 	{
- 	  perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
- 					   first_vec, second_vec, mask);
- 	  data_ref = make_ssa_name (perm_dest, perm_stmt);
- 	  gimple_set_lhs (perm_stmt, data_ref);
- 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- 	}
-       else
- 	/* If mask was NULL_TREE generate the requested identity transform.  */
- 	perm_stmt = SSA_NAME_DEF_STMT (first_vec);
- 
-       /* Store the vector statement in NODE.  */
-       SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
- 	= perm_stmt;
- 
-       first_vec_indx += stride_in;
-       second_vec_indx += stride_in;
-     }
- }
- 
- 
  /* Generate vector permute statements from a list of loads in DR_CHAIN.
     If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
     permute statements for the SLP node NODE of the SLP instance
--- 3379,3384 ----
*************** vect_transform_slp_perm_load (slp_tree n
*** 3456,3462 ****
    int nunits, vec_index = 0;
    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
!   int unroll_factor, mask_element, ncopies;
    unsigned char *mask;
    machine_mode mode;
  
--- 3396,3402 ----
    int nunits, vec_index = 0;
    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
!   int mask_element;
    unsigned char *mask;
    machine_mode mode;
  
*************** vect_transform_slp_perm_load (slp_tree n
*** 3474,3484 ****
    mask_type = get_vectype_for_scalar_type (mask_element_type);
    nunits = TYPE_VECTOR_SUBPARTS (vectype);
    mask = XALLOCAVEC (unsigned char, nunits);
-   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
  
!   /* Number of copies is determined by the final vectorization factor
!      relatively to SLP_NODE_INSTANCE unrolling factor.  */
!   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
  
    /* Generate permutation masks for every NODE. Number of masks for each NODE
       is equal to GROUP_SIZE.
--- 3414,3426 ----
    mask_type = get_vectype_for_scalar_type (mask_element_type);
    nunits = TYPE_VECTOR_SUBPARTS (vectype);
    mask = XALLOCAVEC (unsigned char, nunits);
  
!   /* Initialize the vect stmts of NODE to properly insert the generated
!      stmts later.  */
!   if (! analyze_only)
!     for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
! 	 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
!       SLP_TREE_VEC_STMTS (node).quick_push (NULL);
  
    /* Generate permutation masks for every NODE. Number of masks for each NODE
       is equal to GROUP_SIZE.
*************** vect_transform_slp_perm_load (slp_tree n
*** 3505,3511 ****
    bool noop_p = true;
    *n_perms = 0;
  
!   for (int j = 0; j < unroll_factor; j++)
      {
        for (int k = 0; k < group_size; k++)
  	{
--- 3447,3453 ----
    bool noop_p = true;
    *n_perms = 0;
  
!   for (int j = 0; j < vf; j++)
      {
        for (int k = 0; k < group_size; k++)
  	{
*************** vect_transform_slp_perm_load (slp_tree n
*** 3578,3587 ****
  
  		  if (second_vec_index == -1)
  		    second_vec_index = first_vec_index;
! 		  vect_create_mask_and_perm (stmt, mask_vec, first_vec_index,
! 					     second_vec_index,
! 					     gsi, node, vectype, dr_chain,
! 					     ncopies, vect_stmts_counter++);
  		}
  
  	      index = 0;
--- 3520,3549 ----
  
  		  if (second_vec_index == -1)
  		    second_vec_index = first_vec_index;
! 
! 		  /* Generate the permute statement if necessary.  */
! 		  tree first_vec = dr_chain[first_vec_index];
! 		  tree second_vec = dr_chain[second_vec_index];
! 		  gimple *perm_stmt;
! 		  if (! noop_p)
! 		    {
! 		      tree perm_dest
! 			= vect_create_destination_var (gimple_assign_lhs (stmt),
! 						       vectype);
! 		      perm_dest = make_ssa_name (perm_dest);
! 		      perm_stmt = gimple_build_assign (perm_dest,
! 						       VEC_PERM_EXPR,
! 						       first_vec, second_vec,
! 						       mask_vec);
! 		      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
! 		    }
! 		  else
! 		    /* If mask was NULL_TREE generate the requested
! 		       identity transform.  */
! 		    perm_stmt = SSA_NAME_DEF_STMT (first_vec);
! 
! 		  /* Store the vector statement in NODE.  */
! 		  SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
  		}
  
  	      index = 0;
Index: gcc/testsuite/gcc.dg/vect/pr79920.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr79920.c	(nonexistent)
--- gcc/testsuite/gcc.dg/vect/pr79920.c	(working copy)
***************
*** 0 ****
--- 1,44 ----
+ /* { dg-do run } */
+ /* { dg-additional-options "-O3" } */
+ 
+ #include "tree-vect.h"
+ 
+ double __attribute__((noinline,noclone))
+ compute_integral (double w_1[18])
+ {
+   double A = 0;
+   double t33[2][6] = {{0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+ 	{0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
+   double t43[2] = {0.0, 0.0};
+   double t31[2][2] = {{1.0, 1.0}, {1.0, 1.0}};
+   double t32[2][3] = {{0.0, 0.0, 1.0}, {0.0, 0.0, 1.0}};
+ 
+   for (int ip_1 = 0; ip_1 < 2; ++ip_1)
+     {
+       for (int i_0 = 0; i_0 < 6; ++i_0)
+ 	t33[ip_1][i_0] = ((w_1[i_0*3] * t32[ip_1][0])
+ 			  + (w_1[i_0*3+2] * t32[ip_1][2]));
+       t43[ip_1] = 2.0;
+     }
+   for (int i_0 = 0; i_0 < 6; ++i_0)
+     A += t43[1]*t33[1][i_0];
+   return A;
+ }
+ 
+ int main()
+ {
+   check_vect ();
+ 
+   double w_1[18] = {0., 1.0, 1.0,
+       0., 1.0, 1.0,
+       0., 1.0, 1.0,
+       0., 1.0, 1.0,
+       0., 1.0, 1.0,
+       0., 1.0, 1.0};
+   double A = compute_integral(w_1);
+   if (A != 12.0)
+     __builtin_abort ();
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_double && { vect_perm && vect_hw_misalign } } } } } */



More information about the Gcc-patches mailing list