This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix PR66051


This fixes the vectorizer part of PR66051 (a x86 target part remains
for the testcase in the PR - PR68655).  The issue is again a
misplaced check for SLP detection:

              /* Check that the size of interleaved loads group is not
                 greater than the SLP group size.  */
              unsigned ncopies
                = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
              if (is_a <loop_vec_info> (vinfo)
                  && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
                  && ((GROUP_SIZE (vinfo_for_stmt (stmt))
                       - GROUP_GAP (vinfo_for_stmt (stmt)))
                      > ncopies * group_size))
                {
                  if (dump_enabled_p ())
                    {
                      dump_printf_loc (MSG_MISSED_OPTIMIZATION, 
vect_location,
                                       "Build SLP failed: the number "
                                       "of interleaved loads is greater 
than "
                                       "the SLP group size ");
                      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
                                        stmt, 0);
                      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
                    }
                  /* Fatal mismatch.  */
                  matches[0] = false;
                  return false;
                }

I've relaxed this multiple times but that still doesn't make it necessary.
It also uses a vectorization factor estimate as the vectorization factor
is not yet determined.  A good side-effect of the patch is that we
can get rid of that estimate completely.

Tested on the x86_64 vectorization tests sofar.

Bootstrap & regtest pending and I'll make sure SPEC CPU 2006 is
happy as well.

Thanks,
Richard.

2015-12-02  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/66051
	* tree-vect-slp.c (vect_build_slp_tree_1): Remove restriction
	on load group size.  Do not pass in vectorization_factor.
	(vect_transform_slp_perm_load): Do not require any permute support.
	(vect_build_slp_tree): Do not pass in vectorization factor.
	(vect_analyze_slp_instance): Do not compute vectorization
	factor estimate.  Use vector size instead of vectorization factor
	estimate to split store groups for BB vectorization.

	* gcc.dg/vect/slp-42.c: New testcase.

Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c	(revision 231167)
--- gcc/tree-vect-slp.c	(working copy)
*************** static bool
*** 430,437 ****
  vect_build_slp_tree_1 (vec_info *vinfo,
  		       vec<gimple *> stmts, unsigned int group_size,
  		       unsigned nops, unsigned int *max_nunits,
! 		       unsigned int vectorization_factor, bool *matches,
! 		       bool *two_operators)
  {
    unsigned int i;
    gimple *first_stmt = stmts[0], *stmt = stmts[0];
--- 430,436 ----
  vect_build_slp_tree_1 (vec_info *vinfo,
  		       vec<gimple *> stmts, unsigned int group_size,
  		       unsigned nops, unsigned int *max_nunits,
! 		       bool *matches, bool *two_operators)
  {
    unsigned int i;
    gimple *first_stmt = stmts[0], *stmt = stmts[0];
*************** vect_build_slp_tree_1 (vec_info *vinfo,
*** 523,533 ****
  
        /* In case of multiple types we need to detect the smallest type.  */
        if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
!         {
!           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
!           if (is_a <bb_vec_info> (vinfo))
!             vectorization_factor = *max_nunits;
!         }
  
        if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
  	{
--- 522,528 ----
  
        /* In case of multiple types we need to detect the smallest type.  */
        if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
! 	*max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
  
        if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
  	{
*************** vect_build_slp_tree_1 (vec_info *vinfo,
*** 700,730 ****
  	  else
  	    {
  	      /* Load.  */
-               /* Check that the size of interleaved loads group is not
-                  greater than the SLP group size.  */
- 	      unsigned ncopies
- 		= vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
-               if (is_a <loop_vec_info> (vinfo)
- 		  && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
-                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
- 		       - GROUP_GAP (vinfo_for_stmt (stmt)))
- 		      > ncopies * group_size))
-                 {
-                   if (dump_enabled_p ())
-                     {
-                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 				       "Build SLP failed: the number "
- 				       "of interleaved loads is greater than "
- 				       "the SLP group size ");
-                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- 					stmt, 0);
-                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-                     }
- 		  /* Fatal mismatch.  */
- 		  matches[0] = false;
-                   return false;
-                 }
- 
                first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
                if (prev_first_load)
                  {
--- 695,700 ----
*************** vect_build_slp_tree (vec_info *vinfo,
*** 871,877 ****
                       slp_tree *node, unsigned int group_size,
                       unsigned int *max_nunits,
                       vec<slp_tree> *loads,
-                      unsigned int vectorization_factor,
  		     bool *matches, unsigned *npermutes, unsigned *tree_size,
  		     unsigned max_tree_size)
  {
--- 841,846 ----
*************** vect_build_slp_tree (vec_info *vinfo,
*** 895,902 ****
    bool two_operators = false;
    if (!vect_build_slp_tree_1 (vinfo,
  			      SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
! 			      max_nunits, vectorization_factor, matches,
! 			      &two_operators))
      return false;
    SLP_TREE_TWO_OPERATORS (*node) = two_operators;
  
--- 864,870 ----
    bool two_operators = false;
    if (!vect_build_slp_tree_1 (vinfo,
  			      SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
! 			      max_nunits, matches, &two_operators))
      return false;
    SLP_TREE_TWO_OPERATORS (*node) = two_operators;
  
*************** vect_build_slp_tree (vec_info *vinfo,
*** 959,966 ****
  	}
  
        if (vect_build_slp_tree (vinfo, &child,
! 			       group_size, max_nunits, loads,
! 			       vectorization_factor, matches,
  			       npermutes, &this_tree_size, max_tree_size))
  	{
  	  /* If we have all children of child built up from scalars then just
--- 927,933 ----
  	}
  
        if (vect_build_slp_tree (vinfo, &child,
! 			       group_size, max_nunits, loads, matches,
  			       npermutes, &this_tree_size, max_tree_size))
  	{
  	  /* If we have all children of child built up from scalars then just
*************** vect_build_slp_tree (vec_info *vinfo,
*** 1074,1080 ****
  	  bool *tem = XALLOCAVEC (bool, group_size);
  	  if (vect_build_slp_tree (vinfo, &child,
  				   group_size, max_nunits, loads,
- 				   vectorization_factor,
  				   tem, npermutes, &this_tree_size,
  				   max_tree_size))
  	    {
--- 1041,1046 ----
*************** vect_analyze_slp_instance (vec_info *vin
*** 1656,1662 ****
    unsigned int unrolling_factor = 1, nunits;
    tree vectype, scalar_type = NULL_TREE;
    gimple *next;
-   unsigned int vectorization_factor = 0;
    unsigned int i;
    unsigned int max_nunits = 0;
    vec<slp_tree> loads;
--- 1622,1627 ----
*************** vect_analyze_slp_instance (vec_info *vin
*** 1697,1708 ****
  
        return false;
      }
- 
    nunits = TYPE_VECTOR_SUBPARTS (vectype);
-   if (is_a <loop_vec_info> (vinfo))
-     vectorization_factor = as_a <loop_vec_info> (vinfo)->vectorization_factor;
-   else
-     vectorization_factor = nunits;
  
    /* Calculate the unrolling factor.  */
    unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
--- 1662,1668 ----
*************** vect_analyze_slp_instance (vec_info *vin
*** 1755,1762 ****
    unsigned npermutes = 0;
    if (vect_build_slp_tree (vinfo, &node, group_size,
  			   &max_nunits, &loads,
! 			   vectorization_factor, matches, &npermutes, NULL,
! 			   max_tree_size))
      {
        /* Calculate the unrolling factor based on the smallest type.  */
        if (max_nunits > nunits)
--- 1715,1721 ----
    unsigned npermutes = 0;
    if (vect_build_slp_tree (vinfo, &node, group_size,
  			   &max_nunits, &loads,
! 			   matches, &npermutes, NULL, max_tree_size))
      {
        /* Calculate the unrolling factor based on the smallest type.  */
        if (max_nunits > nunits)
*************** vect_analyze_slp_instance (vec_info *vin
*** 1852,1858 ****
    loads.release ();
  
    /* For basic block SLP, try to break the group up into multiples of the
!      vectorization factor.  */
    if (is_a <bb_vec_info> (vinfo)
        && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
        && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
--- 1811,1817 ----
    loads.release ();
  
    /* For basic block SLP, try to break the group up into multiples of the
!      vector size.  */
    if (is_a <bb_vec_info> (vinfo)
        && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
        && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
*************** vect_analyze_slp_instance (vec_info *vin
*** 1862,1872 ****
        for (i = 0; i < group_size; i++)
  	if (!matches[i]) break;
  
!       if (i >= vectorization_factor && i < group_size)
  	{
  	  /* Split into two groups at the first vector boundary before i.  */
! 	  gcc_assert ((vectorization_factor & (vectorization_factor - 1)) == 0);
! 	  unsigned group1_size = i & ~(vectorization_factor - 1);
  
  	  gimple *rest = vect_split_slp_store_group (stmt, group1_size);
  	  bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
--- 1821,1831 ----
        for (i = 0; i < group_size; i++)
  	if (!matches[i]) break;
  
!       if (i >= nunits && i < group_size)
  	{
  	  /* Split into two groups at the first vector boundary before i.  */
! 	  gcc_assert ((nunits & (nunits - 1)) == 0);
! 	  unsigned group1_size = i & ~(nunits - 1);
  
  	  gimple *rest = vect_split_slp_store_group (stmt, group1_size);
  	  bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
*************** vect_analyze_slp_instance (vec_info *vin
*** 1874,1882 ****
  	     skip the rest of that vector.  */
  	  if (group1_size < i)
  	    {
! 	      i = group1_size + vectorization_factor;
  	      if (i < group_size)
! 		rest = vect_split_slp_store_group (rest, vectorization_factor);
  	    }
  	  if (i < group_size)
  	    res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
--- 1833,1841 ----
  	     skip the rest of that vector.  */
  	  if (group1_size < i)
  	    {
! 	      i = group1_size + nunits;
  	      if (i < group_size)
! 		rest = vect_split_slp_store_group (rest, nunits);
  	    }
  	  if (i < group_size)
  	    res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
*************** vect_transform_slp_perm_load (slp_tree n
*** 3274,3291 ****
  
    mode = TYPE_MODE (vectype);
  
-   if (!can_vec_perm_p (mode, false, NULL))
-     {
-       if (dump_enabled_p ())
-         {
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 			   "no vect permute for ");
-           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
-           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-         }
-       return false;
-     }
- 
    /* The generic VEC_PERM_EXPR code always uses an integral type of the
       same size as the vector element being permuted.  */
    mask_element_type = lang_hooks.types.type_for_mode
--- 3233,3238 ----
Index: gcc/testsuite/gcc.dg/vect/slp-42.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-42.c	(revision 0)
--- gcc/testsuite/gcc.dg/vect/slp-42.c	(working copy)
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ int p[4096], q[4096];
+ 
+ void foo (int n)
+ {
+   int i;
+   for (i = 0; i < n; ++i)
+     {
+       p[i*4+0] = q[i*8+0] + q[i*8+4];
+       p[i*4+1] = q[i*8+1] + q[i*8+5];
+       p[i*4+2] = q[i*8+2] + q[i*8+6];
+       p[i*4+3] = q[i*8+3] + q[i*8+7];
+     }
+ }
+ 
+ /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+ /* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]