This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Handle SLP group loads with trailing gaps


This makes the vectorizer handle loads of SLP groups with
"trailing gaps".  That is, a (for now know) gap between
the groups between loop iterations as in the testcase

+ double self[1024];
+ double a[1024][1024];
+ double b[1024];
+
+ void __attribute__((noinline,noclone))
+ foo (void)
+ {
+   int i, j;
+   for (i = 0; i < 1024; i+=6)
+     for (j = 0; j < 1024; j+=6)
+       {
+       self[i] = self[i] + a[i][j]*b[j];
+       self[i+1] = self[i+1] + a[i][j+1]*b[j+1];
+       }
+ }

reduced to this issue from the complex multiplication vectorization
issue in PR37021.

A next step will eventually be to allow an unknown gap between
the groups between iterations (including possible overlap).

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2013-03-27  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/37021
	* tree-vect-slp.c (vect_build_slp_tree): When not unrolling
	do not restrict gaps between groups.
	* tree-vect-stmts.c (vectorizable_load): Properly account for
	a gap between groups.

	* gcc.dg/vect/fast-math-slp-38.c: New testcase.
	* gcc.dg/vect/O3-pr36098.c: Un-XFAIL.

Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c.orig	2013-03-26 13:09:18.000000000 +0100
--- gcc/tree-vect-slp.c	2013-03-26 14:27:14.135697847 +0100
*************** vect_build_slp_tree (loop_vec_info loop_
*** 740,750 ****
  	  else
  	    {
  	      /* Load.  */
!               /* FORNOW: Check that there is no gap between the loads.  */
!               if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
!                    && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
!                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
!                       && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
                  {
                    if (dump_enabled_p ())
                      {
--- 750,765 ----
  	  else
  	    {
  	      /* Load.  */
!               /* FORNOW: Check that there is no gap between the loads
! 		 and no gap between the groups when we need to load
! 		 multiple groups at once.
! 		 ???  We should enhance this to only disallow gaps
! 		 inside vectors.  */
!               if ((ncopies > 1
! 		   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
! 		   && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
! 		  || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
! 		      && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
                  {
                    if (dump_enabled_p ())
                      {
*************** vect_build_slp_tree (loop_vec_info loop_
*** 762,768 ****
                /* Check that the size of interleaved loads group is not
                   greater than the SLP group size.  */
                if (loop_vinfo
!                   && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
                  {
                    if (dump_enabled_p ())
                      {
--- 777,786 ----
                /* Check that the size of interleaved loads group is not
                   greater than the SLP group size.  */
                if (loop_vinfo
! 		  && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
!                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
! 		       - GROUP_GAP (vinfo_for_stmt (stmt)))
! 		      > ncopies * group_size))
                  {
                    if (dump_enabled_p ())
                      {
Index: gcc/testsuite/gcc.dg/vect/fast-math-slp-38.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/vect/fast-math-slp-38.c	2013-03-26 13:09:20.860002059 +0100
***************
*** 0 ****
--- 1,22 ----
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_double } */
+ 
+ double self[1024];
+ double a[1024][1024];
+ double b[1024];
+ 
+ void __attribute__((noinline,noclone))
+ foo (void)
+ {
+   int i, j;
+   for (i = 0; i < 1024; i+=6)
+     for (j = 0; j < 1024; j+=6)
+       {
+ 	self[i] = self[i] + a[i][j]*b[j];
+ 	self[i+1] = self[i+1] + a[i][j+1]*b[j+1];
+       }
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/O3-pr36098.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/O3-pr36098.c.orig	2013-03-26 13:02:00.000000000 +0100
--- gcc/testsuite/gcc.dg/vect/O3-pr36098.c	2013-03-26 13:09:20.860002059 +0100
*************** void foo (int ncons, t_sortblock *sb, in
*** 17,22 ****
       iatom[m]=sb[i].iatom[m];
  }
  
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail *-*-* } } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
- 
--- 17,21 ----
       iatom[m]=sb[i].iatom[m];
  }
  
! /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c.orig	2013-03-26 13:02:00.000000000 +0100
--- gcc/tree-vect-stmts.c	2013-03-26 13:09:20.861002070 +0100
*************** vectorizable_load (gimple stmt, gimple_s
*** 4316,4322 ****
    gimple ptr_incr;
    int nunits = TYPE_VECTOR_SUBPARTS (vectype);
    int ncopies;
!   int i, j, group_size;
    tree msq = NULL_TREE, lsq;
    tree offset = NULL_TREE;
    tree realignment_token = NULL_TREE;
--- 4316,4322 ----
    gimple ptr_incr;
    int nunits = TYPE_VECTOR_SUBPARTS (vectype);
    int ncopies;
!   int i, j, group_size, group_gap;
    tree msq = NULL_TREE, lsq;
    tree offset = NULL_TREE;
    tree realignment_token = NULL_TREE;
*************** vectorizable_load (gimple stmt, gimple_s
*** 4766,4780 ****
  	  vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
            if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
              slp_perm = true;
      	}
        else
! 	vec_num = group_size;
      }
    else
      {
        first_stmt = stmt;
        first_dr = dr;
        group_size = vec_num = 1;
      }
  
    alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
--- 4766,4785 ----
  	  vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
            if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
              slp_perm = true;
+ 	  group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
      	}
        else
! 	{
! 	  vec_num = group_size;
! 	  group_gap = 0;
! 	}
      }
    else
      {
        first_stmt = stmt;
        first_dr = dr;
        group_size = vec_num = 1;
+       group_gap = 0;
      }
  
    alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
*************** vectorizable_load (gimple stmt, gimple_s
*** 5134,5139 ****
--- 5139,5153 ----
  	      if (slp && !slp_perm)
  		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
  	    }
+ 	  /* Bump the vector pointer to account for a gap.  */
+ 	  if (slp && group_gap != 0)
+ 	    {
+ 	      tree bump = size_binop (MULT_EXPR,
+ 				      TYPE_SIZE_UNIT (elem_type),
+ 				      size_int (group_gap));
+ 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+ 					     stmt, bump);
+ 	    }
  	}
  
        if (slp && !slp_perm)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]