[PATCH][1/n] Vectorizer TLC: speedup basic-block vectorization

Richard Biener rguenther@suse.de
Tue Feb 26 14:12:00 GMT 2013


This patch disentangles data reference and data dependence analysis
(for both loop and basic-block vectorization).  Doing so avoids
the need to fix data dependences for gather loads as well as
computing them in the first place for data references we cannot
vectorize.  The latter speeds up basic-block vectorization as
it reduces the quadratic cost of dependence computation to those
dependences we will end up using (for polyhedron ac.f90 this cuts
10% of compile-time for me according to callgrind and get's SLP
vectorization off the radar of -ftime-report).

The patch also touches data reference finding, avoiding excessive
malloc/free there by using a stack vector.  That's really an
independent change, but ...

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Queued for 4.9.  I could split out the data-ref changes and
consider the BB vectorization speedup a regression fix - Jakub,
any opinion on that?

Thanks,
Richard.

2013-02-26  Richard Biener  <rguenther@suse.de>

	* tree-data-ref.h (find_data_references_in_loop): Declare.
	* tree-data-ref.c (get_references_in_stmt): Use a stack
	vector pre-allocated in the callers.
	(find_data_references_in_stmt): Adjust.
	(graphite_find_data_references_in_stmt): Likewise.
	(create_rdg_vertices): Likewise.
	(find_data_references_in_loop): Export.
	* tree-vect-data-refs.c (vect_analyze_data_ref_dependences):
	Compute dependences here...
	(vect_analyze_data_refs): ...not here.  When we encounter
	a non-vectorizable data reference in basic-block vectorization
	truncate the data reference vector.  Do not bother to
	fixup data-dependence information for gather loads.
	* tree-vect-slp.c (vect_slp_analyze_bb_1): Check the number
	of data references, as reported.

Index: gcc/tree-data-ref.h
===================================================================
*** gcc/tree-data-ref.h.orig	2013-02-20 13:22:51.000000000 +0100
--- gcc/tree-data-ref.h	2013-02-26 13:31:44.114507972 +0100
*************** extern bool find_data_references_in_stmt
*** 382,387 ****
--- 382,388 ----
  					  vec<data_reference_p> *);
  extern bool graphite_find_data_references_in_stmt (loop_p, loop_p, gimple,
  						   vec<data_reference_p> *);
+ tree find_data_references_in_loop (struct loop *, vec<data_reference_p> *);
  struct data_reference *create_data_ref (loop_p, loop_p, tree, gimple, bool);
  extern bool find_loop_nest (struct loop *, vec<loop_p> *);
  extern struct data_dependence_relation *initialize_data_dependence_relation
Index: gcc/tree-data-ref.c
===================================================================
*** gcc/tree-data-ref.c.orig	2013-02-20 13:22:51.000000000 +0100
--- gcc/tree-data-ref.c	2013-02-26 14:32:13.571463465 +0100
*************** compute_all_dependences (vec<data_refere
*** 4257,4267 ****
  
  typedef struct data_ref_loc_d
  {
!     /* Position of the memory reference.  */
!     tree *pos;
  
!       /* True if the memory reference is read.  */
!       bool is_read;
  } data_ref_loc;
  
  
--- 4257,4267 ----
  
  typedef struct data_ref_loc_d
  {
!   /* Position of the memory reference.  */
!   tree *pos;
  
!   /* True if the memory reference is read.  */
!   bool is_read;
  } data_ref_loc;
  
  
*************** typedef struct data_ref_loc_d
*** 4269,4283 ****
     true if STMT clobbers memory, false otherwise.  */
  
  static bool
! get_references_in_stmt (gimple stmt, vec<data_ref_loc> *references)
  {
    bool clobbers_memory = false;
    data_ref_loc ref;
    tree *op0, *op1;
    enum gimple_code stmt_code = gimple_code (stmt);
  
-   references->create (0);
- 
    /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
       As we cannot model data-references to not spelled out
       accesses give up if they may occur.  */
--- 4269,4281 ----
     true if STMT clobbers memory, false otherwise.  */
  
  static bool
! get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_stack> *references)
  {
    bool clobbers_memory = false;
    data_ref_loc ref;
    tree *op0, *op1;
    enum gimple_code stmt_code = gimple_code (stmt);
  
    /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
       As we cannot model data-references to not spelled out
       accesses give up if they may occur.  */
*************** find_data_references_in_stmt (struct loo
*** 4348,4358 ****
  			      vec<data_reference_p> *datarefs)
  {
    unsigned i;
!   vec<data_ref_loc> references;
    data_ref_loc *ref;
    bool ret = true;
    data_reference_p dr;
  
    if (get_references_in_stmt (stmt, &references))
      {
        references.release ();
--- 4346,4357 ----
  			      vec<data_reference_p> *datarefs)
  {
    unsigned i;
!   vec<data_ref_loc, va_stack> references;
    data_ref_loc *ref;
    bool ret = true;
    data_reference_p dr;
  
+   vec_stack_alloc (data_ref_loc, references, 2);
    if (get_references_in_stmt (stmt, &references))
      {
        references.release ();
*************** graphite_find_data_references_in_stmt (l
*** 4381,4391 ****
  				       vec<data_reference_p> *datarefs)
  {
    unsigned i;
!   vec<data_ref_loc> references;
    data_ref_loc *ref;
    bool ret = true;
    data_reference_p dr;
  
    if (get_references_in_stmt (stmt, &references))
      {
        references.release ();
--- 4380,4391 ----
  				       vec<data_reference_p> *datarefs)
  {
    unsigned i;
!   vec<data_ref_loc, va_stack> references;
    data_ref_loc *ref;
    bool ret = true;
    data_reference_p dr;
  
+   vec_stack_alloc (data_ref_loc, references, 2);
    if (get_references_in_stmt (stmt, &references))
      {
        references.release ();
*************** find_data_references_in_bb (struct loop
*** 4437,4443 ****
     TODO: This function should be made smarter so that it can handle address
     arithmetic as if they were array accesses, etc.  */
  
! static tree
  find_data_references_in_loop (struct loop *loop,
  			      vec<data_reference_p> *datarefs)
  {
--- 4437,4443 ----
     TODO: This function should be made smarter so that it can handle address
     arithmetic as if they were array accesses, etc.  */
  
! tree
  find_data_references_in_loop (struct loop *loop,
  			      vec<data_reference_p> *datarefs)
  {
*************** create_rdg_vertices (struct graph *rdg,
*** 5005,5011 ****
  
    FOR_EACH_VEC_ELT (stmts, i, stmt)
      {
!       vec<data_ref_loc> references;
        data_ref_loc *ref;
        struct vertex *v = &(rdg->vertices[i]);
  
--- 5005,5011 ----
  
    FOR_EACH_VEC_ELT (stmts, i, stmt)
      {
!       vec<data_ref_loc, va_stack> references;
        data_ref_loc *ref;
        struct vertex *v = &(rdg->vertices[i]);
  
*************** create_rdg_vertices (struct graph *rdg,
*** 5020,5025 ****
--- 5020,5026 ----
        if (gimple_code (stmt) == GIMPLE_PHI)
  	continue;
  
+       vec_stack_alloc (data_ref_loc, references, 2);
        get_references_in_stmt (stmt, &references);
        FOR_EACH_VEC_ELT (references, j, ref)
  	{
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c.orig	2013-02-26 13:31:25.000000000 +0100
--- gcc/tree-vect-data-refs.c	2013-02-26 14:25:52.896275680 +0100
*************** vect_analyze_data_ref_dependences (loop_
*** 798,806 ****
      dump_printf_loc (MSG_NOTE, vect_location,
                       "=== vect_analyze_dependences ===");
    if (loop_vinfo)
!     ddrs = LOOP_VINFO_DDRS (loop_vinfo);
    else
!     ddrs = BB_VINFO_DDRS (bb_vinfo);
  
    FOR_EACH_VEC_ELT (ddrs, i, ddr)
      if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
--- 798,818 ----
      dump_printf_loc (MSG_NOTE, vect_location,
                       "=== vect_analyze_dependences ===");
    if (loop_vinfo)
!     {
!       if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
! 				    &LOOP_VINFO_DDRS (loop_vinfo),
! 				    LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
! 	return false;
!       ddrs = LOOP_VINFO_DDRS (loop_vinfo);
!     }
    else
!     {
!       if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
! 				    &BB_VINFO_DDRS (bb_vinfo),
! 				    vNULL, true))
! 	return false;
!       ddrs = BB_VINFO_DDRS (bb_vinfo);
!     }
  
    FOR_EACH_VEC_ELT (ddrs, i, ddr)
      if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
*************** vect_analyze_data_refs (loop_vec_info lo
*** 2941,2947 ****
    vec<data_reference_p> datarefs;
    struct data_reference *dr;
    tree scalar_type;
-   bool res, stop_bb_analysis = false;
  
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
--- 2953,2958 ----
*************** vect_analyze_data_refs (loop_vec_info lo
*** 2950,2962 ****
    if (loop_vinfo)
      {
        loop = LOOP_VINFO_LOOP (loop_vinfo);
!       res = compute_data_dependences_for_loop
! 	(loop, true,
! 	 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
! 	 &LOOP_VINFO_DATAREFS (loop_vinfo),
! 	 &LOOP_VINFO_DDRS (loop_vinfo));
! 
!       if (!res)
  	{
  	  if (dump_enabled_p ())
  	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
--- 2961,2969 ----
    if (loop_vinfo)
      {
        loop = LOOP_VINFO_LOOP (loop_vinfo);
!       if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo))
! 	  || find_data_references_in_loop
! 	       (loop, &LOOP_VINFO_DATAREFS (loop_vinfo)))
  	{
  	  if (dump_enabled_p ())
  	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
*************** vect_analyze_data_refs (loop_vec_info lo
*** 2987,3003 ****
  	      break;
  	    }
  	}
-       if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
- 				    &BB_VINFO_DDRS (bb_vinfo),
- 				    vNULL, true))
- 	{
- 	  if (dump_enabled_p ())
- 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
-                              "not vectorized: basic block contains function"
-                              " calls or data references that cannot be"
-                              " analyzed");
- 	  return false;
- 	}
  
        datarefs = BB_VINFO_DATAREFS (bb_vinfo);
      }
--- 2994,2999 ----
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3024,3035 ****
        stmt = DR_STMT (dr);
        stmt_info = vinfo_for_stmt (stmt);
  
-       if (stop_bb_analysis)
-         {
-           STMT_VINFO_VECTORIZABLE (stmt_info) = false;
-           continue;
-         }
- 
        /* Check that analysis of the data-ref succeeded.  */
        if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
  	  || !DR_STEP (dr))
--- 3020,3025 ----
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3070,3080 ****
  		}
  
  	      if (bb_vinfo)
! 		{
! 		  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
! 		  stop_bb_analysis = true;
! 		  continue;
! 		}
  
  	      return false;
  	    }
--- 3060,3066 ----
  		}
  
  	      if (bb_vinfo)
! 		break;
  
  	      return false;
  	    }
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3088,3098 ****
                               "constant");
  
            if (bb_vinfo)
!             {
!               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
!               stop_bb_analysis = true;
!               continue;
!             }
  
  	  if (gather)
  	    free_data_ref (dr);
--- 3074,3080 ----
                               "constant");
  
            if (bb_vinfo)
! 	    break;
  
  	  if (gather)
  	    free_data_ref (dr);
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3109,3119 ****
              }
  
            if (bb_vinfo)
!             {
!               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
!               stop_bb_analysis = true;
!               continue;
!             }
  
            return false;
          }
--- 3091,3097 ----
              }
  
            if (bb_vinfo)
! 	    break;
  
            return false;
          }
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3129,3139 ****
              }
  
            if (bb_vinfo)
!             {
!               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
!               stop_bb_analysis = true;
!               continue;
!             }
  
  	  if (gather)
  	    free_data_ref (dr);
--- 3107,3113 ----
              }
  
            if (bb_vinfo)
! 	    break;
  
  	  if (gather)
  	    free_data_ref (dr);
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3152,3162 ****
              }
  
            if (bb_vinfo)
!             {
!               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
!               stop_bb_analysis = true;
!               continue;
!             }
  
  	  if (gather)
  	    free_data_ref (dr);
--- 3126,3132 ----
              }
  
            if (bb_vinfo)
! 	    break;
  
  	  if (gather)
  	    free_data_ref (dr);
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3177,3187 ****
  	    }
  
  	  if (bb_vinfo)
! 	    {
! 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
! 	      stop_bb_analysis = true;
! 	      continue;
! 	    }
  
  	  if (gather)
  	    free_data_ref (dr);
--- 3147,3153 ----
  	    }
  
  	  if (bb_vinfo)
! 	    break;
  
  	  if (gather)
  	    free_data_ref (dr);
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3316,3326 ****
              }
  
            if (bb_vinfo)
!             {
!               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
!               stop_bb_analysis = true;
!               continue;
!             }
  
  	  if (gather)
  	    free_data_ref (dr);
--- 3282,3288 ----
              }
  
            if (bb_vinfo)
! 	    break;
  
  	  if (gather)
  	    free_data_ref (dr);
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3346,3357 ****
              }
  
            if (bb_vinfo)
!             {
!               /* Mark the statement as not vectorizable.  */
!               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
!               stop_bb_analysis = true;
!               continue;
!             }
  
  	  if (gather)
  	    {
--- 3308,3314 ----
              }
  
            if (bb_vinfo)
! 	    break;
  
  	  if (gather)
  	    {
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3369,3382 ****
  
        if (gather)
  	{
- 	  unsigned int j, k, n;
- 	  struct data_reference *olddr
- 	    = datarefs[i];
- 	  vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
- 	  struct data_dependence_relation *ddr, *newddr;
- 	  bool bad = false;
  	  tree off;
- 	  vec<loop_p> nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
  
  	  gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
  	  if (gather
--- 3326,3332 ----
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3396,3454 ****
  	      return false;
  	    }
  
- 	  n = datarefs.length () - 1;
- 	  for (j = 0, k = i - 1; j < i; j++)
- 	    {
- 	      ddr = ddrs[k];
- 	      gcc_assert (DDR_B (ddr) == olddr);
- 	      newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
- 							    nest);
- 	      ddrs[k] = newddr;
- 	      free_dependence_relation (ddr);
- 	      if (!bad
- 		  && DR_IS_WRITE (DDR_A (newddr))
- 		  && DDR_ARE_DEPENDENT (newddr) != chrec_known)
- 		bad = true;
- 	      k += --n;
- 	    }
- 
- 	  k++;
- 	  n = k + datarefs.length () - i - 1;
- 	  for (; k < n; k++)
- 	    {
- 	      ddr = ddrs[k];
- 	      gcc_assert (DDR_A (ddr) == olddr);
- 	      newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
- 							    nest);
- 	      ddrs[k] = newddr;
- 	      free_dependence_relation (ddr);
- 	      if (!bad
- 		  && DR_IS_WRITE (DDR_B (newddr))
- 		  && DDR_ARE_DEPENDENT (newddr) != chrec_known)
- 		bad = true;
- 	    }
- 
- 	  k = ddrs.length ()
- 	      - datarefs.length () + i;
- 	  ddr = ddrs[k];
- 	  gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
- 	  newddr = initialize_data_dependence_relation (dr, dr, nest);
- 	  ddrs[k] = newddr;
- 	  free_dependence_relation (ddr);
  	  datarefs[i] = dr;
- 
- 	  if (bad)
- 	    {
- 	      if (dump_enabled_p ())
- 		{
- 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
-                                    "not vectorized: data dependence conflict"
-                                    " prevents gather load");
- 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- 		}
- 	      return false;
- 	    }
- 
  	  STMT_VINFO_GATHER_P (stmt_info) = true;
  	}
        else if (loop_vinfo
--- 3346,3352 ----
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3472,3477 ****
--- 3370,3391 ----
  	}
      }
  
+   /* If we stopped analysis at the first dataref we could not analyze
+      when trying to vectorize a basic-block mark the rest of the datarefs
+      as not vectorizable and truncate the vector of datarefs.  That
+      avoids spending useless time in analyzing their dependence.  */
+   if (i != datarefs.length ())
+     {
+       gcc_assert (bb_vinfo != NULL);
+       for (unsigned j = i; j < datarefs.length (); ++j)
+ 	{
+ 	  data_reference_p dr = datarefs[j];
+           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
+ 	  free_data_ref (dr);
+ 	}
+       datarefs.truncate (i);
+     }
+ 
    return true;
  }
  
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c.orig	2013-02-26 13:31:25.000000000 +0100
--- gcc/tree-vect-slp.c	2013-02-26 14:01:07.587931536 +0100
*************** static bb_vec_info
*** 2085,2091 ****
  vect_slp_analyze_bb_1 (basic_block bb)
  {
    bb_vec_info bb_vinfo;
-   vec<ddr_p> ddrs;
    vec<slp_instance> slp_instances;
    slp_instance instance;
    int i;
--- 2085,2090 ----
*************** vect_slp_analyze_bb_1 (basic_block bb)
*** 2107,2114 ****
        return NULL;
      }
  
!   ddrs = BB_VINFO_DDRS (bb_vinfo);
!   if (!ddrs.length ())
      {
        if (dump_enabled_p ())
          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--- 2106,2112 ----
        return NULL;
      }
  
!   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
      {
        if (dump_enabled_p ())
          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,



More information about the Gcc-patches mailing list