This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, vec-tails 03/10] Support epilogues vectorization with no masking


Hi,

This patch introduces changes required to run vectorizer on loop epilogue.
This also enables epilogue vectorization using a vector of smaller size.

Thanks,
Ilya
--
gcc/

2016-05-19  Ilya Enkovich  <ilya.enkovich@intel.com>

	* tree-if-conv.c (tree_if_conversion): Make public.
	* tree-if-conv.h: New file.
	* tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Don't
	try to enhance alignment for epilogues.
	* tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound): Return
	created loop.
	* tree-vect-loop.c: include tree-if-conv.h.
	(destroy_loop_vec_info): Preserve LOOP_VINFO_ORIG_LOOP_INFO in
	loop->aux.
	(vect_analyze_loop_form): Init LOOP_VINFO_ORIG_LOOP_INFO and reset
	loop->aux.
	(vect_analyze_loop): Reset loop->aux.
	(vect_transform_loop): Check if created epilogue should be returned
	for further vectorization.  If-convert epilogue if required.
	* tree-vectorizer.c (vectorize_loops): Add a queue of loops to
	process and insert vectorized loop epilogues into this queue.
	* tree-vectorizer.h (vect_do_peeling_for_loop_bound): Return created
	loop.
	(vect_transform_loop): Return created loop.


diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index c38e21b..41b6c99 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -2801,7 +2801,7 @@ ifcvt_local_dce (basic_block bb)
    profitability analysis.  Returns non-zero todo flags when something
    changed.  */
 
-static unsigned int
+unsigned int
 tree_if_conversion (struct loop *loop)
 {
   unsigned int todo = 0;
diff --git a/gcc/tree-if-conv.h b/gcc/tree-if-conv.h
new file mode 100644
index 0000000..3a732c2
--- /dev/null
+++ b/gcc/tree-if-conv.h
@@ -0,0 +1,24 @@
+/* Copyright (C) 2016 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_TREE_IF_CONV_H
+#define GCC_TREE_IF_CONV_H
+
+unsigned int tree_if_conversion (struct loop *);
+
+#endif  /* GCC_TREE_IF_CONV_H  */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 7652e21..f275933 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1595,7 +1595,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   /* Check if we can possibly peel the loop.  */
   if (!vect_can_advance_ivs_p (loop_vinfo)
       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
-      || loop->inner)
+      || loop->inner
+      /* Required peeling was performed in prologue and
+	 is not required for epilogue.  */
+      || LOOP_VINFO_EPILOGUE_P (loop_vinfo))
     do_peeling = false;
 
   if (do_peeling
@@ -1875,7 +1878,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
   do_versioning =
 	optimize_loop_nest_for_speed_p (loop)
-	&& (!loop->inner); /* FORNOW */
+	&& (!loop->inner) /* FORNOW */
+        /* Required versioning was performed for the
+	   original loop and is not required for epilogue.  */
+	&& !LOOP_VINFO_EPILOGUE_P (loop_vinfo);
 
   if (do_versioning)
     {
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 7ec6dae..fab5879 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1742,9 +1742,11 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
 
    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
-   test.  */
+   test.
 
-void
+   Return created loop.  */
+
+struct loop *
 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
 				tree ni_name, tree ratio_mult_vf_name,
 				unsigned int th, bool check_profitability)
@@ -1812,6 +1814,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
   scev_reset ();
 
   free_original_copy_tables ();
+
+  return new_loop;
 }
 
 
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index aac0df9..a537ef4 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-vectorizer.h"
 #include "gimple-fold.h"
 #include "cgraph.h"
+#include "tree-if-conv.h"
 
 /* Loop Vectorization Pass.
 
@@ -1212,8 +1213,8 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
   loop_vinfo->scalar_cost_vec.release ();
 
+  loop->aux = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
   free (loop_vinfo);
-  loop->aux = NULL;
 }
 
 
@@ -1499,13 +1500,24 @@ vect_analyze_loop_form (struct loop *loop)
 
   if (! vect_analyze_loop_form_1 (loop, &loop_cond, &number_of_iterationsm1,
 				  &number_of_iterations, &inner_loop_cond))
-    return NULL;
+    {
+      loop->aux = NULL;
+      return NULL;
+    }
 
   loop_vec_info loop_vinfo = new_loop_vec_info (loop);
   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
 
+  /* For epilogues we want to vectorize aux holds
+     loop_vec_info of the original loop.  */
+  if (loop->aux)
+    {
+      gcc_assert (LOOP_VINFO_VECTORIZABLE_P ((loop_vec_info)loop->aux));
+      LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = (loop_vec_info)loop->aux;
+    }
+
   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
     {
       if (dump_enabled_p ())
@@ -1522,7 +1534,6 @@ vect_analyze_loop_form (struct loop *loop)
     STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
       = loop_exit_ctrl_vec_info_type;
 
-  gcc_assert (!loop->aux);
   loop->aux = loop_vinfo;
   return loop_vinfo;
 }
@@ -2280,7 +2291,10 @@ vect_analyze_loop (struct loop *loop)
       if (fatal
 	  || vector_sizes == 0
 	  || current_vector_size == 0)
-	return NULL;
+	{
+	  loop->aux = NULL;
+	  return NULL;
+	}
 
       /* Try the next biggest vector size.  */
       current_vector_size = 1 << floor_log2 (vector_sizes);
@@ -6576,10 +6590,11 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
    Vectorize the loop - created vectorized stmts to replace the scalar
    stmts in the loop, and update the loop exit condition.  */
 
-void
+struct loop *
 vect_transform_loop (loop_vec_info loop_vinfo)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  struct loop *epilogue = NULL;
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
   int nbbs = loop->num_nodes;
   int i;
@@ -6661,8 +6676,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
 	ni_name = vect_build_loop_niters (loop_vinfo);
       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
 				       &ratio);
-      vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
-				      th, check_profitability);
+      epilogue = vect_do_peeling_for_loop_bound (loop_vinfo, ni_name,
+						 ratio_mult_vf, th,
+						 check_profitability);
     }
   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
@@ -6959,6 +6975,64 @@ vect_transform_loop (loop_vec_info loop_vinfo)
   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
     vect_free_slp_instance (instance);
   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
+
+  /* Don't vectorize epilogue for epilogue.  */
+  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+    epilogue = NULL;
+  /* Scalar epilogue is not vectorized in case
+     we use combined vector epilogue.  */
+  else if (LOOP_VINFO_COMBINE_EPILOGUE (loop_vinfo))
+    epilogue = NULL;
+  /* FORNOW: Currently alias checks are not inherited for epilogues.
+     Don't try to vectorize epilogue because it will require
+     additional alias checks.  */
+  else if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
+    epilogue = NULL;
+
+  if (epilogue)
+    {
+      if (!LOOP_VINFO_MASK_EPILOGUE (loop_vinfo))
+	{
+	  unsigned int vector_sizes
+	    = targetm.vectorize.autovectorize_vector_sizes ();
+	  vector_sizes &= current_vector_size - 1;
+
+	  if (!(flag_tree_vectorize_epilogues & VECT_EPILOGUE_NOMASK))
+	    epilogue = NULL;
+	  else if (!vector_sizes)
+	    epilogue = NULL;
+	  else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+		   && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
+	    {
+	      int smallest_vec_size = 1 << ctz_hwi (vector_sizes);
+	      int ratio = current_vector_size / smallest_vec_size;
+	      int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo)
+		- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+	      eiters = eiters % vectorization_factor;
+
+	      epilogue->nb_iterations_upper_bound = eiters - 1;
+
+	      if (eiters < vectorization_factor / ratio)
+		epilogue = NULL;
+	    }
+	}
+    }
+
+  if (epilogue)
+    {
+      epilogue->force_vectorize = loop->force_vectorize;
+      epilogue->safelen = loop->safelen;
+      epilogue->dont_vectorize = false;
+
+      /* We may need to if-convert epilogue to vectorize it.  */
+      if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
+	tree_if_conversion (epilogue);
+
+      gcc_assert (!epilogue->aux);
+      epilogue->aux = loop_vinfo;
+    }
+
+  return epilogue;
 }
 
 /* The code below is trying to perform simple optimization - revert
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 2b25b45..5f15246 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -491,14 +491,16 @@ vectorize_loops (void)
 {
   unsigned int i;
   unsigned int num_vectorized_loops = 0;
-  unsigned int vect_loops_num;
+  unsigned int vect_loops_num = number_of_loops (cfun);
   struct loop *loop;
   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
   bool any_ifcvt_loops = false;
   unsigned ret = 0;
+  auto_vec<unsigned int> loops (vect_loops_num);
 
-  vect_loops_num = number_of_loops (cfun);
+  FOR_EACH_LOOP (loop, 0)
+    loops.quick_push (loop->num);
 
   /* Bail out if there are no loops.  */
   if (vect_loops_num <= 1)
@@ -514,14 +516,18 @@ vectorize_loops (void)
   /* If some loop was duplicated, it gets bigger number
      than all previously defined loops.  This fact allows us to run
      only over initial loops skipping newly generated ones.  */
-  FOR_EACH_LOOP (loop, 0)
-    if (loop->dont_vectorize)
+  for (i = 0; i < loops.length (); i++)
+    if (!(loop = get_loop (cfun, loops[i])))
+      continue;
+    else if (loop->dont_vectorize)
       any_ifcvt_loops = true;
     else if ((flag_tree_loop_vectorize
-	      && optimize_loop_nest_for_speed_p (loop))
+	      && (optimize_loop_nest_for_speed_p (loop)
+		  || loop->aux))
 	     || loop->force_vectorize)
       {
 	loop_vec_info loop_vinfo;
+	struct loop *new_loop;
 	vect_location = find_loop_location (loop);
         if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
 	    && dump_enabled_p ())
@@ -551,12 +557,21 @@ vectorize_loops (void)
 	    && dump_enabled_p ())
           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
                            "loop vectorized\n");
-	vect_transform_loop (loop_vinfo);
+	new_loop = vect_transform_loop (loop_vinfo);
 	num_vectorized_loops++;
 	/* Now that the loop has been vectorized, allow it to be unrolled
 	   etc.  */
 	loop->force_vectorize = false;
 
+	/* Add new loop to a processing queue.  To make it easier
+	   to match loop and its epilogue vectorization in dumps
+	   put new loop as the next loop to process.  */
+	if (new_loop)
+	  {
+	    loops.safe_insert (i + 1, new_loop->num);
+	    vect_loops_num = number_of_loops (cfun);
+	  }
+
 	if (loop->simduid)
 	  {
 	    simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 4c19317..b269752 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -984,8 +984,8 @@ extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
 struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
 						     struct loop *, edge);
 extern void vect_loop_versioning (loop_vec_info, unsigned int, bool);
-extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree, tree,
-					    unsigned int, bool);
+extern struct loop *vect_do_peeling_for_loop_bound (loop_vec_info, tree, tree,
+						    unsigned int, bool);
 extern void vect_do_peeling_for_alignment (loop_vec_info, tree,
 					   unsigned int, bool);
 extern source_location find_loop_location (struct loop *);
@@ -1101,7 +1101,7 @@ extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool,
 /* Drive for loop analysis stage.  */
 extern loop_vec_info vect_analyze_loop (struct loop *);
 /* Drive for loop transformation stage.  */
-extern void vect_transform_loop (loop_vec_info);
+extern struct loop *vect_transform_loop (loop_vec_info);
 extern loop_vec_info vect_analyze_loop_form (struct loop *);
 extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *,
 					 gimple **);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]