This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, vec-tails 03/10] Support epilogues vectorization with no masking
- From: Ilya Enkovich <enkovich dot gnu at gmail dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 19 May 2016 22:39:39 +0300
- Subject: [PATCH, vec-tails 03/10] Support epilogues vectorization with no masking
- Authentication-results: sourceware.org; auth=none
Hi,
This patch introduces changes required to run vectorizer on loop epilogue.
This also enables epilogue vectorization using a vector of smaller size.
Thanks,
Ilya
--
gcc/
2016-05-19 Ilya Enkovich <ilya.enkovich@intel.com>
* tree-if-conv.c (tree_if_conversion): Make public.
* tree-if-conv.h: New file.
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Don't
try to enhance alignment for epilogues.
* tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound): Return
created loop.
* tree-vect-loop.c: include tree-if-conv.h.
(destroy_loop_vec_info): Preserve LOOP_VINFO_ORIG_LOOP_INFO in
loop->aux.
(vect_analyze_loop_form): Init LOOP_VINFO_ORIG_LOOP_INFO and reset
loop->aux.
(vect_analyze_loop): Reset loop->aux.
(vect_transform_loop): Check if created epilogue should be returned
for further vectorization. If-convert epilogue if required.
* tree-vectorizer.c (vectorize_loops): Add a queue of loops to
process and insert vectorized loop epilogues into this queue.
* tree-vectorizer.h (vect_do_peeling_for_loop_bound): Return created
loop.
(vect_transform_loop): Return created loop.
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index c38e21b..41b6c99 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -2801,7 +2801,7 @@ ifcvt_local_dce (basic_block bb)
profitability analysis. Returns non-zero todo flags when something
changed. */
-static unsigned int
+unsigned int
tree_if_conversion (struct loop *loop)
{
unsigned int todo = 0;
diff --git a/gcc/tree-if-conv.h b/gcc/tree-if-conv.h
new file mode 100644
index 0000000..3a732c2
--- /dev/null
+++ b/gcc/tree-if-conv.h
@@ -0,0 +1,24 @@
+/* Copyright (C) 2016 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_TREE_IF_CONV_H
+#define GCC_TREE_IF_CONV_H
+
+unsigned int tree_if_conversion (struct loop *);
+
+#endif /* GCC_TREE_IF_CONV_H */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 7652e21..f275933 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1595,7 +1595,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
/* Check if we can possibly peel the loop. */
if (!vect_can_advance_ivs_p (loop_vinfo)
|| !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
- || loop->inner)
+ || loop->inner
+ /* Required peeling was performed in prologue and
+ is not required for epilogue. */
+ || LOOP_VINFO_EPILOGUE_P (loop_vinfo))
do_peeling = false;
if (do_peeling
@@ -1875,7 +1878,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
do_versioning =
optimize_loop_nest_for_speed_p (loop)
- && (!loop->inner); /* FORNOW */
+ && (!loop->inner) /* FORNOW */
+ /* Required versioning was performed for the
+ original loop and is not required for epilogue. */
+ && !LOOP_VINFO_EPILOGUE_P (loop_vinfo);
if (do_versioning)
{
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 7ec6dae..fab5879 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1742,9 +1742,11 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
- test. */
+ test.
-void
+ Return created loop. */
+
+struct loop *
vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
tree ni_name, tree ratio_mult_vf_name,
unsigned int th, bool check_profitability)
@@ -1812,6 +1814,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
scev_reset ();
free_original_copy_tables ();
+
+ return new_loop;
}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index aac0df9..a537ef4 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vectorizer.h"
#include "gimple-fold.h"
#include "cgraph.h"
+#include "tree-if-conv.h"
/* Loop Vectorization Pass.
@@ -1212,8 +1213,8 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
loop_vinfo->scalar_cost_vec.release ();
+ loop->aux = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
free (loop_vinfo);
- loop->aux = NULL;
}
@@ -1499,13 +1500,24 @@ vect_analyze_loop_form (struct loop *loop)
if (! vect_analyze_loop_form_1 (loop, &loop_cond, &number_of_iterationsm1,
&number_of_iterations, &inner_loop_cond))
- return NULL;
+ {
+ loop->aux = NULL;
+ return NULL;
+ }
loop_vec_info loop_vinfo = new_loop_vec_info (loop);
LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
+ /* For epilogues we want to vectorize aux holds
+ loop_vec_info of the original loop. */
+ if (loop->aux)
+ {
+ gcc_assert (LOOP_VINFO_VECTORIZABLE_P ((loop_vec_info)loop->aux));
+ LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = (loop_vec_info)loop->aux;
+ }
+
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
if (dump_enabled_p ())
@@ -1522,7 +1534,6 @@ vect_analyze_loop_form (struct loop *loop)
STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
= loop_exit_ctrl_vec_info_type;
- gcc_assert (!loop->aux);
loop->aux = loop_vinfo;
return loop_vinfo;
}
@@ -2280,7 +2291,10 @@ vect_analyze_loop (struct loop *loop)
if (fatal
|| vector_sizes == 0
|| current_vector_size == 0)
- return NULL;
+ {
+ loop->aux = NULL;
+ return NULL;
+ }
/* Try the next biggest vector size. */
current_vector_size = 1 << floor_log2 (vector_sizes);
@@ -6576,10 +6590,11 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
Vectorize the loop - created vectorized stmts to replace the scalar
stmts in the loop, and update the loop exit condition. */
-void
+struct loop *
vect_transform_loop (loop_vec_info loop_vinfo)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *epilogue = NULL;
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
int i;
@@ -6661,8 +6676,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
ni_name = vect_build_loop_niters (loop_vinfo);
vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
&ratio);
- vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
- th, check_profitability);
+ epilogue = vect_do_peeling_for_loop_bound (loop_vinfo, ni_name,
+ ratio_mult_vf, th,
+ check_profitability);
}
else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
@@ -6959,6 +6975,64 @@ vect_transform_loop (loop_vec_info loop_vinfo)
FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
vect_free_slp_instance (instance);
LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
+
+ /* Don't vectorize epilogue for epilogue. */
+ if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+ epilogue = NULL;
+ /* Scalar epilogue is not vectorized in case
+ we use combined vector epilogue. */
+ else if (LOOP_VINFO_COMBINE_EPILOGUE (loop_vinfo))
+ epilogue = NULL;
+ /* FORNOW: Currently alias checks are not inherited for epilogues.
+ Don't try to vectorize epilogue because it will require
+ additional alias checks. */
+ else if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
+ epilogue = NULL;
+
+ if (epilogue)
+ {
+ if (!LOOP_VINFO_MASK_EPILOGUE (loop_vinfo))
+ {
+ unsigned int vector_sizes
+ = targetm.vectorize.autovectorize_vector_sizes ();
+ vector_sizes &= current_vector_size - 1;
+
+ if (!(flag_tree_vectorize_epilogues & VECT_EPILOGUE_NOMASK))
+ epilogue = NULL;
+ else if (!vector_sizes)
+ epilogue = NULL;
+ else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
+ {
+ int smallest_vec_size = 1 << ctz_hwi (vector_sizes);
+ int ratio = current_vector_size / smallest_vec_size;
+ int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo)
+ - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ eiters = eiters % vectorization_factor;
+
+ epilogue->nb_iterations_upper_bound = eiters - 1;
+
+ if (eiters < vectorization_factor / ratio)
+ epilogue = NULL;
+ }
+ }
+ }
+
+ if (epilogue)
+ {
+ epilogue->force_vectorize = loop->force_vectorize;
+ epilogue->safelen = loop->safelen;
+ epilogue->dont_vectorize = false;
+
+ /* We may need to if-convert epilogue to vectorize it. */
+ if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
+ tree_if_conversion (epilogue);
+
+ gcc_assert (!epilogue->aux);
+ epilogue->aux = loop_vinfo;
+ }
+
+ return epilogue;
}
/* The code below is trying to perform simple optimization - revert
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 2b25b45..5f15246 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -491,14 +491,16 @@ vectorize_loops (void)
{
unsigned int i;
unsigned int num_vectorized_loops = 0;
- unsigned int vect_loops_num;
+ unsigned int vect_loops_num = number_of_loops (cfun);
struct loop *loop;
hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
bool any_ifcvt_loops = false;
unsigned ret = 0;
+ auto_vec<unsigned int> loops (vect_loops_num);
- vect_loops_num = number_of_loops (cfun);
+ FOR_EACH_LOOP (loop, 0)
+ loops.quick_push (loop->num);
/* Bail out if there are no loops. */
if (vect_loops_num <= 1)
@@ -514,14 +516,18 @@ vectorize_loops (void)
/* If some loop was duplicated, it gets bigger number
than all previously defined loops. This fact allows us to run
only over initial loops skipping newly generated ones. */
- FOR_EACH_LOOP (loop, 0)
- if (loop->dont_vectorize)
+ for (i = 0; i < loops.length (); i++)
+ if (!(loop = get_loop (cfun, loops[i])))
+ continue;
+ else if (loop->dont_vectorize)
any_ifcvt_loops = true;
else if ((flag_tree_loop_vectorize
- && optimize_loop_nest_for_speed_p (loop))
+ && (optimize_loop_nest_for_speed_p (loop)
+ || loop->aux))
|| loop->force_vectorize)
{
loop_vec_info loop_vinfo;
+ struct loop *new_loop;
vect_location = find_loop_location (loop);
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
&& dump_enabled_p ())
@@ -551,12 +557,21 @@ vectorize_loops (void)
&& dump_enabled_p ())
dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
"loop vectorized\n");
- vect_transform_loop (loop_vinfo);
+ new_loop = vect_transform_loop (loop_vinfo);
num_vectorized_loops++;
/* Now that the loop has been vectorized, allow it to be unrolled
etc. */
loop->force_vectorize = false;
+ /* Add new loop to a processing queue. To make it easier
+ to match loop and its epilogue vectorization in dumps
+ put new loop as the next loop to process. */
+ if (new_loop)
+ {
+ loops.safe_insert (i + 1, new_loop->num);
+ vect_loops_num = number_of_loops (cfun);
+ }
+
if (loop->simduid)
{
simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 4c19317..b269752 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -984,8 +984,8 @@ extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
struct loop *, edge);
extern void vect_loop_versioning (loop_vec_info, unsigned int, bool);
-extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree, tree,
- unsigned int, bool);
+extern struct loop *vect_do_peeling_for_loop_bound (loop_vec_info, tree, tree,
+ unsigned int, bool);
extern void vect_do_peeling_for_alignment (loop_vec_info, tree,
unsigned int, bool);
extern source_location find_loop_location (struct loop *);
@@ -1101,7 +1101,7 @@ extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool,
/* Drive for loop analysis stage. */
extern loop_vec_info vect_analyze_loop (struct loop *);
/* Drive for loop transformation stage. */
-extern void vect_transform_loop (loop_vec_info);
+extern struct loop *vect_transform_loop (loop_vec_info);
extern loop_vec_info vect_analyze_loop_form (struct loop *);
extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *,
gimple **);