This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix PR66051
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 2 Dec 2015 16:14:07 +0100 (CET)
- Subject: [PATCH] Fix PR66051
- Authentication-results: sourceware.org; auth=none
This fixes the vectorizer part of PR66051 (a x86 target part remains
for the testcase in the PR - PR68655). The issue is again a
misplaced check for SLP detection:
/* Check that the size of interleaved loads group is not
greater than the SLP group size. */
unsigned ncopies
= vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
if (is_a <loop_vec_info> (vinfo)
&& GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
&& ((GROUP_SIZE (vinfo_for_stmt (stmt))
- GROUP_GAP (vinfo_for_stmt (stmt)))
> ncopies * group_size))
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
vect_location,
"Build SLP failed: the number "
"of interleaved loads is greater
than "
"the SLP group size ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
stmt, 0);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
/* Fatal mismatch. */
matches[0] = false;
return false;
}
I've relaxed this multiple times but that still doesn't make it necessary.
It also uses a vectorization factor estimate as the vectorization factor
is not yet determined. A good side-effect of the patch is that we
can get rid of that estimate completely.
Tested on the x86_64 vectorization tests sofar.
Bootstrap & regtest pending and I'll make sure SPEC CPU 2006 is
happy as well.
Thanks,
Richard.
2015-12-02 Richard Biener <rguenther@suse.de>
PR tree-optimization/66051
* tree-vect-slp.c (vect_build_slp_tree_1): Remove restriction
on load group size. Do not pass in vectorization_factor.
(vect_transform_slp_perm_load): Do not require any permute support.
(vect_build_slp_tree): Do not pass in vectorization factor.
(vect_analyze_slp_instance): Do not compute vectorization
factor estimate. Use vector size instead of vectorization factor
estimate to split store groups for BB vectorization.
* gcc.dg/vect/slp-42.c: New testcase.
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 231167)
--- gcc/tree-vect-slp.c (working copy)
*************** static bool
*** 430,437 ****
vect_build_slp_tree_1 (vec_info *vinfo,
vec<gimple *> stmts, unsigned int group_size,
unsigned nops, unsigned int *max_nunits,
! unsigned int vectorization_factor, bool *matches,
! bool *two_operators)
{
unsigned int i;
gimple *first_stmt = stmts[0], *stmt = stmts[0];
--- 430,436 ----
vect_build_slp_tree_1 (vec_info *vinfo,
vec<gimple *> stmts, unsigned int group_size,
unsigned nops, unsigned int *max_nunits,
! bool *matches, bool *two_operators)
{
unsigned int i;
gimple *first_stmt = stmts[0], *stmt = stmts[0];
*************** vect_build_slp_tree_1 (vec_info *vinfo,
*** 523,533 ****
/* In case of multiple types we need to detect the smallest type. */
if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
! {
! *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
! if (is_a <bb_vec_info> (vinfo))
! vectorization_factor = *max_nunits;
! }
if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
{
--- 522,528 ----
/* In case of multiple types we need to detect the smallest type. */
if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
! *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
{
*************** vect_build_slp_tree_1 (vec_info *vinfo,
*** 700,730 ****
else
{
/* Load. */
- /* Check that the size of interleaved loads group is not
- greater than the SLP group size. */
- unsigned ncopies
- = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
- if (is_a <loop_vec_info> (vinfo)
- && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
- && ((GROUP_SIZE (vinfo_for_stmt (stmt))
- - GROUP_GAP (vinfo_for_stmt (stmt)))
- > ncopies * group_size))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: the number "
- "of interleaved loads is greater than "
- "the SLP group size ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
-
first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
if (prev_first_load)
{
--- 695,700 ----
*************** vect_build_slp_tree (vec_info *vinfo,
*** 871,877 ****
slp_tree *node, unsigned int group_size,
unsigned int *max_nunits,
vec<slp_tree> *loads,
- unsigned int vectorization_factor,
bool *matches, unsigned *npermutes, unsigned *tree_size,
unsigned max_tree_size)
{
--- 841,846 ----
*************** vect_build_slp_tree (vec_info *vinfo,
*** 895,902 ****
bool two_operators = false;
if (!vect_build_slp_tree_1 (vinfo,
SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
! max_nunits, vectorization_factor, matches,
! &two_operators))
return false;
SLP_TREE_TWO_OPERATORS (*node) = two_operators;
--- 864,870 ----
bool two_operators = false;
if (!vect_build_slp_tree_1 (vinfo,
SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
! max_nunits, matches, &two_operators))
return false;
SLP_TREE_TWO_OPERATORS (*node) = two_operators;
*************** vect_build_slp_tree (vec_info *vinfo,
*** 959,966 ****
}
if (vect_build_slp_tree (vinfo, &child,
! group_size, max_nunits, loads,
! vectorization_factor, matches,
npermutes, &this_tree_size, max_tree_size))
{
/* If we have all children of child built up from scalars then just
--- 927,933 ----
}
if (vect_build_slp_tree (vinfo, &child,
! group_size, max_nunits, loads, matches,
npermutes, &this_tree_size, max_tree_size))
{
/* If we have all children of child built up from scalars then just
*************** vect_build_slp_tree (vec_info *vinfo,
*** 1074,1080 ****
bool *tem = XALLOCAVEC (bool, group_size);
if (vect_build_slp_tree (vinfo, &child,
group_size, max_nunits, loads,
- vectorization_factor,
tem, npermutes, &this_tree_size,
max_tree_size))
{
--- 1041,1046 ----
*************** vect_analyze_slp_instance (vec_info *vin
*** 1656,1662 ****
unsigned int unrolling_factor = 1, nunits;
tree vectype, scalar_type = NULL_TREE;
gimple *next;
- unsigned int vectorization_factor = 0;
unsigned int i;
unsigned int max_nunits = 0;
vec<slp_tree> loads;
--- 1622,1627 ----
*************** vect_analyze_slp_instance (vec_info *vin
*** 1697,1708 ****
return false;
}
-
nunits = TYPE_VECTOR_SUBPARTS (vectype);
- if (is_a <loop_vec_info> (vinfo))
- vectorization_factor = as_a <loop_vec_info> (vinfo)->vectorization_factor;
- else
- vectorization_factor = nunits;
/* Calculate the unrolling factor. */
unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
--- 1662,1668 ----
*************** vect_analyze_slp_instance (vec_info *vin
*** 1755,1762 ****
unsigned npermutes = 0;
if (vect_build_slp_tree (vinfo, &node, group_size,
&max_nunits, &loads,
! vectorization_factor, matches, &npermutes, NULL,
! max_tree_size))
{
/* Calculate the unrolling factor based on the smallest type. */
if (max_nunits > nunits)
--- 1715,1721 ----
unsigned npermutes = 0;
if (vect_build_slp_tree (vinfo, &node, group_size,
&max_nunits, &loads,
! matches, &npermutes, NULL, max_tree_size))
{
/* Calculate the unrolling factor based on the smallest type. */
if (max_nunits > nunits)
*************** vect_analyze_slp_instance (vec_info *vin
*** 1852,1858 ****
loads.release ();
/* For basic block SLP, try to break the group up into multiples of the
! vectorization factor. */
if (is_a <bb_vec_info> (vinfo)
&& GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
&& STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
--- 1811,1817 ----
loads.release ();
/* For basic block SLP, try to break the group up into multiples of the
! vector size. */
if (is_a <bb_vec_info> (vinfo)
&& GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
&& STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
*************** vect_analyze_slp_instance (vec_info *vin
*** 1862,1872 ****
for (i = 0; i < group_size; i++)
if (!matches[i]) break;
! if (i >= vectorization_factor && i < group_size)
{
/* Split into two groups at the first vector boundary before i. */
! gcc_assert ((vectorization_factor & (vectorization_factor - 1)) == 0);
! unsigned group1_size = i & ~(vectorization_factor - 1);
gimple *rest = vect_split_slp_store_group (stmt, group1_size);
bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
--- 1821,1831 ----
for (i = 0; i < group_size; i++)
if (!matches[i]) break;
! if (i >= nunits && i < group_size)
{
/* Split into two groups at the first vector boundary before i. */
! gcc_assert ((nunits & (nunits - 1)) == 0);
! unsigned group1_size = i & ~(nunits - 1);
gimple *rest = vect_split_slp_store_group (stmt, group1_size);
bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
*************** vect_analyze_slp_instance (vec_info *vin
*** 1874,1882 ****
skip the rest of that vector. */
if (group1_size < i)
{
! i = group1_size + vectorization_factor;
if (i < group_size)
! rest = vect_split_slp_store_group (rest, vectorization_factor);
}
if (i < group_size)
res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
--- 1833,1841 ----
skip the rest of that vector. */
if (group1_size < i)
{
! i = group1_size + nunits;
if (i < group_size)
! rest = vect_split_slp_store_group (rest, nunits);
}
if (i < group_size)
res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
*************** vect_transform_slp_perm_load (slp_tree n
*** 3274,3291 ****
mode = TYPE_MODE (vectype);
- if (!can_vec_perm_p (mode, false, NULL))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "no vect permute for ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
-
/* The generic VEC_PERM_EXPR code always uses an integral type of the
same size as the vector element being permuted. */
mask_element_type = lang_hooks.types.type_for_mode
--- 3233,3238 ----
Index: gcc/testsuite/gcc.dg/vect/slp-42.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-42.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/slp-42.c (working copy)
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_int } */
+
+ int p[4096], q[4096];
+
+ void foo (int n)
+ {
+ int i;
+ for (i = 0; i < n; ++i)
+ {
+ p[i*4+0] = q[i*8+0] + q[i*8+4];
+ p[i*4+1] = q[i*8+1] + q[i*8+5];
+ p[i*4+2] = q[i*8+2] + q[i*8+6];
+ p[i*4+3] = q[i*8+3] + q[i*8+7];
+ }
+ }
+
+ /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+ /* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */