This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix PR82436
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Fri, 6 Oct 2017 09:02:06 +0200 (CEST)
- Subject: [PATCH] Fix PR82436
- Authentication-results: sourceware.org; auth=none
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
Richard.
2017-10-06 Richard Biener <rguenther@suse.de>
PR tree-optimization/82436
* tree-vect-slp.c (vect_supported_load_permutation_p): More
conservatively choose the vectorization factor when checking
whether we can perform the required load permutation.
(vect_transform_slp_perm_load): Assert when we may not fail.
* gcc.dg/vect/pr82436.c: New testcase.
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c (revision 253439)
+++ gcc/tree-vect-slp.c (working copy)
@@ -1567,14 +1567,20 @@ vect_supported_load_permutation_p (slp_i
return true;
}
- /* For loop vectorization verify we can generate the permutation. */
+ /* For loop vectorization verify we can generate the permutation. Be
+ conservative about the vectorization factor, there are permutations
+ that will use three vector inputs only starting from a specific factor
+ and the vectorization factor is not yet final.
+ ??? The SLP instance unrolling factor might not be the maximum one. */
unsigned n_perms;
+ unsigned test_vf
+ = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
+ LOOP_VINFO_VECT_FACTOR
+ (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
if (node->load_permutation.exists ()
- && !vect_transform_slp_perm_load
- (node, vNULL, NULL,
- SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true,
- &n_perms))
+ && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
+ slp_instn, true, &n_perms))
return false;
return true;
@@ -3560,6 +3566,7 @@ vect_transform_slp_perm_load (slp_tree n
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
stmt, 0);
}
+ gcc_assert (analyze_only);
return false;
}
@@ -3583,6 +3590,7 @@ vect_transform_slp_perm_load (slp_tree n
dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
}
+ gcc_assert (analyze_only);
return false;
}
Index: gcc/testsuite/gcc.dg/vect/pr82436.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr82436.c (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr82436.c (working copy)
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast -fno-tree-scev-cprop" } */
+/* { dg-additional-options "-mavx2" { target { x86_64-*-* i?86-*-* } } } */
+
+struct reflection_type
+{
+ int h;
+ int k;
+ int l;
+ double f_exp;
+ double f_sigma;
+ _Complex double f_calc;
+ double f_pred;
+ double i_exp;
+ double i_sigma;
+ double i_pred;
+};
+
+double y, w;
+int foo (struct reflection_type *r, int n, unsigned s)
+{
+ int i;
+ y = 0;
+ w = 0;
+ for (i = 1; i < n; ++i)
+ {
+ struct reflection_type *x = &r[i*s];
+ double fpred = x->f_pred;
+ double fexp = x->f_exp;
+ double tem = (fpred - fexp);
+ y += __builtin_fabs (tem / x->f_sigma);
+ w += __builtin_fabs (tem / fexp);
+ }
+ return i;
+}