From 2ad71efb5de9e929ffd2b8ce0a37c3c34021c0f1 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 28 Jun 2021 09:42:58 +0200 Subject: [PATCH] tree-optimization/101207 - fix BB reduc permute elide with life stmts This fixes breakage of live lane extracts from permuted loads we elide from BB reduction vectorization by handling the un-permuting the same as in the regular eliding code - apply the reverse permute to both the scalar stmts and the load permutation. 2021-06-28 Richard Biener PR tree-optimization/101207 * tree-vect-slp.c (vect_optimize_slp): Do BB reduction permute eliding for load permutations properly. * gcc.dg/vect/bb-slp-pr101207.c: New testcase. --- gcc/testsuite/gcc.dg/vect/bb-slp-pr101207.c | 25 ++++++ gcc/tree-vect-slp.c | 88 +++++++++++---------- 2 files changed, 71 insertions(+), 42 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr101207.c diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr101207.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr101207.c new file mode 100644 index 000000000000..1f51d66a5fe8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr101207.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ffast-math" } */ + +#include "tree-vect.h" + +double a[2]; +double x, y; + +void __attribute__((noipa)) foo () +{ + x = a[1] - a[0]; + y = a[0] + a[1]; +} + +int main() +{ + check_vect (); + + a[0] = 0.; + a[1] = 1.; + foo (); + if (x != 1. || y != 1.) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 17fe5f23c098..5401dbe4d5e6 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3921,6 +3921,52 @@ vect_optimize_slp (vec_info *vinfo) } } + /* Elide any permutations at BB reduction roots. */ + if (is_a (vinfo)) + { + for (slp_instance instance : vinfo->slp_instances) + { + if (SLP_INSTANCE_KIND (instance) != slp_inst_kind_bb_reduc) + continue; + slp_tree old = SLP_INSTANCE_TREE (instance); + if (SLP_TREE_CODE (old) == VEC_PERM_EXPR + && SLP_TREE_CHILDREN (old).length () == 1) + { + slp_tree child = SLP_TREE_CHILDREN (old)[0]; + if (SLP_TREE_DEF_TYPE (child) == vect_external_def) + { + /* Preserve the special VEC_PERM we use to shield existing + vector defs from the rest. But make it a no-op. */ + unsigned i = 0; + for (std::pair &p + : SLP_TREE_LANE_PERMUTATION (old)) + p.second = i++; + } + else + { + SLP_INSTANCE_TREE (instance) = child; + SLP_TREE_REF_COUNT (child)++; + vect_free_slp_tree (old); + } + } + else if (SLP_TREE_LOAD_PERMUTATION (old).exists () + && SLP_TREE_REF_COUNT (old) == 1 + && vertices[old->vertex].materialize) + { + /* ??? For loads the situation is more complex since + we can't modify the permute in place in case the + node is used multiple times. In fact for loads this + should be somehow handled in the propagation engine. */ + /* Apply the reverse permutation to our stmts. */ + int perm = vertices[old->vertex].get_perm_in (); + vect_slp_permute (perms[perm], + SLP_TREE_SCALAR_STMTS (old), true); + vect_slp_permute (perms[perm], + SLP_TREE_LOAD_PERMUTATION (old), true); + } + } + } + /* Free the perms vector used for propagation. */ while (!perms.is_empty ()) perms.pop ().release (); @@ -3987,48 +4033,6 @@ vect_optimize_slp (vec_info *vinfo) } } } - - /* And any permutations of BB reductions. */ - if (is_a (vinfo)) - { - for (slp_instance instance : vinfo->slp_instances) - { - if (SLP_INSTANCE_KIND (instance) != slp_inst_kind_bb_reduc) - continue; - slp_tree old = SLP_INSTANCE_TREE (instance); - if (SLP_TREE_CODE (old) == VEC_PERM_EXPR - && SLP_TREE_CHILDREN (old).length () == 1) - { - slp_tree child = SLP_TREE_CHILDREN (old)[0]; - if (SLP_TREE_DEF_TYPE (child) == vect_external_def) - { - /* Preserve the special VEC_PERM we use to shield existing - vector defs from the rest. But make it a no-op. */ - unsigned i = 0; - for (std::pair &p - : SLP_TREE_LANE_PERMUTATION (old)) - p.second = i++; - } - else - { - SLP_INSTANCE_TREE (instance) = child; - SLP_TREE_REF_COUNT (child)++; - vect_free_slp_tree (old); - } - } - else if (SLP_TREE_LOAD_PERMUTATION (old).exists () - && SLP_TREE_REF_COUNT (old) == 1) - { - /* ??? For loads the situation is more complex since - we can't modify the permute in place in case the - node is used multiple times. In fact for loads this - should be somehow handled in the propagation engine. */ - auto fn = [] (const void *a, const void *b) - { return *(const int *)a - *(const int *)b; }; - SLP_TREE_LOAD_PERMUTATION (old).qsort (fn); - } - } - } } /* Gather loads reachable from the individual SLP graph entries. */ -- 2.43.5