[PATCH] Fix vector permutation forwprop optimization (PR tree-optimization/54610)
Jakub Jelinek
jakub@redhat.com
Tue Sep 18 12:24:00 GMT 2012
Hi!
vect_gen_perm_mask is not suitable for use outside of the vectorizer,
it uses current vector size to determine the number of units of a vector,
which isn't something that should be used outside of the vectorizer.
The following patch just does construct the mask inline, it is not that long
code.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2012-09-18 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/54610
* tree-ssa-forwprop.c: Include optabs.h.
(simplify_vector_constructor): Don't use vect_gen_perm_mask,
instead create the mask constant here.
* Makefile.in (tree-ssa-forwprop.o): Depend on $(OPTABS_H).
* gcc.target/i386/pr54610.c: New test.
--- gcc/tree-ssa-forwprop.c.jj 2012-09-14 14:20:56.000000000 +0200
+++ gcc/tree-ssa-forwprop.c 2012-09-18 10:17:40.627193548 +0200
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.
#include "expr.h"
#include "cfgloop.h"
#include "tree-vectorizer.h"
+#include "optabs.h"
/* This pass propagates the RHS of assignment statements into use
sites of the LHS of the assignment. It's basically a specialized
@@ -2854,14 +2855,24 @@ simplify_vector_constructor (gimple_stmt
return false;
if (maybe_ident)
- {
- gimple_assign_set_rhs_from_tree (gsi, orig);
- }
+ gimple_assign_set_rhs_from_tree (gsi, orig);
else
{
- op2 = vect_gen_perm_mask (type, sel);
- if (!op2)
+ tree mask_type, *mask_elts;
+
+ if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
+ return false;
+ mask_type
+ = build_vector_type (build_nonstandard_integer_type (elem_size, 1),
+ nelts);
+ if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT
+ || GET_MODE_SIZE (TYPE_MODE (mask_type))
+ != GET_MODE_SIZE (TYPE_MODE (type)))
return false;
+ mask_elts = XALLOCAVEC (tree, nelts);
+ for (i = 0; i < nelts; i++)
+ mask_elts[i] = build_int_cst (TREE_TYPE (mask_type), sel[i]);
+ op2 = build_vector (mask_type, mask_elts);
gimple_assign_set_rhs_with_ops_1 (gsi, VEC_PERM_EXPR, orig, orig, op2);
}
update_stmt (gsi_stmt (*gsi));
--- gcc/Makefile.in.jj 2012-09-13 07:54:44.000000000 +0200
+++ gcc/Makefile.in 2012-09-18 10:18:05.717067056 +0200
@@ -2245,7 +2245,7 @@ tree-ssa-forwprop.o : tree-ssa-forwprop.
$(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \
$(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \
- $(TREE_VECTORIZER_H)
+ $(TREE_VECTORIZER_H) $(OPTABS_H)
tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
$(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
--- gcc/testsuite/gcc.target/i386/pr54610.c.jj 2012-09-18 10:24:58.793981091 +0200
+++ gcc/testsuite/gcc.target/i386/pr54610.c 2012-09-18 10:26:26.838535968 +0200
@@ -0,0 +1,17 @@
+/* PR tree-optimization/54610 */
+/* { dg-do compile } */
+/* { dg-options "-O -mavx -fdump-tree-optimized" } */
+
+typedef double vec __attribute__((vector_size (2 * sizeof (double))));
+void f (vec *px, vec *y, vec *z)
+{
+ vec x = *px;
+ vec t1 = { x[1], x[0] };
+ vec t2 = { x[0], x[1] };
+ *y = t1;
+ *z = t2;
+}
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Jakub
More information about the Gcc-patches
mailing list