This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Optimize COND_EXPR where then/else operands are INTEGER_CSTs of different size than the comparison operands


Hi!

Since Richard's changes recently to allow different modes in vcond
patterns (so far on i?86/x86_64 only I think) we can vectorize more
COND_EXPRs than before, and this patch improves it a tiny bit more
- even i?86/x86_64 support vconds only if the sizes of vector element
modes are the same.  With this patch we can optimize even if it is wider
or narrower, by vectorizing it as the COND_EXPR in integer mode matching
the size of the comparsion operands and then a cast.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-10-06  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/50596
	* tree-vectorizer.h (vect_is_simple_cond): New prototype.
	(NUM_PATTERNS): Change to 6.
	* tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): New
	function.
	(vect_vect_recog_func_ptrs): Add vect_recog_mixed_size_cond_pattern.
	(vect_mark_pattern_stmts): Don't create stmt_vinfo for def_stmt
	if it already has one, and don't set STMT_VINFO_VECTYPE in it
	if it is already set.
	* tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Handle
	COND_EXPR and VEC_COND_EXPR in pattern stmts.
	(vect_is_simple_cond): No longer static.

	* lib/target-supports.exp (check_effective_target_vect_cond_mixed):
	New.
	* gcc.dg/vect/vect-cond-8.c: New test.

--- gcc/tree-vectorizer.h.jj	2011-09-26 14:06:52.000000000 +0200
+++ gcc/tree-vectorizer.h	2011-10-06 10:04:03.000000000 +0200
@@ -1,5 +1,5 @@
 /* Vectorizer
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
    Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com>
 
@@ -818,6 +818,7 @@ extern bool vect_transform_stmt (gimple,
                                  bool *, slp_tree, slp_instance);
 extern void vect_remove_stores (gimple);
 extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
+extern bool vect_is_simple_cond (tree, loop_vec_info, tree *);
 extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
                                     tree, int);
 extern void vect_get_load_cost (struct data_reference *, int, bool,
@@ -902,7 +903,7 @@ extern void vect_slp_transform_bb (basic
    Additional pattern recognition functions can (and will) be added
    in the future.  */
 typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
-#define NUM_PATTERNS 5
+#define NUM_PATTERNS 6
 void vect_pattern_recog (loop_vec_info);
 
 /* In tree-vectorizer.c.  */
--- gcc/tree-vect-patterns.c.jj	2011-10-06 09:14:17.000000000 +0200
+++ gcc/tree-vect-patterns.c	2011-10-06 14:37:12.000000000 +0200
@@ -49,12 +49,15 @@ static gimple vect_recog_dot_prod_patter
 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
                                                  tree *);
+static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
+						  tree *, tree *);
 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
 	vect_recog_widen_mult_pattern,
 	vect_recog_widen_sum_pattern,
 	vect_recog_dot_prod_pattern,
 	vect_recog_pow_pattern,
-        vect_recog_over_widening_pattern};
+	vect_recog_over_widening_pattern,
+	vect_recog_mixed_size_cond_pattern};
 
 
 /* Function widened_name_p
@@ -1218,6 +1214,120 @@ vect_recog_over_widening_pattern (VEC (g
 }
 
 
+/* Function vect_recog_mixed_size_cond_pattern
+
+   Try to find the following pattern:
+
+     type x_t, y_t;
+     TYPE a_T, b_T, c_T;
+   loop:
+     S1  a_T = x_t CMP y_t ? b_T : c_T;
+
+   where type 'TYPE' is an integral type which has different size
+   from 'type'.  b_T and c_T are constants and if 'TYPE' is wider
+   than 'type', the constants need to fit into an integer type
+   with the same width as 'type'.
+
+   Input:
+
+   * LAST_STMT: A stmt from which the pattern search begins.
+
+   Output:
+
+   * TYPE_IN: The type of the input arguments to the pattern.
+
+   * TYPE_OUT: The type of the output of this pattern.
+
+   * Return value: A new stmt that will be used to replace the pattern.
+	Additionally a def_stmt is added.
+
+	a_it = x_t CMP y_t ? b_it : c_it;
+	a_T = (TYPE) a_it;  */
+
+static gimple
+vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
+				    tree *type_out)
+{
+  gimple last_stmt = VEC_index (gimple, *stmts, 0);
+  tree cond_expr, then_clause, else_clause;
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
+  tree type, vectype, comp_vectype, itype, vecitype;
+  enum machine_mode cmpmode;
+  gimple pattern_stmt, def_stmt;
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+
+  if (!is_gimple_assign (last_stmt)
+      || gimple_assign_rhs_code (last_stmt) != COND_EXPR
+      || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
+    return NULL;
+
+  cond_expr = gimple_assign_rhs1 (last_stmt);
+  then_clause = gimple_assign_rhs2 (last_stmt);
+  else_clause = gimple_assign_rhs3 (last_stmt);
+
+  if (TREE_CODE (then_clause) != INTEGER_CST
+      || TREE_CODE (else_clause) != INTEGER_CST)
+    return NULL;
+
+  if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype)
+      || !comp_vectype)
+    return NULL;
+
+  type = gimple_expr_type (last_stmt);
+  cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
+
+  if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
+    return NULL;
+
+  vectype = get_vectype_for_scalar_type (type);
+  if (vectype == NULL_TREE)
+    return NULL;
+
+  if (expand_vec_cond_expr_p (vectype, comp_vectype))
+    return NULL;
+
+  itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
+					  TYPE_UNSIGNED (type));
+  if (itype == NULL_TREE
+      || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
+    return NULL;
+
+  vecitype = get_vectype_for_scalar_type (itype);
+  if (vecitype == NULL_TREE)
+    return NULL;
+
+  if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
+    return NULL;
+
+  if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
+    {
+      if (!int_fits_type_p (then_clause, itype)
+	  || !int_fits_type_p (else_clause, itype))
+	return NULL;
+    }
+
+  def_stmt
+    = gimple_build_assign_with_ops3 (COND_EXPR,
+				     vect_recog_temp_ssa_var (itype, NULL),
+				     unshare_expr (cond_expr),
+				     fold_convert (itype, then_clause),
+				     fold_convert (itype, else_clause));
+  pattern_stmt
+    = gimple_build_assign_with_ops (NOP_EXPR,
+				    vect_recog_temp_ssa_var (type, NULL),
+				    gimple_assign_lhs (def_stmt), NULL_TREE);
+
+  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
+  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
+  set_vinfo_for_stmt (def_stmt, def_stmt_info);
+  STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
+  *type_in = vecitype;
+  *type_out = vectype;
+
+  return pattern_stmt;
+}
+
+
 /* Mark statements that are involved in a pattern.  */
 
 static inline void
@@ -1245,14 +1355,18 @@ vect_mark_pattern_stmts (gimple orig_stm
   if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
     {
       def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
-      set_vinfo_for_stmt (def_stmt,
-                          new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
-      gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
       def_stmt_info = vinfo_for_stmt (def_stmt);
+      if (def_stmt_info == NULL)
+	{
+	  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
+	  set_vinfo_for_stmt (def_stmt, def_stmt_info);
+	}
+      gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
       STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
       STMT_VINFO_DEF_TYPE (def_stmt_info)
 	= STMT_VINFO_DEF_TYPE (orig_stmt_info);
-      STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
+      if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
+	STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
     }
 }
 
--- gcc/tree-vect-stmts.c.jj	2011-09-29 14:25:46.000000000 +0200
+++ gcc/tree-vect-stmts.c	2011-10-06 12:16:43.000000000 +0200
@@ -652,9 +652,26 @@ vect_mark_stmts_to_be_vectorized (loop_v
              have to scan the RHS or function arguments instead.  */
           if (is_gimple_assign (stmt))
             {
-              for (i = 1; i < gimple_num_ops (stmt); i++)
+	      enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
+	      tree op = gimple_assign_rhs1 (stmt);
+
+	      i = 1;
+	      if ((rhs_code == COND_EXPR || rhs_code == VEC_COND_EXPR)
+		  && COMPARISON_CLASS_P (op))
+		{
+		  if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
+				    live_p, relevant, &worklist)
+		      || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
+				       live_p, relevant, &worklist))
+		    {
+		      VEC_free (gimple, heap, worklist);
+		      return false;
+		    }
+		  i = 2;
+		}
+	      for (; i < gimple_num_ops (stmt); i++)
                 {
-                  tree op = gimple_op (stmt, i);
+		  op = gimple_op (stmt, i);
                   if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
                                     &worklist))
                     {
@@ -4682,7 +4699,7 @@ vectorizable_load (gimple stmt, gimple_s
    Returns whether a COND can be vectorized.  Checks whether
    condition operands are supportable using vec_is_simple_use.  */
 
-static bool
+bool
 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype)
 {
   tree lhs, rhs;
--- gcc/testsuite/lib/target-supports.exp.jj	2011-10-04 10:18:31.000000000 +0200
+++ gcc/testsuite/lib/target-supports.exp	2011-10-06 15:18:28.000000000 +0200
@@ -3234,6 +3234,26 @@ proc check_effective_target_vect_conditi
     return $et_vect_cond_saved
 }
 
+# Return 1 if the target supports vector conditional operations where
+# the comparison has different type from the lhs, 0 otherwise.
+
+proc check_effective_target_vect_cond_mixed { } {
+    global et_vect_cond_mixed_saved
+
+    if [info exists et_vect_cond_mixed_saved] {
+	verbose "check_effective_target_vect_cond_mixed: using cached result" 2
+    } else {
+	set et_vect_cond_mixed_saved 0
+	if { [istarget i?86-*-*]
+	     || [istarget x86_64-*-*] } {
+	   set et_vect_cond_mixed_saved 1
+	}
+    }
+
+    verbose "check_effective_target_vect_cond_mixed: returning $et_vect_cond_mixed_saved" 2
+    return $et_vect_cond_mixed_saved
+}
+
 # Return 1 if the target supports vector char multiplication, 0 otherwise.
 
 proc check_effective_target_vect_char_mult { } {
--- gcc/testsuite/gcc.dg/vect/vect-cond-8.c.jj	2011-10-06 14:50:25.000000000 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-cond-8.c	2011-10-06 15:17:12.000000000 +0200
@@ -0,0 +1,122 @@
+/* { dg-require-effective-target vect_cond_mixed } */
+
+#include "tree-vect.h"
+
+#define N 1024
+float a[N], b[N], c[N];
+int d[N], e[N], f[N];
+unsigned char k[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = a[i] < b[i] ? 17 : 0;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = a[i] < b[i] ? 0 : 24;
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = a[i] < b[i] ? 51 : 12;
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int d2 = d[i], e2 = e[i];
+      f[i] = a[i] < b[i] ? d2 : e2;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f5 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      float a2 = a[i], b2 = b[i];
+      c[i] = d[i] < e[i] ? a2 : b2;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      switch (i % 9)
+	{
+	case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
+	case 1: a[i] = 0; b[i] = 0; break;
+	case 2: a[i] = i + 1; b[i] = - i - 1; break;
+	case 3: a[i] = i; b[i] = i + 7; break;
+	case 4: a[i] = i; b[i] = i; break;
+	case 5: a[i] = i + 16; b[i] = i + 3; break;
+	case 6: a[i] = - i - 5; b[i] = - i; break;
+	case 7: a[i] = - i; b[i] = - i; break;
+	case 8: a[i] = - i; b[i] = - i - 7; break;
+	}
+      d[i] = i;
+      e[i] = 2 * i;
+    }
+  f1 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 ? 17 : 0))
+      abort ();
+  f2 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 ? 0 : 24))
+      abort ();
+  f3 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 ? 51 : 12))
+      abort ();
+  f4 ();
+  for (i = 0; i < N; i++)
+    if (f[i] != ((i % 3) == 0 ? d[i] : e[i]))
+      abort ();
+  for (i = 0; i < N; i++)
+    {
+      switch (i % 9)
+	{
+	case 0: asm (""); d[i] = - i - 1; e[i] = i + 1; break;
+	case 1: d[i] = 0; e[i] = 0; break;
+	case 2: d[i] = i + 1; e[i] = - i - 1; break;
+	case 3: d[i] = i; e[i] = i + 7; break;
+	case 4: d[i] = i; e[i] = i; break;
+	case 5: d[i] = i + 16; e[i] = i + 3; break;
+	case 6: d[i] = - i - 5; e[i] = - i; break;
+	case 7: d[i] = - i; e[i] = - i; break;
+	case 8: d[i] = - i; e[i] = - i - 7; break;
+	}
+      a[i] = i;
+      b[i] = i / 2;
+    }
+  f5 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != ((i % 3) == 0 ? a[i] : b[i]))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 5 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]