This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Use get_range_info in vect_recog_divmod_pattern


On Wed, Oct 23, 2013 at 11:14:54AM +0200, Richard Biener wrote:
> On Tue, 22 Oct 2013, Jakub Jelinek wrote:
> 
> > Hi!
> > 
> > If VRP tells us that oprnd is always >= 0 or always < 0, we can generate
> > better code for the divmode vectorization.
> > 
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> Testcase...?
> 
> Ok with adding one (I suggest a x86 specific one and scanning the
> assembler dump).

Like this?

2013-10-23  Jakub Jelinek  <jakub@redhat.com>

	* tree-vect-patterns.c (vect_recog_divmod_pattern): Optimize
	sequence based on get_range_info returned range.

	* gcc.target/i386/vect-div-1.c: New test.

--- gcc/tree-vect-patterns.c.jj	2013-10-22 18:36:51.947395037 +0200
+++ gcc/tree-vect-patterns.c	2013-10-23 11:28:26.211956658 +0200
@@ -2226,20 +2226,19 @@ vect_recog_divmod_pattern (vec<gimple> *
       if (post_shift >= prec)
 	return NULL;
 
-      /* t1 = oprnd1 h* ml;  */
+      /* t1 = oprnd0 h* ml;  */
       t1 = vect_recog_temp_ssa_var (itype, NULL);
       def_stmt
 	= gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
 					build_int_cst (itype, ml));
-      append_pattern_def_seq (stmt_vinfo, def_stmt);
 
       if (add)
 	{
 	  /* t2 = t1 + oprnd0;  */
+	  append_pattern_def_seq (stmt_vinfo, def_stmt);
 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
 	  def_stmt
 	    = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
-	  append_pattern_def_seq (stmt_vinfo, def_stmt);
 	}
       else
 	t2 = t1;
@@ -2247,27 +2246,57 @@ vect_recog_divmod_pattern (vec<gimple> *
       if (post_shift)
 	{
 	  /* t3 = t2 >> post_shift;  */
+	  append_pattern_def_seq (stmt_vinfo, def_stmt);
 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
 	  def_stmt
 	    = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
 					    build_int_cst (itype, post_shift));
-	  append_pattern_def_seq (stmt_vinfo, def_stmt);
 	}
       else
 	t3 = t2;
 
-      /* t4 = oprnd0 >> (prec - 1);  */
-      t4 = vect_recog_temp_ssa_var (itype, NULL);
-      def_stmt
-	= gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
-					build_int_cst (itype, prec - 1));
-      append_pattern_def_seq (stmt_vinfo, def_stmt);
-
-      /* q = t3 - t4;  or q = t4 - t3;  */
-      q = vect_recog_temp_ssa_var (itype, NULL);
-      pattern_stmt
-	= gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
-					d < 0 ? t3 : t4);
+      double_int oprnd0_min, oprnd0_max;
+      int msb = 1;
+      if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
+	{
+	  if (!oprnd0_min.is_negative ())
+	    msb = 0;
+	  else if (oprnd0_max.is_negative ())
+	    msb = -1;
+	}
+
+      if (msb == 0 && d >= 0)
+	{
+	  /* q = t3;  */
+	  q = t3;
+	  pattern_stmt = def_stmt;
+	}
+      else
+	{
+	  /* t4 = oprnd0 >> (prec - 1);
+	     or if we know from VRP that oprnd0 >= 0
+	     t4 = 0;
+	     or if we know from VRP that oprnd0 < 0
+	     t4 = -1;  */
+	  append_pattern_def_seq (stmt_vinfo, def_stmt);
+	  t4 = vect_recog_temp_ssa_var (itype, NULL);
+	  if (msb != 1)
+	    def_stmt
+	      = gimple_build_assign_with_ops (INTEGER_CST,
+					      t4, build_int_cst (itype, msb),
+					      NULL_TREE);
+	  else
+	    def_stmt
+	      = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
+					      build_int_cst (itype, prec - 1));
+	  append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+	  /* q = t3 - t4;  or q = t4 - t3;  */
+	  q = vect_recog_temp_ssa_var (itype, NULL);
+	  pattern_stmt
+	    = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
+					    d < 0 ? t3 : t4);
+	}
     }
 
   if (rhs_code == TRUNC_MOD_EXPR)
--- gcc/testsuite/gcc.target/i386/vect-div-1.c.jj	2013-10-23 11:43:49.089265027 +0200
+++ gcc/testsuite/gcc.target/i386/vect-div-1.c	2013-10-23 11:57:06.387187749 +0200
@@ -0,0 +1,43 @@
+/* { dg-do compile { target sse2 } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-common -msse2" } */
+
+unsigned short b[1024] = { 0 };
+int a[1024] = { 0 };
+
+int
+f1 (int x)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (b[i] + 7) / 15;
+}
+
+int
+f2 (int x)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (b[i] + 7) % 15;
+}
+
+int
+f3 (int x)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (b[i] - 66000) / 15;
+}
+
+int
+f4 (int x)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    a[i] = (b[i] - 66000) % 15;
+}
+
+/* In f1 and f2, VRP can prove the first operand of division or modulo
+   is always non-negative, so there is no need to do >> 31 shift
+   etc. to check if it is.  And in f3 and f4, VRP can prove it is always
+   negative.  */
+/* { dg-final { scan-assembler-not "psrad\[^\n\r\]*\\\$31" } } */


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]