[PATCH] Canonicalize real X - CST to X + -CST

Wed Sep 16 14:04:00 GMT 2020

This removes the canonicalization of X + -CST to X - CST to facilitate
SLP vectorization analysis where we currently treat the added testcase
as two-operation plus blend vectorization opportunity.  While there
may be the possibility to avoid this in the vectorizer itself the
canonicalization is somewhat premature - it's motivation is that
a FP constant of both signs is common and thus canonicalizing to
positive values optimizes constant pool.  But if you mix, say,
x * -7.1 and x - 7.1 this backfires - this instead looks like
a local optimization to be applied in RTL CSE or even LRA
load inheritance.  I filed PR97071 for this.

Bootstrapped and tested on x86_64-unknown-linux-gnu, any objections?

Thanks,
Richard.

2020-09-16  Richard Biener  <rguenther@suse.de>

	* fold-const.c (fold_binary_loc): Remove condition
	on REAL_CST for A - B -> A + (-B) transform.
	(negate_expr_p): Remove condition on REAL_CST.
	* match.pd (negate_expr_p): Likewise.
	(X + -C -> X - C): Remove.

	* gcc.dg/vect/slp-49.c: New testcase.
	* gcc.dg/tree-ssa/pr90356-1.c: Adjust.
	* gcc.dg/tree-ssa/pr90356-3.c: Likewise.
	* gcc.dg/tree-ssa/pr90356-4.c: Likewise.
	* gcc.target/i386/pr54855-3.c: Likewise.
	* gfortran.dg/reassoc_1.f90: Likewise.
	* gfortran.dg/reassoc_2.f90: Likewise.
---
 gcc/fold-const.c                          |  7 ++-----
 gcc/match.pd                              | 11 +----------
 gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c |  4 ++--
 gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c |  2 +-
 gcc/testsuite/gcc.dg/vect/slp-49.c        | 15 +++++++++++++++
 gcc/testsuite/gcc.target/i386/pr54855-3.c |  2 +-
 gcc/testsuite/gfortran.dg/reassoc_1.f90   |  2 +-
 gcc/testsuite/gfortran.dg/reassoc_2.f90   |  2 +-
 9 files changed, 25 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/slp-49.c

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80adf632..ec369169a4c 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -401,7 +401,7 @@ negate_expr_p (tree t)
     case REAL_CST:
       /* We want to canonicalize to positive real constants.  Pretend
          that only negative ones can be easily negated.  */
-      return REAL_VALUE_NEGATIVE (TREE_REAL_CST (t));
+      return true;
 
     case COMPLEX_CST:
       return negate_expr_p (TREE_REALPART (t))
@@ -10962,10 +10962,7 @@ fold_binary_loc (location_t loc, enum tree_code code, tree type,
       /* A - B -> A + (-B) if B is easily negatable.  */
       if (negate_expr_p (op1)
 	  && ! TYPE_OVERFLOW_SANITIZED (type)
-	  && ((FLOAT_TYPE_P (type)
-               /* Avoid this transformation if B is a positive REAL_CST.  */
-	       && (TREE_CODE (op1) != REAL_CST
-		   || REAL_VALUE_NEGATIVE (TREE_REAL_CST (op1))))
+	  && (FLOAT_TYPE_P (type)
 	      || INTEGRAL_TYPE_P (type)))
 	return fold_build2_loc (loc, PLUS_EXPR, type,
 				fold_convert_loc (loc, type, arg0),
diff --git a/gcc/match.pd b/gcc/match.pd
index 7d63bb973cb..9f14d50ae2d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1307,8 +1307,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (negate @0)
  (if (!TYPE_OVERFLOW_SANITIZED (type))))
 (match negate_expr_p
- REAL_CST
- (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (t)))))
+ REAL_CST)
 /* VECTOR_CST handling of non-wrapping types would recurse in unsupported
    ways.  */
 (match negate_expr_p
@@ -3326,14 +3325,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* Canonicalization of binary operations.  */
 
-/* Convert X + -C into X - C.  */
-(simplify
- (plus @0 REAL_CST@1)
- (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
-  (with { tree tem = const_unop (NEGATE_EXPR, type, @1); }
-   (if (!TREE_OVERFLOW (tem) || !flag_trapping_math)
-    (minus @0 { tem; })))))
-
 /* Convert x+x into x*2.  */
 (simplify
  (plus @0 @0)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c
index c3a15ea21af..ac95ec56810 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c
@@ -2,8 +2,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
 /* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
-/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. \\+ -0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\+ -?0.0;" 16 "optimized" } } */
 
 double f1 (double x) { return (x + 0.0) + 0.0; }
 double f2 (double y) { return (y + (-0.0)) + (-0.0); }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c
index e658130c69f..951971b95ee 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/90356 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros -fdump-tree-optimized" } */
-/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\+ -?0.0;" 32 "optimized" } } */
 
 #include "pr90356-1.c"
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c
index 126cd10928b..3459ddba356 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c
@@ -1,6 +1,6 @@
 /* PR tree-optimization/90356 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros -fdump-tree-optimized" } */
-/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\+ -?0.0;" 32 "optimized" } } */
 
 #include "pr90356-1.c"
diff --git a/gcc/testsuite/gcc.dg/vect/slp-49.c b/gcc/testsuite/gcc.dg/vect/slp-49.c
new file mode 100644
index 00000000000..4b3ec77b99d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-49.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+void foo (double *x)
+{
+  for (int i = 0; i < 1024; ++i)
+    {
+      x[2*i] += 1.0;
+      x[2*i+1] -= 2.0;
+    }
+}
+
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */
+/* The loop should be SLPed with uniform plus.  */
+/* { dg-final { scan-tree-dump-not "VEC_PERM" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr54855-3.c b/gcc/testsuite/gcc.target/i386/pr54855-3.c
index 3c15dfc93d1..802d6882e43 100644
--- a/gcc/testsuite/gcc.target/i386/pr54855-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr54855-3.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -msse2 -mfpmath=sse" } */
-/* { dg-final { scan-assembler-times "subsd" 1 } } */
+/* { dg-final { scan-assembler-times "addsd" 1 } } */
 /* { dg-final { scan-assembler-not "movapd" } } */
 /* { dg-final { scan-assembler-not "movsd" } } */
 
diff --git a/gcc/testsuite/gfortran.dg/reassoc_1.f90 b/gcc/testsuite/gfortran.dg/reassoc_1.f90
index e3d84c4ffd9..19b47fc7ce6 100644
--- a/gcc/testsuite/gfortran.dg/reassoc_1.f90
+++ b/gcc/testsuite/gfortran.dg/reassoc_1.f90
@@ -9,4 +9,4 @@ end
 
 ! We need an explicit +5 and -5, and an intermediate ((bla)) expression
 ! (the reassoc barrier).  Make use of "." matching lineends.
-! { dg-final { scan-tree-dump "\\\+ 5.*\\\)\\\).* - 5" "optimized" } }
+! { dg-final { scan-tree-dump "\\\+ 5.*\\\)\\\).* \\\+ -5" "optimized" } }
diff --git a/gcc/testsuite/gfortran.dg/reassoc_2.f90 b/gcc/testsuite/gfortran.dg/reassoc_2.f90
index 3e323eb0134..62361619932 100644
--- a/gcc/testsuite/gfortran.dg/reassoc_2.f90
+++ b/gcc/testsuite/gfortran.dg/reassoc_2.f90
@@ -12,4 +12,4 @@ function test(a)
   test = d
 end
 
-! { dg-final { scan-tree-dump "- 5" "optimized" } }
+! { dg-final { scan-tree-dump "\\\+ -5" "optimized" } }
-- 
2.26.2