This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

final rs6000 fused multiply-add patch


This is like the previous multiply-add patch I posted, but it actually
passes a bootstrap and testrun (I had a PLUS where I meant MINUS).

Bootstrapped & tested on powerpc-darwin.  

I'll commit it to mainline because it's an optimisation bug.

-- 
- Geoffrey Keating <geoffk@apple.com>

===File ~/patches/rs6000-fnmadd-2.patch=====================
Index: gcc/ChangeLog
2002-12-02  Geoffrey Keating  <geoffk@apple.com>

	* combine.c (combine_simplify_rtx): Add new canonicalizations.
	* doc/md.texi (Insn Canonicalizations): Document new
	canonicalizations for multiply/add combinations.
	* config/rs6000/rs6000.md: Add and modify floating add/multiply
	patterns to ensure they're used whenever they can be.

Index: gcc/testsuite/ChangeLog
2002-12-02  Geoffrey Keating  <geoffk@apple.com>

	* gcc.dg/ppc-fmadd-1.c: New file.
	* gcc.dg/ppc-fmadd-2.c: New file.
	* gcc.dg/ppc-fmadd-3.c: New file.

Index: gcc/combine.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/combine.c,v
retrieving revision 1.324
diff -u -p -u -p -r1.324 combine.c
--- gcc/combine.c	20 Nov 2002 09:43:19 -0000	1.324
+++ gcc/combine.c	5 Dec 2002 00:30:37 -0000
@@ -4029,6 +4029,24 @@ combine_simplify_rtx (x, op0_mode, last,
 	return gen_binary (MINUS, mode, XEXP (XEXP (x, 0), 1),
 			   XEXP (XEXP (x, 0), 0));
 
+      /* (neg (plus A B)) is canonicalized to (minus (neg A) B).  */
+      if (GET_CODE (XEXP (x, 0)) == PLUS
+	  && !HONOR_SIGNED_ZEROS (mode)
+	  && !HONOR_SIGN_DEPENDENT_ROUNDING (mode))
+	{
+	  temp = simplify_gen_unary (NEG, mode, XEXP (XEXP (x, 0), 0), mode);
+	  temp = combine_simplify_rtx (temp, mode, last, in_dest);
+	  return gen_binary (MINUS, mode, temp, XEXP (XEXP (x, 0), 1));
+	}
+
+      /* (neg (mult A B)) becomes (mult (neg A) B).  
+         This works even for floating-point values.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT)
+	{
+	  temp = simplify_gen_unary (NEG, mode, XEXP (XEXP (x, 0), 0), mode);
+	  return gen_binary (MULT, mode, temp, XEXP (XEXP (x, 0), 1));
+	}
+
       /* (neg (xor A 1)) is (plus A -1) if A is known to be either 0 or 1.  */
       if (GET_CODE (XEXP (x, 0)) == XOR && XEXP (XEXP (x, 0), 1) == const1_rtx
 	  && nonzero_bits (XEXP (XEXP (x, 0), 0), mode) == 1)
@@ -4217,6 +4235,19 @@ combine_simplify_rtx (x, op0_mode, last,
 #endif
 
     case PLUS:
+      /* Canonicalize (plus (mult (neg B) C) A) to (minus A (mult B C)).
+       */
+      if (GET_CODE (XEXP (x, 0)) == MULT 
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == NEG)
+	{
+	  rtx in1, in2;
+	 
+	  in1 = XEXP (XEXP (XEXP (x, 0), 0), 0);
+	  in2 = XEXP (XEXP (x, 0), 1);
+	  return gen_binary (MINUS, mode, XEXP (x, 1),
+			     gen_binary (MULT, mode, in1, in2));
+	}
+
       /* If we have (plus (plus (A const) B)), associate it so that CONST is
 	 outermost.  That's because that's the way indexed addresses are
 	 supposed to appear.  This code used to check many more cases, but
@@ -4322,6 +4353,32 @@ combine_simplify_rtx (x, op0_mode, last,
 	  && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
 	return simplify_and_const_int (NULL_RTX, mode, XEXP (x, 0),
 				       -INTVAL (XEXP (XEXP (x, 1), 1)) - 1);
+
+      /* Canonicalize (minus A (mult (neg B) C)) to (plus (mult B C) A).
+       */
+      if (GET_CODE (XEXP (x, 1)) == MULT 
+	  && GET_CODE (XEXP (XEXP (x, 1), 0)) == NEG)
+	{
+	  rtx in1, in2;
+	 
+	  in1 = XEXP (XEXP (XEXP (x, 1), 0), 0);
+	  in2 = XEXP (XEXP (x, 1), 1);
+	  return gen_binary (PLUS, mode, gen_binary (MULT, mode, in1, in2),
+			     XEXP (x, 0));
+	}
+
+       /* Canonicalize (minus (neg A) (mult B C)) to 
+	  (minus (mult (neg B) C) A). */
+      if (GET_CODE (XEXP (x, 1)) == MULT 
+	  && GET_CODE (XEXP (x, 0)) == NEG)
+	{
+	  rtx in1, in2;
+	 
+	  in1 = simplify_gen_unary (NEG, mode, XEXP (XEXP (x, 1), 0), mode);
+	  in2 = XEXP (XEXP (x, 1), 1);
+	  return gen_binary (MINUS, mode, gen_binary (MULT, mode, in1, in2),
+			     XEXP (XEXP (x, 0), 0));
+	}
 
       /* Canonicalize (minus A (plus B C)) to (minus (minus A B) C) for
 	 integers.  */
Index: gcc/config/rs6000/rs6000.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.md,v
retrieving revision 1.222
diff -u -p -u -p -r1.222 rs6000.md
--- gcc/config/rs6000/rs6000.md	16 Nov 2002 18:01:51 -0000	1.222
+++ gcc/config/rs6000/rs6000.md	5 Dec 2002 00:31:26 -0000
@@ -5280,7 +5280,18 @@
 	(neg:SF (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
 				  (match_operand:SF 2 "gpc_reg_operand" "f"))
 			 (match_operand:SF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+  "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (SFmode)"
+  "fnmadds %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(minus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f"))
+			   (match_operand:SF 2 "gpc_reg_operand" "f"))
+			 (match_operand:SF 3 "gpc_reg_operand" "f")))]
+  "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+   && ! HONOR_SIGNED_ZEROS (SFmode)"
   "fnmadds %0,%1,%2,%3"
   [(set_attr "type" "fp")])
 
@@ -5295,10 +5306,31 @@
 
 (define_insn ""
   [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(minus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f"))
+			   (match_operand:SF 2 "gpc_reg_operand" "f"))
+			 (match_operand:SF 3 "gpc_reg_operand" "f")))]
+  "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+   && ! HONOR_SIGNED_ZEROS (SFmode)"
+  "{fnma|fnmadd} %0,%1,%2,%3"
+  [(set_attr "type" "dmul")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
 	(neg:SF (minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
 				   (match_operand:SF 2 "gpc_reg_operand" "f"))
 			  (match_operand:SF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+  "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (SFmode)"
+  "fnmsubs %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(minus:SF (match_operand:SF 3 "gpc_reg_operand" "f")
+		  (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+			   (match_operand:SF 2 "gpc_reg_operand" "f"))))]
+  "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+   && ! HONOR_SIGNED_ZEROS (SFmode)"
   "fnmsubs %0,%1,%2,%3"
   [(set_attr "type" "fp")])
 
@@ -5311,6 +5343,16 @@
   "{fnms|fnmsub} %0,%1,%2,%3"
   [(set_attr "type" "dmul")])
 
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(minus:SF (match_operand:SF 3 "gpc_reg_operand" "f")
+		  (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+			   (match_operand:SF 2 "gpc_reg_operand" "f"))))]
+  "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+   && ! HONOR_SIGNED_ZEROS (SFmode)"
+  "{fnms|fnmsub} %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
 (define_expand "sqrtsf2"
   [(set (match_operand:SF 0 "gpc_reg_operand" "")
 	(sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
@@ -5524,7 +5566,18 @@
 	(neg:DF (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
 				  (match_operand:DF 2 "gpc_reg_operand" "f"))
 			 (match_operand:DF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (DFmode)"
+  "{fnma|fnmadd} %0,%1,%2,%3"
+  [(set_attr "type" "dmul")])
+
+(define_insn ""
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+	(minus:DF (mult:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" "f"))
+			   (match_operand:DF 2 "gpc_reg_operand" "f"))
+		  (match_operand:DF 3 "gpc_reg_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+   && ! HONOR_SIGNED_ZEROS (DFmode)"
   "{fnma|fnmadd} %0,%1,%2,%3"
   [(set_attr "type" "dmul")])
 
@@ -5533,7 +5586,18 @@
 	(neg:DF (minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
 				   (match_operand:DF 2 "gpc_reg_operand" "f"))
 			  (match_operand:DF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (DFmode)"
+  "{fnms|fnmsub} %0,%1,%2,%3"
+  [(set_attr "type" "dmul")])
+
+(define_insn ""
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+	(minus:DF (match_operand:DF 3 "gpc_reg_operand" "f")
+	          (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
+			   (match_operand:DF 2 "gpc_reg_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD 
+   && ! HONOR_SIGNED_ZEROS (DFmode)"
   "{fnms|fnmsub} %0,%1,%2,%3"
   [(set_attr "type" "dmul")])
 
Index: gcc/doc/md.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/md.texi,v
retrieving revision 1.53
diff -u -p -u -p -r1.53 md.texi
--- gcc/doc/md.texi	1 Nov 2002 07:05:57 -0000	1.53
+++ gcc/doc/md.texi	5 Dec 2002 00:31:30 -0000
@@ -3670,6 +3670,14 @@ For these operators, if only one operand
 @code{mult}, @code{plus}, or @code{minus} expression, it will be the
 first operand.
 
+@item
+In combinations of @code{neg}, @code{mult}, @code{plus}, and
+@code{minus}, the @code{neg} operations (if any) will be moved inside
+the operations as far as possible.  For instance, 
+@code{(neg (mult A B))} is canonicalized as @code{(mult (neg A) B)}, but
+@code{(plus (mult (neg A) B) C)} is canonicalized as
+@code{(minus A (mult B C))}.
+
 @cindex @code{compare}, canonicalization of
 @item
 For the @code{compare} operator, a constant is always the second operand
Index: gcc/testsuite/gcc.dg/ppc-fmadd-1.c
===================================================================
RCS file: gcc/testsuite/gcc.dg/ppc-fmadd-1.c
diff -N gcc/testsuite/gcc.dg/ppc-fmadd-1.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/ppc-fmadd-1.c	5 Dec 2002 00:32:29 -0000
@@ -0,0 +1,43 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-ffast-math -O2" } */
+/* { dg-final { scan-assembler-not "f(add|sub|mul|neg)" } } */
+
+void foo(double *a, double *b, double *c, double *d)
+{
+  a[0] =  b[0] + c[0] * d[0];		// fmadd
+  a[1] =  b[1] - c[1] * d[1];		// fnmsub with fast-math
+  a[2] = -b[2] + c[2] * d[2];   	// fmsub
+  a[3] = -b[3] - c[3] * d[3];		// fnmadd with fast-math
+  a[4] = -( b[4] + c[4] * d[4]);	// fnmadd
+  a[5] = -( b[5] - c[5] * d[5]);	// fmsub with fast-math
+  a[6] = -(-b[6] + c[6] * d[6]);	// fnmsub
+  a[7] = -(-b[7] - c[7] * d[7]);	// fmadd with fast-math
+  a[10] =  b[10] - c[10] * -d[10];	// fmadd
+  a[11] =  b[11] + c[11] * -d[11];	// fnmsub with fast-math
+  a[12] = -b[12] - c[12] * -d[12];   	// fmsub
+  a[13] = -b[13] + c[13] * -d[13];	// fnmadd with fast-math
+  a[14] = -( b[14] - c[14] * -d[14]);	// fnmadd
+  a[15] = -( b[15] + c[15] * -d[15]);	// fmsub with fast-math
+  a[16] = -(-b[16] - c[16] * -d[16]);	// fnmsub
+  a[17] = -(-b[17] + c[17] * -d[17]);	// fmadd with fast-math
+}
+
+void foos(float *a, float *b, float *c, float *d)
+{
+  a[0] =  b[0] + c[0] * d[0];		// fmadd
+  a[1] =  b[1] - c[1] * d[1];		// fnmsub with fast-math
+  a[2] = -b[2] + c[2] * d[2];   	// fmsub
+  a[3] = -b[3] - c[3] * d[3];		// fnmadd with fast-math
+  a[4] = -( b[4] + c[4] * d[4]);	// fnmadd
+  a[5] = -( b[5] - c[5] * d[5]);	// fmsub with fast-math
+  a[6] = -(-b[6] + c[6] * d[6]);	// fnmsub
+  a[7] = -(-b[7] - c[7] * d[7]);	// fmadd with fast-math
+  a[10] =  b[10] - c[10] * -d[10];	// fmadd
+  a[11] =  b[11] + c[11] * -d[11];	// fnmsub with fast-math
+  a[12] = -b[12] - c[12] * -d[12];   	// fmsub
+  a[13] = -b[13] + c[13] * -d[13];	// fnmadd with fast-math
+  a[14] = -( b[14] - c[14] * -d[14]);	// fnmadd
+  a[15] = -( b[15] + c[15] * -d[15]);	// fmsub with fast-math
+  a[16] = -(-b[16] - c[16] * -d[16]);	// fnmsub
+  a[17] = -(-b[17] + c[17] * -d[17]);	// fmadd with fast-math
+}
Index: gcc/testsuite/gcc.dg/ppc-fmadd-2.c
===================================================================
RCS file: gcc/testsuite/gcc.dg/ppc-fmadd-2.c
diff -N gcc/testsuite/gcc.dg/ppc-fmadd-2.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/ppc-fmadd-2.c	5 Dec 2002 00:32:29 -0000
@@ -0,0 +1,27 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "f(add|sub|mul|neg)" } } */
+
+void foo(double *a, double *b, double *c, double *d)
+{
+  a[0] =  b[0] + c[0] * d[0];		// fmadd
+  a[2] = -b[2] + c[2] * d[2];   	// fmsub
+  a[4] = -( b[4] + c[4] * d[4]);	// fnmadd
+  a[6] = -(-b[6] + c[6] * d[6]);	// fnmsub
+  a[10] =  b[10] - c[10] * -d[10];	// fmadd
+  a[12] = -b[12] - c[12] * -d[12];   	// fmsub
+  a[14] = -( b[14] - c[14] * -d[14]);	// fnmadd
+  a[16] = -(-b[16] - c[16] * -d[16]);	// fnmsub
+}
+
+void foos(float *a, float *b, float *c, float *d)
+{
+  a[0] =  b[0] + c[0] * d[0];		// fmadd
+  a[2] = -b[2] + c[2] * d[2];   	// fmsub
+  a[4] = -( b[4] + c[4] * d[4]);	// fnmadd
+  a[6] = -(-b[6] + c[6] * d[6]);	// fnmsub
+  a[10] =  b[10] - c[10] * -d[10];	// fmadd
+  a[12] = -b[12] - c[12] * -d[12];   	// fmsub
+  a[14] = -( b[14] - c[14] * -d[14]);	// fnmadd
+  a[16] = -(-b[16] - c[16] * -d[16]);	// fnmsub
+}
Index: gcc/testsuite/gcc.dg/ppc-fmadd-3.c
===================================================================
RCS file: gcc/testsuite/gcc.dg/ppc-fmadd-3.c
diff -N gcc/testsuite/gcc.dg/ppc-fmadd-3.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/testsuite/gcc.dg/ppc-fmadd-3.c	5 Dec 2002 00:32:29 -0000
@@ -0,0 +1,36 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "f(add|sub|mul)" } } */
+
+void foo(double *a, double *b, double *c, double *d)
+{
+#if 0
+  a[1] =  b[1] - c[1] * d[1];		// fneg, fmadd without fast-math
+#endif
+  a[3] = -b[3] - c[3] * d[3];		// fneg, fmsub without fast-math
+#if 0
+  a[5] = -( b[5] - c[5] * d[5]);	// fneg, fnmadd without fast-math
+#endif
+  a[7] = -(-b[7] - c[7] * d[7]);	// fneg, fnmsub without fast-math
+  a[11] =  b[11] + c[11] * -d[11];	// fneg, fmadd without fast-math
+  a[13] = -b[13] + c[13] * -d[13];	// fneg, fmsub without fast-math
+  a[15] = -( b[15] + c[15] * -d[15]);	// fneg, fnmadd without fast-math
+  a[17] = -(-b[17] + c[17] * -d[17]);	// fneg, fnmsub without fast-math
+}
+
+void foos(float *a, float *b, float *c, float *d)
+{
+#if 0
+  a[1] =  b[1] - c[1] * d[1];		// fneg, fmadd without fast-math
+#endif
+  a[3] = -b[3] - c[3] * d[3];		// fneg, fmsub without fast-math
+#if 0
+  a[5] = -( b[5] - c[5] * d[5]);	// fneg, fnmadd without fast-math
+#endif
+  a[7] = -(-b[7] - c[7] * d[7]);	// fneg, fnmsub without fast-math
+  a[11] =  b[11] + c[11] * -d[11];	// fneg, fmadd without fast-math
+  a[13] = -b[13] + c[13] * -d[13];	// fneg, fmsub without fast-math
+  a[15] = -( b[15] + c[15] * -d[15]);	// fneg, fnmadd without fast-math
+  a[17] = -(-b[17] + c[17] * -d[17]);	// fneg, fnmsub without fast-math
+}
+
============================================================


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]