This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH]: PR29335 use MPFR for builtins fma, fmin and fmax

From: "Kaveh R. GHAZI" <ghazi at caip dot rutgers dot edu>
To: gcc-patches at gcc dot gnu dot org
Date: Tue, 7 Nov 2006 21:57:54 -0500 (EST)
Subject: [PATCH]: PR29335 use MPFR for builtins fma, fmin and fmax
This patch uses MPFR for builtins fma, fmin and fmax.

In addition, I updated the test macros to look at the sign of the results
to distinguish 0.0 vs -0.0.  There were lots of pre-existing tests
expecting 0.0 or -0.0 results, but since 0.0 == -0.0 it wasn't actually
testing that the correct sign of zero was returned AFAICT.  Thankfully
after adding the check, everything still works. :-)

Bootstrapped on sparc-sun-solaris2.10, no regressions and the new tests
all pass.

Okay for mainline?

		Thanks,
		--KAveh


2006-11-04  Kaveh R. Ghazi  <ghazi@caip.rutgers.edu>

	* builtins.c (do_mpfr_arg3): New.
	(fold_builtin_1): Handle builtins fma, fmin and fmax.

testsuite:
	* gcc.dg/torture/builtin-math-2.c: Test builtin fma.
	* gcc.dg/torture/builtin-math-3.c (CKSGN_F, CKSGN, CKSGN_L):
	New macros. Use them in exact tests.
	(TESTIT3): New macro.
	Add tests for fmin, fmax and fma.

diff -rup orig/egcc-SVN20061104/gcc/builtins.c egcc-SVN20061104/gcc/builtins.c
--- orig/egcc-SVN20061104/gcc/builtins.c	2006-11-02 22:36:46.000000000 -0500
+++ egcc-SVN20061104/gcc/builtins.c	2006-11-04 21:35:51.281564682 -0500
@@ -207,6 +207,8 @@ static tree do_mpfr_arg1 (tree, tree, in
 			  const REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *, bool);
 static tree do_mpfr_arg2 (tree, tree, tree,
 			  int (*)(mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mp_rnd_t));
+static tree do_mpfr_arg3 (tree, tree, tree, tree,
+			  int (*)(mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mpfr_srcptr, mp_rnd_t));
 static tree do_mpfr_sincos (tree, tree, tree);

 /* Return true if NODE should be considered for inline expansion regardless
@@ -9265,6 +9267,28 @@ fold_builtin_1 (tree fndecl, tree arglis
 			     type, mpfr_atan2);
     break;

+    CASE_FLT_FN (BUILT_IN_FMA):
+      if (validate_arglist (arglist, REAL_TYPE, REAL_TYPE, REAL_TYPE, VOID_TYPE))
+	return do_mpfr_arg3 (TREE_VALUE (arglist),
+			     TREE_VALUE (TREE_CHAIN (arglist)),
+			     TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))),
+			     type, mpfr_fma);
+    break;
+
+    CASE_FLT_FN (BUILT_IN_FMIN):
+      if (validate_arglist (arglist, REAL_TYPE, REAL_TYPE, VOID_TYPE))
+	return do_mpfr_arg2 (TREE_VALUE (arglist),
+			     TREE_VALUE (TREE_CHAIN (arglist)),
+			     type, mpfr_min);
+    break;
+
+    CASE_FLT_FN (BUILT_IN_FMAX):
+      if (validate_arglist (arglist, REAL_TYPE, REAL_TYPE, VOID_TYPE))
+	return do_mpfr_arg2 (TREE_VALUE (arglist),
+			     TREE_VALUE (TREE_CHAIN (arglist)),
+			     type, mpfr_max);
+    break;
+
     CASE_FLT_FN (BUILT_IN_HYPOT):
       return fold_builtin_hypot (fndecl, arglist, type);

@@ -11602,6 +11626,52 @@ do_mpfr_arg2 (tree arg1, tree arg2, tree
   return result;
 }

+/* If argument ARG is a REAL_CST, call the three-argument mpfr function
+   FUNC on it and return the resulting value as a tree with type TYPE.
+   The mpfr precision is set to the precision of TYPE.  We assume that
+   function FUNC returns zero if the result could be calculated
+   exactly within the requested precision.  */
+
+static tree
+do_mpfr_arg3 (tree arg1, tree arg2, tree arg3, tree type,
+	      int (*func)(mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mpfr_srcptr, mp_rnd_t))
+{
+  tree result = NULL_TREE;
+
+  STRIP_NOPS (arg1);
+  STRIP_NOPS (arg2);
+  STRIP_NOPS (arg3);
+
+  if (TREE_CODE (arg1) == REAL_CST && ! TREE_CONSTANT_OVERFLOW (arg1)
+      && TREE_CODE (arg2) == REAL_CST && ! TREE_CONSTANT_OVERFLOW (arg2)
+      && TREE_CODE (arg3) == REAL_CST && ! TREE_CONSTANT_OVERFLOW (arg3))
+    {
+      const REAL_VALUE_TYPE *const ra1 = &TREE_REAL_CST (arg1);
+      const REAL_VALUE_TYPE *const ra2 = &TREE_REAL_CST (arg2);
+      const REAL_VALUE_TYPE *const ra3 = &TREE_REAL_CST (arg3);
+
+      if (!real_isnan (ra1) && !real_isinf (ra1)
+	  && !real_isnan (ra2) && !real_isinf (ra2)
+	  && !real_isnan (ra3) && !real_isinf (ra3))
+        {
+	  const int prec = REAL_MODE_FORMAT (TYPE_MODE (type))->p;
+	  int inexact;
+	  mpfr_t m1, m2, m3;
+
+	  mpfr_inits2 (prec, m1, m2, m3, NULL);
+	  mpfr_from_real (m1, ra1);
+	  mpfr_from_real (m2, ra2);
+	  mpfr_from_real (m3, ra3);
+	  mpfr_clear_flags();
+	  inexact = func (m1, m1, m2, m3, GMP_RNDN);
+	  result = do_mpfr_ckconv (m1, type, inexact);
+	  mpfr_clears (m1, m2, m3, NULL);
+	}
+    }
+
+  return result;
+}
+
 /* If argument ARG is a REAL_CST, call mpfr_sin_cos() on it and set
    the pointers *(ARG_SINP) and *(ARG_COSP) to the resulting values.
    The type is taken from the type of ARG and is used for setting the
diff -rup orig/egcc-SVN20061104/gcc/testsuite/gcc.dg/torture/builtin-math-2.c egcc-SVN20061104/gcc/testsuite/gcc.dg/torture/builtin-math-2.c
--- orig/egcc-SVN20061104/gcc/testsuite/gcc.dg/torture/builtin-math-2.c	2006-10-30 20:01:12.000000000 -0500
+++ egcc-SVN20061104/gcc/testsuite/gcc.dg/torture/builtin-math-2.c	2006-11-04 23:12:40.173765730 -0500
@@ -133,6 +133,18 @@ void bar()
   foo (__builtin_pow (__DBL_MAX__, -3.5));
   fool (__builtin_powl (__LDBL_MAX__, -3.5L));
   TESTIT2 (pow, 2.0, -0x1p50);
+
+  foof (__builtin_fmaf (__FLT_MAX__, __FLT_MAX__, 0.0F));
+  foof (__builtin_fmaf (__FLT_MAX__, 1.0F, __FLT_MAX__));
+  foof (__builtin_fmaf (__FLT_MIN__, __FLT_MIN__, 0.0F));
+
+  foo (__builtin_fma (__DBL_MAX__, __DBL_MAX__, 0.0));
+  foo (__builtin_fma (__DBL_MAX__, 1.0, __DBL_MAX__));
+  foo (__builtin_fma (__DBL_MIN__, __DBL_MIN__, 0.0));
+
+  fool (__builtin_fmal (__LDBL_MAX__, __LDBL_MAX__, 0.0L));
+  fool (__builtin_fmal (__LDBL_MAX__, 1.0L, __LDBL_MAX__));
+  fool (__builtin_fmal (__LDBL_MIN__, __LDBL_MIN__, 0.0L));
 }

 /* { dg-final { scan-tree-dump-times "exp2 " 9 "original" } } */
@@ -168,4 +180,7 @@ void bar()
 /* { dg-final { scan-tree-dump-times "pow " 13 "original" } } */
 /* { dg-final { scan-tree-dump-times "powf" 13 "original" } } */
 /* { dg-final { scan-tree-dump-times "powl" 13 "original" } } */
+/* { dg-final { scan-tree-dump-times "fma " 3 "original" } } */
+/* { dg-final { scan-tree-dump-times "fmaf" 3 "original" } } */
+/* { dg-final { scan-tree-dump-times "fmal" 3 "original" } } */
 /* { dg-final { cleanup-tree-dump "original" } } */
diff -rup orig/egcc-SVN20061104/gcc/testsuite/gcc.dg/torture/builtin-math-3.c egcc-SVN20061104/gcc/testsuite/gcc.dg/torture/builtin-math-3.c
--- orig/egcc-SVN20061104/gcc/testsuite/gcc.dg/torture/builtin-math-3.c	2006-11-01 22:21:38.000000000 -0500
+++ egcc-SVN20061104/gcc/testsuite/gcc.dg/torture/builtin-math-3.c	2006-11-04 23:13:53.311144586 -0500
@@ -13,13 +13,25 @@
 /* All references to link_error should go away at compile-time.  */
 extern void link_error(int);

+/* Return TRUE if the sign of X != sign of Y.  This is important when
+   comparing signed zeros.  */
+#define CKSGN_F(X,Y) \
+  (__builtin_copysignf(1.0F,(X)) != __builtin_copysignf(1.0F,(Y)))
+#define CKSGN(X,Y) \
+  (__builtin_copysign(1.0,(X)) != __builtin_copysign(1.0,(Y)))
+#define CKSGN_L(X,Y) \
+  (__builtin_copysignl(1.0L,(X)) != __builtin_copysignl(1.0L,(Y)))
+
 /* Test that FUNC(ARG) == (RES).  */
 #define TESTIT(FUNC,ARG,RES) do { \
-  if (__builtin_##FUNC##f(ARG##F) != RES##F) \
+  if (__builtin_##FUNC##f(ARG##F) != RES##F \
+      || CKSGN_F(__builtin_##FUNC##f(ARG##F),RES##F)) \
     link_error(__LINE__); \
-  if (__builtin_##FUNC(ARG) != RES) \
+  if (__builtin_##FUNC(ARG) != RES \
+      || CKSGN(__builtin_##FUNC(ARG),RES)) \
     link_error(__LINE__); \
-  if (__builtin_##FUNC##l(ARG##L) != RES##L) \
+  if (__builtin_##FUNC##l(ARG##L) != RES##L \
+      || CKSGN_L(__builtin_##FUNC##l(ARG##L),RES##L)) \
     link_error(__LINE__); \
   } while (0)

@@ -35,11 +47,14 @@ extern void link_error(int);

 /* Test that FUNC(ARG1, ARG2) == (RES).  */
 #define TESTIT2(FUNC,ARG1,ARG2,RES) do { \
-  if (__builtin_##FUNC##f(ARG1##F, ARG2##F) != RES##F) \
+  if (__builtin_##FUNC##f(ARG1##F, ARG2##F) != RES##F \
+      || CKSGN_F(__builtin_##FUNC##f(ARG1##F,ARG2##F),RES##F)) \
     link_error(__LINE__); \
-  if (__builtin_##FUNC(ARG1, ARG2) != RES) \
+  if (__builtin_##FUNC(ARG1, ARG2) != RES \
+      || CKSGN(__builtin_##FUNC(ARG1,ARG2),RES)) \
     link_error(__LINE__); \
-  if (__builtin_##FUNC##l(ARG1##L, ARG2##L) != RES##L) \
+  if (__builtin_##FUNC##l(ARG1##L, ARG2##L) != RES##L \
+      || CKSGN_L(__builtin_##FUNC##l(ARG1##L,ARG2##L),RES##L)) \
     link_error(__LINE__); \
   } while (0)

@@ -56,6 +71,19 @@ extern void link_error(int);
     link_error(__LINE__); \
   } while (0)

+/* Test that FUNC(ARG1, ARG2, ARG3) == (RES).  */
+#define TESTIT3(FUNC,ARG1,ARG2,ARG3,RES) do { \
+  if (__builtin_##FUNC##f(ARG1##F, ARG2##F, ARG3##F) != RES##F \
+      || CKSGN_F(__builtin_##FUNC##f(ARG1##F,ARG2##F,ARG3##F),RES##F)) \
+    link_error(__LINE__); \
+  if (__builtin_##FUNC(ARG1, ARG2, ARG3) != RES \
+      || CKSGN(__builtin_##FUNC(ARG1,ARG2,ARG3),RES)) \
+    link_error(__LINE__); \
+  if (__builtin_##FUNC##l(ARG1##L, ARG2##L, ARG3##L) != RES##L \
+      || CKSGN_L(__builtin_##FUNC##l(ARG1##L,ARG2##L,ARG3##L),RES##L)) \
+    link_error(__LINE__); \
+  } while (0)
+
 /* Test that for FUNC(ARG, &ARG_S, &ARG_C);
    assert (ARG_S == RES_S && ARG_C == RES_C);.  */
 #define TESTIT_2P(FUNC,ARG,ARG_S,ARG_C,RES_S,RES_C) do { \
@@ -264,5 +292,64 @@ int main (void)
   TESTIT2_R (atan2, -1.0, 0.0, -1.58, -1.57); /* atan2(-1,0) == -pi/2 */
   TESTIT2_R (atan2, 1.0, 0.0, 1.57, 1.58); /* atan2(1,0) == pi/2 */

+  TESTIT2 (fmin, 5.0, 6.0, 5.0); /* fmin(5,6) == 5 */
+  TESTIT2 (fmin, 6.0, 5.0, 5.0); /* fmin(6,5) == 5 */
+  TESTIT2 (fmin, -5.0, -6.0, -6.0); /* fmin(-5,-6) == -6 */
+  TESTIT2 (fmin, -6.0, -5.0, -6.0); /* fmin(-6,-5) == -6 */
+  TESTIT2 (fmin, -0.0, 0.0, -0.0); /* fmin(-0,0) == -0 */
+  TESTIT2 (fmin, 0.0, -0.0, -0.0); /* fmin(-0,0) == -0 */
+
+  TESTIT2 (fmax, 5.0, 6.0, 6.0); /* fmax(5,6) == 6 */
+  TESTIT2 (fmax, 6.0, 5.0, 6.0); /* fmax(6,5) == 6 */
+  TESTIT2 (fmax, -5.0, -6.0, -5.0); /* fmax(-5,-6) == -5 */
+  TESTIT2 (fmax, -6.0, -5.0, -5.0); /* fmax(-6,-5) == -5 */
+  TESTIT2 (fmax, -0.0, 0.0, 0.0); /* fmax(-0,0) == 0 */
+  TESTIT2 (fmax, 0.0, -0.0, 0.0); /* fmax(-0,0) == 0 */
+
+  TESTIT3 (fma, 2.0, 3.0, 4.0, 10.0); /* fma(2,3,4) == 10 */
+  TESTIT3 (fma, 2.0, -3.0, 4.0, -2.0); /* fma(2,-3,4) == -2 */
+  TESTIT3 (fma, 2.0, 3.0, -4.0, 2.0); /* fma(2,3,-4) == 2 */
+  TESTIT3 (fma, 2.0, -3.0, -4.0, -10.0); /* fma(2,-3,-4) == -10 */
+  TESTIT3 (fma, -2.0, -3.0, -4.0, 2.0); /* fma(-2,-3,-4) == 2 */
+  TESTIT3 (fma, 6.0, -0.0, 0.0, 0.0); /* fma(6,-0,0) == 0 */
+  TESTIT3 (fma, -0.0, 6.0, 0.0, 0.0); /* fma(-0,6,0) == 0 */
+  TESTIT3 (fma, 6.0, -0.0, -0.0, -0.0); /* fma(6,-0,-0) == -0 */
+  TESTIT3 (fma, -0.0, 6.0, -0.0, -0.0); /* fma(-0,6,-0) == -0 */
+  TESTIT3 (fma, 0.0, 0.0, 0.0, 0.0); /* fma(0,0,0) == 0 */
+  TESTIT3 (fma, -0.0, 0.0, 0.0, 0.0); /* fma(-0,0,0) == 0 */
+  TESTIT3 (fma, 0.0, -0.0, 0.0, 0.0); /* fma(0,-0,0) == 0 */
+  TESTIT3 (fma, -0.0, -0.0, 0.0, 0.0); /* fma(-0,-0,0) == 0 */
+  TESTIT3 (fma, 0.0, 0.0, -0.0, 0.0); /* fma(0,0,-0) == 0 */
+  TESTIT3 (fma, -0.0, 0.0, -0.0, -0.0); /* fma(-0,0,-0) == -0 */
+  TESTIT3 (fma, 0.0, -0.0, -0.0, -0.0); /* fma(0,-0,-0) == -0 */
+  TESTIT3 (fma, -0.0, -0.0, -0.0, 0.0); /* fma(-0,-0,-0) == 0 */
+
+  if (__builtin_fmaf(__FLT_MAX__, 2.0F, -__FLT_MAX__) != __FLT_MAX__)
+    link_error (__LINE__);
+  if (__builtin_fmaf(2.0F,__FLT_MAX__, -__FLT_MAX__) != __FLT_MAX__)
+    link_error (__LINE__);
+  if (__builtin_fmaf(__FLT_MIN__, 0.5F, __FLT_MIN__) != __FLT_MIN__*1.5F)
+    link_error (__LINE__);
+  if (__builtin_fmaf(0.5F,__FLT_MIN__, __FLT_MIN__) != __FLT_MIN__*1.5F)
+    link_error (__LINE__);
+
+  if (__builtin_fma(__DBL_MAX__, 2.0, -__DBL_MAX__) != __DBL_MAX__)
+    link_error (__LINE__);
+  if (__builtin_fma(2.0,__DBL_MAX__, -__DBL_MAX__) != __DBL_MAX__)
+    link_error (__LINE__);
+  if (__builtin_fma(__DBL_MIN__, 0.5, __DBL_MIN__) != __DBL_MIN__*1.5)
+    link_error (__LINE__);
+  if (__builtin_fma(0.5,__DBL_MIN__, __DBL_MIN__) != __DBL_MIN__*1.5)
+    link_error (__LINE__);
+
+  if (__builtin_fmal(__LDBL_MAX__, 2.0L, -__LDBL_MAX__) != __LDBL_MAX__)
+    link_error (__LINE__);
+  if (__builtin_fmal(2.0L,__LDBL_MAX__, -__LDBL_MAX__) != __LDBL_MAX__)
+    link_error (__LINE__);
+  if (__builtin_fmal(__LDBL_MIN__, 0.5L, __LDBL_MIN__) != __LDBL_MIN__*1.5L)
+    link_error (__LINE__);
+  if (__builtin_fmal(0.5L,__LDBL_MIN__, __LDBL_MIN__) != __LDBL_MIN__*1.5L)
+    link_error (__LINE__);
+
   return 0;
 }
Follow-Ups:
- Re: [PATCH]: PR29335 use MPFR for builtins fma, fmin and fmax
  - From: Andrew Pinski
- Re: [PATCH]: PR29335 use MPFR for builtins fma, fmin and fmax
  - From: Roger Sayle
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]