[PATCH] Add _Float<N>/_Float<N>X rounding built-ins & improve gimple optimization of _Float<N>/_Float<N>X built-in functions

Fri Dec 29 05:36:00 GMT 2017

On Thu, Dec 21, 2017 at 01:03:26PM -0600, Segher Boessenkool wrote:
> On Thu, Dec 21, 2017 at 06:16:16PM +0000, Joseph Myers wrote:
> > On Fri, 17 Nov 2017, Michael Meissner wrote:
> > The architecture-independent changes are OK.  However, I have a comment on 
> > the target parts:
> > 
> > > +(define_insn "round<mode>2"
> > > +  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> > > +	(unspec:IEEE128
> > > +	 [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
> > > +	 UNSPEC_FRIN))]
> > > +  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
> > > +  "xsrqpi 0,%0,%1,3"
> > > +  [(set_attr "type" "vecfloat")
> > > +   (set_attr "size" "128")])
> > 
> > My reading of Power ISA 3.0B documentation is that 0,%0,%1,3 means round 
> > in the mode specified by FPSCR and you need 0,%0,%1,0 for 
> > round-to-nearest-away semantics which are what the round<mode>2 
> > instruction has (i.e., what you've written here is actually correct for 
> > nearbyint<mode>2, and would be rint<mode>2 if xsrqpix were used instead).  
> 
> Ah yes, the roundM2 insn is round-away-from-zero, so you are right.
> Tricky, from the name I assumed it would be "current rounding mode" :-/
> Not that "frin" would make sense if that were true.
> 
> Thanks!  And thanks for all the reviews in general.

Here is the corrected rs6000 part of the patch.  I added more round tests and I
checked it on a power9 prototype machine.  Roundf128 now produces the correct
answer.  Can I check this into the trunk?

[gcc]
2017-12-29  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/rs6000.md (floor<mode>2): Add support for IEEE
	128-bit round to integer instructions.
	(ceil<mode>2): Likewise.
	(btrunc<mode>2): Likewise.
	(round<mode>2): Likewise.

[gcc/testsuite]
2017-12-29  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/float128-hw2.c: Add tests for ceilf128,
	floorf128, truncf128, and roundf128.
	* gcc.target/powerpc/float128-hw5.c: New tests for _Float128
	optimizations added in match.pd.
	* gcc.target/powerpc/float128-hw6.c: Likewise.
	* gcc.target/powerpc/float128-hw7.c: Likewise.
	* gcc.target/powerpc/float128-hw8.c: Likewise.
	* gcc.target/powerpc/float128-hw9.c: Likewise.
	* gcc.target/powerpc/float128-hw10.c: Likewise.
	* gcc.target/powerpc/float128-hw11.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meissner@linux.vnet.ibm.com, phone: +1 (978) 899-4797
-------------- next part --------------
Index: gcc/config/rs6000/rs6000.md
===================================================================

--- gcc/config/rs6000/rs6000.md	(revision 256026)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -14777,6 +14777,47 @@ (define_insn_and_split "floatuns<QHI:mod
    (set_attr "type" "vecfloat")
    (set_attr "size" "128")])
 
+;; IEEE 128-bit round to integer built-in functions
+(define_insn "floor<mode>2"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+	(unspec:IEEE128
+	 [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+	 UNSPEC_FRIM))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsrqpi 1,%0,%1,3"
+  [(set_attr "type" "vecfloat")
+   (set_attr "size" "128")])
+
+(define_insn "ceil<mode>2"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+	(unspec:IEEE128
+	 [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+	 UNSPEC_FRIP))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsrqpi 1,%0,%1,2"
+  [(set_attr "type" "vecfloat")
+   (set_attr "size" "128")])
+
+(define_insn "btrunc<mode>2"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+	(unspec:IEEE128
+	 [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+	 UNSPEC_FRIZ))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsrqpi 1,%0,%1,1"
+  [(set_attr "type" "vecfloat")
+   (set_attr "size" "128")])
+
+(define_insn "round<mode>2"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+	(unspec:IEEE128
+	 [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+	 UNSPEC_FRIN))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsrqpi 0,%0,%1,0"
+  [(set_attr "type" "vecfloat")
+   (set_attr "size" "128")])
+
 ;; IEEE 128-bit instructions with round to odd semantics
 (define_insn "add<mode>3_odd"
   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
Index: gcc/testsuite/gcc.target/powerpc/float128-hw2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-hw2.c	(revision 256026)
+++ gcc/testsuite/gcc.target/powerpc/float128-hw2.c	(working copy)
@@ -14,6 +14,10 @@
 extern _Float128 copysignf128 (_Float128, _Float128);
 extern _Float128 sqrtf128 (_Float128);
 extern _Float128 fmaf128 (_Float128, _Float128, _Float128);
+extern _Float128 ceilf128 (_Float128);
+extern _Float128 floorf128 (_Float128);
+extern _Float128 truncf128 (_Float128);
+extern _Float128 roundf128 (_Float128);
 
 _Float128
 do_copysign (_Float128 a, _Float128 b)
@@ -51,10 +55,35 @@ do_nfms (_Float128 a, _Float128 b, _Floa
   return -fmaf128 (a, b, -c);
 }
 
+_Float128
+do_ceil (_Float128 a)
+{
+  return ceilf128 (a);
+}
+
+_Float128
+do_floor (_Float128 a)
+{
+  return floorf128 (a);
+}
+
+_Float128
+do_trunc (_Float128 a)
+{
+  return truncf128 (a);
+}
+
+_Float128
+do_round (_Float128 a)
+{
+  return roundf128 (a);
+}
+
 /* { dg-final { scan-assembler     {\mxscpsgnqp\M} } } */
 /* { dg-final { scan-assembler     {\mxssqrtqp\M}  } } */
 /* { dg-final { scan-assembler     {\mxsmaddqp\M}  } } */
 /* { dg-final { scan-assembler     {\mxsmsubqp\M}  } } */
 /* { dg-final { scan-assembler     {\mxsnmaddqp\M} } } */
 /* { dg-final { scan-assembler     {\mxsnmsubqp\M} } } */
+/* { dg-final { scan-assembler     {\mxsrqpi\M}    } } */
 /* { dg-final { scan-assembler-not {\mbl\M}        } } */
Index: gcc/testsuite/gcc.target/powerpc/float128-hw5.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-hw5.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/float128-hw5.c	(working copy)
@@ -0,0 +1,33 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2 -ffast-math" } */
+
+extern _Float128 copysignf128 (_Float128, _Float128);
+
+/* Check copysign optimizations that are done for double are also done for
+   _Float128.  */
+
+_Float128
+x_times_cs_one_negx (_Float128 x)
+{
+  return x * copysignf128 (1.0Q, -x);	/* XSNABSQP  */
+}
+
+_Float128
+x_times_cs_one_x (_Float128 x)
+{
+  return x * copysignf128 (1.0Q, x);	/* XSABSQP  */
+}
+
+_Float128
+cs_x_x (_Float128 x)
+{
+  return copysignf128 (x, x);		/* no operation.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxsabsqp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mxsnabsqp\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mxscpsgnqp\M}  } } */
+/* { dg-final { scan-assembler-not   {\mlxvx\M}       } } */
+/* { dg-final { scan-assembler-not   {\mlxv\M}        } } */
+/* { dg-final { scan-assembler-not   {\mbl\M}         } } */
Index: gcc/testsuite/gcc.target/powerpc/float128-hw6.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-hw6.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/float128-hw6.c	(working copy)
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2" } */
+
+extern _Float128 fabsf128 (_Float128);
+extern _Float128 copysignf128 (_Float128, _Float128);
+
+/* Check copysign optimizations that are done for double are also done for
+   _Float128.  */
+
+_Float128
+cs_negx_y (_Float128 x, _Float128 y)
+{
+  return copysignf128 (-x, y);			/* eliminate negation.  */
+}
+
+_Float128
+cs_absx_y (_Float128 x, _Float128 y)
+{
+  return copysignf128 (fabsf128 (x), y);	/* eliminate fabsf128.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxscpsgnqp\M} 2 } } */
+/* { dg-final { scan-assembler-not   {\mxsnegqp\M}     } } */
+/* { dg-final { scan-assembler-not   {\mxsabsqp\M}     } } */
+/* { dg-final { scan-assembler-not   {\mbl\M}          } } */
Index: gcc/testsuite/gcc.target/powerpc/float128-hw7.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-hw7.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/float128-hw7.c	(working copy)
@@ -0,0 +1,27 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2" } */
+
+extern _Float128 fabsf128 (_Float128);
+extern _Float128 copysignf128 (_Float128, _Float128);
+
+/* Check copysign optimizations that are done for double are also done for
+   _Float128.  */
+
+_Float128
+cs_x_pos1 (_Float128 x)
+{
+  return copysignf128 (x, 1.0Q);		/* XSABSQP.  */
+}
+
+_Float128 cs_x_neg2 (_Float128 x)
+{
+  return copysignf128 (x, -2.0Q);		/* XSNABSQP.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxsabsqp\M}  1 } } */
+/* { dg-final { scan-assembler-not   {\mxsnabsqp\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mxscpsgnqp\M}  } } */
+/* { dg-final { scan-assembler-not   {\mlxvx\M}       } } */
+/* { dg-final { scan-assembler-not   {\mlxv\M}        } } */
+/* { dg-final { scan-assembler-not   {\mbl\M}         } } */
Index: gcc/testsuite/gcc.target/powerpc/float128-hw8.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-hw8.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/float128-hw8.c	(working copy)
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2" } */
+
+extern _Float128 fminf128 (_Float128, _Float128);
+extern _Float128 fmaxf128 (_Float128, _Float128);
+
+/* Check min/max optimizations that are done for double are also done for
+   _Float128.  */
+
+_Float128
+min_x_x (_Float128 x)
+{
+  return fminf128 (x, x);
+}
+
+_Float128
+max_x_x (_Float128 x)
+{
+  return fmaxf128 (x, x);
+}
+
+/* { dg-final { scan-assembler-not {\mxscmpuqp\M} } } */
+/* { dg-final { scan-assembler-not {\mbl\M}       } } */
Index: gcc/testsuite/gcc.target/powerpc/float128-hw9.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-hw9.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/float128-hw9.c	(working copy)
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2 -ffast-math" } */
+
+extern _Float128 sqrtf128 (_Float128);
+
+/* Check sqrt optimizations that are done for double are also done for
+   _Float128.  */
+
+_Float128
+sqrt_x_times_sqrt_x (_Float128 x)
+{
+  return sqrtf128 (x) * sqrtf128 (x);
+}
+
+/* { dg-final { scan-assembler-not {\mxssqrtqp\M} } } */
+/* { dg-final { scan-assembler-not {\mbl\M}       } } */
Index: gcc/testsuite/gcc.target/powerpc/float128-hw10.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-hw10.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/float128-hw10.c	(working copy)
@@ -0,0 +1,38 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -O2" } */
+
+extern _Float128 floorf128 (_Float128);
+extern _Float128 ceilf128 (_Float128);
+extern _Float128 roundf128 (_Float128);
+extern _Float128 truncf128 (_Float128);
+
+/* Check rounding optimizations that are done for double are also done for
+   _Float128.  */
+
+_Float128
+floor_floor_x (_Float128 x)
+{
+  return floorf128 (floorf128 (x));
+}
+
+_Float128
+ceil_ceil_x (_Float128 x)
+{
+  return ceilf128 (ceilf128 (x));
+}
+
+_Float128
+trunc_trunc_x (_Float128 x)
+{
+  return truncf128 (truncf128 (x));
+}
+
+_Float128
+round_round_x (_Float128 x)
+{
+  return roundf128 (roundf128 (x));
+}
+
+/* { dg-final { scan-assembler-times {\mxsrqpi\M} 4 } } */
+/* { dg-final { scan-assembler-not   {\mbl\M}       } } */
Index: gcc/testsuite/gcc.target/powerpc/float128-hw11.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-hw11.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/float128-hw11.c	(working copy)
@@ -0,0 +1,85 @@
+/* { dg-do run { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target p9vector_hw } */
+/* { dg-options "-mpower9-vector -O2" } */
+
+#define __STDC_WANT_IEC_60559_TYPES_EXT__ 1
+#define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1
+
+#include <math.h>
+#include <stdlib.h>
+#include <stddef.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#define FAIL(T,G,E) printf ("%s: %g %g\n", (T), (G), (E))
+#else
+#define FAIL(T,G,E) abort ()
+#endif
+
+extern _Float128 roundf128 (_Float128);
+extern _Float128 floorf128 (_Float128);
+extern _Float128 ceilf128  (_Float128);
+extern _Float128 truncf128 (_Float128);
+
+static const _Float128 values[] = {
+  -3.7f128,
+  -3.5f128,
+  -3.3f128,
+  -3.0f128,
+  -2.7f128,
+  -2.5f128,
+  -2.3f128,
+  -2.0f128,
+  -1.7f128,
+  -1.5f128,
+  -1.3f128,
+  -0.7f128,
+  -0.5f128,
+  -0.3f128,
+  +0.0f128,
+  +0.3f128,
+  +0.5f128,
+  +0.7f128,
+  +1.0f128,
+  +1.3f128,
+  +1.5f128,
+  +1.7f128,
+  +2.0f128,
+  +2.3f128,
+  +2.5f128,
+  +2.7f128,
+  +3.0f128,
+  +3.3f128,
+  +3.5f128,
+  +3.7f128,
+};
+
+__attribute__((__noinline__))
+void
+do_test (const char *test, _Float128 got, double expected)
+{
+  double got2 = (double) got;
+  if (got2 != expected)
+    FAIL (test, got2, expected);
+}
+
+int
+main (void)
+{
+  size_t i;
+  _Float128 v;
+  double v2;
+
+  for (i = 0; i < sizeof (values) / sizeof (values[0]); i++)
+    {
+      v = values[i];
+      v2 = (double) v;
+
+      do_test ("roundf128", roundf128 (v), round (v2));
+      do_test ("floorf128", floorf128 (v), floor (v2));
+      do_test ("ceilf128",  ceilf128  (v), ceil  (v2));
+      do_test ("truncf128", truncf128 (v), trunc (v2));
+    }
+
+  return 0;
+}