This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, rs6000, libgcc] Implement temporary solution for __divkc3 and __mulkc3
- From: Bill Schmidt <wschmidt at linux dot vnet dot ibm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Segher Boessenkool <segher at kernel dot crashing dot org>, David Edelsohn <dje dot gcc at gmail dot com>, iant at golang dot org
- Date: Mon, 11 Jul 2016 14:38:50 -0500
- Subject: [PATCH, rs6000, libgcc] Implement temporary solution for __divkc3 and __mulkc3
- Authentication-results: sourceware.org; auth=none
Hi,
It was recently brought to my attention that glibc needs access to complex
multiply and divide for IEEE-128 floating-point in GCC 6.2 in order to move
ahead with the library implementation work. This patch enables this support
using only target-specific changes to avoid any possible effect on other
targets. This is not the correct long-term approach, and I am working on a
patch that instead makes use of the common infrastructure. The plan is to
use the current patch for GCC 6, and replace it with the other approach in
GCC 7 shortly.
Thus this patch copies the common code for complex multiply and divide out
of libgcc2.c into separate Power-specific files, and specializes it for
the KC type. It adds a couple of straightforward tests to verify that the
approach works. I've tested the code generated for these tests on a POWER9
simulator as well as a POWER8 machine.
Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions.
I've also asked the glibc team to verify that this serves their requirements.
Is this ok for trunk, and for gcc-6-branch after a short burn-in period?
Thanks,
Bill
[libgcc]
2016-07-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* config/rs6000/_divkc3.c: New.
* config/rs6000/_mulkc3.c: New.
* config/rs6000/quad-float128.h: Define TFtype; declare _mulkc3
and _divkc3.
* config/rs6000/t-float128: Add _mulkc3 and _divkc3 to
fp128_ppc_funcs.
[gcc/testsuite]
2016-07-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gcc.target/powerpc/divkc3-1.c: New.
* gcc.target/powerpc/mulkc3-1.c: New.
Index: gcc/testsuite/gcc.target/powerpc/divkc3-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/divkc3-1.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/divkc3-1.c (working copy)
@@ -0,0 +1,22 @@
+/* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */
+/* { dg-options "-mfloat128 -mvsx" } */
+
+void abort ();
+
+typedef __complex float __cfloat128 __attribute__((mode(KC)));
+
+__cfloat128 divide (__cfloat128 x, __cfloat128 y)
+{
+ return x / y;
+}
+
+__cfloat128 z, a;
+
+int main ()
+{
+ z = divide (5.0q + 5.0jq, 2.0q + 1.0jq);
+ a = 3.0q + 1.0jq;
+ if (z != a)
+ abort ();
+ return 0;
+}
Index: gcc/testsuite/gcc.target/powerpc/mulkc3-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/mulkc3-1.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/mulkc3-1.c (working copy)
@@ -0,0 +1,22 @@
+/* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */
+/* { dg-options "-mfloat128 -mvsx" } */
+
+void abort ();
+
+typedef __complex float __cfloat128 __attribute__((mode(KC)));
+
+__cfloat128 multiply (__cfloat128 x, __cfloat128 y)
+{
+ return x * y;
+}
+
+__cfloat128 z, a;
+
+int main ()
+{
+ z = multiply (2.0q + 1.0jq, 3.0q + 1.0jq);
+ a = 5.0q + 5.0jq;
+ if (z != a)
+ abort ();
+ return 0;
+}
Index: libgcc/config/rs6000/_divkc3.c
===================================================================
--- libgcc/config/rs6000/_divkc3.c (revision 0)
+++ libgcc/config/rs6000/_divkc3.c (working copy)
@@ -0,0 +1,64 @@
+typedef float KFtype __attribute__ ((mode (KF)));
+typedef __complex float KCtype __attribute__ ((mode (KC)));
+
+#define COPYSIGN(x,y) __builtin_copysignq (x, y)
+#define INFINITY __builtin_infq ()
+#define FABS __builtin_fabsq
+#define isnan __builtin_isnan
+#define isinf __builtin_isinf
+#define isfinite __builtin_isfinite
+
+KCtype
+__divkc3 (KFtype a, KFtype b, KFtype c, KFtype d)
+{
+ KFtype denom, ratio, x, y;
+ KCtype res;
+
+ /* ??? We can get better behavior from logarithmic scaling instead of
+ the division. But that would mean starting to link libgcc against
+ libm. We could implement something akin to ldexp/frexp as gcc builtins
+ fairly easily... */
+ if (FABS (c) < FABS (d))
+ {
+ ratio = c / d;
+ denom = (c * ratio) + d;
+ x = ((a * ratio) + b) / denom;
+ y = ((b * ratio) - a) / denom;
+ }
+ else
+ {
+ ratio = d / c;
+ denom = (d * ratio) + c;
+ x = ((b * ratio) + a) / denom;
+ y = (b - (a * ratio)) / denom;
+ }
+
+ /* Recover infinities and zeros that computed as NaN+iNaN; the only cases
+ are nonzero/zero, infinite/finite, and finite/infinite. */
+ if (isnan (x) && isnan (y))
+ {
+ if (c == 0.0 && d == 0.0 && (!isnan (a) || !isnan (b)))
+ {
+ x = COPYSIGN (INFINITY, c) * a;
+ y = COPYSIGN (INFINITY, c) * b;
+ }
+ else if ((isinf (a) || isinf (b)) && isfinite (c) && isfinite (d))
+ {
+ a = COPYSIGN (isinf (a) ? 1 : 0, a);
+ b = COPYSIGN (isinf (b) ? 1 : 0, b);
+ x = INFINITY * (a * c + b * d);
+ y = INFINITY * (b * c - a * d);
+ }
+ else if ((isinf (c) || isinf (d)) && isfinite (a) && isfinite (b))
+ {
+ c = COPYSIGN (isinf (c) ? 1 : 0, c);
+ d = COPYSIGN (isinf (d) ? 1 : 0, d);
+ x = 0.0 * (a * c + b * d);
+ y = 0.0 * (b * c - a * d);
+ }
+ }
+
+ __real__ res = x;
+ __imag__ res = y;
+ return res;
+}
Index: libgcc/config/rs6000/_mulkc3.c
===================================================================
--- libgcc/config/rs6000/_mulkc3.c (revision 0)
+++ libgcc/config/rs6000/_mulkc3.c (working copy)
@@ -0,0 +1,69 @@
+typedef float KFtype __attribute__ ((mode (KF)));
+typedef __complex float KCtype __attribute__ ((mode (KC)));
+
+#define COPYSIGN(x,y) __builtin_copysignq (x, y)
+#define INFINITY __builtin_infq ()
+#define isnan __builtin_isnan
+#define isinf __builtin_isinf
+
+KCtype
+__mulkc3 (KFtype a, KFtype b, KFtype c, KFtype d)
+{
+ KFtype ac, bd, ad, bc, x, y;
+ KCtype res;
+
+ ac = a * c;
+ bd = b * d;
+ ad = a * d;
+ bc = b * c;
+
+ x = ac - bd;
+ y = ad + bc;
+
+ if (isnan (x) && isnan (y))
+ {
+ /* Recover infinities that computed as NaN + iNaN. */
+ _Bool recalc = 0;
+ if (isinf (a) || isinf (b))
+ {
+ /* z is infinite. "Box" the infinity and change NaNs in
+ the other factor to 0. */
+ a = COPYSIGN (isinf (a) ? 1 : 0, a);
+ b = COPYSIGN (isinf (b) ? 1 : 0, b);
+ if (isnan (c)) c = COPYSIGN (0, c);
+ if (isnan (d)) d = COPYSIGN (0, d);
+ recalc = 1;
+ }
+ if (isinf (c) || isinf (d))
+ {
+ /* w is infinite. "Box" the infinity and change NaNs in
+ the other factor to 0. */
+ c = COPYSIGN (isinf (c) ? 1 : 0, c);
+ d = COPYSIGN (isinf (d) ? 1 : 0, d);
+ if (isnan (a)) a = COPYSIGN (0, a);
+ if (isnan (b)) b = COPYSIGN (0, b);
+ recalc = 1;
+ }
+ if (!recalc
+ && (isinf (ac) || isinf (bd)
+ || isinf (ad) || isinf (bc)))
+ {
+ /* Recover infinities from overflow by changing NaNs to 0. */
+ if (isnan (a)) a = COPYSIGN (0, a);
+ if (isnan (b)) b = COPYSIGN (0, b);
+ if (isnan (c)) c = COPYSIGN (0, c);
+ if (isnan (d)) d = COPYSIGN (0, d);
+ recalc = 1;
+ }
+ if (recalc)
+ {
+ x = INFINITY * (a * c - b * d);
+ y = INFINITY * (a * d + b * c);
+ }
+ }
+
+ __real__ res = x;
+ __imag__ res = y;
+ return res;
+}
+
Index: libgcc/config/rs6000/quad-float128.h
===================================================================
--- libgcc/config/rs6000/quad-float128.h (revision 238213)
+++ libgcc/config/rs6000/quad-float128.h (working copy)
@@ -33,6 +33,10 @@
This define forces it to use KFmode (aka, ieee 128-bit floating point). */
#define TF KF
+/* We also need TCtype to represent complex ieee 128-bit float for
+ __mulkc3 and __divkc3. */
+typedef __complex float TCtype __attribute__ ((mode (KC)));
+
/* Force the use of the VSX instruction set. */
#if defined(_ARCH_PPC) && (!defined(__VSX__) || !defined(__FLOAT128__))
#pragma GCC target ("vsx,float128")
@@ -154,6 +158,10 @@ extern TFtype __floatundikf (UDItype_ppc);
extern IBM128_TYPE __extendkftf2 (TFtype);
extern TFtype __trunctfkf2 (IBM128_TYPE);
+/* Complex __float128 built on __float128 interfaces. */
+extern TCtype __mulkc3 (TFtype, TFtype, TFtype, TFtype);
+extern TCtype __divkc3 (TFtype, TFtype, TFtype, TFtype);
+
/* Implementation of conversions between __ibm128 and __float128, to allow the
same code to be used on systems with IEEE 128-bit emulation and with IEEE
128-bit hardware support. */
Index: libgcc/config/rs6000/t-float128
===================================================================
--- libgcc/config/rs6000/t-float128 (revision 238213)
+++ libgcc/config/rs6000/t-float128 (working copy)
@@ -25,7 +25,7 @@ fp128_softfp_obj = $(fp128_softfp_static_obj) $(fp
# New functions for software emulation
fp128_ppc_funcs = floattikf floatuntikf fixkfti fixunskfti \
extendkftf2-sw trunctfkf2-sw \
- sfp-exceptions
+ sfp-exceptions _mulkc3 _divkc3
fp128_ppc_src = $(addprefix $(srcdir)/config/rs6000/,$(addsuffix \
.c,$(fp128_ppc_funcs)))