This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, rs6000] Use direct moves for __builtin_signbit for 128-bit floating-point
- From: Bill Schmidt <wschmidt at linux dot vnet dot ibm dot com>
- To: Segher Boessenkool <segher at kernel dot crashing dot org>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, David Edelsohn <dje dot gcc at gmail dot com>, Michael Meissner <meissner at linux dot vnet dot ibm dot com>
- Date: Fri, 1 Jul 2016 20:04:46 -0500
- Subject: Re: [PATCH, rs6000] Use direct moves for __builtin_signbit for 128-bit floating-point
- Authentication-results: sourceware.org; auth=none
- References: <46c94133-1c05-7d87-4c87-d3c65d6d4080 at linux dot vnet dot ibm dot com> <20160629214314 dot GA16527 at gate dot crashing dot org> <6E5A1D4C-8C56-474D-9EC5-7A765E70EC02 at linux dot vnet dot ibm dot com>
Hi,
I've revised the patch to address the previous concerns. rs6000_split_signbit has
been greatly simplified since SF and DF modes are not currently of interest. The
use of -static-libgcc did indeed turn out to be leftover from long-ago necessity
and is no longer needed. I also updated the signbit-3.c testcase to make use of
the recently added __float128 builtins.
Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions.
Is this ok for trunk, and eventual 6.2 backport?
Thanks!
Bill
[gcc]
2016-07-01 Michael Meissner <meissner@linux.vnet.ibm.com>
Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* config/rs6000/rs6000-protos.h (rs6000_split_signbit): New
prototype.
* config/rs6000/rs6000.c (rs6000_split_signbit): New function.
* config/rs6000/rs6000.md (UNSPEC_SIGNBIT): New constant.
(SIGNBIT): New mode iterator.
(Fsignbit): New mode attribute.
(signbit<mode>2): Change operand1 to match FLOAT128 instead of
IBM128; dispatch to gen_signbit{kf,tf}2_dm for __float128
when direct moves are available.
(signbit<mode>2_dm): New define_insn_and_split).
(signbit<mode>2_dm2): New define_insn.
[gcc/testsuite]
2016-07-01 Michael Meissner <meissner@linux.vnet.ibm.com>
Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gcc.target/powerpc/signbit-1.c: New test.
* gcc.target/powerpc/signbit-2.c: New test.
* gcc.target/powerpc/signbit-3.c: New test.
Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h (revision 237929)
+++ gcc/config/rs6000/rs6000-protos.h (working copy)
@@ -135,6 +135,7 @@ extern bool rs6000_emit_set_const (rtx, rtx);
extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx);
extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
+extern void rs6000_split_signbit (rtx, rtx);
extern void rs6000_expand_atomic_compare_and_swap (rtx op[]);
extern void rs6000_expand_atomic_exchange (rtx op[]);
extern void rs6000_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 237929)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -23145,6 +23145,48 @@ rs6000_emit_minmax (rtx dest, enum rtx_code code,
emit_move_insn (dest, target);
}
+/* Split a signbit operation on 64-bit machines with direct move. Also allow
+ for the value to come from memory or if it is already loaded into a GPR. */
+
+void
+rs6000_split_signbit (rtx dest, rtx src)
+{
+ machine_mode d_mode = GET_MODE (dest);
+ machine_mode s_mode = GET_MODE (src);
+ rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
+ rtx shift_reg = dest_di;
+
+ gcc_assert (REG_P (dest));
+ gcc_assert (REG_P (src) || MEM_P (src));
+ gcc_assert (s_mode == KFmode || s_mode == TFmode);
+
+ if (MEM_P (src))
+ {
+ rtx mem = (WORDS_BIG_ENDIAN
+ ? adjust_address (src, DImode, 0)
+ : adjust_address (src, DImode, 8));
+ emit_insn (gen_rtx_SET (dest_di, mem));
+ }
+
+ else
+ {
+ unsigned int r = REGNO (src);
+
+ /* If this is a VSX register, generate the special mfvsrd instruction
+ to get it in a GPR. Until we support SF and DF modes, that will
+ always be true. */
+ gcc_assert (VSX_REGNO_P (r));
+
+ if (s_mode == KFmode)
+ emit_insn (gen_signbitkf2_dm2 (dest_di, src));
+ else
+ emit_insn (gen_signbittf2_dm2 (dest_di, src));
+ }
+
+ emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
+ return;
+}
+
/* A subroutine of the atomic operation splitters. Jump to LABEL if
COND is true. Mark the jump as unlikely to be taken. */
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 237929)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -147,6 +147,7 @@
UNSPEC_ROUND_TO_ODD
UNSPEC_IEEE128_MOVE
UNSPEC_IEEE128_CONVERT
+ UNSPEC_SIGNBIT
])
;;
@@ -508,6 +509,13 @@
(IF "TARGET_FLOAT128")
(TF "TARGET_LONG_DOUBLE_128")])
+; Iterator for signbit on 64-bit machines with direct move
+(define_mode_iterator SIGNBIT [(KF "FLOAT128_VECTOR_P (KFmode)")
+ (TF "FLOAT128_VECTOR_P (TFmode)")])
+
+(define_mode_attr Fsignbit [(KF "wa")
+ (TF "wa")])
+
; Iterator for ISA 3.0 supported floating point types
(define_mode_iterator FP_ISA3 [SF
DF
@@ -4567,7 +4575,7 @@
;; when little-endian.
(define_expand "signbit<mode>2"
[(set (match_dup 2)
- (float_truncate:DF (match_operand:IBM128 1 "gpc_reg_operand" "")))
+ (float_truncate:DF (match_operand:FLOAT128 1 "gpc_reg_operand" "")))
(set (match_dup 3)
(subreg:DI (match_dup 2) 0))
(set (match_dup 4)
@@ -4575,8 +4583,20 @@
(set (match_operand:SI 0 "gpc_reg_operand" "")
(match_dup 6))]
"TARGET_HARD_FLOAT
- && (TARGET_FPRS || TARGET_E500_DOUBLE)"
+ && (TARGET_FPRS || TARGET_E500_DOUBLE)
+ && (!FLOAT128_IEEE_P (<MODE>mode)
+ || (TARGET_POWERPC64 && TARGET_DIRECT_MOVE))"
{
+ if (FLOAT128_IEEE_P (<MODE>mode))
+ {
+ if (<MODE>mode == KFmode)
+ emit_insn (gen_signbitkf2_dm (operands[0], operands[1]));
+ else if (<MODE>mode == TFmode)
+ emit_insn (gen_signbittf2_dm (operands[0], operands[1]));
+ else
+ gcc_unreachable ();
+ DONE;
+ }
operands[2] = gen_reg_rtx (DFmode);
operands[3] = gen_reg_rtx (DImode);
if (TARGET_POWERPC64)
@@ -4624,6 +4644,37 @@
operands[5] = CONST0_RTX (<MODE>mode);
})
+;; Optimize signbit on 64-bit systems with direct move to avoid doing the store
+;; and load
+(define_insn_and_split "signbit<mode>2_dm"
+ [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r")
+ (unspec:SI
+ [(match_operand:SIGNBIT 1 "input_operand" "<Fsignbit>,m,r")]
+ UNSPEC_SIGNBIT))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rs6000_split_signbit (operands[0], operands[1]);
+ DONE;
+}
+ [(set_attr "length" "8,8,12")
+ (set_attr "type" "mftgpr,load,integer")])
+
+;; MODES_TIEABLE_P doesn't allow DImode to be tied with the various floating
+;; point types, which makes normal SUBREG's problematical. Instead use a
+;; special pattern to avoid using a normal movdi.
+(define_insn "signbit<mode>2_dm2"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (unspec:DI [(match_operand:SIGNBIT 1 "gpc_reg_operand" "<Fsignbit>")
+ (const_int 0)]
+ UNSPEC_SIGNBIT))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mfvsrd %0,%x1"
+ [(set_attr "type" "mftgpr")])
+
+
;; Use an unspec rather providing an if-then-else in RTL, to prevent the
;; compiler from optimizing -0.0
(define_insn "copysign<mode>3_fcpsgn"
Index: gcc/testsuite/gcc.target/powerpc/signbit-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/signbit-1.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/signbit-1.c (working copy)
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O2 -mfloat128" } */
+
+int do_signbit_kf (__float128 a) { return __builtin_signbit (a); }
+int do_signbit_if (__ibm128 a) { return __builtin_signbit (a); }
+int do_signbit_tf (long double a) { return __builtin_signbit (a); }
+
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvw4x" } } */
+/* { dg-final { scan-assembler-not "stxsd" } } */
+/* { dg-final { scan-assembler-not "stxsdx" } } */
+/* { dg-final { scan-assembler-times "mfvsrd" 3 } } */
+/* { dg-final { scan-assembler-times "srdi" 3 } } */
Index: gcc/testsuite/gcc.target/powerpc/signbit-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/signbit-2.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/signbit-2.c (working copy)
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2 -mfloat128" } */
+
+int do_signbit_kf (__float128 *a) { return __builtin_signbit (*a); }
+
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvw4x" } } */
+/* { dg-final { scan-assembler-not "stxsd" } } */
+/* { dg-final { scan-assembler-not "stxsdx" } } */
+/* { dg-final { scan-assembler-not "lxvd2x" } } */
+/* { dg-final { scan-assembler-not "lxvw4x" } } */
+/* { dg-final { scan-assembler-not "lxsd" } } */
+/* { dg-final { scan-assembler-not "lxsdx" } } */
+/* { dg-final { scan-assembler-times "ld" 1 } } */
+/* { dg-final { scan-assembler-times "srdi" 1 } } */
Index: gcc/testsuite/gcc.target/powerpc/signbit-3.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/signbit-3.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/signbit-3.c (working copy)
@@ -0,0 +1,172 @@
+/* { dg-do run { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target ppc_float128_sw } */
+/* { dg-options "-mcpu=power7 -O2 -mfloat128 -lm" } */
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <math.h>
+
+#if defined(__BIG_ENDIAN__)
+struct ieee128 {
+ uint64_t upper;
+ uint64_t lower;
+};
+
+#elif defined(__LITTLE_ENDIAN__)
+struct ieee128 {
+ uint64_t lower;
+ uint64_t upper;
+};
+
+#else
+#error "Unknown system"
+#endif
+
+union ieee_union {
+ __float128 f128;
+ struct ieee128 st128;
+};
+
+#ifdef DEBUG
+static int num_errors = 0;
+
+__attribute__((__noinline__))
+static void
+failure (int expected, int got, __float128 x)
+{
+ unsigned sign;
+ unsigned exponent;
+ uint64_t mantissa1;
+ uint64_t mantissa2;
+ uint64_t upper;
+ uint64_t lower;
+
+ union ieee_union u;
+
+ u.f128 = x;
+ upper = u.st128.upper;
+ lower = u.st128.lower;
+
+ sign = (unsigned)((upper >> 63) & 1);
+ exponent = (unsigned)((upper >> 48) & ((((uint64_t)1) << 16) - 1));
+ mantissa1 = (upper & ((((uint64_t)1) << 48) - 1));
+ mantissa2 = lower;
+
+ printf ("Expected %d, got %d, %c 0x%.4x 0x%.12" PRIx64 " 0x%.16" PRIx64,
+ expected, got,
+ sign ? '-' : '+',
+ exponent,
+ mantissa1,
+ mantissa2);
+
+ num_errors++;
+}
+
+#else
+
+#define failure(E, G, F) abort ()
+#endif
+
+__attribute__((__noinline__))
+static void
+test_signbit_arg (__float128 f128, int expected)
+{
+ int sign = __builtin_signbit (f128);
+
+ if ((expected != 0 && sign == 0)
+ || (expected == 0 && sign != 0))
+ failure (f128, expected, sign);
+}
+
+__attribute__((__noinline__))
+static void
+test_signbit_mem (__float128 *ptr, int expected)
+{
+ int sign = __builtin_signbit (*ptr);
+
+ if ((expected != 0 && sign == 0)
+ || (expected == 0 && sign != 0))
+ failure (*ptr, expected, sign);
+}
+
+__attribute__((__noinline__))
+static void
+test_signbit_gpr (__float128 *ptr, int expected)
+{
+ __float128 f128 = *ptr;
+ int sign;
+
+ __asm__ (" # %0" : "+r" (f128));
+
+ sign = __builtin_signbit (f128);
+ if ((expected != 0 && sign == 0)
+ || (expected == 0 && sign != 0))
+ failure (f128, expected, sign);
+}
+
+__attribute__((__noinline__))
+static void
+test_signbit (__float128 f128, int expected)
+{
+#ifdef DEBUG
+ union ieee_union u;
+ u.f128 = f128;
+ printf ("Expecting %d, trying %-5g "
+ "(0x%.16" PRIx64 " 0x%.16" PRIx64 ")\n",
+ expected, (double)f128,
+ u.st128.upper, u.st128.lower);
+#endif
+
+ test_signbit_arg (f128, expected);
+ test_signbit_mem (&f128, expected);
+ test_signbit_gpr (&f128, expected);
+}
+
+int
+main (void)
+{
+ union ieee_union u;
+
+ test_signbit (+0.0q, 0);
+ test_signbit (+1.0q, 0);
+
+ test_signbit (-0.0q, 1);
+ test_signbit (-1.0q, 1);
+
+ test_signbit (__builtin_copysign (__builtin_infq (), +1.0q), 0);
+ test_signbit (__builtin_copysign (__builtin_infq (), -1.0q), 1);
+
+ test_signbit (__builtin_copysign (__builtin_nanq (""), +1.0q), 0);
+ test_signbit (__builtin_copysign (__builtin_nanq (""), -1.0q), 1);
+
+ /* force the bottom double word to have specific bits in the 'sign' bit to
+ make sure we are picking the right word. */
+ u.f128 = 1.0q;
+ u.st128.lower = 0ULL;
+ test_signbit (u.f128, 0);
+
+ u.st128.lower = ~0ULL;
+ test_signbit (u.f128, 0);
+
+ u.f128 = -1.0q;
+ u.st128.lower = 0ULL;
+ test_signbit (u.f128, 1);
+
+ u.st128.lower = ~0ULL;
+ test_signbit (u.f128, 1);
+
+#ifdef DEBUG
+ printf ("%d error(s) were found\n", num_errors);
+ if (num_errors)
+ return num_errors;
+#endif
+
+ return 0;
+}
+
On 7/1/16 4:37 PM, Bill Schmidt wrote:
> Hi Segher,
>
>> On Jun 29, 2016, at 4:43 PM, Segher Boessenkool <segher@kernel.crashing.org> wrote:
>>
>> Hi,
>>
>> On Tue, Jun 28, 2016 at 04:44:08PM -0500, Bill Schmidt wrote:
>>> +void
>>> +rs6000_split_signbit (rtx dest, rtx src)
>>> +{
>>> + machine_mode d_mode = GET_MODE (dest);
>>> + machine_mode s_mode = GET_MODE (src);
>>> + rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
>>> + rtx shift_reg = dest_di;
>>> +
>>> + gcc_assert (REG_P (dest));
>>> + gcc_assert (REG_P (src) || MEM_P (src));
>>> +
>>> + if (MEM_P (src))
>>> + {
>>> + rtx mem;
>>> +
>>> + if (s_mode == SFmode)
>>> + mem = gen_rtx_SIGN_EXTEND (DImode, adjust_address (src, SImode, 0));
>>> +
>>> + else if (GET_MODE_SIZE (s_mode) == 16 && !WORDS_BIG_ENDIAN)
>>> + mem = adjust_address (src, DImode, 8);
>>> +
>>> + else
>>> + mem = adjust_address (src, DImode, 0);
>> else if (s_mode == DFmode)
>> ...
>> else
>> gcc_unreachable ();
>>
>> Or does this case catch some other modes, too? Make those explicit, then?
>> Or make everything based on size (not mode).
>>
>> And put it in order of size if you make that change?
> This actually has some leftover cruft in it from the original patch that
> supported DF and SF modes. I'll simplify it considerably.
>
>>> + if (FLOAT128_IEEE_P (<MODE>mode))
>>> + {
>>> + if (<MODE>mode == KFmode)
>>> + {
>>> + emit_insn (gen_signbitkf2_dm (operands[0], operands[1]));
>>> + DONE;
>>> + }
>>> + else if (<MODE>mode == TFmode)
>>> + {
>>> + emit_insn (gen_signbittf2_dm (operands[0], operands[1]));
>>> + DONE;
>>> + }
>>> + else
>>> + gcc_unreachable ();
>>> + }
>> If you put a single DONE at the end here things will look simpler.
>>
>> Why does the similar thing in rs6000.c construct the RTL by hand? This
>> way is safer.
> Good points, will fix.
>
>>> --- gcc/testsuite/gcc.target/powerpc/signbit-1.c (revision 0)
>>> +++ gcc/testsuite/gcc.target/powerpc/signbit-1.c (working copy)
>>> @@ -0,0 +1,16 @@
>>> +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
>>> +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
>> Default args aren't necessary.
>>
>>> --- gcc/testsuite/gcc.target/powerpc/signbit-3.c (revision 0)
>>> +++ gcc/testsuite/gcc.target/powerpc/signbit-3.c (working copy)
>>> @@ -0,0 +1,172 @@
>>> +/* { dg-do run { target { powerpc*-*-linux* } } } */
>>> +/* { dg-require-effective-target ppc_float128_sw } */
>>> +/* { dg-options "-mcpu=power7 -O2 -mfloat128 -static-libgcc -lm" } */
>> Why does this need -static-libgcc?
> Mike, can you please respond to this? I was curious about this also
> but neglected to ask you.
>
> Thanks,
> Bill
>
>>
>> Segher
>>
>