This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[BIB PATCH] FP neg and abs using bit operations

From: Roger Sayle <roger at www dot eyesopen dot com>
To: gcc-patches at gcc dot gnu dot org
Cc: Richard Henderson <rth at redhat dot com>
Date: Fri, 6 Dec 2002 19:54:01 -0700 (MST)
Subject: [BIB PATCH] FP neg and abs using bit operations
On Wed, 4 Dec 2002, Richard Henderson wrote:
> On Wed, Dec 04, 2002 at 10:34:23AM -0700, Roger Sayle wrote:
> > I'm not even sure how we encode reinterpretation: i.e. changing an
> > [sd]f mode pseudo into a [sd]i mode bit-pattern and back again
> > without conversion/interpretation.
>
> imode = int_mode_for_mode (fpmode);
> ival = gen_lowpart (imode, fpval);

Ahhh!

How about the following patch to the gcc-3_4-basic-improvement-branch
that attempts to implement abs?f2 and neg?f2 using bit-wise integer
operations to either clear or invert the FP sign bit respectively.

As suggested I've added a field to struct real_format to record
which bit of the floating point format is the sign bit, or -1 if
these bit-wise techniques shouldn't be used.

I've then added additional code to expand_unop and expand_abs in
optabs.c to attempt these operations using integer arithmetic if
possible.  The current constraints are that the target has an
integer mode of exactly the same size as the float mode operand,
and less than or equal to twice sizeof(HOST_WIDE_INT) so that GCC
can represent the float encoding as an immediate constant.  Then
if the float format is recognized and has a sign bit, fabs is
implemented by "x & ~(1<<signbit)".  Additionally, if the
floating point format has signed zero, "x ^ (1<<signbit)" is
used to implement FP negation.

RTH, Is this the sort of implementation you had in mind?

The patch below has been tested with a complete bootstrap, all
languages except Ada and treelang, on i686-pc-linux-gnu with no
new regressions from a full "make -k check".

Additionally to check that the patch actually works, I've also
bootstrapped and regression tested with a version that disables
the "abs?f2" and "neg?f2" patterns from i386.md.  As expected
the only new regression was gcc.c-torture/execute/20020720-1.c
which tests the optimization of the "ABS" RTX, which of course
is no longer used if abs?f2 is disabled.

For example, "-O2 -fomit-frame-pointer" generated the following
code:

negdf2: pushl   %ebx
        subl    $8, %esp
        movl    20(%esp), %edx
        movl    16(%esp), %eax
        movl    %edx, %ebx
        xorl    $-2147483648, %ebx
        movl    %eax, (%esp)
        movl    %ebx, 4(%esp)
        fldl    (%esp)
        addl    $8, %esp
        popl    %ebx
        ret

absdf2: pushl   %ebx
        subl    $8, %esp
        movl    20(%esp), %edx
        movl    16(%esp), %eax
        movl    %edx, %ebx
        andl    $2147483647, %ebx
        movl    %eax, (%esp)
        movl    %ebx, 4(%esp)
        fldl    (%esp)
        addl    $8, %esp
        popl    %ebx
        ret

negsf2: subl    $4, %esp
        movl    8(%esp), %eax
        xorl    $-2147483648, %eax
        movl    %eax, (%esp)
        flds    (%esp)
        popl    %eax
        ret

abssf2: subl    $4, %esp
        movl    8(%esp), %eax
        andl    $2147483647, %eax
        movl    %eax, (%esp)
        flds    (%esp)
        popl    %edx
        ret

You'll notice there are some strange register allocation choices
made for the "double" implementations, probably caused by the
SUBREG wierdness tricking GCC into thinking %edx isn't dead.

Just in case, I also did some timings of the above routines, which
turn out to only be about 5% slower than the implementations that
use "fchs" and "fabs" instructions respectively (on my Pentium4).


Ok for the BIB?


2002-12-06  Roger Sayle  <roger@eyesopen.com>

	* real.h (real_format): Add signbit field.
	* real.c (ieee_single_format, ieee_double_format,
	ieee_extended_motorola_format, ieee_extended_intel_96_format,
	ieee_extended_intel_128_format, ibm_extended_format,
	ieee_quad_format, vax_f_format, vax_d_format,
	vax_g_format, i370_single_format, i370_double_format,
	c4x_single_format, c4x_extended_format, real_internal_format):
	Provide suitable signbit value, or -1 to avoid bit twiddling.

	* optabs.c (expand_unop): Try implementing negation of
	floating point modes by flipping the sign bit.
	(expand_abs): Try implementing abs of floating point modes
	by clearing the sign bit.


Index: real.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/real.h,v
retrieving revision 1.44.8.7
diff -c -3 -p -r1.44.8.7 real.h
*** real.h	4 Nov 2002 00:04:22 -0000	1.44.8.7
--- real.h	6 Dec 2002 17:59:49 -0000
*************** struct real_format
*** 124,129 ****
--- 124,132 ----
    /* The maximum integer, x, such that b**(x-1) is representable.  */
    int emax;

+   /* The bit position of the sign bit, or -1 for a complex encoding.  */
+   int signbit;
+
    /* Properties of the format.  */
    bool has_nans;
    bool has_inf;
Index: real.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/real.c,v
retrieving revision 1.75.4.11
diff -c -3 -p -r1.75.4.11 real.c
*** real.c	3 Dec 2002 17:34:58 -0000	1.75.4.11
--- real.c	6 Dec 2002 17:59:50 -0000
*************** const struct real_format ieee_single_for
*** 2723,2728 ****
--- 2723,2729 ----
      24,
      -125,
      128,
+     31,
      true,
      true,
      true,
*************** const struct real_format ieee_double_for
*** 2916,2921 ****
--- 2917,2923 ----
      53,
      -1021,
      1024,
+     63,
      true,
      true,
      true,
*************** const struct real_format ieee_extended_m
*** 3171,3176 ****
--- 3173,3179 ----
      64,
      -16382,
      16384,
+     -1,
      true,
      true,
      true,
*************** const struct real_format ieee_extended_i
*** 3187,3192 ****
--- 3190,3196 ----
      64,
      -16381,
      16384,
+     -1,
      true,
      true,
      true,
*************** const struct real_format ieee_extended_i
*** 3203,3208 ****
--- 3207,3213 ----
      64,
      -16381,
      16384,
+     -1,
      true,
      true,
      true,
*************** const struct real_format ibm_extended_fo
*** 3296,3301 ****
--- 3301,3307 ----
      53 + 53,
      -1021,
      1024,
+     -1,
      true,
      true,
      true,
*************** const struct real_format ieee_quad_forma
*** 3549,3554 ****
--- 3555,3561 ----
      113,
      -16381,
      16384,
+     -1,
      true,
      true,
      true,
*************** const struct real_format vax_f_format =
*** 3856,3861 ****
--- 3863,3869 ----
      24,
      -127,
      127,
+     15,
      false,
      false,
      false,
*************** const struct real_format vax_d_format =
*** 3872,3877 ****
--- 3880,3886 ----
      56,
      -127,
      127,
+     -1,
      false,
      false,
      false,
*************** const struct real_format vax_g_format =
*** 3888,3893 ****
--- 3897,3903 ----
      53,
      -1023,
      1023,
+     -1,
      false,
      false,
      false,
*************** const struct real_format i370_single_for
*** 4069,4074 ****
--- 4079,4085 ----
      6,
      -64,
      63,
+     31,
      false,
      false,
      false, /* ??? The encoding does allow for "unnormals".  */
*************** const struct real_format i370_double_for
*** 4085,4090 ****
--- 4096,4102 ----
      14,
      -64,
      63,
+     -1,
      false,
      false,
      false, /* ??? The encoding does allow for "unnormals".  */
*************** const struct real_format c4x_single_form
*** 4299,4304 ****
--- 4311,4317 ----
      24,
      -126,
      128,
+     -1,
      false,
      false,
      false,
*************** const struct real_format c4x_extended_fo
*** 4315,4320 ****
--- 4328,4334 ----
      32,
      -126,
      128,
+     -1,
      false,
      false,
      false,
*************** const struct real_format real_internal_f
*** 4360,4365 ****
--- 4374,4380 ----
      SIGNIFICAND_BITS - 2,
      -MAX_EXP,
      MAX_EXP,
+     -1,
      true,
      true,
      false,
Index: optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.143.4.8
diff -c -3 -p -r1.143.4.8 optabs.c
*** optabs.c	3 Dec 2002 17:34:57 -0000	1.143.4.8
--- optabs.c	6 Dec 2002 17:59:52 -0000
*************** expand_unop (mode, unoptab, op0, target,
*** 2516,2521 ****
--- 2516,2554 ----
        return target;
      }

+   /* Try negating floating point values by flipping the sign bit.  */
+   if (unoptab->code == NEG && class == MODE_FLOAT
+       && GET_MODE_BITSIZE (mode) <= 2 * HOST_BITS_PER_WIDE_INT)
+     {
+       const struct real_format *fmt = real_format_for_mode[mode - QFmode];
+       enum machine_mode imode = int_mode_for_mode (mode);
+       int bitpos = (fmt != 0) ? fmt->signbit : -1;
+
+       if (imode != BLKmode && bitpos >= 0 && fmt->has_signed_zero)
+ 	{
+ 	  HOST_WIDE_INT hi, lo;
+ 	  rtx last = get_last_insn ();
+
+ 	  if (bitpos < HOST_BITS_PER_WIDE_INT)
+ 	    {
+ 	      hi = 0;
+ 	      lo = (HOST_WIDE_INT) 1 << bitpos;
+ 	    }
+ 	  else
+ 	    {
+ 	      hi = (HOST_WIDE_INT) 1 << (bitpos - HOST_BITS_PER_WIDE_INT);
+ 	      lo = 0;
+ 	    }
+ 	  temp = expand_binop (imode, xor_optab,
+ 			       gen_lowpart (imode, op0),
+ 			       immed_double_const (lo, hi, imode),
+ 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ 	  if (temp != 0)
+ 	    return gen_lowpart (mode, temp);
+ 	  delete_insns_since (last);
+         }
+     }
+
    /* Now try a library call in this mode.  */
    if (unoptab->handlers[(int) mode].libfunc)
      {
*************** expand_abs (mode, op0, target, result_un
*** 2627,2632 ****
--- 2660,2698 ----
                        op0, target, 0);
    if (temp != 0)
      return temp;
+
+   /* For floating point modes, try clearing the sign bit.  */
+   if (GET_MODE_CLASS (mode) == MODE_FLOAT
+       && GET_MODE_BITSIZE (mode) <= 2 * HOST_BITS_PER_WIDE_INT)
+     {
+       const struct real_format *fmt = real_format_for_mode[mode - QFmode];
+       enum machine_mode imode = int_mode_for_mode (mode);
+       int bitpos = (fmt != 0) ? fmt->signbit : -1;
+
+       if (imode != BLKmode && bitpos >= 0)
+ 	{
+ 	  HOST_WIDE_INT hi, lo;
+ 	  rtx last = get_last_insn ();
+
+ 	  if (bitpos < HOST_BITS_PER_WIDE_INT)
+ 	    {
+ 	      hi = 0;
+ 	      lo = (HOST_WIDE_INT) 1 << bitpos;
+ 	    }
+ 	  else
+ 	    {
+ 	      hi = (HOST_WIDE_INT) 1 << (bitpos - HOST_BITS_PER_WIDE_INT);
+ 	      lo = 0;
+ 	    }
+ 	  temp = expand_binop (imode, and_optab,
+ 			       gen_lowpart (imode, op0),
+ 			       immed_double_const (~lo, ~hi, imode),
+ 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ 	  if (temp != 0)
+ 	    return gen_lowpart (mode, temp);
+ 	  delete_insns_since (last);
+ 	}
+     }

    /* If we have a MAX insn, we can do this as MAX (x, -x).  */
    if (smax_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)

Roger
--
Roger Sayle,                         E-mail: roger@eyesopen.com
OpenEye Scientific Software,         WWW: http://www.eyesopen.com/
Suite 1107, 3600 Cerrillos Road,     Tel: (+1) 505-473-7385
Santa Fe, New Mexico, 87507.         Fax: (+1) 505-473-0833
Follow-Ups:
- Re: [BIB PATCH] FP neg and abs using bit operations
  - From: Richard Henderson
References:
- Re: [PATCH] Add __abssf2 and __absdf2 to libgcc
  - From: Richard Henderson
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]