This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, i386]: Fix PR target/26915: -1 should be loaded as fld1;fchs


Roger Sayle wrote:

PR target/26915
* config/i386/i386.c (standard_80387_constant_p): When optimizing
for size, treat -1 as a valid 80387 constant.


Cool. PR26915 was one of the PRs on my list. This is Ok for mainline.

Two minor things though.  The first is that I believe that a near
identical fix should also allow us to handle -0.0 (not very common
but should keep the IEEE conscious people happy).

Indeed! In the revised patch, handling of -0.0 is also included.

I'll pre-approve
either a follow-up or a revised version of your patch with that
change. Secondly, have you done any benchmarking with this change
enabled for !optimize_size? Although it's clearly a size win, there's
also the possibility that avoiding the memory traffic, and reducing
code size may be faster on some cores. If the numbers are in the noise,
we should leave the optimize_size tests in, but we should at least
check. Perhaps H.J., Evandro or Honza might know the answer?


Yes, I have some suprising numbers. Consider this testcase:

--cut here--
int main()
{
 int i;
 double sum = 0.0;

 for (i = 0; i < 1000000000; i++)
   sum += (i & 0x01) ? 1.0 : -1.0;

printf("%f\n", sum);

 return 0;
}
--cut here--

When the test is compiled with unpatched gcc (-m32 -Os):
user    0m3.069s
and when this test is compiled with patched gcc (-m32 -Os):
user    0m2.511s

This is on:
vendor_id       : AuthenticAMD
cpu family      : 15
model           : 47
model name      : AMD Athlon(tm) 64 Processor 3000+
stepping        : 2
cpu MHz         : 1809.287
cache size      : 512 KB

The difference between assembler dumps is:
--- bench.s_    2006-11-04 20:03:02.000000000 +0100
+++ bench.s     2006-11-04 20:03:20.000000000 +0100
@@ -18,7 +18,8 @@
.L2:
       testb   $1, %al
       jne     .L3
-       flds    .LC1
+       fld1
+       fchs
       jmp     .L5
.L3:
       fld1
@@ -37,9 +38,5 @@
       leal    -4(%ecx), %esp
       ret
       .size   main, .-main
-       .section        .rodata.cst4,"aM",@progbits,4
-       .align 4
-.LC1:
-       .long   3212836864
-       .ident  "GCC: (GNU) 4.3.0 20061103 (experimental)"
+       .ident  "GCC: (GNU) 4.3.0 20061104 (experimental)"
       .section        .note.GNU-stack,"",@progbits

I think that we can safely enable this optimization for other optimization levels.

Attached is a new revision of patch. It was bootstrapped on x86_64-pc-linux-gnu, regression test is in progress.
If it is still OK, I'll commit this patch to mainline, as soon as regression test finish (a couple of hours).


2006-11-04 Uros Bizjak <ubizjak@gmail.com>

PR target/26915
* config/i386/i386.c (standard_80387_constant_p): Treat -0.0 and -1.0
as a valid 80387 constant.
(standard_80387_constant_opcode): Return "#" for -0.0 and -1.0.
* config/i386/i386.md (unnamed splitter): Split the load of
constant -0.0 or -1.0 into the load of 0.0 or 1.0, followed
by negation.


testsuite/ChangeLog:

2006-11-04 Uros Bizjak <ubizjak@gmail.com>

       PR target/26915
       * gcc.target/i386/387-12.c: New test.

BTW: Unfortunatelly, we split too late for gcse-after-reload to eliminate one fld1:
.L2:
testb $1, %al
jne .L3
fld1
fchs
jmp .L5
.L3:
fld1
.L5:


There is nothing that can be done in this case, but to move post-reload split before gcse-after-reload pass. This would result in:
.L2:
testb $1, %al
fld1
jne .L3
fchs
.L3:


Uros.
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 118481)
+++ config/i386/i386.md	(working copy)
@@ -2914,6 +2914,26 @@
   [(set_attr "type" "fxch")
    (set_attr "mode" "XF")])
 
+;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(match_operand:X87MODEF 1 "immediate_operand" ""))]
+  "reload_completed && FP_REGNO_P (REGNO (operands[0]))
+   && (standard_80387_constant_p (operands[1]) == 8
+       || standard_80387_constant_p (operands[1]) == 9)"
+  [(set (match_dup 0)(match_dup 1))
+   (set (match_dup 0)
+	(neg:X87MODEF (match_dup 0)))]
+{
+  REAL_VALUE_TYPE r;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+  if (real_isnegzero (&r))
+    operands[1] = CONST0_RTX (<MODE>mode);
+  else
+    operands[1] = CONST1_RTX (<MODE>mode);
+})
+
 (define_expand "movtf"
   [(set (match_operand:TF 0 "nonimmediate_operand" "")
 	(match_operand:TF 1 "nonimmediate_operand" ""))]
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 118481)
+++ config/i386/i386.c	(working copy)
@@ -4575,6 +4575,8 @@
 int
 standard_80387_constant_p (rtx x)
 {
+  REAL_VALUE_TYPE r;
+
   if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
     return -1;
 
@@ -4583,23 +4585,30 @@
   if (x == CONST1_RTX (GET_MODE (x)))
     return 2;
 
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
   /* For XFmode constants, try to find a special 80387 instruction when
      optimizing for size or on those CPUs that benefit from them.  */
   if (GET_MODE (x) == XFmode
       && (optimize_size || x86_ext_80387_constants & TUNEMASK))
     {
-      REAL_VALUE_TYPE r;
       int i;
 
       if (! ext_80387_constants_init)
 	init_ext_80387_constants ();
 
-      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
       for (i = 0; i < 5; i++)
         if (real_identical (&r, &ext_80387_constants_table[i]))
 	  return i + 3;
     }
 
+  /* Load of the constant -0.0 or -1.0 will be split as
+     fldz;fchs or fld1;fchs sequence.  */
+  if (real_isnegzero (&r))
+    return 8;
+  if (real_identical (&r, &dconstm1))
+    return 9;
+
   return 0;
 }
 
@@ -4625,6 +4634,9 @@
       return "fldl2t";
     case 7:
       return "fldpi";
+    case 8:
+    case 9:
+      return "#";
     default:
       gcc_unreachable ();
     }
Index: testsuite/gcc.target/i386/387-12.c
===================================================================
--- testsuite/gcc.target/i386/387-12.c	(revision 0)
+++ testsuite/gcc.target/i386/387-12.c	(revision 0)
@@ -0,0 +1,17 @@
+/* PR target/26915 */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O" } */
+
+double testm0(void)
+{
+	return -0.0;
+}
+
+double testm1(void)
+{
+	return -1.0;
+}
+
+/* { dg-final { scan-assembler "fldz" } } */
+/* { dg-final { scan-assembler "fld1" } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]