This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[PATCH] implement fptan x87 instruction

From: Uros Bizjak <uros at kss-loka dot si>
To: gcc-patches at gcc dot gnu dot org, Roger Sayle <roger at eyesopen dot com>
Date: Wed, 07 Apr 2004 09:50:44 +0200
Subject: [PATCH] implement fptan x87 instruction

Hello!

Attached to this message, please find a patch to implement x87 fpatan instruction. Patch is tested and works as expected. Also in this patch is a small cosmetic change to my previous fsincos patch (a couple of expand_twoval_unop() parameters are moved) and a ChangeLog typo is fixed.

As an added bonus, peephole2 optimizer detects cases like 1.0 / tan(x), and transforms 'fptan; fstp %st(0); fld1' sequence back into fptan. Annoying fptan feature of pushing 1.0 onto stack is now used to produce smaller & faster code.

Testcase ( i386-387-8.c ):
double f1(double x)
{
 return 1.0 / tan(x);
}

Is with '-O2 -ffast-math' compiled into:
f1:
       pushl   %ebp
       movl    %esp, %ebp
       fldl    8(%ebp)
       fptan
       popl    %ebp
       fdivp   %st, %st(1)
       ret

I guess, this is quite good asm...

Uros.

Index: ChangeLog
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/ChangeLog,v
retrieving revision 2.3355
diff -u -r2.3355 ChangeLog
--- ChangeLog	7 Apr 2004 01:20:04 -0000	2.3355
+++ ChangeLog	7 Apr 2004 07:43:01 -0000
@@ -1,3 +1,17 @@
+2004-04-07  Uros Bizjak  <uros@kss-loka.si>
+
+	* reg-stack.c (subst_stack_regs_pat): Handle UNSPEC_TAN_ONE
+	and UNSPEC_TAN_TAN. Add missing comment.
+
+	* config/i386/i386.md (*tandf3_1, *tansf3_1, *tanxf3_1): New
+	patterns to implement fptan x87 instruction.
+	(tandf2, tansf2, tanxf2): New expanders to implement tan, tanf
+	and tanl built-ins as inline x87 intrinsics. Define corresponding
+	peephole2 optimizers for 'fptan; fstp %st(0); fld1' sequence.
+
+	(UNSPEC_TAN_ONE, UNSPEC_TAN_TAN): New unspecs to represent
+	x87's fptan insn.
+
 2004-04-06  Nathanael Nerode  <neroden@gcc.gnu.org>
 
 	* config.gcc: Stop changing enable_threads midstream.
@@ -37,8 +51,8 @@
 	(sindf2, sinsf2, sinxf2): Rename to *sindf2, *sinsf2, *sinxf2.
 	(cosdf2, cossf2, cosxf2): Rename to *cosdf2, *cossf2, *cosxf2.
 
-	(UNSPEC_SINCOS_SIN, UNPEC_SINCOS_COS): New unspecs to represent
-	x87's unspec insn.
+	(UNSPEC_SINCOS_SIN, UNSPEC_SINCOS_COS): New unspecs to represent
+	x87's fsincos insn.
 
 2004-04-06  Devang Patel  <dpatel@apple.com>
 
Index: builtins.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.302
diff -u -r1.302 builtins.c
--- builtins.c	6 Apr 2004 19:34:06 -0000	1.302
+++ builtins.c	7 Apr 2004 07:43:02 -0000
@@ -1897,13 +1897,13 @@
 	    case BUILT_IN_SIN:
 	    case BUILT_IN_SINF:
 	    case BUILT_IN_SINL:
-	      if (! expand_twoval_unop(builtin_optab, 0, target, op0, 0))    
+	      if (! expand_twoval_unop(builtin_optab, op0, 0, target, 0))    
 		abort();
 	      break;
 	    case BUILT_IN_COS:
 	    case BUILT_IN_COSF:
 	    case BUILT_IN_COSL:
-	      if (! expand_twoval_unop(builtin_optab, target, 0, op0, 0))
+	      if (! expand_twoval_unop(builtin_optab, op0, target, 0, 0))
 		abort();
 	      break;
 	    default:
Index: optabs.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.214
diff -u -r1.214 optabs.c
--- optabs.c	6 Apr 2004 19:34:07 -0000	1.214
+++ optabs.c	7 Apr 2004 07:43:03 -0000
@@ -2159,7 +2159,7 @@
    Returns 1 if this operation can be performed; 0 if not.  */
 
 int
-expand_twoval_unop (optab unoptab, rtx targ0, rtx targ1, rtx op0,
+expand_twoval_unop (optab unoptab, rtx op0, rtx targ0, rtx targ1,
 		    int unsignedp)
 {
   enum machine_mode mode = GET_MODE (targ0 ? targ0 : targ1);
Index: reg-stack.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/reg-stack.c,v
retrieving revision 1.146
diff -u -r1.146 reg-stack.c
--- reg-stack.c	6 Apr 2004 19:34:09 -0000	1.146
+++ reg-stack.c	7 Apr 2004 07:43:04 -0000
@@ -1769,6 +1769,7 @@
 		break;
 
 	      case UNSPEC_SINCOS_COS:
+	      case UNSPEC_TAN_ONE:
 		/* These insns operate on the top two stack slots,
 		   first part of one input, double output insn.  */
 
@@ -1796,6 +1797,10 @@
 		break;
 
 	      case UNSPEC_SINCOS_SIN:
+	      case UNSPEC_TAN_TAN:
+		/* These insns operate on the top two stack slots,
+		   second part of one input, double output insn.  */
+
 		src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
 
 		emit_swap_insn (insn, regstack, *src1);
Index: config/i386/i386.md
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.524
diff -u -r1.524 i386.md
--- config/i386/i386.md	6 Apr 2004 19:34:15 -0000	1.524
+++ config/i386/i386.md	7 Apr 2004 07:43:07 -0000
@@ -121,8 +121,11 @@
    (UNSPEC_FRNDINT		68)
    (UNSPEC_F2XM1		69)
 
+   ; x87 Double output FP
    (UNSPEC_SINCOS_COS		80)
    (UNSPEC_SINCOS_SIN		81)
+   (UNSPEC_TAN_ONE		82)
+   (UNSPEC_TAN_TAN		83)
 
    ; REP instruction
    (UNSPEC_REP			75)
@@ -15144,6 +15147,132 @@
    && !reload_completed && !reload_in_progress"
   [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))]
   "")
+
+(define_insn "*tandf3_1"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unspec:DF [(match_operand:DF 2 "register_operand" "0")]
+		   UNSPEC_TAN_ONE))
+   (set (match_operand:DF 1 "register_operand" "=u")
+        (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+  "fptan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DF")])
+
+;; optimize sequence: fptan
+;;		      fstp    %st(0)
+;;		      fld1
+;; into fptan insn.
+
+(define_peephole2
+  [(parallel[(set (match_operand:DF 0 "register_operand" "")
+		  (unspec:DF [(match_operand:DF 2 "register_operand" "")]
+			     UNSPEC_TAN_ONE))
+	     (set (match_operand:DF 1 "register_operand" "")
+		  (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))])
+   (set (match_dup 0)
+        (match_operand:DF 3 "immediate_operand" ""))]
+  "standard_80387_constant_p (operands[3]) == 2"
+  [(parallel[(set (match_dup 0) (unspec:DF [(match_dup 2)] UNSPEC_TAN_ONE))
+   	     (set (match_dup 1) (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))])]
+  "")
+
+(define_expand "tandf2"
+  [(parallel [(set (match_dup 2)
+		   (unspec:DF [(match_operand:DF 1 "register_operand" "")]
+			      UNSPEC_TAN_ONE))
+	      (set (match_operand:DF 0 "register_operand" "")
+		   (unspec:DF [(match_dup 1)] UNSPEC_TAN_TAN))])]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (DFmode);
+})
+
+(define_insn "*tansf3_1"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unspec:SF [(match_operand:SF 2 "register_operand" "0")]
+		   UNSPEC_TAN_ONE))
+   (set (match_operand:SF 1 "register_operand" "=u")
+        (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+  "fptan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "SF")])
+
+;; optimize sequence: fptan
+;;		      fstp    %st(0)
+;;		      fld1
+;; into fptan insn.
+
+(define_peephole2
+  [(parallel[(set (match_operand:SF 0 "register_operand" "")
+		  (unspec:SF [(match_operand:SF 2 "register_operand" "")]
+			     UNSPEC_TAN_ONE))
+	     (set (match_operand:SF 1 "register_operand" "")
+		  (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))])
+   (set (match_dup 0)
+        (match_operand:SF 3 "immediate_operand" ""))]
+  "standard_80387_constant_p (operands[3]) == 2"
+  [(parallel[(set (match_dup 0) (unspec:SF [(match_dup 2)] UNSPEC_TAN_ONE))
+   	     (set (match_dup 1) (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))])]
+  "")
+
+(define_expand "tansf2"
+  [(parallel [(set (match_dup 2)
+		   (unspec:SF [(match_operand:SF 1 "register_operand" "")]
+			      UNSPEC_TAN_ONE))
+	      (set (match_operand:SF 0 "register_operand" "")
+		   (unspec:SF [(match_dup 1)] UNSPEC_TAN_TAN))])]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (SFmode);
+})
+
+(define_insn "*tanxf3_1"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+		   UNSPEC_TAN_ONE))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+  "fptan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+;; optimize sequence: fptan
+;;		      fstp    %st(0)
+;;		      fld1
+;; into fptan insn.
+
+(define_peephole2
+  [(parallel[(set (match_operand:XF 0 "register_operand" "")
+		  (unspec:XF [(match_operand:XF 2 "register_operand" "")]
+			     UNSPEC_TAN_ONE))
+	     (set (match_operand:XF 1 "register_operand" "")
+		  (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))])
+   (set (match_dup 0)
+        (match_operand:XF 3 "immediate_operand" ""))]
+  "standard_80387_constant_p (operands[3]) == 2"
+  [(parallel[(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_TAN_ONE))
+   	     (set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))])]
+  "")
+
+(define_expand "tanxf2"
+  [(parallel [(set (match_dup 2)
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+			      UNSPEC_TAN_ONE))
+	      (set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_dup 1)] UNSPEC_TAN_TAN))])]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+})
 
 (define_insn "atan2df3_1"
   [(set (match_operand:DF 0 "register_operand" "=f")
Index: testsuite/ChangeLog
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/testsuite/ChangeLog,v
retrieving revision 1.3658
diff -u -r1.3658 ChangeLog
--- testsuite/ChangeLog	7 Apr 2004 00:58:11 -0000	1.3658
+++ testsuite/ChangeLog	7 Apr 2004 07:43:10 -0000
@@ -1,3 +1,13 @@
+2004-04-06  Uros Bizjak  <uros@kss-loka.si>
+
+	* gcc.dg/i386-387-1.c: Add new test for __builtin_tan.
+	* gcc.dg/i386-387-2.c: Likewise.
+
+	* gcc.dg/i386-387-7.c: New test.
+	* gcc.dg/i386-387-8.c: New test.
+
+	* gcc.dg/builtins-37.c: New test.
+
 2004-04-06  Kaveh R. Ghazi  <ghazi@caip.rutgers.edu>
 
 	* gcc.dg/compat/struct-by-value-5a_main.c,
Index: testsuite/gcc.dg/i386-387-1.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/testsuite/gcc.dg/i386-387-1.c,v
retrieving revision 1.7
diff -u -r1.7 i386-387-1.c
--- testsuite/gcc.dg/i386-387-1.c	30 Jul 2003 22:48:45 -0000	1.7
+++ testsuite/gcc.dg/i386-387-1.c	7 Apr 2004 07:43:12 -0000
@@ -7,6 +7,7 @@
 /* { dg-final { scan-assembler "call\t_?atan2" } } */
 /* { dg-final { scan-assembler "call\t_?log" } } */
 /* { dg-final { scan-assembler "call\t_?exp" } } */
+/* { dg-final { scan-assembler "call\t_?tan" } } */
 
 double f1(double x) { return __builtin_sin(x); }
 double f2(double x) { return __builtin_cos(x); }
@@ -14,3 +15,4 @@
 double f4(double x, double y) { return __builtin_atan2(x,y); }
 double f5(double x) { return __builtin_log(x); }
 double f6(double x) { return __builtin_exp(x); }
+double f7(double x) { return __builtin_tan(x); }
Index: testsuite/gcc.dg/i386-387-2.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/testsuite/gcc.dg/i386-387-2.c,v
retrieving revision 1.5
diff -u -r1.5 i386-387-2.c
--- testsuite/gcc.dg/i386-387-2.c	15 Jun 2003 13:32:31 -0000	1.5
+++ testsuite/gcc.dg/i386-387-2.c	7 Apr 2004 07:43:12 -0000
@@ -7,6 +7,7 @@
 /* { dg-final { scan-assembler "fpatan" } } */
 /* { dg-final { scan-assembler "fyl2x" } } */
 /* { dg-final { scan-assembler "f2xm1" } } */
+/* { dg-final { scan-assembler "fptan" } } */
 
 double f1(double x) { return __builtin_sin(x); }
 double f2(double x) { return __builtin_cos(x); }
@@ -14,3 +15,4 @@
 double f4(double x, double y) { return __builtin_atan2(x,y); }
 double f5(double x) { return __builtin_log(x); }
 double f6(double x) { return __builtin_exp(x); }
+double f7(double x) { return __builtin_tan(x); }

/* Copyright (C) 2004 Free Software Foundation.

   Check tan, tanf and tanl built-in functions.

   Written by Uros Bizjak, 7th April 2004.  */

/* { dg-do compile } */
/* { dg-options "-O2 -ffast-math" } */

extern double tan(double);
extern float tanf(float);
extern long double tanl(long double);


double test1(double x)
{
	return tan(x);
}

float test1f(float x)
{
	return tanf(x);
}

long double test1l(long double x)
{
	return tanl(x);
}

/* Verify that 387 fsincos instruction is generated.  */
/* { dg-do compile { target "i?86-*-*" } } */
/* { dg-options "-O -ffast-math -march=i686" } */
/* { dg-final { scan-assembler "fsincos" } } */

double f1(double x)
{
  return sin(x) + cos (x);
}

/* Verify that 387 fptan instruction is generated. Also check fptan
   peephole2 optimizer.  */
/* { dg-do compile { target "i?86-*-*" } } */
/* { dg-options "-O2 -ffast-math -march=i686" } */
/* { dg-final { scan-assembler "fptan" } } */
/* { dg-final { scan-assembler-not "fld1" } } */

double f1(double x)
{
  return 1.0 / tan(x);
}

Follow-Ups:
- Re: [PATCH] implement fptan x87 instruction
  - From: Roger Sayle

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]