This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
[RFC PATCH] implement asin() and acos() as built-in function
- From: Uros Bizjak <uros at kss-loka dot si>
- To: "Kaveh R. Ghazi" <ghazi at caip dot rutgers dot edu>
- Cc: gcc at gcc dot gnu dot org, Roger Sayle <roger at eyesopen dot com>
- Date: Mon, 19 Apr 2004 17:52:43 +0200
- Subject: [RFC PATCH] implement asin() and acos() as built-in function
- References: <40837167.5000902@kss-loka.si> <200404191202.i3JC2lnu009969@caip.rutgers.edu>
Hello!
We'd get more speedup if you could put the other opts into the .md
file also. (Hack hack, measure, measure) it becomes a 36.5%
improvement judging by adding just inline asin and fmod to almabench.
Attached to this message is RFC patch, which implements asin() and
acos() as built-in function. Patch is tested, and produces code as
expected, however, I would like someone to review this patch _before_ I
implement long and float versions (they are trivial, once this RFC patch
is OK'd). I will prepare full patch with ChangeLog tomorrow...
BTW: I'm really curious, if it will speed up almabench and how much...
Uros.
Index: gcc/builtins.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.310
diff -u -p -r1.310 builtins.c
--- gcc/builtins.c 19 Apr 2004 13:03:05 -0000 1.310
+++ gcc/builtins.c 19 Apr 2004 15:47:08 -0000
@@ -1585,6 +1585,14 @@ expand_builtin_mathfn (tree exp, rtx tar
case BUILT_IN_LOG2F:
case BUILT_IN_LOG2L:
errno_set = true; builtin_optab = log2_optab; break;
+ case BUILT_IN_ASIN:
+ case BUILT_IN_ASINF:
+ case BUILT_IN_ASINL:
+ builtin_optab = asin_optab; break;
+ case BUILT_IN_ACOS:
+ case BUILT_IN_ACOSF:
+ case BUILT_IN_ACOSL:
+ builtin_optab = acos_optab; break;
case BUILT_IN_TAN:
case BUILT_IN_TANF:
case BUILT_IN_TANL:
@@ -5204,6 +5212,12 @@ expand_builtin (tree exp, rtx target, rt
case BUILT_IN_TAN:
case BUILT_IN_TANF:
case BUILT_IN_TANL:
+ case BUILT_IN_ASIN:
+ case BUILT_IN_ASINF:
+ case BUILT_IN_ASINL:
+ case BUILT_IN_ACOS:
+ case BUILT_IN_ACOSF:
+ case BUILT_IN_ACOSL:
case BUILT_IN_ATAN:
case BUILT_IN_ATANF:
case BUILT_IN_ATANL:
Index: gcc/genopinit.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/genopinit.c,v
retrieving revision 1.71
diff -u -p -r1.71 genopinit.c
--- gcc/genopinit.c 15 Apr 2004 02:43:44 -0000 1.71
+++ gcc/genopinit.c 19 Apr 2004 15:47:08 -0000
@@ -124,7 +124,9 @@ static const char * const optabs[] =
"nearbyint_optab->handlers[$A].insn_code = CODE_FOR_$(nearbyint$a2$)",
"sincos_optab->handlers[$A].insn_code = CODE_FOR_$(sincos$a3$)",
"sin_optab->handlers[$A].insn_code = CODE_FOR_$(sin$a2$)",
+ "asin_optab->handlers[$A].insn_code = CODE_FOR_$(asin$a2$)",
"cos_optab->handlers[$A].insn_code = CODE_FOR_$(cos$a2$)",
+ "acos_optab->handlers[$A].insn_code = CODE_FOR_$(acos$a2$)",
"exp_optab->handlers[$A].insn_code = CODE_FOR_$(exp$a2$)",
"exp10_optab->handlers[$A].insn_code = CODE_FOR_$(exp10$a2$)",
"exp2_optab->handlers[$A].insn_code = CODE_FOR_$(exp2$a2$)",
Index: gcc/optabs.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.216
diff -u -p -r1.216 optabs.c
--- gcc/optabs.c 15 Apr 2004 02:43:43 -0000 1.216
+++ gcc/optabs.c 19 Apr 2004 15:47:09 -0000
@@ -5380,7 +5380,9 @@ init_optabs (void)
nearbyint_optab = init_optab (UNKNOWN);
sincos_optab = init_optab (UNKNOWN);
sin_optab = init_optab (UNKNOWN);
+ asin_optab = init_optab (UNKNOWN);
cos_optab = init_optab (UNKNOWN);
+ acos_optab = init_optab (UNKNOWN);
exp_optab = init_optab (UNKNOWN);
exp10_optab = init_optab (UNKNOWN);
exp2_optab = init_optab (UNKNOWN);
Index: gcc/optabs.h
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/optabs.h,v
retrieving revision 1.25
diff -u -p -r1.25 optabs.h
--- gcc/optabs.h 15 Apr 2004 02:43:43 -0000 1.25
+++ gcc/optabs.h 19 Apr 2004 15:47:09 -0000
@@ -152,8 +152,12 @@ enum optab_index
OTI_sincos,
/* Sine */
OTI_sin,
+ /* Inverse sine */
+ OTI_asin,
/* Cosine */
OTI_cos,
+ /* Inverse cosine */
+ OTI_acos,
/* Exponential */
OTI_exp,
/* Base-10 Exponential */
@@ -271,7 +275,9 @@ extern GTY(()) optab optab_table[OTI_MAX
#define sqrt_optab (optab_table[OTI_sqrt])
#define sincos_optab (optab_table[OTI_sincos])
#define sin_optab (optab_table[OTI_sin])
+#define asin_optab (optab_table[OTI_asin])
#define cos_optab (optab_table[OTI_cos])
+#define acos_optab (optab_table[OTI_acos])
#define exp_optab (optab_table[OTI_exp])
#define exp10_optab (optab_table[OTI_exp10])
#define exp2_optab (optab_table[OTI_exp2])
Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.528
diff -u -p -r1.528 i386.md
--- gcc/config/i386/i386.md 15 Apr 2004 02:43:44 -0000 1.528
+++ gcc/config/i386/i386.md 19 Apr 2004 15:47:13 -0000
@@ -15284,6 +15284,64 @@
DONE;
})
+;; TESTING
+
+
+(define_insn "*sqrttruncxfdf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (sqrt:DF (match_operand:XF 1 "register_operand" "0")))]
+ "TARGET_80387 && !TARGET_NO_FANCY_MATH_387"
+ "fsqrt"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "direct")])
+
+(define_expand "asindf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (set (match_dup 3) (mult:XF (match_dup 2) (match_dup 2)))
+ (set (match_dup 5) (minus:XF (match_dup 4) (match_dup 3)))
+ (set (match_dup 6) (sqrt:DF (match_dup 5)))
+ (parallel [(set (match_operand:DF 0 "register_operand" "")
+ (unspec:DF [(match_dup 6) (match_dup 1)]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:DF 7 ""))])]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=2; i<6; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ operands[6] = gen_reg_rtx (DFmode);
+
+ emit_move_insn (operands[4], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "acosdf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (set (match_dup 3) (mult:XF (match_dup 2) (match_dup 2)))
+ (set (match_dup 5) (minus:XF (match_dup 4) (match_dup 3)))
+ (set (match_dup 6) (sqrt:DF (match_dup 5)))
+ (parallel [(set (match_operand:DF 0 "register_operand" "")
+ (unspec:DF [(match_dup 1) (match_dup 6)]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:DF 7 ""))])]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ for (i=2; i<6; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ operands[6] = gen_reg_rtx (DFmode);
+
+ emit_move_insn (operands[4], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+;; TESTING END
+
(define_insn "*fyl2x_sfxf3"
[(set (match_operand:SF 0 "register_operand" "=f")
(unspec:SF [(match_operand:SF 2 "register_operand" "0")