This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC PATCH] implement fma() as builtin x87 or SSE intrinsic


Hello!

This RFC patch implements fma() function as builtin x87 or SSE intrinsic. To implement three operator optab, infrastructure to handle three operators had to be implemented. In builtins.c expand_builtin_mathfn_3() function now handles three operator functions, and existing expand_builtin_mathfn_3() was renamed to expand_builtin_sin_cos(). In optabs.c, new function expand_ternop() was introduced.

fma() function, [together with ldexp() and hypot() which are trivial to implement] is last "pure" floating-point function from mathinline.h. What remains, are conversion functions like "floor", "ceil", "rint", etc... which depend on rounding mode switching. The implementation of these functions requires some extra thinking...

For fma(), this testcase:
double test(double x, double y, double z)
{
 return fma(x, y, z);
}

gets compiled with 'O2 -ffast-math -march=pentium4 -S -fomit-frame-pointer -mfpmath=sse' to:
test:
subl $12, %esp
movsd 24(%esp), %xmm0
mulsd 16(%esp), %xmm0
addsd 32(%esp), %xmm0
movsd %xmm0, (%esp)
fldl (%esp)
addl $12, %esp
ret


and with '-O2 -ffast-math -march=pentium4 -S -fomit-frame-pointer' to:
test:
       fldl 12(%esp)
       fmull   4(%esp)
       faddl   20(%esp)
       ret

Uros.


Index: gcc/builtins.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.321
diff -u -p -r1.321 builtins.c
--- gcc/builtins.c	7 May 2004 05:38:14 -0000	1.321
+++ gcc/builtins.c	7 May 2004 08:20:19 -0000
@@ -96,6 +96,7 @@ static void expand_errno_check (tree, rt
 static rtx expand_builtin_mathfn (tree, rtx, rtx);
 static rtx expand_builtin_mathfn_2 (tree, rtx, rtx);
 static rtx expand_builtin_mathfn_3 (tree, rtx, rtx);
+static rtx expand_builtin_sin_cos (tree, rtx, rtx);
 static rtx expand_builtin_constant_p (tree, enum machine_mode);
 static rtx expand_builtin_args_info (tree);
 static rtx expand_builtin_next_arg (tree);
@@ -1821,7 +1822,7 @@ expand_builtin_mathfn_2 (tree exp, rtx t
   if (! flag_errno_math || ! HONOR_NANS (mode))
     errno_set = false;
 
-  /* Alway stabilize the argument list.  */
+  /* Always stabilize the argument list.  */
   narg = save_expr (arg1);
   if (narg != arg1)
     {
@@ -1874,7 +1875,7 @@ expand_builtin_mathfn_2 (tree exp, rtx t
   return target;
 }
 
-/* Expand a call to the builtin sin and cos math functions.
+/* Expand a call to the builtin ternary math functions (fma).
    Return 0 if a normal call should be emitted rather than expanding the
    function in-line.  EXP is the expression that is a call to the builtin
    function; if convenient, the result should be placed in TARGET.
@@ -1885,6 +1886,117 @@ static rtx
 expand_builtin_mathfn_3 (tree exp, rtx target, rtx subtarget)
 {
   optab builtin_optab;
+  rtx op0, op1, op2, insns;
+  tree fndecl = get_callee_fndecl (exp);
+  tree arglist = TREE_OPERAND (exp, 1);
+  tree arg0, arg1, arg2, temp, narg;
+  enum machine_mode mode;
+  bool errno_set = true;
+  bool stable = true;
+
+  if (!validate_arglist (arglist, REAL_TYPE, REAL_TYPE,
+			 REAL_TYPE, VOID_TYPE))
+    return 0;
+
+  arg0 = TREE_VALUE (arglist);
+  arg1 = TREE_VALUE (arglist = TREE_CHAIN (arglist));
+  arg2 = TREE_VALUE (TREE_CHAIN (arglist));
+
+  switch (DECL_FUNCTION_CODE (fndecl))
+    {
+    case BUILT_IN_FMA:
+    case BUILT_IN_FMAF:
+    case BUILT_IN_FMAL:
+      builtin_optab = fma_optab; break;
+    default:
+      abort ();
+    }
+
+  /* Make a suitable register to place result in.  */
+  mode = TYPE_MODE (TREE_TYPE (exp));
+
+  /* Before working hard, check whether the instruction is available.  */
+  if (builtin_optab->handlers[(int) mode].insn_code == CODE_FOR_nothing)
+    return 0;
+
+  target = gen_reg_rtx (mode);
+
+  if (! flag_errno_math || ! HONOR_NANS (mode))
+    errno_set = false;
+
+  /* Always stabilize the argument list.  */
+  narg = save_expr (arg2);
+  if (narg != arg2)
+    {
+      temp = build_tree_list (NULL_TREE, narg);
+      stable = false;
+    }
+  else
+    temp = TREE_CHAIN (arglist);
+
+  narg = save_expr (arg1);
+  if (narg != arg1)
+    {
+      arglist = tree_cons (NULL_TREE, narg, temp);
+      stable = false;
+    }
+  else if (! stable)
+    arglist = tree_cons (NULL_TREE, arg1, temp);
+
+  narg = save_expr (arg0);
+  if (narg != arg0)
+    {
+      arglist = tree_cons (NULL_TREE, narg, temp);
+      stable = false;
+    }
+  else if (! stable)
+    arglist = tree_cons (NULL_TREE, arg0, temp);
+
+  if (! stable)
+    exp = build_function_call_expr (fndecl, arglist);
+
+  op0 = expand_expr (arg0, subtarget, VOIDmode, 0);
+  op1 = expand_expr (arg1, 0, VOIDmode, 0);
+  op2 = expand_expr (arg2, 0, VOIDmode, 0);
+
+  emit_queue ();
+  start_sequence ();
+
+  /* Compute into TARGET.
+     Set TARGET to wherever the result comes back.  */
+  target = expand_ternop (mode, builtin_optab, op0, op1, op2, target, 0);
+
+  /* If we were unable to expand via the builtin, stop the sequence
+     (without outputting the insns) and call to the library function
+     with the stabilized argument list.  */
+  if (target == 0)
+    {
+      end_sequence ();
+      return expand_call (exp, target, target == const0_rtx);
+    }
+
+  if (errno_set)
+    expand_errno_check (exp, target);
+
+  /* Output the entire sequence.  */
+  insns = get_insns ();
+  end_sequence ();
+  emit_insn (insns);
+
+  return target;
+}
+
+/* Expand a call to the builtin sin and cos math functions.
+   Return 0 if a normal call should be emitted rather than expanding the
+   function in-line.  EXP is the expression that is a call to the builtin
+   function; if convenient, the result should be placed in TARGET.
+   SUBTARGET may be used as the target for computing one of EXP's
+   operands.  */
+
+static rtx
+expand_builtin_sin_cos (tree exp, rtx target, rtx subtarget)
+{
+  optab builtin_optab;
   rtx op0, insns, before_call;
   tree fndecl = get_callee_fndecl (exp);
   tree arglist = TREE_OPERAND (exp, 1);
@@ -5392,6 +5504,16 @@ expand_builtin (tree exp, rtx target, rt
 	return target;
       break;
 
+    case BUILT_IN_FMA:
+    case BUILT_IN_FMAF:
+    case BUILT_IN_FMAL:
+      if (! flag_unsafe_math_optimizations)
+	break;
+      target = expand_builtin_mathfn_3 (exp, target, subtarget);
+      if (target)
+	return target;
+      break;
+
     case BUILT_IN_SIN:
     case BUILT_IN_SINF:
     case BUILT_IN_SINL:
@@ -5400,7 +5522,7 @@ expand_builtin (tree exp, rtx target, rt
     case BUILT_IN_COSL:
       if (! flag_unsafe_math_optimizations)
 	break;
-      target = expand_builtin_mathfn_3 (exp, target, subtarget);
+      target = expand_builtin_sin_cos (exp, target, subtarget);
       if (target)
 	return target;
       break;
Index: gcc/genopinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/genopinit.c,v
retrieving revision 1.75
diff -u -p -r1.75 genopinit.c
--- gcc/genopinit.c	7 May 2004 05:38:14 -0000	1.75
+++ gcc/genopinit.c	7 May 2004 08:20:19 -0000
@@ -93,6 +93,7 @@ static const char * const optabs[] =
   "umod_optab->handlers[$A].insn_code = CODE_FOR_$(umod$a3$)",
   "fmod_optab->handlers[$A].insn_code = CODE_FOR_$(fmod$a3$)",
   "drem_optab->handlers[$A].insn_code = CODE_FOR_$(drem$a3$)",
+  "fma_optab->handlers[$A].insn_code = CODE_FOR_$(fma$a4$)",
   "ftrunc_optab->handlers[$A].insn_code = CODE_FOR_$(ftrunc$F$a2$)",
   "and_optab->handlers[$A].insn_code = CODE_FOR_$(and$a3$)",
   "ior_optab->handlers[$A].insn_code = CODE_FOR_$(ior$a3$)",
Index: gcc/optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.221
diff -u -p -r1.221 optabs.c
--- gcc/optabs.c	7 May 2004 05:38:14 -0000	1.221
+++ gcc/optabs.c	7 May 2004 08:20:20 -0000
@@ -2402,6 +2402,136 @@ expand_simple_unop (enum machine_mode mo
   return expand_unop (mode, unop, op0, target, unsignedp);
 }
 
+/* Generate code to perform an operation specified by TERNOPTAB
+   on operands OP0, OP1 and OP2, with results having machine-mode MODE.
+
+   UNSIGNEDP is for the case where we have to widen the operands
+   to perform the operation.  It says to use zero-extension.
+
+   If TARGET is nonzero, the value is generated there,
+   if it is convenient to do so.  */
+
+rtx
+expand_ternop (enum machine_mode mode, optab ternoptab,
+	       rtx op0, rtx op1, rtx op2, rtx target,
+	       int unsignedp)
+{
+  enum mode_class class;
+  enum machine_mode wider_mode;
+  rtx entry_last = get_last_insn ();
+  rtx last;
+
+  class = GET_MODE_CLASS (mode);
+
+  op0 = protect_from_queue (op0, 0);
+  op1 = protect_from_queue (op1, 0);
+  op2 = protect_from_queue (op2, 0);
+
+  if (flag_force_mem)
+    {
+      op0 = force_not_mem (op0);
+      op1 = force_not_mem (op1);
+      op2 = force_not_mem (op2);
+    }
+
+  if (target)
+    target = protect_from_queue (target, 1);
+  else
+    target = gen_reg_rtx (mode);
+
+  /* Record where to go back to if we fail.  */
+  last = get_last_insn ();
+
+  if (ternoptab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
+    {
+      int icode = (int) ternoptab->handlers[(int) mode].insn_code;
+      enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+      enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+      enum machine_mode mode2 = insn_data[icode].operand[3].mode;
+      rtx pat;
+      rtx xop0 = op0, xop1 = op1, xop2 = op2;
+
+      /* In case the insn wants input operands in modes different from
+	 those of the actual operands, convert the operands.  It would
+	 seem that we don't need to convert CONST_INTs, but we do, so
+	 that they're properly zero-extended, sign-extended or truncated
+	 for their mode.  */
+
+      if (GET_MODE (op0) != mode0 && mode0 != VOIDmode)
+	xop0 = convert_modes (mode0,
+			      GET_MODE (op0) != VOIDmode
+			      ? GET_MODE (op0)
+			      : mode,
+			      xop0, unsignedp);
+
+      if (GET_MODE (op1) != mode1 && mode1 != VOIDmode)
+	xop1 = convert_modes (mode1,
+			      GET_MODE (op1) != VOIDmode
+			      ? GET_MODE (op1)
+			      : mode,
+			      xop1, unsignedp);
+
+      if (GET_MODE (op2) != mode2 && mode2 != VOIDmode)
+	xop2 = convert_modes (mode2,
+			      GET_MODE (op2) != VOIDmode
+			      ? GET_MODE (op2)
+			      : mode,
+			      xop2, unsignedp);
+
+      /* Now, if insn doesn't accept these operands, put them into pseudos.  */
+      if (! (*insn_data[icode].operand[1].predicate) (xop0, mode0))
+	xop0 = copy_to_mode_reg (mode0, xop0);
+
+      if (! (*insn_data[icode].operand[2].predicate) (xop1, mode1))
+	xop1 = copy_to_mode_reg (mode1, xop1);
+
+      if (! (*insn_data[icode].operand[3].predicate) (xop2, mode2))
+	xop2 = copy_to_mode_reg (mode2, xop2);
+
+      if (! (*insn_data[icode].operand[0].predicate) (target, mode))
+	abort ();
+
+      pat = GEN_FCN (icode) (target, xop0, xop1, xop2);
+      if (pat)
+	{
+	  emit_insn (pat);
+	  return target;
+	}
+      else
+	delete_insns_since (last);
+    }
+
+  /* It can't be done in this mode.  Can we do it in a wider mode?  */
+
+  if (class == MODE_INT || class == MODE_FLOAT || class == MODE_COMPLEX_FLOAT)
+    {
+      for (wider_mode = GET_MODE_WIDER_MODE (mode); wider_mode != VOIDmode;
+	   wider_mode = GET_MODE_WIDER_MODE (wider_mode))
+	{
+	  if (ternoptab->handlers[(int) wider_mode].insn_code
+	      != CODE_FOR_nothing)
+	    {
+	      rtx t0 = gen_reg_rtx (wider_mode);
+	      rtx cop0 = convert_modes (wider_mode, mode, op0, unsignedp);
+	      rtx cop1 = convert_modes (wider_mode, mode, op1, unsignedp);
+	      rtx cop2 = convert_modes (wider_mode, mode, op2, unsignedp);
+
+	      if (expand_ternop (wider_mode, ternoptab, cop0, cop1, cop2,
+				 t0, unsignedp))
+		{
+		  convert_move (target, t0, unsignedp);
+		  return target;
+		}
+	      else
+		delete_insns_since (last);
+	    }
+	}
+    }
+
+  delete_insns_since (entry_last);
+  return 0;
+}
+
 /* Try calculating
 	(clz:narrow x)
    as
@@ -5330,6 +5460,7 @@ init_optabs (void)
   umod_optab = init_optab (UMOD);
   fmod_optab = init_optab (UNKNOWN);
   drem_optab = init_optab (UNKNOWN);
+  fma_optab = init_optab (UNKNOWN);
   ftrunc_optab = init_optab (UNKNOWN);
   and_optab = init_optab (AND);
   ior_optab = init_optab (IOR);
Index: gcc/optabs.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.h,v
retrieving revision 1.29
diff -u -p -r1.29 optabs.h
--- gcc/optabs.h	7 May 2004 05:38:14 -0000	1.29
+++ gcc/optabs.h	7 May 2004 08:20:20 -0000
@@ -96,6 +96,8 @@ enum optab_index
   /* Floating point remainder functions */
   OTI_fmod,
   OTI_drem,
+  /* Floating point multiply-add */
+  OTI_fma,
   /* Convert float to integer in float fmt */
   OTI_ftrunc,
 
@@ -252,6 +254,7 @@ extern GTY(()) optab optab_table[OTI_MAX
 #define umod_optab (optab_table[OTI_umod])
 #define fmod_optab (optab_table[OTI_fmod])
 #define drem_optab (optab_table[OTI_drem])
+#define fma_optab (optab_table[OTI_fma])
 #define ftrunc_optab (optab_table[OTI_ftrunc])
 #define and_optab (optab_table[OTI_and])
 #define ior_optab (optab_table[OTI_ior])
@@ -419,6 +422,9 @@ extern int expand_twoval_binop (optab, r
 
 /* Expand a unary arithmetic operation given optab rtx operand.  */
 extern rtx expand_unop (enum machine_mode, optab, rtx, rtx, int);
+
+/* Expand a ternary arithmetic operation given optab rtx operand.  */
+extern rtx expand_ternop (enum machine_mode, optab, rtx, rtx, rtx, rtx, int);
 
 /* Expand the absolute value operation.  */
 extern rtx expand_abs_nojump (enum machine_mode, rtx, rtx, int);
Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.537
diff -u -p -r1.537 i386.md
--- gcc/config/i386/i386.md	7 May 2004 05:38:19 -0000	1.537
+++ gcc/config/i386/i386.md	7 May 2004 08:20:23 -0000
@@ -16314,6 +16314,43 @@
   emit_move_insn (operands[2], temp);
   emit_move_insn (operands[9], CONST1_RTX (XFmode));  /* fld1 */
 })
+
+(define_expand "fmasf4"
+  [(set (match_dup 4)
+	(mult:SF (match_operand:SF 1 "register_operand" "")
+		 (match_operand:SF 2 "nonimmediate_operand" "")))
+   (set (match_operand:SF 0 "register_operand" "")
+	(plus:SF (match_dup 4)
+		 (match_operand:SF 3 "nonimmediate_operand" "")))]
+  "TARGET_80387 || TARGET_SSE_MATH
+   && flag_unsafe_math_optimizations"
+{
+  operands[4] = gen_reg_rtx (SFmode);
+})
+
+(define_expand "fmadf4"
+  [(set (match_dup 4)
+	(mult:DF (match_operand:DF 1 "register_operand" "")
+		 (match_operand:DF 2 "nonimmediate_operand" "")))
+   (set (match_operand:DF 0 "register_operand" "")
+	(plus:DF (match_dup 4)
+	(match_operand:DF 3 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)
+   && flag_unsafe_math_optimizations"
+{
+  operands[4] = gen_reg_rtx (DFmode);
+})
+
+(define_expand "fmaxf4"
+  [(set (match_dup 4) (mult:XF (match_operand:XF 1 "register_operand" "")
+			       (match_operand:XF 2 "register_operand" "")))
+   (set (match_operand:XF 0 "register_operand" "")
+	(plus:XF (match_dup 4) (match_operand:XF 3 "register_operand" "")))]
+  "TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  operands[4] = gen_reg_rtx (XFmode);
+})
 
 ;; Block operation instructions
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]