This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Patch, ia64, committed] Change min latency flt division code.
- From: Steve Ellcey <sje at cup dot hp dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 31 Mar 2009 14:49:42 -0700 (PDT)
- Subject: [Patch, ia64, committed] Change min latency flt division code.
- Reply-to: sje at cup dot hp dot com
A while back the (default) maximum throughput versions of the floating
point division code for IA64 were changed from post-reload expands to
pre-reload expands in order to get better instruction scheduling.
This patch makes the same change to the minimum latency versions of the
floating point division code.
Tested on IA64 HP-UX with no regressions and committed.
Steve Ellcey
sje@cup.hp.com
2009-03-31 Steve Ellcey <sje@cup.hp.com>
* config/ia64/ia64.md (divsf3_internal_lat): Remove.
(divdf3_internal_lat): Remove.
(divxf3_internal_lat): Remove.
(divxf3_internal_thr): Remove.
(divxf): Use divxf3_internal.
* config/ia64/div.md (divsf3_internal_lat): New.
(divdf3_internal_lat): New.
(divxf3_internal): New.
Index: config/ia64/ia64.md
===================================================================
--- config/ia64/ia64.md (revision 145326)
+++ config/ia64/ia64.md (working copy)
@@ -3157,64 +3157,6 @@ (define_expand "divsf3"
DONE;
})
-(define_insn_and_split "divsf3_internal_lat"
- [(set (match_operand:SF 0 "fr_register_operand" "=&f")
- (div:SF (match_operand:SF 1 "fr_register_operand" "f")
- (match_operand:SF 2 "fr_register_operand" "f")))
- (clobber (match_scratch:XF 3 "=&f"))
- (clobber (match_scratch:XF 4 "=f"))
- (clobber (match_scratch:BI 5 "=c"))]
- "TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT"
- "#"
- "&& reload_completed"
- [(parallel [(set (match_dup 6) (unspec:XF [(const_int 1) (match_dup 8)]
- UNSPEC_FR_RECIP_APPROX_RES))
- (set (match_dup 5) (unspec:BI [(match_dup 7) (match_dup 8)]
- UNSPEC_FR_RECIP_APPROX))
- (use (const_int 0))])
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 3) (mult:XF (match_dup 7) (match_dup 6)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 4)
- (minus:XF (match_dup 10)
- (mult:XF (match_dup 8) (match_dup 6))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 3)
- (plus:XF (mult:XF (match_dup 4) (match_dup 3))
- (match_dup 3)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 4) (mult:XF (match_dup 4) (match_dup 4)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 3)
- (plus:XF (mult:XF (match_dup 4) (match_dup 3))
- (match_dup 3)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 4) (mult:XF (match_dup 4) (match_dup 4)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 9)
- (float_truncate:DF
- (plus:XF (mult:XF (match_dup 4) (match_dup 3))
- (match_dup 3))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (set (match_dup 0)
- (float_truncate:SF (match_dup 6))))
- ]
-{
- operands[6] = gen_rtx_REG (XFmode, REGNO (operands[0]));
- operands[7] = gen_rtx_REG (XFmode, REGNO (operands[1]));
- operands[8] = gen_rtx_REG (XFmode, REGNO (operands[2]));
- operands[9] = gen_rtx_REG (DFmode, REGNO (operands[0]));
- operands[10] = CONST1_RTX (XFmode);
-}
- [(set_attr "predicable" "no")])
-
;; Inline square root.
(define_insn "*sqrt_approx"
@@ -3588,88 +3530,6 @@ (define_expand "divdf3"
DONE;
})
-(define_insn_and_split "divdf3_internal_lat"
- [(set (match_operand:DF 0 "fr_register_operand" "=&f")
- (div:DF (match_operand:DF 1 "fr_register_operand" "f")
- (match_operand:DF 2 "fr_register_operand" "f")))
- (clobber (match_scratch:XF 3 "=&f"))
- (clobber (match_scratch:XF 4 "=&f"))
- (clobber (match_scratch:XF 5 "=&f"))
- (clobber (match_scratch:BI 6 "=c"))]
- "TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT"
- "#"
- "&& reload_completed"
- [(parallel [(set (match_dup 7) (unspec:XF [(const_int 1) (match_dup 9)]
- UNSPEC_FR_RECIP_APPROX_RES))
- (set (match_dup 6) (unspec:BI [(match_dup 8) (match_dup 9)]
- UNSPEC_FR_RECIP_APPROX))
- (use (const_int 0))])
- (cond_exec (ne (match_dup 6) (const_int 0))
- (parallel [(set (match_dup 3) (mult:XF (match_dup 8) (match_dup 7)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 6) (const_int 0))
- (parallel [(set (match_dup 4)
- (minus:XF (match_dup 12)
- (mult:XF (match_dup 9) (match_dup 7))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 6) (const_int 0))
- (parallel [(set (match_dup 3)
- (plus:XF (mult:XF (match_dup 4) (match_dup 3))
- (match_dup 3)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 6) (const_int 0))
- (parallel [(set (match_dup 5) (mult:XF (match_dup 4) (match_dup 4)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 6) (const_int 0))
- (parallel [(set (match_dup 7)
- (plus:XF (mult:XF (match_dup 4) (match_dup 7))
- (match_dup 7)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 6) (const_int 0))
- (parallel [(set (match_dup 3)
- (plus:XF (mult:XF (match_dup 5) (match_dup 3))
- (match_dup 3)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 6) (const_int 0))
- (parallel [(set (match_dup 4) (mult:XF (match_dup 5) (match_dup 5)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 6) (const_int 0))
- (parallel [(set (match_dup 7)
- (plus:XF (mult:XF (match_dup 5) (match_dup 7))
- (match_dup 7)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 6) (const_int 0))
- (parallel [(set (match_dup 10)
- (float_truncate:DF
- (plus:XF (mult:XF (match_dup 4) (match_dup 3))
- (match_dup 3))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 6) (const_int 0))
- (parallel [(set (match_dup 7)
- (plus:XF (mult:XF (match_dup 4) (match_dup 7))
- (match_dup 7)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 6) (const_int 0))
- (parallel [(set (match_dup 11)
- (float_truncate:DF
- (minus:XF (match_dup 8)
- (mult:XF (match_dup 9) (match_dup 3)))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 6) (const_int 0))
- (set (match_dup 0)
- (float_truncate:DF (plus:XF (mult:XF (match_dup 5) (match_dup 7))
- (match_dup 3)))))
- ]
-{
- operands[7] = gen_rtx_REG (XFmode, REGNO (operands[0]));
- operands[8] = gen_rtx_REG (XFmode, REGNO (operands[1]));
- operands[9] = gen_rtx_REG (XFmode, REGNO (operands[2]));
- operands[10] = gen_rtx_REG (DFmode, REGNO (operands[3]));
- operands[11] = gen_rtx_REG (DFmode, REGNO (operands[5]));
- operands[12] = CONST1_RTX (XFmode);
-}
- [(set_attr "predicable" "no")])
-
;; Inline square root.
(define_expand "sqrtdf2"
@@ -4179,175 +4039,11 @@ (define_expand "divxf3"
(match_operand:XF 2 "fr_register_operand" "")))]
"TARGET_INLINE_FLOAT_DIV"
{
- rtx insn;
- if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
- insn = gen_divxf3_internal_lat (operands[0], operands[1], operands[2]);
- else
- insn = gen_divxf3_internal_thr (operands[0], operands[1], operands[2]);
- emit_insn (insn);
+ /* There is only one divxf3 sequence, not two like for divsf and divdf. */
+ emit_insn (gen_divxf3_internal (operands[0], operands[1], operands[2]));
DONE;
})
-(define_insn_and_split "divxf3_internal_lat"
- [(set (match_operand:XF 0 "fr_register_operand" "=&f")
- (div:XF (match_operand:XF 1 "fr_register_operand" "f")
- (match_operand:XF 2 "fr_register_operand" "f")))
- (clobber (match_scratch:XF 3 "=&f"))
- (clobber (match_scratch:XF 4 "=&f"))
- (clobber (match_scratch:XF 5 "=&f"))
- (clobber (match_scratch:XF 6 "=&f"))
- (clobber (match_scratch:BI 7 "=c"))]
- "TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT"
- "#"
- "&& reload_completed"
- [(parallel [(set (match_dup 0) (unspec:XF [(const_int 1) (match_dup 2)]
- UNSPEC_FR_RECIP_APPROX_RES))
- (set (match_dup 7) (unspec:BI [(match_dup 1) (match_dup 2)]
- UNSPEC_FR_RECIP_APPROX))
- (use (const_int 0))])
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 3)
- (minus:XF (match_dup 8)
- (mult:XF (match_dup 2) (match_dup 0))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 4) (mult:XF (match_dup 1) (match_dup 0)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 5) (mult:XF (match_dup 3) (match_dup 3)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 6)
- (plus:XF (mult:XF (match_dup 3) (match_dup 3))
- (match_dup 3)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 3)
- (plus:XF (mult:XF (match_dup 5) (match_dup 5))
- (match_dup 3)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 5)
- (plus:XF (mult:XF (match_dup 6) (match_dup 0))
- (match_dup 0)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 0)
- (plus:XF (mult:XF (match_dup 5) (match_dup 3))
- (match_dup 0)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 4)
- (minus:XF (match_dup 1)
- (mult:XF (match_dup 2) (match_dup 4))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 3)
- (plus:XF (mult:XF (match_dup 3) (match_dup 0))
- (match_dup 4)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 5)
- (minus:XF (match_dup 8)
- (mult:XF (match_dup 2) (match_dup 0))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 0)
- (plus:XF (mult:XF (match_dup 4) (match_dup 0))
- (match_dup 0)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (parallel [(set (match_dup 4)
- (minus:XF (match_dup 1)
- (mult:XF (match_dup 2) (match_dup 3))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 7) (const_int 0))
- (set (match_dup 0)
- (plus:XF (mult:XF (match_dup 4) (match_dup 0))
- (match_dup 3))))
- ]
- "operands[8] = CONST1_RTX (XFmode);"
- [(set_attr "predicable" "no")])
-
-(define_insn_and_split "divxf3_internal_thr"
- [(set (match_operand:XF 0 "fr_register_operand" "=&f")
- (div:XF (match_operand:XF 1 "fr_register_operand" "f")
- (match_operand:XF 2 "fr_register_operand" "f")))
- (clobber (match_scratch:XF 3 "=&f"))
- (clobber (match_scratch:XF 4 "=&f"))
- (clobber (match_scratch:BI 5 "=c"))]
- "TARGET_INLINE_FLOAT_DIV == INL_MAX_THR"
- "#"
- "&& reload_completed"
- [(parallel [(set (match_dup 0) (unspec:XF [(const_int 1) (match_dup 2)]
- UNSPEC_FR_RECIP_APPROX_RES))
- (set (match_dup 5) (unspec:BI [(match_dup 1) (match_dup 2)]
- UNSPEC_FR_RECIP_APPROX))
- (use (const_int 0))])
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 3)
- (minus:XF (match_dup 6)
- (mult:XF (match_dup 2) (match_dup 0))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 4)
- (plus:XF (mult:XF (match_dup 3) (match_dup 0))
- (match_dup 0)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 3) (mult:XF (match_dup 3) (match_dup 3)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 3)
- (plus:XF (mult:XF (match_dup 3) (match_dup 4))
- (match_dup 4)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 4) (mult:XF (match_dup 1) (match_dup 0)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 0)
- (minus:XF (match_dup 6)
- (mult:XF (match_dup 2) (match_dup 3))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 0)
- (plus:XF (mult:XF (match_dup 0) (match_dup 3))
- (match_dup 3)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 3)
- (minus:XF (match_dup 1)
- (mult:XF (match_dup 2) (match_dup 4))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 3)
- (plus:XF (mult:XF (match_dup 3) (match_dup 0))
- (match_dup 4)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 4)
- (minus:XF (match_dup 6)
- (mult:XF (match_dup 2) (match_dup 0))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 0)
- (plus:XF (mult:XF (match_dup 4) (match_dup 0))
- (match_dup 0)))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (parallel [(set (match_dup 4)
- (minus:XF (match_dup 1)
- (mult:XF (match_dup 2) (match_dup 3))))
- (use (const_int 1))]))
- (cond_exec (ne (match_dup 5) (const_int 0))
- (set (match_dup 0)
- (plus:XF (mult:XF (match_dup 4) (match_dup 0))
- (match_dup 3))))
- ]
- "operands[6] = CONST1_RTX (XFmode);"
- [(set_attr "predicable" "no")])
-
;; Inline square root.
(define_expand "sqrtxf2"
Index: config/ia64/div.md
===================================================================
--- config/ia64/div.md (revision 145326)
+++ config/ia64/div.md (working copy)
@@ -257,6 +257,56 @@ (define_expand "divsf3_internal_thr"
DONE;
})
+;; Single precision floating point division (minimum latency algorithm).
+
+(define_expand "divsf3_internal_lat"
+ [(set (match_operand:SF 0 "fr_register_operand" "")
+ (div:SF (match_operand:SF 1 "fr_register_operand" "")
+ (match_operand:SF 2 "fr_register_operand" "")))]
+ "TARGET_INLINE_FLOAT_DIV"
+{
+ rtx y = gen_reg_rtx (RFmode);
+ rtx a = gen_reg_rtx (RFmode);
+ rtx b = gen_reg_rtx (RFmode);
+ rtx e = gen_reg_rtx (RFmode);
+ rtx q = gen_reg_rtx (RFmode);
+ rtx e1 = gen_reg_rtx (RFmode);
+ rtx y1 = gen_reg_rtx (RFmode);
+ rtx q1 = gen_reg_rtx (RFmode);
+ rtx r = gen_reg_rtx (RFmode);
+ rtx q_res = gen_reg_rtx (RFmode);
+ rtx cond = gen_reg_rtx (BImode);
+ rtx zero = CONST0_RTX (RFmode);
+ rtx one = CONST1_RTX (RFmode);
+ rtx status0 = CONST0_RTX (SImode);
+ rtx status1 = CONST1_RTX (SImode);
+ rtx trunc_sgl = CONST0_RTX (SImode);
+ rtx trunc_off = CONST2_RTX (SImode);
+
+ /* Empty conversions to put inputs into RFmode. */
+ emit_insn (gen_extendsfrf2 (a, operands[1]));
+ emit_insn (gen_extendsfrf2 (b, operands[2]));
+ /* y = 1 / b */
+ emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+ /* q = a * y */
+ emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+ /* e = 1 - (b * y) */
+ emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+ /* e1 = e + (e * e) */
+ emit_insn (gen_m2addrf4_cond (e1, cond, e, e, e, zero, status1, trunc_off));
+ /* q1 = single(q + (q * e1)) */
+ emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e1, zero, status1, trunc_sgl));
+ /* y1 = y + (y * e1) */
+ emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e1, zero, status1, trunc_off));
+ /* r = a - (q1 * b) */
+ emit_insn (gen_m2subrf4_cond (r, cond, a, q1, b, zero, status1, trunc_off));
+ /* Q = single (q1 + (r * y1)) */
+ emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r, y1, y, status0, trunc_sgl));
+ /* Conversion back into SFmode. */
+ emit_insn (gen_truncrfsf2 (operands[0], q_res));
+ DONE;
+})
+
;; Double precision floating point division (maximum throughput algorithm).
@@ -312,3 +362,132 @@ (define_expand "divdf3_internal_thr"
emit_insn (gen_truncrfdf2 (operands[0], q_res));
DONE;
})
+
+;; Double precision floating point division (minimum latency algorithm).
+
+(define_expand "divdf3_internal_lat"
+ [(set (match_operand:DF 0 "fr_register_operand" "")
+ (div:DF (match_operand:DF 1 "fr_register_operand" "")
+ (match_operand:DF 2 "fr_register_operand" "")))]
+ "TARGET_INLINE_FLOAT_DIV"
+{
+ rtx q_res = gen_reg_rtx (RFmode);
+ rtx a = gen_reg_rtx (RFmode);
+ rtx b = gen_reg_rtx (RFmode);
+ rtx y = gen_reg_rtx (RFmode);
+ rtx e = gen_reg_rtx (RFmode);
+ rtx y1 = gen_reg_rtx (RFmode);
+ rtx e1 = gen_reg_rtx (RFmode);
+ rtx q1 = gen_reg_rtx (RFmode);
+ rtx y2 = gen_reg_rtx (RFmode);
+ rtx e2 = gen_reg_rtx (RFmode);
+ rtx q2 = gen_reg_rtx (RFmode);
+ rtx e3 = gen_reg_rtx (RFmode);
+ rtx q = gen_reg_rtx (RFmode);
+ rtx r1 = gen_reg_rtx (RFmode);
+ rtx cond = gen_reg_rtx (BImode);
+ rtx zero = CONST0_RTX (RFmode);
+ rtx one = CONST1_RTX (RFmode);
+ rtx status0 = CONST0_RTX (SImode);
+ rtx status1 = CONST1_RTX (SImode);
+ rtx trunc_dbl = CONST1_RTX (SImode);
+ rtx trunc_off = CONST2_RTX (SImode);
+
+ /* Empty conversions to put inputs into RFmode */
+ emit_insn (gen_extenddfrf2 (a, operands[1]));
+ emit_insn (gen_extenddfrf2 (b, operands[2]));
+ /* y = 1 / b */
+ emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+ /* e = 1 - (b * y) */
+ emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+ /* q = a * y */
+ emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+ /* e2 = e + (e * e) */
+ emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
+ /* e1 = e * e */
+ emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+ /* e3 = e + (e1 * e1) */
+ emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
+ /* q1 = q + (q * e2) */
+ emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e2, zero, status1, trunc_off));
+ /* y1 = y + (y * e2) */
+ emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
+ /* q2 = double(q + (q1 * e3)) */
+ emit_insn (gen_m2addrf4_cond (q2, cond, q, q1, e3, zero, status1, trunc_dbl));
+ /* y2 = y + (y1 * e3) */
+ emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
+ /* r1 = a - (b * q2) */
+ emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q2, zero, status1, trunc_off));
+ /* Q = double (q2 + (r1 * y2)) */
+ emit_insn (gen_m2addrf4_cond (q_res, cond, q2, r1, y2, y, status0, trunc_dbl));
+ /* Conversion back into DFmode */
+ emit_insn (gen_truncrfdf2 (operands[0], q_res));
+ DONE;
+})
+
+;; Extended precision floating point division.
+
+(define_expand "divxf3_internal"
+ [(set (match_operand:XF 0 "fr_register_operand" "")
+ (div:XF (match_operand:XF 1 "fr_register_operand" "")
+ (match_operand:XF 2 "fr_register_operand" "")))]
+ "TARGET_INLINE_FLOAT_DIV"
+{
+ rtx q_res = gen_reg_rtx (RFmode);
+ rtx a = gen_reg_rtx (RFmode);
+ rtx b = gen_reg_rtx (RFmode);
+ rtx y = gen_reg_rtx (RFmode);
+ rtx e = gen_reg_rtx (RFmode);
+ rtx y1 = gen_reg_rtx (RFmode);
+ rtx e1 = gen_reg_rtx (RFmode);
+ rtx q1 = gen_reg_rtx (RFmode);
+ rtx y2 = gen_reg_rtx (RFmode);
+ rtx e2 = gen_reg_rtx (RFmode);
+ rtx y3 = gen_reg_rtx (RFmode);
+ rtx e3 = gen_reg_rtx (RFmode);
+ rtx e4 = gen_reg_rtx (RFmode);
+ rtx q = gen_reg_rtx (RFmode);
+ rtx r = gen_reg_rtx (RFmode);
+ rtx r1 = gen_reg_rtx (RFmode);
+ rtx cond = gen_reg_rtx (BImode);
+ rtx zero = CONST0_RTX (RFmode);
+ rtx one = CONST1_RTX (RFmode);
+ rtx status0 = CONST0_RTX (SImode);
+ rtx status1 = CONST1_RTX (SImode);
+ rtx trunc_off = CONST2_RTX (SImode);
+
+ /* Empty conversions to put inputs into RFmode */
+ emit_insn (gen_extendxfrf2 (a, operands[1]));
+ emit_insn (gen_extendxfrf2 (b, operands[2]));
+ /* y = 1 / b */
+ emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+ /* e = 1 - (b * y) */
+ emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+ /* q = a * y */
+ emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+ /* e2 = e + (e * e) */
+ emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
+ /* e1 = e * e */
+ emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+ /* y1 = y + (y * e2) */
+ emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
+ /* e3 = e + (e1 * e1) */
+ emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
+ /* y2 = y + (y1 * e3) */
+ emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
+ /* r = a - (b * q) */
+ emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
+ /* e4 = 1 - (b * y2) */
+ emit_insn (gen_m2subrf4_cond (e4, cond, one, b, y2, zero, status1, trunc_off));
+ /* q1 = q + (r * y2) */
+ emit_insn (gen_m2addrf4_cond (q1, cond, q, r, y2, zero, status1, trunc_off));
+ /* y3 = y2 + (y2 * e4) */
+ emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e4, zero, status1, trunc_off));
+ /* r1 = a - (b * q1) */
+ emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q1, zero, status1, trunc_off));
+ /* Q = q1 + (r1 * y3) */
+ emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r1, y3, y, status0, trunc_off));
+ /* Conversion back into XFmode */
+ emit_insn (gen_truncrfxf2 (operands[0], q_res));
+ DONE;
+})