This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch, ia64, committed] Change min latency flt division code.


A while back the (default) maximum throughput versions of the floating
point division code for IA64 were changed from post-reload expands to
pre-reload expands in order to get better instruction scheduling.

This patch makes the same change to the minimum latency versions of the
floating point division code.

Tested on IA64 HP-UX with no regressions and committed.

Steve Ellcey
sje@cup.hp.com



2009-03-31  Steve Ellcey  <sje@cup.hp.com>

	* config/ia64/ia64.md (divsf3_internal_lat): Remove.
	(divdf3_internal_lat): Remove.
	(divxf3_internal_lat): Remove.
	(divxf3_internal_thr): Remove.
	(divxf): Use divxf3_internal.
	* config/ia64/div.md (divsf3_internal_lat): New.
	(divdf3_internal_lat): New.
	(divxf3_internal): New.

Index: config/ia64/ia64.md
===================================================================
--- config/ia64/ia64.md	(revision 145326)
+++ config/ia64/ia64.md	(working copy)
@@ -3157,64 +3157,6 @@ (define_expand "divsf3"
   DONE;
 })
 
-(define_insn_and_split "divsf3_internal_lat"
-  [(set (match_operand:SF 0 "fr_register_operand" "=&f")
-	(div:SF (match_operand:SF 1 "fr_register_operand" "f")
-		(match_operand:SF 2 "fr_register_operand" "f")))
-   (clobber (match_scratch:XF 3 "=&f"))
-   (clobber (match_scratch:XF 4 "=f"))
-   (clobber (match_scratch:BI 5 "=c"))]
-  "TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 6) (unspec:XF [(const_int 1) (match_dup 8)]
-					    UNSPEC_FR_RECIP_APPROX_RES))
-	      (set (match_dup 5) (unspec:BI [(match_dup 7) (match_dup 8)]
-					    UNSPEC_FR_RECIP_APPROX))
-	      (use (const_int 0))])
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3) (mult:XF (match_dup 7) (match_dup 6)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 4)
-		     (minus:XF (match_dup 10)
-			       (mult:XF (match_dup 8) (match_dup 6))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (plus:XF (mult:XF (match_dup 4) (match_dup 3))
-			      (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 4) (mult:XF (match_dup 4) (match_dup 4)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (plus:XF (mult:XF (match_dup 4) (match_dup 3))
-			      (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 4) (mult:XF (match_dup 4) (match_dup 4)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 9)
-		     (float_truncate:DF
-		       (plus:XF (mult:XF (match_dup 4) (match_dup 3))
-			      (match_dup 3))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (set (match_dup 0)
-	  (float_truncate:SF (match_dup 6))))
-  ] 
-{
-  operands[6] = gen_rtx_REG (XFmode, REGNO (operands[0]));
-  operands[7] = gen_rtx_REG (XFmode, REGNO (operands[1]));
-  operands[8] = gen_rtx_REG (XFmode, REGNO (operands[2]));
-  operands[9] = gen_rtx_REG (DFmode, REGNO (operands[0]));
-  operands[10] = CONST1_RTX (XFmode);
-}
-  [(set_attr "predicable" "no")])
-
 ;; Inline square root.
 
 (define_insn "*sqrt_approx"
@@ -3588,88 +3530,6 @@ (define_expand "divdf3"
   DONE;
 })
 
-(define_insn_and_split "divdf3_internal_lat"
-  [(set (match_operand:DF 0 "fr_register_operand" "=&f")
-	(div:DF (match_operand:DF 1 "fr_register_operand" "f")
-		(match_operand:DF 2 "fr_register_operand" "f")))
-   (clobber (match_scratch:XF 3 "=&f"))
-   (clobber (match_scratch:XF 4 "=&f"))
-   (clobber (match_scratch:XF 5 "=&f"))
-   (clobber (match_scratch:BI 6 "=c"))]
-  "TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 7) (unspec:XF [(const_int 1) (match_dup 9)]
-					    UNSPEC_FR_RECIP_APPROX_RES))
-	      (set (match_dup 6) (unspec:BI [(match_dup 8) (match_dup 9)]
-					    UNSPEC_FR_RECIP_APPROX))
-	      (use (const_int 0))])
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (parallel [(set (match_dup 3) (mult:XF (match_dup 8) (match_dup 7)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (parallel [(set (match_dup 4)
-		     (minus:XF (match_dup 12)
-			       (mult:XF (match_dup 9) (match_dup 7))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (plus:XF (mult:XF (match_dup 4) (match_dup 3))
-			      (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (parallel [(set (match_dup 5) (mult:XF (match_dup 4) (match_dup 4)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (parallel [(set (match_dup 7)
-		     (plus:XF (mult:XF (match_dup 4) (match_dup 7))
-			      (match_dup 7)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (plus:XF (mult:XF (match_dup 5) (match_dup 3))
-			      (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (parallel [(set (match_dup 4) (mult:XF (match_dup 5) (match_dup 5)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (parallel [(set (match_dup 7)
-		     (plus:XF (mult:XF (match_dup 5) (match_dup 7))
-			      (match_dup 7)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (parallel [(set (match_dup 10)
-		     (float_truncate:DF
-		       (plus:XF (mult:XF (match_dup 4) (match_dup 3))
-			      (match_dup 3))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (parallel [(set (match_dup 7)
-		     (plus:XF (mult:XF (match_dup 4) (match_dup 7))
-			      (match_dup 7)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (parallel [(set (match_dup 11)
-		     (float_truncate:DF
-		       (minus:XF (match_dup 8)
-				 (mult:XF (match_dup 9) (match_dup 3)))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 6) (const_int 0))
-     (set (match_dup 0)
-	  (float_truncate:DF (plus:XF (mult:XF (match_dup 5) (match_dup 7))
-			      (match_dup 3)))))
-  ] 
-{
-  operands[7] = gen_rtx_REG (XFmode, REGNO (operands[0]));
-  operands[8] = gen_rtx_REG (XFmode, REGNO (operands[1]));
-  operands[9] = gen_rtx_REG (XFmode, REGNO (operands[2]));
-  operands[10] = gen_rtx_REG (DFmode, REGNO (operands[3]));
-  operands[11] = gen_rtx_REG (DFmode, REGNO (operands[5]));
-  operands[12] = CONST1_RTX (XFmode);
-}
-  [(set_attr "predicable" "no")])
-
 ;; Inline square root.
 
 (define_expand "sqrtdf2"
@@ -4179,175 +4039,11 @@ (define_expand "divxf3"
 		(match_operand:XF 2 "fr_register_operand" "")))]
   "TARGET_INLINE_FLOAT_DIV"
 {
-  rtx insn;
-  if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
-    insn = gen_divxf3_internal_lat (operands[0], operands[1], operands[2]);
-  else
-    insn = gen_divxf3_internal_thr (operands[0], operands[1], operands[2]);
-  emit_insn (insn);
+  /* There is only one divxf3 sequence, not two like for divsf and divdf.  */
+  emit_insn (gen_divxf3_internal (operands[0], operands[1], operands[2]));
   DONE;
 })
 
-(define_insn_and_split "divxf3_internal_lat"
-  [(set (match_operand:XF 0 "fr_register_operand" "=&f")
-	(div:XF (match_operand:XF 1 "fr_register_operand" "f")
-		(match_operand:XF 2 "fr_register_operand" "f")))
-   (clobber (match_scratch:XF 3 "=&f"))
-   (clobber (match_scratch:XF 4 "=&f"))
-   (clobber (match_scratch:XF 5 "=&f"))
-   (clobber (match_scratch:XF 6 "=&f"))
-   (clobber (match_scratch:BI 7 "=c"))]
-  "TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0) (unspec:XF [(const_int 1) (match_dup 2)]
-					    UNSPEC_FR_RECIP_APPROX_RES))
-	      (set (match_dup 7) (unspec:BI [(match_dup 1) (match_dup 2)]
-					    UNSPEC_FR_RECIP_APPROX))
-	      (use (const_int 0))])
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (minus:XF (match_dup 8)
-			       (mult:XF (match_dup 2) (match_dup 0))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 4) (mult:XF (match_dup 1) (match_dup 0)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 5) (mult:XF (match_dup 3) (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 6)
-		     (plus:XF (mult:XF (match_dup 3) (match_dup 3))
-			      (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (plus:XF (mult:XF (match_dup 5) (match_dup 5))
-			      (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 5)
-		     (plus:XF (mult:XF (match_dup 6) (match_dup 0))
-			      (match_dup 0)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 0)
-		     (plus:XF (mult:XF (match_dup 5) (match_dup 3))
-			      (match_dup 0)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 4)
-		     (minus:XF (match_dup 1)
-			       (mult:XF (match_dup 2) (match_dup 4))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (plus:XF (mult:XF (match_dup 3) (match_dup 0))
-			      (match_dup 4)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 5)
-		     (minus:XF (match_dup 8)
-			       (mult:XF (match_dup 2) (match_dup 0))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 0)
-		     (plus:XF (mult:XF (match_dup 4) (match_dup 0))
-			      (match_dup 0)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (parallel [(set (match_dup 4)
-		     (minus:XF (match_dup 1)
-			       (mult:XF (match_dup 2) (match_dup 3))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 7) (const_int 0))
-     (set (match_dup 0)
-	  (plus:XF (mult:XF (match_dup 4) (match_dup 0))
-		   (match_dup 3))))
-  ] 
-  "operands[8] = CONST1_RTX (XFmode);"
-  [(set_attr "predicable" "no")])
-
-(define_insn_and_split "divxf3_internal_thr"
-  [(set (match_operand:XF 0 "fr_register_operand" "=&f")
-	(div:XF (match_operand:XF 1 "fr_register_operand" "f")
-		(match_operand:XF 2 "fr_register_operand" "f")))
-   (clobber (match_scratch:XF 3 "=&f"))
-   (clobber (match_scratch:XF 4 "=&f"))
-   (clobber (match_scratch:BI 5 "=c"))]
-  "TARGET_INLINE_FLOAT_DIV == INL_MAX_THR"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0) (unspec:XF [(const_int 1) (match_dup 2)]
-					    UNSPEC_FR_RECIP_APPROX_RES))
-	      (set (match_dup 5) (unspec:BI [(match_dup 1) (match_dup 2)]
-					    UNSPEC_FR_RECIP_APPROX))
-	      (use (const_int 0))])
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (minus:XF (match_dup 6)
-			       (mult:XF (match_dup 2) (match_dup 0))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 4)
-		     (plus:XF (mult:XF (match_dup 3) (match_dup 0))
-			      (match_dup 0)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3) (mult:XF (match_dup 3) (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (plus:XF (mult:XF (match_dup 3) (match_dup 4))
-			      (match_dup 4)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 4) (mult:XF (match_dup 1) (match_dup 0)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 0)
-		     (minus:XF (match_dup 6)
-			       (mult:XF (match_dup 2) (match_dup 3))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 0)
-		     (plus:XF (mult:XF (match_dup 0) (match_dup 3))
-			      (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (minus:XF (match_dup 1)
-			       (mult:XF (match_dup 2) (match_dup 4))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (plus:XF (mult:XF (match_dup 3) (match_dup 0))
-			      (match_dup 4)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 4)
-		     (minus:XF (match_dup 6)
-			       (mult:XF (match_dup 2) (match_dup 0))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 0)
-		     (plus:XF (mult:XF (match_dup 4) (match_dup 0))
-			      (match_dup 0)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 4)
-		     (minus:XF (match_dup 1)
-			       (mult:XF (match_dup 2) (match_dup 3))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (set (match_dup 0)
-	  (plus:XF (mult:XF (match_dup 4) (match_dup 0))
-		   (match_dup 3))))
-  ] 
-  "operands[6] = CONST1_RTX (XFmode);"
-  [(set_attr "predicable" "no")])
-
 ;; Inline square root.
 
 (define_expand "sqrtxf2"
Index: config/ia64/div.md
===================================================================
--- config/ia64/div.md	(revision 145326)
+++ config/ia64/div.md	(working copy)
@@ -257,6 +257,56 @@ (define_expand "divsf3_internal_thr"
   DONE;
 })
 
+;; Single precision floating point division (minimum latency algorithm).
+
+(define_expand "divsf3_internal_lat"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (div:SF (match_operand:SF 1 "fr_register_operand" "")
+                (match_operand:SF 2 "fr_register_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx a         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx q_res     = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (BImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode.  */
+  emit_insn (gen_extendsfrf2 (a, operands[1]));
+  emit_insn (gen_extendsfrf2 (b, operands[2]));
+  /* y = 1 / b				*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* q = a * y				*/
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* e = 1 - (b * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* e1 = e + (e * e)			*/
+  emit_insn (gen_m2addrf4_cond (e1, cond, e, e, e, zero, status1, trunc_off));
+  /* q1 = single(q + (q * e1))		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e1, zero, status1, trunc_sgl));
+  /* y1 = y + (y * e1)			*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e1, zero, status1, trunc_off));
+  /* r = a - (q1 * b)			*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, q1, b, zero, status1, trunc_off));
+  /* Q = single (q1 + (r * y1))		*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r, y1, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.	*/
+  emit_insn (gen_truncrfsf2 (operands[0], q_res));
+  DONE;
+})
+
 
 ;; Double precision floating point division (maximum throughput algorithm).
 
@@ -312,3 +362,132 @@ (define_expand "divdf3_internal_thr"
   emit_insn (gen_truncrfdf2 (operands[0], q_res));
   DONE;
 })
+
+;; Double precision floating point division (minimum latency algorithm).
+
+(define_expand "divdf3_internal_lat"
+  [(set (match_operand:DF 0 "fr_register_operand" "")
+        (div:DF (match_operand:DF 1 "fr_register_operand" "")
+                (match_operand:DF 2 "fr_register_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx q_res     = gen_reg_rtx (RFmode);
+  rtx a         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx y2        = gen_reg_rtx (RFmode);
+  rtx e2        = gen_reg_rtx (RFmode);
+  rtx q2        = gen_reg_rtx (RFmode);
+  rtx e3        = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx r1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (BImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_dbl = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode */
+  emit_insn (gen_extenddfrf2 (a, operands[1]));
+  emit_insn (gen_extenddfrf2 (b, operands[2]));
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* q  = a * y                 */
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* e2 = e + (e * e)		*/
+  emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
+  /* e1 = e * e                 */
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* e3 = e + (e1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
+  /* q1 = q + (q * e2)		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e2, zero, status1, trunc_off));
+  /* y1 = y + (y * e2)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
+  /* q2 = double(q + (q1 * e3))	*/
+  emit_insn (gen_m2addrf4_cond (q2, cond, q, q1, e3, zero, status1, trunc_dbl));
+  /* y2 = y + (y1 * e3)		*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
+  /* r1  = a - (b * q2)		*/
+  emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q2, zero, status1, trunc_off));
+  /* Q  = double (q2 + (r1 * y2))	*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q2, r1, y2, y, status0, trunc_dbl));
+  /* Conversion back into DFmode */
+  emit_insn (gen_truncrfdf2 (operands[0], q_res));
+  DONE;
+})
+
+;; Extended precision floating point division.
+
+(define_expand "divxf3_internal"
+  [(set (match_operand:XF 0 "fr_register_operand" "")
+        (div:XF (match_operand:XF 1 "fr_register_operand" "")
+                (match_operand:XF 2 "fr_register_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx q_res     = gen_reg_rtx (RFmode);
+  rtx a         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx y2        = gen_reg_rtx (RFmode);
+  rtx e2        = gen_reg_rtx (RFmode);
+  rtx y3        = gen_reg_rtx (RFmode);
+  rtx e3        = gen_reg_rtx (RFmode);
+  rtx e4        = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx r1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (BImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode */
+  emit_insn (gen_extendxfrf2 (a, operands[1]));
+  emit_insn (gen_extendxfrf2 (b, operands[2]));
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* q  = a * y                 */
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* e2 = e + (e * e)		*/
+  emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
+  /* e1 = e * e                 */
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* y1 = y + (y * e2)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
+  /* e3 = e + (e1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
+  /* y2 = y + (y1 * e3)		*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
+  /* r  = a - (b * q)		*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
+  /* e4  = 1 - (b * y2)		*/
+  emit_insn (gen_m2subrf4_cond (e4, cond, one, b, y2, zero, status1, trunc_off));
+  /* q1 = q + (r * y2)		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, r, y2, zero, status1, trunc_off));
+  /* y3 = y2 + (y2 * e4)	*/
+  emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e4, zero, status1, trunc_off));
+  /* r1  = a - (b * q1)		*/
+  emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q1, zero, status1, trunc_off));
+  /* Q  = q1 + (r1 * y3)	*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r1, y3, y, status0, trunc_off));
+  /* Conversion back into XFmode */
+  emit_insn (gen_truncrfxf2 (operands[0], q_res));
+  DONE;
+})


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]