This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: Patch to change IA64 division code

From: Steve Ellcey <sje at cup dot hp dot com>
To: wilson at specifix dot com
Cc: gcc-patches at gcc dot gnu dot org
Date: Tue, 27 Mar 2007 10:21:16 -0700 (PDT)
Subject: Re: Patch to change IA64 division code
Jim,

Here is a new version of my division change.  I got rid of the _a and _b
variants and changed the division sequences to use gen_* calls.  This
makes div.md smaller and easier to read.  The code sequence generated is
unchanged.

I am still a bit concerned about HARD_REGNO_CALLER_SAVE_MODE.  I changed
it back to XFmode and I got no regressions but it still seems like
RFmode is the 'right' mode to use as it will save and restore a register
without losing any information.  XFmode will lose two bits of precision.
This should only happen if we save and restore an FP register in the
middle of a division code sequence.  A long time ago I thought I saw
that happen but I cannot reproduce it with the current compiler.

Retested with no regressions. OK to check in?


2007-02-27  Steve Ellcey  <sje@cup.hp.com>

	* config/ia64/ia64.h (HARD_REGNO_NREGS): Handle RFmode.
	(HARD_REGNO_MODE_OK): Ditto.
	(MODES_TIEABLE_P): Ditto.
	(HARD_REGNO_CALLER_SAVE_MODE): Ditto.
	(CLASS_MAX_NREGS): Ditto.
	* config/ia64/ia64.c (ia64_print_operand_address): Add R format.
	(rtx_needs_barrier): Add UNSPEC_NOP_CONVERT case.
	* config/ia64/ia64.md (UNSPEC_NOP_CONVERT): New.
	(divsf3_internal_thr): Removed.
	(divdf3_internal_thr): Removed.
	* config/ia64/div.md: New file.


Index: config/ia64/ia64.h
===================================================================
--- config/ia64/ia64.h	(revision 123090)
+++ config/ia64/ia64.h	(working copy)
@@ -642,6 +642,7 @@ while (0)
    : PR_REGNO_P (REGNO) && (MODE) == BImode ? 2				\
    : PR_REGNO_P (REGNO) && (MODE) == CCImode ? 1			\
    : FR_REGNO_P (REGNO) && (MODE) == XFmode ? 1				\
+   : FR_REGNO_P (REGNO) && (MODE) == RFmode ? 1				\
    : FR_REGNO_P (REGNO) && (MODE) == XCmode ? 2				\
    : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
 
@@ -657,7 +658,7 @@ while (0)
    : PR_REGNO_P (REGNO) ?					\
      (MODE) == BImode || GET_MODE_CLASS (MODE) == MODE_CC	\
    : GR_REGNO_P (REGNO) ?					\
-     (MODE) != CCImode && (MODE) != XFmode && (MODE) != XCmode	\
+     (MODE) != CCImode && (MODE) != XFmode && (MODE) != XCmode	&& (MODE) != RFmode \
    : AR_REGNO_P (REGNO) ? (MODE) == DImode			\
    : BR_REGNO_P (REGNO) ? (MODE) == DImode			\
    : 0)
@@ -674,8 +675,8 @@ while (0)
    we can't tie it with any other modes.  */
 #define MODES_TIEABLE_P(MODE1, MODE2)			\
   (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)	\
-   && ((((MODE1) == XFmode) || ((MODE1) == XCmode))	\
-       == (((MODE2) == XFmode) || ((MODE2) == XCmode)))	\
+   && ((((MODE1) == XFmode) || ((MODE1) == XCmode) || ((MODE1) == RFmode))     \
+       == (((MODE2) == XFmode) || ((MODE2) == XCmode) || ((MODE1) == RFmode))) \
    && (((MODE1) == BImode) == ((MODE2) == BImode)))
 
 /* Specify the modes required to caller save a given hard regno.
@@ -896,6 +897,7 @@ enum reg_class
 #define CLASS_MAX_NREGS(CLASS, MODE) \
   ((MODE) == BImode && (CLASS) == PR_REGS ? 2			\
    : (((CLASS) == FR_REGS || (CLASS) == FP_REGS) && (MODE) == XFmode) ? 1 \
+   : (((CLASS) == FR_REGS || (CLASS) == FP_REGS) && (MODE) == RFmode) ? 1 \
    : (((CLASS) == FR_REGS || (CLASS) == FP_REGS) && (MODE) == XCmode) ? 2 \
    : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
 
Index: config/ia64/ia64.c
===================================================================
--- config/ia64/ia64.c	(revision 123090)
+++ config/ia64/ia64.c	(working copy)
@@ -4503,6 +4503,7 @@ ia64_print_operand_address (FILE * strea
    O	Append .acq for volatile load.
    P	Postincrement of a MEM.
    Q	Append .rel for volatile store.
+   R    Print .s .d or nothing for a single, double or no truncation.
    S	Shift amount for shladd instruction.
    T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
 	for Intel assembler.
@@ -4643,6 +4644,17 @@ ia64_print_operand (FILE * file, rtx x, 
 	fputs(".rel", file);
       return;
 
+    case 'R':
+      if (x == CONST0_RTX (GET_MODE (x)))
+	fputs(".s", file);
+      else if (x == CONST1_RTX (GET_MODE (x)))
+	fputs(".d", file);
+      else if (x == CONST2_RTX (GET_MODE (x)))
+        ;
+      else
+        output_operand_lossage ("invalid %%R value");
+      return;
+
     case 'S':
       fprintf (file, "%d", exact_log2 (INTVAL (x)));
       return;
@@ -5762,6 +5774,7 @@ rtx_needs_barrier (rtx x, struct reg_fla
 	case UNSPEC_BSP_VALUE:
 	case UNSPEC_FLUSHRS:
 	case UNSPEC_BUNDLE_SELECTOR:
+	case UNSPEC_NOP_CONVERT:
           break;
 
 	case UNSPEC_GR_SPILL:
Index: config/ia64/ia64.md
===================================================================
--- config/ia64/ia64.md	(revision 123090)
+++ config/ia64/ia64.md	(working copy)
@@ -81,6 +81,7 @@ (define_constants
    (UNSPEC_SHRP			29)
    (UNSPEC_COPYSIGN		30)
    (UNSPEC_VECT_EXTR		31)
+   (UNSPEC_NOP_CONVERT		32)
    (UNSPEC_LDA                  40)
    (UNSPEC_LDS                  41)
    (UNSPEC_LDSA                 42)
@@ -3108,60 +3109,6 @@ (define_insn_and_split "divsf3_internal_
 }
   [(set_attr "predicable" "no")])
 
-(define_insn_and_split "divsf3_internal_thr"
-  [(set (match_operand:SF 0 "fr_register_operand" "=&f")
-	(div:SF (match_operand:SF 1 "fr_register_operand" "f")
-		(match_operand:SF 2 "fr_register_operand" "f")))
-   (clobber (match_scratch:XF 3 "=&f"))
-   (clobber (match_scratch:XF 4 "=f"))
-   (clobber (match_scratch:BI 5 "=c"))]
-  "TARGET_INLINE_FLOAT_DIV == INL_MAX_THR"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 6) (div:XF (const_int 1) (match_dup 8)))
-	      (set (match_dup 5) (unspec:BI [(match_dup 7) (match_dup 8)]
-					    UNSPEC_FR_RECIP_APPROX))
-	      (use (const_int 0))])
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (minus:XF (match_dup 10)
-			       (mult:XF (match_dup 8) (match_dup 6))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (plus:XF (mult:XF (match_dup 3) (match_dup 3))
-			      (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 6)
-		     (plus:XF (mult:XF (match_dup 3) (match_dup 6))
-			      (match_dup 6)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 9)
-		     (float_truncate:SF
-		       (mult:XF (match_dup 7) (match_dup 6))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 4)
-		     (minus:XF (match_dup 7)
-			       (mult:XF (match_dup 8) (match_dup 3))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (set (match_dup 0)
-	  (float_truncate:SF
-	    (plus:XF (mult:XF (match_dup 4) (match_dup 6))
-			      (match_dup 3)))))
-  ] 
-{
-  operands[6] = gen_rtx_REG (XFmode, REGNO (operands[0]));
-  operands[7] = gen_rtx_REG (XFmode, REGNO (operands[1]));
-  operands[8] = gen_rtx_REG (XFmode, REGNO (operands[2]));
-  operands[9] = gen_rtx_REG (SFmode, REGNO (operands[3]));
-  operands[10] = CONST1_RTX (XFmode);
-}
-  [(set_attr "predicable" "no")])
-
 ;; Inline square root.
 
 (define_insn "*sqrt_approx"
@@ -3614,72 +3561,6 @@ (define_insn_and_split "divdf3_internal_
 }
   [(set_attr "predicable" "no")])
 
-(define_insn_and_split "divdf3_internal_thr"
-  [(set (match_operand:DF 0 "fr_register_operand" "=&f")
-	(div:DF (match_operand:DF 1 "fr_register_operand" "f")
-		(match_operand:DF 2 "fr_register_operand" "f")))
-   (clobber (match_scratch:XF 3 "=&f"))
-   (clobber (match_scratch:DF 4 "=f"))
-   (clobber (match_scratch:BI 5 "=c"))]
-  "TARGET_INLINE_FLOAT_DIV == INL_MAX_THR"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 6) (div:XF (const_int 1) (match_dup 8)))
-	      (set (match_dup 5) (unspec:BI [(match_dup 7) (match_dup 8)]
-					    UNSPEC_FR_RECIP_APPROX))
-	      (use (const_int 0))])
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (minus:XF (match_dup 10)
-			       (mult:XF (match_dup 8) (match_dup 6))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 6)
-		     (plus:XF (mult:XF (match_dup 3) (match_dup 6))
-			      (match_dup 6)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (mult:XF (match_dup 3) (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 6)
-		     (plus:XF (mult:XF (match_dup 3) (match_dup 6))
-			      (match_dup 6)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 3)
-		     (mult:XF (match_dup 3) (match_dup 3)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 6)
-		     (plus:XF (mult:XF (match_dup 3) (match_dup 6))
-			      (match_dup 6)))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 9)
-		     (float_truncate:DF
-		       (mult:XF (match_dup 7) (match_dup 6))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (parallel [(set (match_dup 4)
-		     (minus:DF (match_dup 1)
-			       (mult:DF (match_dup 2) (match_dup 9))))
-		(use (const_int 1))]))
-   (cond_exec (ne (match_dup 5) (const_int 0))
-     (set (match_dup 0)
-	  (plus:DF (mult:DF (match_dup 4) (match_dup 0))
-			    (match_dup 9))))
-  ] 
-{
-  operands[6] = gen_rtx_REG (XFmode, REGNO (operands[0]));
-  operands[7] = gen_rtx_REG (XFmode, REGNO (operands[1]));
-  operands[8] = gen_rtx_REG (XFmode, REGNO (operands[2]));
-  operands[9] = gen_rtx_REG (DFmode, REGNO (operands[3]));
-  operands[10] = CONST1_RTX (XFmode);
-}
-  [(set_attr "predicable" "no")])
-
 ;; Inline square root.
 
 (define_expand "sqrtdf2"
@@ -6540,3 +6421,5 @@ (define_insn "ip_value"
 (include "vect.md")
 ;; Atomic operations
 (include "sync.md")
+;; New division operations
+(include "div.md")
Index: config/ia64/div.md
===================================================================
--- config/ia64/div.md	(revision 0)
+++ config/ia64/div.md	(revision 0)
@@ -0,0 +1,317 @@
+
+;; For the internal conditional math routines:
+
+;; operand 0 is always the result
+;; operand 1 is always the predicate
+;; operand 2, 3, and sometimes 4 are the input values.
+;; operand 4 or 5 is the floating point status register to use.
+;; operand 5 or 6 is the rounding to do. (0 = single, 1 = double, 2 = none)
+;;
+;; addrf3_cond   - F0 = F2 + F3
+;; subrf3_cond   - F0 = F2 - F3
+;; mulrf3_cond   - F0 = F2 * F3
+;; nmulrf3_cond  - F0 = - (F2 * F3)
+;; m1addrf4_cond - F0 = (F2 * F3) + F4
+;; m1subrf4_cond - F0 = (F2 * F3) - F4
+;; m2addrf4_cond - F0 = F2 + (F3 * F4)
+;; m2subrf4_cond - F0 = F2 - (F3 * F4)
+
+;; Basic plus/minus/mult operations
+
+(define_insn "addrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:BI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (plus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,U")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fadd%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "subrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:BI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (minus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,U")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fsub%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "mulrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:BI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (mult:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,U")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fmpy%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; neg-mult operation
+
+(define_insn "nmulrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:BI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (neg:RF (mult:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,U")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fnmpy%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; add-mult/sub-mult operations (mult as op1)
+
+(define_insn "m1addrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:BI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (plus:RF
+            (mult:RF
+              (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+            (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,U")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fma%R7.s%6 %0 = %F2, %F3, %F4"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "m1subrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:BI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (minus:RF
+            (mult:RF
+              (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+            (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,U")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fms%R7.s%6 %0 = %F2, %F3, %F4"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; add-mult/sub-mult operations (mult as op2)
+
+(define_insn "m2addrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:BI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (plus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (mult:RF
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")
+              (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,U")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fma%R7.s%6 %0 = %F3, %F4, %F2"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "m2subrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:BI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (minus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fg,fG")
+            (mult:RF
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fg,fG")
+              (match_operand:RF 4 "fr_reg_or_fp01_operand" "fg,fG")))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,U")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fnma%R7.s%6 %0 = %F3, %F4, %F2"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; Conversions to/from RF and SF/DF/XF
+;; These conversions should not generate any code but make it possible
+;; for all the instructions used to implement floating point division
+;; to be written for RFmode only and to not have to handle multiple
+;; modes or to have to handle a register in more than one mode.
+
+(define_mode_macro SDX_F [SF DF XF])
+
+(define_insn "mov_extendrf<mode>"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+        (unspec:RF [(match_operand:SDX_F 1 "fr_register_operand" "f")]
+                   UNSPEC_NOP_CONVERT))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "yes")])
+
+(define_split
+  [(set (match_operand:RF 0 "fr_register_operand" "")
+        (unspec:RF [(match_operand:SDX_F 1 "fr_register_operand" "")]
+                      UNSPEC_NOP_CONVERT))]
+   "reload_completed"
+   [(set (match_dup 0) (match_dup 2))]
+{
+   operands[2] = gen_rtx_REG (RFmode, REGNO (operands[1]));
+})
+
+
+(define_insn "mov_trunc<mode>rf"
+  [(set (match_operand:SDX_F 0 "fr_register_operand" "=f")
+        (unspec:SDX_F [(match_operand:RF 1 "fr_register_operand" "f")]
+                      UNSPEC_NOP_CONVERT))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "yes")])
+
+(define_split
+  [(set (match_operand:SDX_F 0 "fr_register_operand" "")
+        (unspec:SDX_F [(match_operand:RF 1 "fr_register_operand" "")]
+                      UNSPEC_NOP_CONVERT))]
+   "reload_completed"
+   [(set (match_dup 0) (match_dup 2))]
+{
+   operands[2] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
+})
+
+;; Reciprical approximation
+
+(define_insn "recip_approx_rf"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+        (div:RF (match_operand:RF 1 "fr_register_operand" "f")
+                (match_operand:RF 2 "fr_register_operand" "f")))
+   (set (match_operand:BI 3 "register_operand" "=c")
+        (unspec:BI [(match_dup 1) (match_dup 2)] UNSPEC_FR_RECIP_APPROX))
+   (use (match_operand:SI 4 "const_int_operand" ""))]
+  ""
+  "frcpa.s%4 %0, %3 = %1, %2"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "no")])
+
+;; Single precision floating point division (maximum throughput algorithm).
+
+(define_expand "divsf3_internal_thr"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (div:SF (match_operand:SF 1 "fr_register_operand" "")
+                (match_operand:SF 2 "fr_register_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx y     = gen_reg_rtx (RFmode);
+  rtx a     = gen_reg_rtx (RFmode);
+  rtx b     = gen_reg_rtx (RFmode);
+  rtx e     = gen_reg_rtx (RFmode);
+  rtx y1    = gen_reg_rtx (RFmode);
+  rtx y2    = gen_reg_rtx (RFmode);
+  rtx q     = gen_reg_rtx (RFmode);
+  rtx r     = gen_reg_rtx (RFmode);
+  rtx q_res = gen_reg_rtx (RFmode);
+  rtx cond  = gen_reg_rtx (BImode);
+  rtx zero    = CONST0_RTX (RFmode);
+  rtx one     = CONST1_RTX (RFmode);
+  rtx status0 = CONST0_RTX (SImode);
+  rtx status1 = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off    = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode.  */
+  emit_insn (gen_mov_extendrfsf (a, operands[1]));
+  emit_insn (gen_mov_extendrfsf (b, operands[2]));
+  /* y = 1 / b				*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e = 1 - (b * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* y1 = y + (y * e)			*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
+  /* y2 = y + (y1 * e)			*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e, zero, status1, trunc_off));
+  /* q = single(a * y2)			*/
+  emit_insn (gen_mulrf3_cond (q, cond, a, y2, zero, status1, trunc_sgl));
+  /* r = a - (q * b)			*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, q, b, zero, status1, trunc_off));
+  /* Q = single (q + (r * y2))		*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y2, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.	*/
+  emit_insn (gen_mov_truncsfrf (operands[0], q_res));
+  DONE;
+})
+
+
+;; Double precision floating point division (maximum throughput algorithm).
+
+(define_expand "divdf3_internal_thr"
+  [(set (match_operand:DF 0 "fr_register_operand" "")
+        (div:DF (match_operand:DF 1 "fr_register_operand" "")
+                (match_operand:DF 2 "fr_register_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx q_res = gen_reg_rtx (RFmode);
+  rtx a     = gen_reg_rtx (RFmode);
+  rtx b     = gen_reg_rtx (RFmode);
+  rtx y     = gen_reg_rtx (RFmode);
+  rtx e     = gen_reg_rtx (RFmode);
+  rtx y1    = gen_reg_rtx (RFmode);
+  rtx e1    = gen_reg_rtx (RFmode);
+  rtx y2    = gen_reg_rtx (RFmode);
+  rtx e2    = gen_reg_rtx (RFmode);
+  rtx y3    = gen_reg_rtx (RFmode);
+  rtx q     = gen_reg_rtx (RFmode);
+  rtx r     = gen_reg_rtx (RFmode);
+  rtx cond  = gen_reg_rtx (BImode);
+  rtx zero    = CONST0_RTX (RFmode);
+  rtx one     = CONST1_RTX (RFmode);
+  rtx status0 = CONST0_RTX (SImode);
+  rtx status1 = CONST1_RTX (SImode);
+  rtx trunc_dbl = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+  /* Empty conversions to put inputs into RFmode */
+  emit_insn (gen_mov_extendrfdf (a, operands[1]));
+  emit_insn (gen_mov_extendrfdf (b, operands[2]));
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* y1 = y + (y * e)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
+  /* e1 = e * e			*/
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* y2 = y1 + (y1 * e1)	*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
+  /* e2 = e1 * e1		*/
+  emit_insn (gen_mulrf3_cond (e2, cond, e1, e1, zero, status1, trunc_off));
+  /* y3 = y2 + (y2 * e2)	*/
+  emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e2, zero, status1, trunc_off));
+  /* q  = double (a * y3)	*/
+  emit_insn (gen_mulrf3_cond (q, cond, a, y3, zero, status1, trunc_dbl));
+  /* r  = a - (b * q)		*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
+  /* Q  = double (q + (r * y3))	*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y3, y, status0, trunc_dbl));
+  /* Conversion back into DFmode */
+  emit_insn (gen_mov_truncdfrf (operands[0], q_res));
+  DONE;
+})
References:
- Re: Patch to change IA64 division code
  - From: Jim Wilson
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]