This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Inline round for IA64
- From: Canqun Yang <canqun at nudt dot edu dot cn>
- To: gcc-patches at gcc dot gnu dot org, Jim Wilson <wilson at specifixinc dot com>
- Date: Fri, 16 Jan 2004 09:13:23 +0800 (HKT)
- Subject: Inline round for IA64
- Reply-to: Canqun Yang <canqun at nudt dot edu dot cn>
Hi, Jim Wilson
I modified the target description file ia64.md to
inline round.
The test result show performance improvement for
programs which need call function 'round' in libm.a,
e.g. the execution time of 189.lucas is 261 seconds,
while the original version takes 309 seconds.
I attached the patch file to this mail, and hope it be
helpful.
Regards,
Canqun Yang
*** ./gcc/gcc/config/ia64/ia64.md 2004-01-14 18:42:59.000000000 +0800
--- ia64.md 2004-01-15 21:27:57.000000000 +0800
***************
*** 76,81 ****
--- 76,84 ----
(UNSPEC_RET_ADDR 26)
(UNSPEC_SETF_EXP 27)
(UNSPEC_FR_SQRT_RECIP_APPROX 28)
+ (UNSPEC_ROUNDF 29)
+ (UNSPEC_ROUND 30)
+ (UNSPEC_ROUNDL 31)
])
(define_constants
***************
*** 958,963 ****
--- 961,973 ----
;; Convert between signed integer types and floating point.
+ (define_insn "floatxfxf2"
+ [(set (match_operand:XF 0 "fr_register_operand" "=f")
+ (float:XF (match_operand:XF 1 "fr_register_operand" "f")))]
+ ""
+ "fcvt.xf %0 = %1"
+ [(set_attr "itanium_class" "fcvtfx")])
+
(define_insn "floatdixf2"
[(set (match_operand:XF 0 "fr_register_operand" "=f")
(float:XF (match_operand:DI 1 "fr_register_operand" "f")))]
***************
*** 979,984 ****
--- 989,1001 ----
"fcvt.fx.trunc %0 = %1"
[(set_attr "itanium_class" "fcvtfx")])
+ (define_insn "fix_truncxf2"
+ [(set (match_operand:XF 0 "fr_register_operand" "=f")
+ (fix: XF (match_operand:XF 1 "fr_register_operand" "f")))]
+ ""
+ "fcvt.fx.trunc %0 = %1"
+ [(set_attr "itanium_class" "fcvtfx")])
+
(define_insn "fix_truncxfdi2"
[(set (match_operand:DI 0 "fr_register_operand" "=f")
(fix:DI (match_operand:XF 1 "fr_register_operand" "f")))]
***************
*** 2909,2914 ****
--- 2926,2992 ----
}
[(set_attr "predicable" "no")])
+
+ ;; Inline round y = round (a)
+
+ (define_expand "roundsf2"
+ [(set (match_operand:SF 0 "fr_register_operand" "=&f")
+ (unspec:SF [(match_operand:SF 1 "fr_register_operand" "f")]
+ UNSPEC_ROUNDF))]
+ ""
+ {
+ rtx insn;
+ insn = gen_rounsf2_internal (operands[0], operands[1]);
+ emit_insn (insn);
+ DONE;
+ })
+
+ (define_insn_and_split "rounsf2_internal"
+ [(set (match_operand:SF 0 "fr_register_operand" "=&f")
+ (unspec: SF [(match_operand:SF 1 "fr_register_operand" "f")]
+ UNSPEC_ROUNDF))
+ (clobber (match_scratch:DI 2 "=r"))
+ (clobber (match_scratch:DI 3 "=r"))
+ (clobber (match_scratch:BI 4 "=c"))
+ (clobber (match_scratch:XF 5 "=f"))]
+ ""
+ "#"
+ "reload_completed"
+ [;; sign bit -1 and exponent 1/2
+ (set (match_dup 2) (const_int 196606))
+ ;; exponent 1/2
+ (set (match_dup 3) (const_int 65534))
+
+ ;; f8 = -1/2
+ (set (match_dup 8) (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
+ ;; f6 = 1/2
+ (set (match_dup 6) (unspec:XF [(match_dup 3)] UNSPEC_SETF_EXP))
+
+ ;; y = a>0? (1/2): (-1/2)
+ (set (match_dup 4)
+ (ge:BI (match_dup 7) (match_dup 9)))
+ (cond_exec (eq (match_dup 4) (const_int 0))
+ (set (match_dup 6) (match_dup 8)))
+
+ ;; y = y + a
+ (set (match_dup 6) (plus:XF (match_dup 6) (match_dup 7)))
+
+ ;; y = fix(y)
+ (set (match_dup 6) (fix:XF (match_dup 6)))
+
+ ;; y = float(y)
+ (set (match_dup 6) (float:XF (match_dup 6)))
+
+ ;; y = (float)y
+ (set (match_dup 0) (float_truncate:SF (match_dup 6)))]
+ {
+ operands[6] = gen_rtx_REG (XFmode, REGNO (operands[0]));
+ operands[7] = gen_rtx_REG (XFmode, REGNO (operands[1]));
+ operands[8] = gen_rtx_REG (XFmode, REGNO (operands[5]));
+ operands[9] = CONST0_RTX (XFmode);
+ }
+ [(set_attr "predicable" "no")])
+
;; ::::::::::::::::::::
;; ::
;; :: 64 bit floating point arithmetic
***************
*** 3403,3408 ****
--- 3481,3547 ----
}
[(set_attr "predicable" "no")])
+
+ ;; Inline round y = round (a)
+
+ (define_expand "rounddf2"
+ [(set (match_operand:DF 0 "fr_register_operand" "=&f")
+ (unspec:DF [(match_operand:DF 1 "fr_register_operand" "f")]
+ UNSPEC_ROUND))]
+ ""
+ {
+ rtx insn;
+ insn = gen_roundf2_internal (operands[0], operands[1]);
+ emit_insn (insn);
+ DONE;
+ })
+
+ (define_insn_and_split "roundf2_internal"
+ [(set (match_operand:DF 0 "fr_register_operand" "=&f")
+ (unspec: DF [(match_operand:DF 1 "fr_register_operand" "f")]
+ UNSPEC_ROUND))
+ (clobber (match_scratch:DI 2 "=r"))
+ (clobber (match_scratch:DI 3 "=r"))
+ (clobber (match_scratch:BI 4 "=c"))
+ (clobber (match_scratch:XF 5 "=f"))]
+ ""
+ "#"
+ "reload_completed"
+ [;; sign bit -1 and exponent 1/2
+ (set (match_dup 2) (const_int 196606))
+ ;; exponent 1/2
+ (set (match_dup 3) (const_int 65534))
+
+ ;; f8 = -1/2
+ (set (match_dup 8) (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
+ ;; f6 = 1/2
+ (set (match_dup 6) (unspec:XF [(match_dup 3)] UNSPEC_SETF_EXP))
+
+ ;; y = a>0? (1/2): (-1/2)
+ (set (match_dup 4)
+ (ge:BI (match_dup 7) (match_dup 9)))
+ (cond_exec (eq (match_dup 4) (const_int 0))
+ (set (match_dup 6) (match_dup 8)))
+
+ ;; y = y + a
+ (set (match_dup 6) (plus:XF (match_dup 6) (match_dup 7)))
+
+ ;; y = fix(y)
+ (set (match_dup 6) (fix:XF (match_dup 6)))
+
+ ;; y = float(y)
+ (set (match_dup 6) (float:XF (match_dup 6)))
+
+ ;; y = (double)y
+ (set (match_dup 0) (float_truncate:DF (match_dup 6)))]
+ {
+ operands[6] = gen_rtx_REG (XFmode, REGNO (operands[0]));
+ operands[7] = gen_rtx_REG (XFmode, REGNO (operands[1]));
+ operands[8] = gen_rtx_REG (XFmode, REGNO (operands[5]));
+ operands[9] = CONST0_RTX (XFmode);
+ }
+ [(set_attr "predicable" "no")])
+
;; ::::::::::::::::::::
;; ::
;; :: 80 bit floating point arithmetic
***************
*** 4081,4086 ****
--- 4220,4283 ----
[(set_attr "itanium_class" "fmisc")
(set_attr "predicable" "no")])
+
+ ;; Inline round y = round (a)
+
+ (define_expand "roundxf2"
+ [(set (match_operand:XF 0 "fr_register_operand" "=&f")
+ (unspec:XF [(match_operand:XF 1 "fr_register_operand" "f")]
+ UNSPEC_ROUNDL))]
+ ""
+ {
+ rtx insn;
+ insn = gen_rounxf2_internal (operands[0], operands[1]);
+ emit_insn (insn);
+ DONE;
+ })
+
+ (define_insn_and_split "rounxf2_internal"
+ [(set (match_operand:XF 0 "fr_register_operand" "=&f")
+ (unspec: XF [(match_operand:XF 1 "fr_register_operand" "f")]
+ UNSPEC_ROUNDL))
+ (clobber (match_scratch:DI 2 "=r"))
+ (clobber (match_scratch:DI 3 "=r"))
+ (clobber (match_scratch:BI 4 "=c"))
+ (clobber (match_scratch:XF 5 "=f"))]
+ ""
+ "#"
+ "reload_completed"
+ [;; sign bit -1 and exponent 1/2
+ (set (match_dup 2) (const_int 196606))
+ ;; exponent 1/2
+ (set (match_dup 3) (const_int 65534))
+
+ ;; f8 = -1/2
+ (set (match_dup 8) (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
+ ;; f6 = 1/2
+ (set (match_dup 6) (unspec:XF [(match_dup 3)] UNSPEC_SETF_EXP))
+
+ ;; y = a>0? (1/2): (-1/2)
+ (set (match_dup 4)
+ (ge:BI (match_dup 7) (match_dup 9)))
+ (cond_exec (eq (match_dup 4) (const_int 0))
+ (set (match_dup 6) (match_dup 8)))
+
+ ;; y = y + a
+ (set (match_dup 6) (plus:XF (match_dup 6) (match_dup 7)))
+
+ ;; y = fix(y)
+ (set (match_dup 6) (fix:XF (match_dup 6)))
+
+ ;; y = float(y)
+ (set (match_dup 0) (float:XF (match_dup 6)))]
+ {
+ operands[6] = gen_rtx_REG (XFmode, REGNO (operands[0]));
+ operands[7] = gen_rtx_REG (XFmode, REGNO (operands[1]));
+ operands[8] = gen_rtx_REG (XFmode, REGNO (operands[5]));
+ operands[9] = CONST0_RTX (XFmode);
+ }
+ [(set_attr "predicable" "no")])
+
;; ::::::::::::::::::::
;; ::
;; :: 32 bit Integer Shifts and Rotates