This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Inline round for IA64

From: Canqun Yang <canqun at nudt dot edu dot cn>
To: gcc-patches at gcc dot gnu dot org, Jim Wilson <wilson at specifixinc dot com>
Date: Fri, 16 Jan 2004 09:13:23 +0800 (HKT)
Subject: Inline round for IA64
Reply-to: Canqun Yang <canqun at nudt dot edu dot cn>

Hi, Jim Wilson

I modified the target description file ia64.md to 
inline round. 

The test result show performance improvement for 
programs which need call function 'round' in libm.a, 
e.g. the execution time of 189.lucas is 261 seconds, 
while the original version takes 309 seconds.

I attached the patch file to this mail, and hope it be 
helpful. 

Regards,

Canqun Yang

*** ./gcc/gcc/config/ia64/ia64.md	2004-01-14 18:42:59.000000000 +0800
--- ia64.md	2004-01-15 21:27:57.000000000 +0800
***************
*** 76,81 ****
--- 76,84 ----
     (UNSPEC_RET_ADDR		26)
     (UNSPEC_SETF_EXP             27)
     (UNSPEC_FR_SQRT_RECIP_APPROX 28)
+    (UNSPEC_ROUNDF               29)
+    (UNSPEC_ROUND                30)
+    (UNSPEC_ROUNDL               31)
    ])
  
  (define_constants
***************
*** 958,963 ****
--- 961,973 ----
  
  ;; Convert between signed integer types and floating point.
  
+ (define_insn "floatxfxf2"
+   [(set (match_operand:XF 0 "fr_register_operand" "=f")
+         (float:XF (match_operand:XF 1 "fr_register_operand" "f")))]
+   ""
+   "fcvt.xf %0 = %1"
+   [(set_attr "itanium_class" "fcvtfx")])
+ 
  (define_insn "floatdixf2"
    [(set (match_operand:XF 0 "fr_register_operand" "=f")
  	(float:XF (match_operand:DI 1 "fr_register_operand" "f")))]
***************
*** 979,984 ****
--- 989,1001 ----
    "fcvt.fx.trunc %0 = %1"
    [(set_attr "itanium_class" "fcvtfx")])
  
+ (define_insn "fix_truncxf2"
+   [(set (match_operand:XF 0 "fr_register_operand" "=f")
+         (fix: XF (match_operand:XF 1 "fr_register_operand" "f")))]
+   ""
+   "fcvt.fx.trunc %0 = %1"
+   [(set_attr "itanium_class" "fcvtfx")])
+ 
  (define_insn "fix_truncxfdi2"
    [(set (match_operand:DI 0 "fr_register_operand" "=f")
  	(fix:DI (match_operand:XF 1 "fr_register_operand" "f")))]
***************
*** 2909,2914 ****
--- 2926,2992 ----
  }
    [(set_attr "predicable" "no")])
  
+ 
+ ;; Inline round y = round (a)
+ 
+ (define_expand "roundsf2"
+   [(set (match_operand:SF 0 "fr_register_operand" "=&f")
+         (unspec:SF [(match_operand:SF 1 "fr_register_operand" "f")]
+                    UNSPEC_ROUNDF))]
+   ""
+ {
+   rtx insn;
+   insn = gen_rounsf2_internal (operands[0], operands[1]);
+   emit_insn (insn);
+   DONE;
+ })
+ 
+ (define_insn_and_split "rounsf2_internal"
+   [(set (match_operand:SF 0 "fr_register_operand" "=&f")
+         (unspec: SF [(match_operand:SF 1 "fr_register_operand" "f")]
+                     UNSPEC_ROUNDF))
+    (clobber (match_scratch:DI 2 "=r"))
+    (clobber (match_scratch:DI 3 "=r"))
+    (clobber (match_scratch:BI 4 "=c"))
+    (clobber (match_scratch:XF 5 "=f"))]
+   ""
+   "#"
+   "reload_completed"
+   [;; sign bit -1 and exponent 1/2
+    (set (match_dup 2) (const_int 196606))
+    ;; exponent 1/2
+    (set (match_dup 3) (const_int 65534))
+ 
+    ;; f8 = -1/2
+    (set (match_dup 8) (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
+    ;; f6 = 1/2
+    (set (match_dup 6) (unspec:XF [(match_dup 3)] UNSPEC_SETF_EXP))
+ 
+    ;; y = a>0? (1/2): (-1/2)
+    (set (match_dup 4)
+         (ge:BI (match_dup 7) (match_dup 9)))
+    (cond_exec (eq (match_dup 4) (const_int 0))
+       (set (match_dup 6) (match_dup 8)))
+ 
+    ;; y = y + a
+    (set (match_dup 6) (plus:XF (match_dup 6) (match_dup 7)))
+ 
+    ;; y = fix(y)
+    (set (match_dup 6) (fix:XF (match_dup 6)))
+    
+    ;; y = float(y)
+    (set (match_dup 6) (float:XF (match_dup 6)))
+ 
+    ;; y = (float)y
+    (set (match_dup 0) (float_truncate:SF (match_dup 6)))]
+ {
+   operands[6] = gen_rtx_REG (XFmode, REGNO (operands[0]));
+   operands[7] = gen_rtx_REG (XFmode, REGNO (operands[1]));
+   operands[8] = gen_rtx_REG (XFmode, REGNO (operands[5]));
+   operands[9] = CONST0_RTX (XFmode);
+ }
+   [(set_attr "predicable" "no")])
+ 
  ;; ::::::::::::::::::::
  ;; ::
  ;; :: 64 bit floating point arithmetic
***************
*** 3403,3408 ****
--- 3481,3547 ----
  }
    [(set_attr "predicable" "no")])
  
+ 
+ ;; Inline round y = round (a)
+ 
+ (define_expand "rounddf2"
+   [(set (match_operand:DF 0 "fr_register_operand" "=&f")
+         (unspec:DF [(match_operand:DF 1 "fr_register_operand" "f")]
+                    UNSPEC_ROUND))]
+   ""
+ {
+   rtx insn;
+   insn = gen_roundf2_internal (operands[0], operands[1]);
+   emit_insn (insn);
+   DONE;
+ })
+ 
+ (define_insn_and_split "roundf2_internal"
+   [(set (match_operand:DF 0 "fr_register_operand" "=&f")
+         (unspec: DF [(match_operand:DF 1 "fr_register_operand" "f")]
+                     UNSPEC_ROUND))
+    (clobber (match_scratch:DI 2 "=r"))
+    (clobber (match_scratch:DI 3 "=r"))
+    (clobber (match_scratch:BI 4 "=c"))
+    (clobber (match_scratch:XF 5 "=f"))]
+   ""
+   "#"
+   "reload_completed"
+   [;; sign bit -1 and exponent 1/2
+    (set (match_dup 2) (const_int 196606))
+    ;; exponent 1/2
+    (set (match_dup 3) (const_int 65534))
+ 
+    ;; f8 = -1/2
+    (set (match_dup 8) (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
+    ;; f6 = 1/2
+    (set (match_dup 6) (unspec:XF [(match_dup 3)] UNSPEC_SETF_EXP))
+ 
+    ;; y = a>0? (1/2): (-1/2)
+    (set (match_dup 4)
+         (ge:BI (match_dup 7) (match_dup 9)))
+    (cond_exec (eq (match_dup 4) (const_int 0))
+       (set (match_dup 6) (match_dup 8)))
+ 
+    ;; y = y + a
+    (set (match_dup 6) (plus:XF (match_dup 6) (match_dup 7)))
+ 
+    ;; y = fix(y)
+    (set (match_dup 6) (fix:XF (match_dup 6)))
+ 
+    ;; y = float(y)
+    (set (match_dup 6) (float:XF (match_dup 6)))
+ 
+    ;; y = (double)y
+    (set (match_dup 0) (float_truncate:DF (match_dup 6)))]
+ {
+   operands[6] = gen_rtx_REG (XFmode, REGNO (operands[0]));
+   operands[7] = gen_rtx_REG (XFmode, REGNO (operands[1]));
+   operands[8] = gen_rtx_REG (XFmode, REGNO (operands[5]));
+   operands[9] = CONST0_RTX (XFmode);
+ }
+   [(set_attr "predicable" "no")])
+ 
  ;; ::::::::::::::::::::
  ;; ::
  ;; :: 80 bit floating point arithmetic
***************
*** 4081,4086 ****
--- 4220,4283 ----
    [(set_attr "itanium_class" "fmisc")
     (set_attr "predicable" "no")])
  
+ 
+ ;; Inline round y = round (a)
+ 
+ (define_expand "roundxf2"
+   [(set (match_operand:XF 0 "fr_register_operand" "=&f")
+         (unspec:XF [(match_operand:XF 1 "fr_register_operand" "f")]
+                    UNSPEC_ROUNDL))]
+   ""
+ {
+   rtx insn;
+   insn = gen_rounxf2_internal (operands[0], operands[1]);
+   emit_insn (insn);
+   DONE;
+ })
+ 
+ (define_insn_and_split "rounxf2_internal"
+   [(set (match_operand:XF 0 "fr_register_operand" "=&f")
+         (unspec: XF [(match_operand:XF 1 "fr_register_operand" "f")]
+                     UNSPEC_ROUNDL))
+    (clobber (match_scratch:DI 2 "=r"))
+    (clobber (match_scratch:DI 3 "=r"))
+    (clobber (match_scratch:BI 4 "=c"))
+    (clobber (match_scratch:XF 5 "=f"))]
+   ""
+   "#"
+   "reload_completed"
+   [;; sign bit -1 and exponent 1/2
+    (set (match_dup 2) (const_int 196606))
+    ;; exponent 1/2
+    (set (match_dup 3) (const_int 65534))
+ 
+    ;; f8 = -1/2
+    (set (match_dup 8) (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
+    ;; f6 = 1/2
+    (set (match_dup 6) (unspec:XF [(match_dup 3)] UNSPEC_SETF_EXP))
+    
+    ;; y = a>0? (1/2): (-1/2)
+    (set (match_dup 4)
+         (ge:BI (match_dup 7) (match_dup 9)))
+    (cond_exec (eq (match_dup 4) (const_int 0))
+       (set (match_dup 6) (match_dup 8)))
+ 
+    ;; y = y + a
+    (set (match_dup 6) (plus:XF (match_dup 6) (match_dup 7)))
+ 
+    ;; y = fix(y)
+    (set (match_dup 6) (fix:XF (match_dup 6)))
+ 
+    ;; y = float(y)
+    (set (match_dup 0) (float:XF (match_dup 6)))]
+ {
+   operands[6] = gen_rtx_REG (XFmode, REGNO (operands[0]));
+   operands[7] = gen_rtx_REG (XFmode, REGNO (operands[1]));
+   operands[8] = gen_rtx_REG (XFmode, REGNO (operands[5]));
+   operands[9] = CONST0_RTX (XFmode);
+ }
+   [(set_attr "predicable" "no")])
+ 
  ;; ::::::::::::::::::::
  ;; ::
  ;; :: 32 bit Integer Shifts and Rotates

Follow-Ups:
- Re: Inline round for IA64
  - From: Jim Wilson
- Re: Inline round for IA64
  - From: Richard Henderson

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]