This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: Inline round for IA64

From: Canqun Yang <canqun at nudt dot edu dot cn>
To: Richard Henderson <rth at redhat dot com>, gcc-patches at gcc dot gnu dot org,Jim Wilson <wilson at specifixinc dot com>,Toon Moene <toon at moene dot indiv dot nluug dot nl>
Date: Mon, 2 Feb 2004 10:43:33 +0800 (HKT)
Subject: Re: Inline round for IA64
Reply-to: Canqun Yang <canqun at nudt dot edu dot cn>

Richard Henderson <rth@redhat.com>:

> As mentioned by Jim, you need to protect against 
overflow.
> 
I worked out another version to deal with overflow. I 
appended the patch to this mail with a testcase. The 
round inlined like this:

float
round (float a)
{
  float y = a;

  /* Use IA64 'fabs' instruction.  */
  if (abs(a) < (1UL << 63))
    {
      if (a < 0)
        y = y - 0.5;
      else
        y = y + 0.5;

      /* Truncate to integer value. 
         Use IA64 'fcvt.trunc' instruction.  */
      y = fix (y);
      /* Convert to floating point value. 
         Use IA64 'fcvt.xf' instruction.  */
      y = float (y);
    }
  return y;
}

> In addition, one needs to protect against the 
rounding mode being set
> to something other than round-to-nearest.  This, 
actually, is probably
> a non-starter when it comes to inlining this function.
> 
> Are you sure you want to implement round, and not 
rint or nearbyint?
>
To implement round.  

> Finally, there's very little of this that's specific 
to ia64.  Indeed,
> none of it.  If you were to generate this:
> 
> 	float rint(float orig)
> 	{
> 	  float two_exp_p = 1 << FLT_MANT_DIG;
> 	  float x = fabs(orig);
> 	  if (x < two_exp_p)
> 	    {
> 	      x += two_exp_p;
> 	      x -= two_exp_p;
> 	      x = copysignf (x, orig);
> 	    }
> 	  else
> 	    x = orig;
> 	  return x;
> 	}
> 
> in rtl from a function in builtins.c, every target 
would benefit.
> 
> 
The motivation of inlining round is to speed up 
spec2000 program 189.lucas on IA64 platform.

As gfortran convert Fortran 95 intrinsic dnint to 
round, if the round is inlined, 189.lucas will speed up 
nearly 15%.

There are may be three choices to achieve this:

1. Modify the implementation of intrinsic dnint in 
gfortran. But, other languages will not benefit.

2. Expand round in builtins.c. We still need to expand 
function call copysignf as in case of rint.

3. Expand round in *.md for a specific target.

So, which one is the best?
 
> r~
> 

Canqun Yang

Common subdirectories: /home/ycq/ia64/CVS and ia64/CVS
diff -c /home/ycq/ia64/ia64.c ia64/ia64.c
*** /home/ycq/ia64/ia64.c	Tue Jan 27 14:48:11 2004
--- ia64/ia64.c	Sat Jan 31 17:09:33 2004
***************
*** 5308,5314 ****
  	case UNSPEC_FR_RESTORE:
  	case UNSPEC_GETF_EXP:
  	case UNSPEC_SETF_EXP:
!         case UNSPEC_ADDP4:
  	case UNSPEC_FR_SQRT_RECIP_APPROX:
  	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
  	  break;
--- 5308,5315 ----
  	case UNSPEC_FR_RESTORE:
  	case UNSPEC_GETF_EXP:
  	case UNSPEC_SETF_EXP:
! 	case UNSPEC_SETF_SIG:
! 	case UNSPEC_ADDP4:
  	case UNSPEC_FR_SQRT_RECIP_APPROX:
  	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
  	  break;
diff -c /home/ycq/ia64/ia64.h ia64/ia64.h
*** /home/ycq/ia64/ia64.h	Mon Jan 26 07:32:42 2004
--- ia64/ia64.h	Sat Jan 31 16:59:10 2004
***************
*** 92,97 ****
--- 92,99 ----
  
  #define MASK_INLINE_SQRT_THR      0x00004000 /* inline sqrt, max throughput. */
  
+ #define MASK_INLINE_ROUND         0x00008000 /* inline round.  */
+ 
  #define MASK_DWARF2_ASM 0x40000000	/* test dwarf2 line info via gas.  */
  
  #define MASK_EARLY_STOP_BITS 0x00002000 /* tune stop bits for the model.  */
***************
*** 139,144 ****
--- 141,149 ----
  #define TARGET_INLINE_SQRT \
    (target_flags & (MASK_INLINE_SQRT_LAT | MASK_INLINE_SQRT_THR))
  
+ #define TARGET_INLINE_ROUND \
+   (target_flags & MASK_INLINE_ROUND)
+ 
  #define TARGET_DWARF2_ASM	(target_flags & MASK_DWARF2_ASM)
  
  /* If the assembler supports thread-local storage, assume that the
***************
*** 211,216 ****
--- 216,223 ----
        N_("Generate inline square root, optimize for latency") },	\
    { "inline-sqrt-max-throughput", MASK_INLINE_SQRT_THR,			\
        N_("Generate inline square root, optimize for throughput") },     \
+   { "inline-round", MASK_INLINE_ROUND,                                  \
+       N_("Generate inline round") },                                    \
    { "dwarf2-asm", 	MASK_DWARF2_ASM,				\
        N_("Enable Dwarf 2 line debug info via GNU as")},			\
    { "no-dwarf2-asm", 	-MASK_DWARF2_ASM,				\
diff -c /home/ycq/ia64/ia64.md ia64/ia64.md
*** /home/ycq/ia64/ia64.md	Tue Jan 27 09:42:59 2004
--- ia64/ia64.md	Mon Feb  2 08:47:25 2004
***************
*** 76,81 ****
--- 76,85 ----
     (UNSPEC_RET_ADDR		26)
     (UNSPEC_SETF_EXP             27)
     (UNSPEC_FR_SQRT_RECIP_APPROX 28)
+    (UNSPEC_ROUNDF               29)
+    (UNSPEC_ROUND                30)
+    (UNSPEC_ROUNDL               31)
+    (UNSPEC_SETF_SIG             32)
    ])
  
  (define_constants
***************
*** 958,963 ****
--- 962,974 ----
    "fcvt.fx.trunc %0 = %1"
    [(set_attr "itanium_class" "fcvtfx")])
  
+ (define_insn "fix_truncxf2"
+   [(set (match_operand:XF 0 "fr_register_operand" "=f")
+         (fix: XF (match_operand:XF 1 "fr_register_operand" "f")))]
+   ""
+   "fcvt.fx.trunc %0 = %1"
+   [(set_attr "itanium_class" "fcvtfx")])
+ 
  (define_insn "fix_truncxfdi2_alts"
    [(set (match_operand:DI 0 "fr_register_operand" "=f")
  	(fix:DI (match_operand:XF 1 "fr_register_operand" "f")))
***************
*** 966,971 ****
--- 977,989 ----
    "fcvt.fx.trunc.s%2 %0 = %1"
    [(set_attr "itanium_class" "fcvtfx")])
  
+ (define_insn "floatxfxf2"
+   [(set (match_operand:XF 0 "fr_register_operand" "=f")
+         (float:XF (match_operand:XF 1 "fr_register_operand" "f")))]
+   ""
+   "fcvt.xf %0 = %1"
+   [(set_attr "itanium_class" "fcvtfx")])
+ 
  ;; Convert between unsigned integer types and floating point.
  
  (define_insn "floatunsdisf2"
***************
*** 2887,2892 ****
--- 2905,3015 ----
    operands[9] = CONST0_RTX (XFmode);
  }
    [(set_attr "predicable" "no")])
+ 
+ (define_insn "*setf_sig_xf"
+   [(set (match_operand:XF 0 "fr_register_operand" "=f")
+         (unspec:XF [(match_operand:DI 1 "register_operand" "r")]
+                   UNSPEC_SETF_SIG))]
+   ""
+   "setf.sig %0 = %1"
+   [(set_attr "itanium_class" "frfr")])
+ 
+ ;; Inline round y = round (a)
+ ;;
+ ;; float
+ ;; round (float a)
+ ;; {
+ ;;   float y = a;
+ ;;   if (abs(a) < (1UL << 63))
+ ;;     {
+ ;;       if (a < 0)
+ ;;         y = y - 0.5;
+ ;;       else
+ ;;         y = y + 0.5;
+ ;;
+ ;;       /* Truncate to integer value.  */
+ ;;       y = fix (y)
+ ;;       /* Convert to floating point value.  */
+ ;;       y = float (y)
+ ;;     }
+ ;;   return y;
+ ;; }
+                                                                                 
+ (define_expand "roundsf2"
+   [(set (match_operand:SF 0 "fr_register_operand" "=&f")
+         (unspec:SF [(match_operand:SF 1 "fr_register_operand" "f")]
+                    UNSPEC_ROUNDF))]
+   "TARGET_INLINE_ROUND"
+ {
+   rtx insn;
+   insn = gen_roundsf2_internal (operands[0], operands[1]);
+   emit_insn (insn);
+   DONE;
+ })
+                                                                                 
+ (define_insn_and_split "roundsf2_internal"
+   [(set (match_operand:SF 0 "fr_register_operand" "=&f")
+         (unspec: SF [(match_operand:SF 1 "fr_register_operand" "f")]
+                     UNSPEC_ROUNDF))
+    (clobber (match_scratch:DI 2 "=r"))
+    (clobber (match_scratch:DI 3 "=r"))
+    (clobber (match_scratch:DI 4 "=r"))
+    (clobber (match_scratch:XF 5 "=f"))
+    (clobber (match_scratch:XF 6 "=f"))
+    (clobber (match_scratch:XF 7 "=f"))
+    (clobber (match_scratch:BI 8 "=c"))
+    (clobber (match_scratch:BI 9 "=c"))]
+   "TARGET_INLINE_ROUND"
+   "#"
+   "reload_completed"
+   [;; f5 = -0.5
+    (set (match_dup 2) (const_int 196606))
+    (set (match_dup 5) (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
+                                                                                 
+    ;; f6 = 0.5
+    (set (match_dup 3) (const_int 65534))
+    (set (match_dup 6) (unspec:XF [(match_dup 3)] UNSPEC_SETF_EXP))
+                                                                                 
+    ;; f7 = LONG_MAX
+    (set (match_dup 4) (match_dup 13))
+    (set (match_dup 7) (unspec:XF [(match_dup 4)] UNSPEC_SETF_SIG))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX) set p8
+    (set (match_dup 10) (abs:XF (match_dup 11)))
+    (set (match_dup 8)
+         (lt:BI (match_dup 10) (match_dup 7)))
+                                                                                 
+    ;; y = a
+    (set (match_dup 10) (match_dup 11))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX && a >= 0) f5 = 0.5
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 9)
+            (ge:BI (match_dup 11) (match_dup 12))))
+    (cond_exec (ne (match_dup 9) (const_int 0))
+         (set (match_dup 5) (match_dup 6)))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX)  y = a +/- 0.5
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 10) (plus:XF (match_dup 10) (match_dup 5))))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX) y = fix(y) (Truncate to integer)
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 10) (fix:XF (match_dup 10))))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX) y = float(y) (Convert to floating point value)
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 10) (float:XF (match_dup 10))))
+                                                                                 
+    ;; y = (double)
+    (set (match_dup 0) (float_truncate:SF (match_dup 10)))]
+ {
+   operands[10] = gen_rtx_REG (XFmode, REGNO (operands[0]));
+   operands[11] = gen_rtx_REG (XFmode, REGNO (operands[1]));
+   operands[12] = CONST0_RTX (XFmode);
+   operands[13] = GEN_INT (1UL << 63);
+ }
+   [(set_attr "predicable" "no")])
  
  ;; ::::::::::::::::::::
  ;; ::
***************
*** 3387,3392 ****
--- 3510,3594 ----
    operands[9] = CONST0_RTX (XFmode);
  }
    [(set_attr "predicable" "no")])
+ 
+ ;; Inline round y = round (a)
+ ;; Refer to "roundsf2"
+                                                                                 
+ (define_expand "rounddf2"
+   [(set (match_operand:DF 0 "fr_register_operand" "=&f")
+         (unspec:DF [(match_operand:DF 1 "fr_register_operand" "f")]
+                    UNSPEC_ROUND))]
+   "TARGET_INLINE_ROUND"
+ {
+   rtx insn;
+   insn = gen_rounddf2_internal (operands[0], operands[1]);
+   emit_insn (insn);
+   DONE;
+ })
+                                                                                 
+ (define_insn_and_split "rounddf2_internal"
+   [(set (match_operand:DF 0 "fr_register_operand" "=&f")
+         (unspec: DF [(match_operand:DF 1 "fr_register_operand" "f")]
+                     UNSPEC_ROUND))
+    (clobber (match_scratch:DI 2 "=r"))
+    (clobber (match_scratch:DI 3 "=r"))
+    (clobber (match_scratch:DI 4 "=r"))
+    (clobber (match_scratch:XF 5 "=f"))
+    (clobber (match_scratch:XF 6 "=f"))
+    (clobber (match_scratch:XF 7 "=f"))
+    (clobber (match_scratch:BI 8 "=c"))
+    (clobber (match_scratch:BI 9 "=c"))]
+   "TARGET_INLINE_ROUND"
+   "#"
+   "reload_completed"
+   [;; f5 = -0.5
+    (set (match_dup 2) (const_int 196606))
+    (set (match_dup 5) (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
+                                                                                 
+    ;; f6 = 0.5
+    (set (match_dup 3) (const_int 65534))
+    (set (match_dup 6) (unspec:XF [(match_dup 3)] UNSPEC_SETF_EXP))
+                                                                                 
+    ;; f7 = LONG_MAX
+    (set (match_dup 4) (match_dup 13))
+    (set (match_dup 7) (unspec:XF [(match_dup 4)] UNSPEC_SETF_SIG))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX) set p8
+    (set (match_dup 10) (abs:XF (match_dup 11)))
+    (set (match_dup 8)
+         (lt:BI (match_dup 10) (match_dup 7)))
+                                                                                 
+    ;; y = a
+    (set (match_dup 10) (match_dup 11))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX && a >= 0) f5 = 0.5
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 9)
+            (ge:BI (match_dup 11) (match_dup 12))))
+    (cond_exec (ne (match_dup 9) (const_int 0))
+         (set (match_dup 5) (match_dup 6)))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX)  y = a +/- 0.5
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 10) (plus:XF (match_dup 10) (match_dup 5))))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX) y = fix(y) (Truncate to integer)
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 10) (fix:XF (match_dup 10))))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX) y = float(y) (Convert to floating point value)
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 10) (float:XF (match_dup 10))))
+                                                                                 
+    ;; y = (double)
+    (set (match_dup 0) (float_truncate:DF (match_dup 10)))]
+ {
+   operands[10] = gen_rtx_REG (XFmode, REGNO (operands[0]));
+   operands[11] = gen_rtx_REG (XFmode, REGNO (operands[1]));
+   operands[12] = CONST0_RTX (XFmode);
+   operands[13] = GEN_INT (1UL << 63);
+ }
+   [(set_attr "predicable" "no")])
  
  ;; ::::::::::::::::::::
  ;; ::
***************
*** 4074,4079 ****
--- 4276,4357 ----
    "frcpa.s%4 %0, %1 = %2, %3"
    [(set_attr "itanium_class" "fmisc")
     (set_attr "predicable" "no")])
+ 
+ ;; Inline round y = round (a)
+ ;; Refer to "roundsf2"
+                                                                                 
+ (define_expand "roundxf2"
+   [(set (match_operand:XF 0 "fr_register_operand" "=&f")
+         (unspec:XF [(match_operand:XF 1 "fr_register_operand" "f")]
+                    UNSPEC_ROUNDL))]
+   "TARGET_INLINE_ROUND"
+ {
+   rtx insn;
+   insn = gen_roundxf2_internal (operands[0], operands[1]);
+   emit_insn (insn);
+   DONE;
+ })
+                                                                                 
+ (define_insn_and_split "roundxf2_internal"
+   [(set (match_operand:XF 0 "fr_register_operand" "=&f")
+         (unspec: XF [(match_operand:XF 1 "fr_register_operand" "f")]
+                     UNSPEC_ROUNDL))
+    (clobber (match_scratch:DI 2 "=r"))
+    (clobber (match_scratch:DI 3 "=r"))
+    (clobber (match_scratch:DI 4 "=r"))
+    (clobber (match_scratch:XF 5 "=f"))
+    (clobber (match_scratch:XF 6 "=f"))
+    (clobber (match_scratch:XF 7 "=f"))
+    (clobber (match_scratch:BI 8 "=c"))
+    (clobber (match_scratch:BI 9 "=c"))]
+   "TARGET_INLINE_ROUND"
+   "#"
+   "reload_completed"
+   [;; f5 = -0.5
+    (set (match_dup 2) (const_int 196606))
+    (set (match_dup 5) (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
+ 
+    ;; f6 = 0.5
+    (set (match_dup 3) (const_int 65534))
+    (set (match_dup 6) (unspec:XF [(match_dup 3)] UNSPEC_SETF_EXP))
+                                                                                 
+    ;; f7 = LONG_MAX
+    (set (match_dup 4) (match_dup 13))
+    (set (match_dup 7) (unspec:XF [(match_dup 4)] UNSPEC_SETF_SIG))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX) set p8
+    (set (match_dup 10) (abs:XF (match_dup 11)))
+    (set (match_dup 8)
+         (lt:BI (match_dup 10) (match_dup 7)))
+                                                                                 
+    ;; y = a
+    (set (match_dup 0) (match_dup 11))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX && a >= 0) f5 = 0.5
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 9)
+            (ge:BI (match_dup 11) (match_dup 12))))
+    (cond_exec (ne (match_dup 9) (const_int 0))
+         (set (match_dup 5) (match_dup 6)))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX)  y = a +/- 0.5
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 10) (plus:XF (match_dup 10) (match_dup 5))))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX) y = fix(y) (Truncate to integer)
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 10) (fix:XF (match_dup 10))))
+                                                                                 
+    ;; if (abs(a) < LONG_MAX) y = float(y) (Convert to floating point value)
+    (cond_exec (ne (match_dup 8) (const_int 0))
+       (set (match_dup 0) (float:XF (match_dup 10))))]
+ {
+   operands[10] = gen_rtx_REG (XFmode, REGNO (operands[0]));
+   operands[11] = gen_rtx_REG (XFmode, REGNO (operands[1]));
+   operands[12] = CONST0_RTX (XFmode);
+   operands[13] = GEN_INT (1UL << 63);
+ }
+   [(set_attr "predicable" "no")])
  
  ;; ::::::::::::::::::::
  ;; ::

/* Test inline round on IA64.
   Use different command line options to compile round.c, run, and then
   compare the results.
     gcc round.c -lm
     gcc round.c -O3 -minline-round  */


float
f_round (float orig)
{
  return roundf (orig);
}


double
d_round (double orig)
{
  return round (orig);
}


long double
l_round (long double orig)
{
  return roundl (orig);
}


int
main ()
{
  printf ("%f\n", f_round (-7.507));
  printf ("%f\n", f_round (-7.499));
  printf ("%e\n", f_round (-7.012345E37));
  printf ("%f\n", f_round (7.507));
  printf ("%f\n", f_round (7.499));
  printf ("%e\n", f_round (7.012345E37));

  printf ("%f\n", d_round (-7.507));
  printf ("%f\n", d_round (-7.499));
  printf ("%e\n", d_round (-7.012345E307));
  printf ("%f\n", d_round (7.507));
  printf ("%f\n", d_round (7.499));
  printf ("%e\n", d_round (7.012345E307));

  printf ("%f\n", (double)l_round (-7.507));
  printf ("%f\n", (double)l_round (-7.499));
  printf ("%e\n", (double)l_round (-7.012345E307));
  printf ("%f\n", (double)l_round (7.507));
  printf ("%f\n", (double)l_round (7.499));
  printf ("%e\n", (double)l_round (7.012345E307));
}

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]