[PATCH] i386: Expand roundeven for SSE4.1+

Uros Bizjak ubizjak@gmail.com
Sun Jul 14 23:04:00 GMT 2019


> This patch is for expanding roundeven inline for SSE4.1 and later.
> Note that this patch is to be applied on top of
> <https://gcc.gnu.org/ml/gcc-patches/2019-06/msg01828.html>. The patch
> is bootstrapped and regression tested on x86_64-linux-gnu.

Actually, your patch at [1] is the way to go, but you need several
other changes to get x87 mode switching in order. Please also note
that there is no corresponding non-SSE4 ix86_expand_... function for
roundeven, so non-SSE4 SSE FP-math <rounding_insn><mode>2 expander has
to be disabled for ROUNDEVEN int iterator. Please see (otherwise
untested) attached patch which fixes both issues.

[1] https://gcc.gnu.org/ml/gcc/2019-06/msg00352.html

Uros.
-------------- next part --------------
Index: builtins.c
===================================================================
--- builtins.c	(revision 273480)
+++ builtins.c	(working copy)
@@ -2056,6 +2056,7 @@ mathfn_built_in_2 (tree type, combined_fn fn)
     CASE_MATHFN (REMQUO)
     CASE_MATHFN_FLOATN (RINT)
     CASE_MATHFN_FLOATN (ROUND)
+    CASE_MATHFN_FLOATN (ROUNDEVEN)
     CASE_MATHFN (SCALB)
     CASE_MATHFN (SCALBLN)
     CASE_MATHFN (SCALBN)
Index: builtins.def
===================================================================
--- builtins.def	(revision 273480)
+++ builtins.def	(working copy)
@@ -548,6 +548,12 @@ DEF_C99_BUILTIN        (BUILT_IN_ROUNDL, "roundl",
 #define ROUND_TYPE(F) BT_FN_##F##_##F
 DEF_EXT_LIB_FLOATN_NX_BUILTINS (BUILT_IN_ROUND, "round", ROUND_TYPE, ATTR_CONST_NOTHROW_LEAF_LIST)
 #undef ROUND_TYPE
+DEF_EXT_LIB_BUILTIN    (BUILT_IN_ROUNDEVEN, "roundeven", BT_FN_DOUBLE_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_EXT_LIB_BUILTIN    (BUILT_IN_ROUNDEVENF, "roundevenf", BT_FN_FLOAT_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_EXT_LIB_BUILTIN    (BUILT_IN_ROUNDEVENL, "roundevenl", BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
+#define ROUNDEVEN_TYPE(F) BT_FN_##F##_##F
+DEF_EXT_LIB_FLOATN_NX_BUILTINS (BUILT_IN_ROUNDEVEN, "roundeven", ROUNDEVEN_TYPE, ATTR_CONST_NOTHROW_LEAF_LIST)
+#undef ROUNDEVEN_TYPE
 DEF_EXT_LIB_BUILTIN    (BUILT_IN_SCALB, "scalb", BT_FN_DOUBLE_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
 DEF_EXT_LIB_BUILTIN    (BUILT_IN_SCALBF, "scalbf", BT_FN_FLOAT_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING_ERRNO)
 DEF_EXT_LIB_BUILTIN    (BUILT_IN_SCALBL, "scalbl", BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 273480)
+++ config/i386/i386.c	(working copy)
@@ -13593,6 +13593,11 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn)
 
   switch (entity)
     {
+    case I387_ROUNDEVEN:
+      if (mode == I387_CW_ROUNDEVEN)
+	return mode;
+      break;
+
     case I387_TRUNC:
       if (mode == I387_CW_TRUNC)
 	return mode;
@@ -13627,6 +13632,7 @@ ix86_mode_needed (int entity, rtx_insn *insn)
       return ix86_dirflag_mode_needed (insn);
     case AVX_U128:
       return ix86_avx_u128_mode_needed (insn);
+    case I387_ROUNDEVEN:
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
@@ -13687,6 +13693,7 @@ ix86_mode_after (int entity, int mode, rtx_insn *i
       return mode;
     case AVX_U128:
       return ix86_avx_u128_mode_after (mode, insn);
+    case I387_ROUNDEVEN:
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
@@ -13739,6 +13746,7 @@ ix86_mode_entry (int entity)
       return ix86_dirflag_mode_entry ();
     case AVX_U128:
       return ix86_avx_u128_mode_entry ();
+    case I387_ROUNDEVEN:
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
@@ -13776,6 +13784,7 @@ ix86_mode_exit (int entity)
       return X86_DIRFLAG_ANY;
     case AVX_U128:
       return ix86_avx_u128_mode_exit ();
+    case I387_ROUNDEVEN:
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
@@ -13810,6 +13819,12 @@ emit_i387_cw_initialization (int mode)
 
   switch (mode)
     {
+    case I387_CW_ROUNDEVEN:
+      /* round to nearest */
+      emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+      slot = SLOT_CW_ROUNDEVEN;
+      break;
+
     case I387_CW_TRUNC:
       /* round toward zero (truncate) */
       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
@@ -13856,6 +13871,7 @@ ix86_emit_mode_set (int entity, int mode, int prev
       if (mode == AVX_U128_CLEAN)
 	emit_insn (gen_avx_vzeroupper ());
       break;
+    case I387_ROUNDEVEN:
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 273480)
+++ config/i386/i386.h	(working copy)
@@ -2471,6 +2471,7 @@ enum ix86_stack_slot
 {
   SLOT_TEMP = 0,
   SLOT_CW_STORED,
+  SLOT_CW_ROUNDEVEN,
   SLOT_CW_TRUNC,
   SLOT_CW_FLOOR,
   SLOT_CW_CEIL,
@@ -2482,6 +2483,7 @@ enum ix86_entity
 {
   X86_DIRFLAG = 0,
   AVX_U128,
+  I387_ROUNDEVEN,
   I387_TRUNC,
   I387_FLOOR,
   I387_CEIL,
@@ -2517,7 +2519,7 @@ enum avx_u128_state
 
 #define NUM_MODES_FOR_MODE_SWITCHING			\
   { X86_DIRFLAG_ANY, AVX_U128_ANY,			\
-    I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
+    I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
 
 

 /* Avoid renaming of stack registers, as doing so in combination with
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 273482)
+++ config/i386/i386.md	(working copy)
@@ -141,6 +141,7 @@
   UNSPEC_FXAM
 
   ;; x87 Rounding
+  UNSPEC_FRNDINT_ROUNDEVEN
   UNSPEC_FRNDINT_FLOOR
   UNSPEC_FRNDINT_CEIL
   UNSPEC_FRNDINT_TRUNC
@@ -303,7 +304,8 @@
 
 ;; Constants to represent rounding modes in the ROUND instruction
 (define_constants
-  [(ROUND_FLOOR			0x1)
+  [(ROUND_ROUNDEVEN		0x0)
+   (ROUND_FLOOR			0x1)
    (ROUND_CEIL			0x2)
    (ROUND_TRUNC			0x3)
    (ROUND_MXCSR			0x4)
@@ -779,7 +781,7 @@
 
 ;; Defines rounding mode of an FP operation.
 
-(define_attr "i387_cw" "trunc,floor,ceil,uninitialized,any"
+(define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any"
   (const_string "any"))
 
 ;; Define attribute to indicate AVX insns with partial XMM register update.
@@ -16248,7 +16250,8 @@
 })
 
 (define_int_iterator FRNDINT_ROUNDING
-	[UNSPEC_FRNDINT_FLOOR
+	[UNSPEC_FRNDINT_ROUNDEVEN
+	 UNSPEC_FRNDINT_FLOOR
 	 UNSPEC_FRNDINT_CEIL
 	 UNSPEC_FRNDINT_TRUNC])
 
@@ -16258,7 +16261,8 @@
 
 ;; Base name for define_insn
 (define_int_attr rounding_insn
-	[(UNSPEC_FRNDINT_FLOOR "floor")
+	[(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
+	 (UNSPEC_FRNDINT_FLOOR "floor")
 	 (UNSPEC_FRNDINT_CEIL "ceil")
 	 (UNSPEC_FRNDINT_TRUNC "btrunc")
 	 (UNSPEC_FIST_FLOOR "floor")
@@ -16265,7 +16269,8 @@
 	 (UNSPEC_FIST_CEIL "ceil")])
 
 (define_int_attr rounding
-	[(UNSPEC_FRNDINT_FLOOR "floor")
+	[(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
+	 (UNSPEC_FRNDINT_FLOOR "floor")
 	 (UNSPEC_FRNDINT_CEIL "ceil")
 	 (UNSPEC_FRNDINT_TRUNC "trunc")
 	 (UNSPEC_FIST_FLOOR "floor")
@@ -16272,7 +16277,8 @@
 	 (UNSPEC_FIST_CEIL "ceil")])
 
 (define_int_attr ROUNDING
-	[(UNSPEC_FRNDINT_FLOOR "FLOOR")
+	[(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN")
+	 (UNSPEC_FRNDINT_FLOOR "FLOOR")
 	 (UNSPEC_FRNDINT_CEIL "CEIL")
 	 (UNSPEC_FRNDINT_TRUNC "TRUNC")
 	 (UNSPEC_FIST_FLOOR "FLOOR")
@@ -16335,8 +16341,9 @@
 	|| TARGET_MIX_SSE_I387)
     && (flag_fp_int_builtin_inexact || !flag_trapping_math))
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
-       && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact
-	   || !flag_trapping_math))"
+       && (TARGET_SSE4_1
+	   || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
+	       && (flag_fp_int_builtin_inexact || !flag_trapping_math))))"
 {
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact || !flag_trapping_math))
Index: internal-fn.def
===================================================================
--- internal-fn.def	(revision 273480)
+++ internal-fn.def	(working copy)
@@ -234,6 +234,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floo
 DEF_INTERNAL_FLT_FLOATN_FN (NEARBYINT, ECF_CONST, nearbyint, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary)
+DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary)
 
 /* Binary math functions.  */
Index: optabs.def
===================================================================
--- optabs.def	(revision 273480)
+++ optabs.def	(working copy)
@@ -268,6 +268,7 @@ OPTAB_D (fnms_optab, "fnms$a4")
 
 OPTAB_D (rint_optab, "rint$a2")
 OPTAB_D (round_optab, "round$a2")
+OPTAB_D (roundeven_optab, "roundeven$a2")
 OPTAB_D (floor_optab, "floor$a2")
 OPTAB_D (ceil_optab, "ceil$a2")
 OPTAB_D (btrunc_optab, "btrunc$a2")
Index: reg-stack.c
===================================================================
--- reg-stack.c	(revision 273480)
+++ reg-stack.c	(working copy)
@@ -1817,6 +1817,7 @@ subst_stack_regs_pat (rtx_insn *insn, stack_ptr re
 	      case UNSPEC_FRNDINT:
 	      case UNSPEC_F2XM1:
 
+	      case UNSPEC_FRNDINT_ROUNDEVEN:
 	      case UNSPEC_FRNDINT_FLOOR:
 	      case UNSPEC_FRNDINT_CEIL:
 	      case UNSPEC_FRNDINT_TRUNC:


More information about the Gcc-patches mailing list