[PATCH] PowerPC: Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV

Wed Aug 6 20:21:00 GMT 2014

On 01-08-2014 12:31, Joseph S. Myers wrote:
> On Thu, 31 Jul 2014, David Edelsohn wrote:
>
>> Thanks for implementing the FENV support.  The patch generally looks 
>> good to me.
>>
>> My one concern is a detail in the implementation of "update". I do not
>> have enough experience with GENERIC to verify the details and it seems
>> like it is missing building an outer COMPOUND_EXPR containing
>> update_mffs and the CALL_EXPR for update mtfsf.
> I suppose what's actually odd there is that you have
>
> +  tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
> +
> +  tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, update_mffs);
>
> so you build a MODIFY_EXPR in void_type_node but then convert it with a 
> VIEW_CONVERT_EXPR.  If you'd built the MODIFY_EXPR in double_type_node 
> then the VIEW_CONVERT_EXPR would be meaningful (the value of an assignment 
> a = b being the new value of a), but reinterpreting a void value doesn't 
> make sense.  Or you could probably just use call_mffs directly in the 
> VIEW_CONVERT_EXPR without explicitly creating the old_fenv variable.
>
Thanks for the review Josephm.  I have changed to avoid the void reinterpretation
and use call_mffs directly.  I have also removed the the mask generation in 'clear'
from your previous message, it is now reusing the mas used in feholdexcept.  The 
testcase patch is the same as before.

Checked on both linux-powerpc64/powerpc64le and no regressions found.

--

2014-08-06  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>

gcc:
	* config/rs6000/rs6000.c (rs6000_atomic_assign_expand_fenv): New
	function.

gcc/testsuite:
	* gcc.dg/atomic/c11-atomic-exec-5.c
	(test_main_long_double_add_overflow): Define and run only for
	LDBL_MANT_DIG != 106.
	(test_main_complex_long_double_add_overflow): Likewise.
	(test_main_long_double_sub_overflow): Likewise.
	(test_main_complex_long_double_sub_overflow): Likewise.

---

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d088ff6..7d66eb1 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1631,6 +1631,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
 
 #undef TARGET_CAN_USE_DOLOOP_P
 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
 

 
 /* Processor table.  */
@@ -33337,6 +33340,80 @@ emit_fusion_gpr_load (rtx *operands)
   return "";
 }
 
+/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
+
+static void
+rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
+    return;
+
+  tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
+  tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
+  tree call_mffs = build_call_expr (mffs, 0);
+
+  /* Generates the equivalent of feholdexcept (&fenv_var)
+
+     *fenv_var = __builtin_mffs ();
+     double fenv_hold;
+     *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
+     __builtin_mtfsf (0xff, fenv_hold);  */
+
+  /* Mask to clear everything except for the rounding modes and non-IEEE
+     arithmetic flag.  */
+  const unsigned HOST_WIDE_INT hold_exception_mask =
+    HOST_WIDE_INT_UC (0xffffffff00000007);
+
+  tree fenv_var = create_tmp_var (double_type_node, NULL);
+
+  tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
+
+  tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
+  tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
+    build_int_cst (uint64_type_node, hold_exception_mask));
+
+  tree fenv_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, fenv_llu_and);
+
+  tree hold_mtfsf = build_call_expr (mtfsf, 2,
+    build_int_cst (unsigned_type_node, 0xff), fenv_mtfsf);
+
+  *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
+
+  /* Reload the value of fenv_hold to clear the exceptions.  */
+
+  *clear = build_call_expr (mtfsf, 2,
+    build_int_cst (unsigned_type_node, 0xff), fenv_mtfsf);
+
+  /* Generates the equivalent of feupdateenv (&fenv_var)
+
+     double old_fenv = __builtin_mffs ();
+     double fenv_update;
+     *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
+                                (*(uint64_t*)fenv_var 0x1ff80fff);
+     __builtin_mtfsf (0xff, fenv_update);  */
+
+  const unsigned HOST_WIDE_INT update_exception_mask =
+    HOST_WIDE_INT_UC (0xffffffff1fffff00);
+  const unsigned HOST_WIDE_INT new_exception_mask =
+    HOST_WIDE_INT_UC (0x1ff80fff);
+
+  tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, call_mffs);
+  tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
+    old_llu, build_int_cst (uint64_type_node, update_exception_mask));
+
+  tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
+    build_int_cst (uint64_type_node, new_exception_mask));
+
+  tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
+    old_llu_and, new_llu_and);
+
+  tree fenv_mtfsf_update = build1 (VIEW_CONVERT_EXPR, double_type_node,
+    new_llu_mask);
+  
+  *update = build_call_expr (mtfsf, 2,
+    build_int_cst (unsigned_type_node, 0xff), fenv_mtfsf_update);
+}
+
 

 struct gcc_target targetm = TARGET_INITIALIZER;
 
diff --git a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
index b47a228..6bcb302 100644
--- a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
+++ b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
@@ -331,11 +331,11 @@ TEST_FUNCS (complex_double_div_overflow, _Complex double, , /= DBL_MIN, 0,
 TEST_FUNCS (long_double_add_invalid, long double, , += __builtin_infl (), 0,
 	    0, __builtin_isinf, 0,
 	    -__builtin_infl (), FE_INVALID)
+#if LDBL_MANT_DIG != 106
 TEST_FUNCS (long_double_add_overflow, long double, , += LDBL_MAX, 0,
 	    LDBL_MAX, __builtin_isinf, FE_OVERFLOW | FE_INEXACT,
 	    0, 0)
 #define NOT_LDBL_EPSILON_2(X) ((X) != LDBL_EPSILON / 2)
-#if LDBL_MANT_DIG != 106
 TEST_FUNCS (long_double_add_inexact, long double, , += LDBL_EPSILON / 2, 0,
 	    1.0L, NOT_LDBL_EPSILON_2, FE_INEXACT,
 	    0, 0)
@@ -348,18 +348,18 @@ TEST_FUNCS (long_double_preinc_inexact, long double, ++, , 0,
 TEST_FUNCS (long_double_postinc_inexact, long double, , ++, 0,
 	    LDBL_EPSILON / 2, NOT_MINUS_1, FE_INEXACT,
 	    -1, 0)
-#endif
 TEST_FUNCS (complex_long_double_add_overflow, _Complex long double, , += LDBL_MAX, 0,
 	    LDBL_MAX, REAL_ISINF, FE_OVERFLOW | FE_INEXACT,
 	    0, 0)
+#endif
 TEST_FUNCS (long_double_sub_invalid, long double, , -= __builtin_infl (), 0,
 	    0, __builtin_isinf, 0,
 	    __builtin_infl (), FE_INVALID)
+#if LDBL_MANT_DIG != 106
 TEST_FUNCS (long_double_sub_overflow, long double, , -= LDBL_MAX, 0,
 	    -LDBL_MAX, __builtin_isinf, FE_OVERFLOW | FE_INEXACT,
 	    0, 0)
 #define NOT_MINUS_LDBL_EPSILON_2(X) ((X) != -LDBL_EPSILON / 2)
-#if LDBL_MANT_DIG != 106
 TEST_FUNCS (long_double_sub_inexact, long double, , -= LDBL_EPSILON / 2, 0,
 	    -1.0L, NOT_MINUS_LDBL_EPSILON_2, FE_INEXACT,
 	    0, 0)
@@ -372,10 +372,10 @@ TEST_FUNCS (long_double_predec_inexact, long double, --, , 0,
 TEST_FUNCS (long_double_postdec_inexact, long double, , --, 0,
 	    -LDBL_EPSILON / 2, NOT_1, FE_INEXACT,
 	    1, 0)
-#endif
 TEST_FUNCS (complex_long_double_sub_overflow, _Complex long double, , -= LDBL_MAX, 0,
 	    -LDBL_MAX, REAL_ISINF, FE_OVERFLOW | FE_INEXACT,
 	    0, 0)
+#endif
 TEST_FUNCS (long_double_mul_invalid, long double, , *= __builtin_infl (), 0,
 	    __builtin_infl (), __builtin_isinf, 0,
 	    0, FE_INVALID)
@@ -507,23 +507,23 @@ main (void)
   ret |= test_main_int_div_double_inexact ();
   ret |= test_main_complex_double_div_overflow ();
   ret |= test_main_long_double_add_invalid ();
-  ret |= test_main_long_double_add_overflow ();
 #if LDBL_MANT_DIG != 106
+  ret |= test_main_long_double_add_overflow ();
   ret |= test_main_long_double_add_inexact ();
   ret |= test_main_long_double_add_inexact_int ();
   ret |= test_main_long_double_preinc_inexact ();
   ret |= test_main_long_double_postinc_inexact ();
-#endif
   ret |= test_main_complex_long_double_add_overflow ();
+#endif
   ret |= test_main_long_double_sub_invalid ();
-  ret |= test_main_long_double_sub_overflow ();
 #if LDBL_MANT_DIG != 106
+  ret |= test_main_long_double_sub_overflow ();
   ret |= test_main_long_double_sub_inexact ();
   ret |= test_main_long_double_sub_inexact_int ();
   ret |= test_main_long_double_predec_inexact ();
   ret |= test_main_long_double_postdec_inexact ();
-#endif
   ret |= test_main_complex_long_double_sub_overflow ();
+#endif
   ret |= test_main_long_double_mul_invalid ();
   ret |= test_main_long_double_mul_overflow ();
   ret |= test_main_long_double_mul_overflow_float ();