[RFC] __atomic_compare_exchange* optimizations (PR middle-end/66867, alternative)

Jakub Jelinek jakub@redhat.com
Fri Jun 24 13:40:00 GMT 2016


On Thu, Jun 23, 2016 at 05:23:21PM +0200, Jakub Jelinek wrote:
> Thinking about this again, there could be another option - keep
> __atomic_compare_exchange_N in the IL, but under certain conditions (similar
> to what the patch uses in fold_builtin_atomic_compare_exchange) for these
> builtins ignore &var on the second argument, and if we actually turn var
> into non-addressable, convert the builtin call similarly to what
> fold_builtin_atomic_compare_exchange does in the patch (except the store
> would be non-conditional then; the gimple-fold.c part wouldn't be needed
> then).

Here is this second approach implemented.  Generally it gives slight code
size savings over the other patch on the testcases I've posted (and even the
other patch used to produce generally smaller code than vanilla).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-06-24  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/66867
	* builtins.c (expand_ifn_atomic_compare_exchange_into_call,
	expand_ifn_atomic_compare_exchange): New functions.
	* internal-fn.c (expand_ATOMIC_COMPARE_EXCHANGE): New function.
	* tree.h (build_call_expr_internal_loc): Rename to ...
	(build_call_expr_internal_loc_array): ... this.  Fix up type of
	last argument.
	* internal-fn.def (ATOMIC_COMPARE_EXCHANGE): New internal fn.
	* predict.c (expr_expected_value_1): Handle IMAGPART_EXPR of
	ATOMIC_COMPARE_EXCHANGE result.
	* builtins.h (expand_ifn_atomic_compare_exchange): New prototype.
	* gimple-fold.h (optimize_atomic_compare_exchange_p,
	fold_builtin_atomic_compare_exchange): New prototypes.
	* gimple-fold.c (optimize_atomic_compare_exchange_p,
	fold_builtin_atomic_compare_exchange): New functions..
	* tree-ssa.c (execute_update_addresses_taken): If
	optimize_atomic_compare_exchange_p, ignore &var in 2nd argument
	of call when finding addressable vars, and if such var becomes
	non-addressable, call fold_builtin_atomic_compare_exchange.

--- gcc/builtins.c.jj	2016-06-08 21:01:25.000000000 +0200
+++ gcc/builtins.c	2016-06-24 11:18:03.839096035 +0200
@@ -5158,6 +5158,123 @@ expand_builtin_atomic_compare_exchange (
   return target;
 }
 
+/* Helper function for expand_ifn_atomic_compare_exchange - expand
+   internal ATOMIC_COMPARE_EXCHANGE call into __atomic_compare_exchange_N
+   call.  The weak parameter must be dropped to match the expected parameter
+   list and the expected argument changed from value to pointer to memory
+   slot.  */
+
+static void
+expand_ifn_atomic_compare_exchange_into_call (gcall *call, machine_mode mode)
+{
+  unsigned int z;
+  vec<tree, va_gc> *vec;
+
+  vec_alloc (vec, 5);
+  vec->quick_push (gimple_call_arg (call, 0));
+  tree expected = gimple_call_arg (call, 1);
+  rtx x = assign_stack_temp_for_type (mode, GET_MODE_SIZE (mode),
+				      TREE_TYPE (expected));
+  rtx expd = expand_expr (expected, x, mode, EXPAND_NORMAL);
+  if (expd != x)
+    emit_move_insn (x, expd);
+  tree v = make_tree (TREE_TYPE (expected), x);
+  vec->quick_push (build1 (ADDR_EXPR,
+			   build_pointer_type (TREE_TYPE (expected)), v));
+  vec->quick_push (gimple_call_arg (call, 2));
+  /* Skip the boolean weak parameter.  */
+  for (z = 4; z < 6; z++)
+    vec->quick_push (gimple_call_arg (call, z));
+  built_in_function fncode
+    = (built_in_function) ((int) BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1
+			   + exact_log2 (GET_MODE_SIZE (mode)));
+  tree fndecl = builtin_decl_explicit (fncode);
+  tree fn = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fndecl)),
+		    fndecl);
+  tree exp = build_call_vec (boolean_type_node, fn, vec);
+  tree lhs = gimple_call_lhs (call);
+  rtx boolret = expand_call (exp, NULL_RTX, lhs == NULL_TREE);
+  if (lhs)
+    {
+      rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+      if (GET_MODE (boolret) != mode)
+	boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1);
+      x = force_reg (mode, x);
+      write_complex_part (target, boolret, true);
+      write_complex_part (target, x, false);
+    }
+}
+
+/* Expand IFN_ATOMIC_COMPARE_EXCHANGE internal function.  */
+
+void
+expand_ifn_atomic_compare_exchange (gcall *call)
+{
+  int size = tree_to_shwi (gimple_call_arg (call, 3)) & 255;
+  gcc_assert (size == 1 || size == 2 || size == 4 || size == 8 || size == 16);
+  machine_mode mode = mode_for_size (BITS_PER_UNIT * size, MODE_INT, 0);
+  rtx expect, desired, mem, oldval, boolret;
+  enum memmodel success, failure;
+  tree lhs;
+  bool is_weak;
+  source_location loc
+    = expansion_point_location_if_in_system_header (gimple_location (call));
+
+  success = get_memmodel (gimple_call_arg (call, 4));
+  failure = get_memmodel (gimple_call_arg (call, 5));
+
+  if (failure > success)
+    {
+      warning_at (loc, OPT_Winvalid_memory_model,
+		  "failure memory model cannot be stronger than success "
+		  "memory model for %<__atomic_compare_exchange%>");
+      success = MEMMODEL_SEQ_CST;
+    }
+
+  if (is_mm_release (failure) || is_mm_acq_rel (failure))
+    {
+      warning_at (loc, OPT_Winvalid_memory_model,
+		  "invalid failure memory model for "
+		  "%<__atomic_compare_exchange%>");
+      failure = MEMMODEL_SEQ_CST;
+      success = MEMMODEL_SEQ_CST;
+    }
+
+  if (!flag_inline_atomics)
+    {
+      expand_ifn_atomic_compare_exchange_into_call (call, mode);
+      return;
+    }
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (gimple_call_arg (call, 0), mode);
+
+  expect = expand_expr_force_mode (gimple_call_arg (call, 1), mode);
+  desired = expand_expr_force_mode (gimple_call_arg (call, 2), mode);
+
+  is_weak = (tree_to_shwi (gimple_call_arg (call, 3)) & 256) != 0;
+
+  boolret = NULL;
+  oldval = NULL;
+
+  if (!expand_atomic_compare_and_swap (&boolret, &oldval, mem, expect, desired,
+				       is_weak, success, failure))
+    {
+      expand_ifn_atomic_compare_exchange_into_call (call, mode);
+      return;
+    }
+
+  lhs = gimple_call_lhs (call);
+  if (lhs)
+    {
+      rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+      if (GET_MODE (boolret) != mode)
+	boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1);
+      write_complex_part (target, boolret, true);
+      write_complex_part (target, oldval, false);
+    }
+}
+
 /* Expand the __atomic_load intrinsic:
    	TYPE __atomic_load (TYPE *object, enum memmodel)
    EXP is the CALL_EXPR.
--- gcc/internal-fn.c.jj	2016-06-15 19:09:09.000000000 +0200
+++ gcc/internal-fn.c	2016-06-22 15:30:06.838951934 +0200
@@ -2143,6 +2143,14 @@ expand_ATOMIC_BIT_TEST_AND_RESET (intern
   expand_ifn_atomic_bit_test_and (call);
 }
 
+/* Expand atomic bit test and set.  */
+
+static void
+expand_ATOMIC_COMPARE_EXCHANGE (internal_fn, gcall *call)
+{
+  expand_ifn_atomic_compare_exchange (call);
+}
+
 /* Expand a call to FN using the operands in STMT.  FN has a single
    output operand and NARGS input operands.  */
 
--- gcc/tree.h.jj	2016-06-20 21:16:07.000000000 +0200
+++ gcc/tree.h	2016-06-21 17:35:19.806362408 +0200
@@ -3985,8 +3985,8 @@ extern tree build_call_expr_loc (locatio
 extern tree build_call_expr (tree, int, ...);
 extern tree build_call_expr_internal_loc (location_t, enum internal_fn,
 					  tree, int, ...);
-extern tree build_call_expr_internal_loc (location_t, enum internal_fn,
-					  tree, int, tree *);
+extern tree build_call_expr_internal_loc_array (location_t, enum internal_fn,
+						tree, int, const tree *);
 extern tree maybe_build_call_expr_loc (location_t, combined_fn, tree,
 				       int, ...);
 extern tree build_string_literal (int, const char *);
--- gcc/gimple-fold.h.jj	2016-01-08 07:31:08.000000000 +0100
+++ gcc/gimple-fold.h	2016-06-24 11:19:34.002040898 +0200
@@ -32,6 +32,8 @@ extern tree maybe_fold_and_comparisons (
 					enum tree_code, tree, tree);
 extern tree maybe_fold_or_comparisons (enum tree_code, tree, tree,
 				       enum tree_code, tree, tree);
+extern bool optimize_atomic_compare_exchange_p (gimple *);
+extern void fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *);
 extern bool arith_overflowed_p (enum tree_code, const_tree, const_tree,
 				const_tree);
 extern tree no_follow_ssa_edges (tree);
--- gcc/internal-fn.def.jj	2016-05-03 13:36:50.000000000 +0200
+++ gcc/internal-fn.def	2016-06-21 17:10:23.516879436 +0200
@@ -193,6 +193,7 @@ DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | EC
 DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (ATOMIC_COMPARE_EXCHANGE, ECF_LEAF | ECF_NOTHROW, NULL)
 
 #undef DEF_INTERNAL_INT_FN
 #undef DEF_INTERNAL_FLT_FN
--- gcc/predict.c.jj	2016-06-22 11:17:44.763444374 +0200
+++ gcc/predict.c	2016-06-22 14:26:08.894724088 +0200
@@ -1978,6 +1978,25 @@ expr_expected_value_1 (tree type, tree o
       if (TREE_CONSTANT (op0))
 	return op0;
 
+      if (code == IMAGPART_EXPR)
+	{
+	  if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME)
+	    {
+	      def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0));
+	      if (is_gimple_call (def)
+		  && gimple_call_internal_p (def)
+		  && (gimple_call_internal_fn (def)
+		      == IFN_ATOMIC_COMPARE_EXCHANGE))
+		{
+		  /* Assume that any given atomic operation has low contention,
+		     and thus the compare-and-swap operation succeeds.  */
+		  if (predictor)
+		    *predictor = PRED_COMPARE_AND_SWAP;
+		  return build_one_cst (TREE_TYPE (op0));
+		}
+	    }
+	}
+
       if (code != SSA_NAME)
 	return NULL_TREE;
 
--- gcc/builtins.h.jj	2016-05-03 13:36:50.000000000 +0200
+++ gcc/builtins.h	2016-06-21 18:05:11.678635858 +0200
@@ -72,6 +72,7 @@ extern tree std_canonical_va_list_type (
 extern void std_expand_builtin_va_start (tree, rtx);
 extern void expand_builtin_trap (void);
 extern void expand_ifn_atomic_bit_test_and (gcall *);
+extern void expand_ifn_atomic_compare_exchange (gcall *);
 extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int);
 extern rtx expand_builtin_with_bounds (tree, rtx, rtx, machine_mode, int);
 extern enum built_in_function builtin_mathfn_code (const_tree);
--- gcc/gimple-fold.c.jj	2016-06-16 21:00:08.000000000 +0200
+++ gcc/gimple-fold.c	2016-06-24 11:36:12.222916831 +0200
@@ -2953,6 +2953,133 @@ fold_internal_goacc_dim (const gimple *c
   return result;
 }
 
+/* Return true if stmt is __atomic_compare_exchange_N call which is suitable
+   for conversion into ATOMIC_COMPARE_EXCHANGE if the second argument is
+   &var where var is only addressable because of such calls.  */
+
+bool
+optimize_atomic_compare_exchange_p (gimple *stmt)
+{
+  if (gimple_call_num_args (stmt) != 6
+      || !flag_inline_atomics
+      || !optimize
+      || (flag_sanitize & (SANITIZE_THREAD | SANITIZE_ADDRESS)) != 0
+      || !gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
+      || !gimple_vdef (stmt)
+      || !gimple_vuse (stmt))
+    return false;
+
+  tree fndecl = gimple_call_fndecl (stmt);
+  switch (DECL_FUNCTION_CODE (fndecl))
+    {
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
+      break;
+    default:
+      return false;
+    }
+
+  tree expected = gimple_call_arg (stmt, 1);
+  if (TREE_CODE (expected) != ADDR_EXPR
+      || !SSA_VAR_P (TREE_OPERAND (expected, 0))
+      || !is_gimple_reg_type (TREE_TYPE (TREE_OPERAND (expected, 0)))
+      || !auto_var_in_fn_p (TREE_OPERAND (expected, 0), current_function_decl)
+      || TREE_THIS_VOLATILE (TREE_TYPE (TREE_OPERAND (expected, 0)))
+      || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == VECTOR_TYPE
+      || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == COMPLEX_TYPE)
+    return false;
+
+  tree weak = gimple_call_arg (stmt, 3);
+  if (!integer_zerop (weak) && !integer_onep (weak))
+    return false;
+
+  tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
+  tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt)));
+  machine_mode mode = TYPE_MODE (itype);
+
+  if (direct_optab_handler (atomic_compare_and_swap_optab, mode)
+      == CODE_FOR_nothing
+      && optab_handler (sync_compare_and_swap_optab, mode) == CODE_FOR_nothing)
+    return false;
+
+  if (int_size_in_bytes (TREE_TYPE (TREE_OPERAND (expected, 0)))
+      != GET_MODE_SIZE (mode))
+    return false;
+
+  return true;
+}
+
+/* Fold
+     r = __atomic_compare_exchange_N (p, &e, d, w, s, f);
+   into
+     _Complex uintN_t t = ATOMIC_COMPARE_EXCHANGE (p, e, d, w * 256 + N, s, f);
+     i = IMAGPART_EXPR <t>;
+     r = (_Bool) i;
+     e = REALPART_EXPR <t>;  */
+
+void
+fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *gsi)
+{
+  gimple *stmt = gsi_stmt (*gsi);
+  tree fndecl = gimple_call_fndecl (stmt);
+  tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
+  tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt)));
+  tree ctype = build_complex_type (itype);
+  tree expected = TREE_OPERAND (gimple_call_arg (stmt, 1), 0);
+  gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)),
+				   expected);
+  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+  gimple_stmt_iterator gsiret = gsi_for_stmt (g);
+  if (!useless_type_conversion_p (itype, TREE_TYPE (expected)))
+    {
+      g = gimple_build_assign (make_ssa_name (itype), VIEW_CONVERT_EXPR,
+			       build1 (VIEW_CONVERT_EXPR, itype,
+				       gimple_assign_lhs (g)));
+      gsi_insert_before (gsi, g, GSI_SAME_STMT);
+    }
+  int flag = (integer_onep (gimple_call_arg (stmt, 3)) ? 256 : 0)
+	     + int_size_in_bytes (itype);
+  g = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6,
+				  gimple_call_arg (stmt, 0),
+				  gimple_assign_lhs (g),
+				  gimple_call_arg (stmt, 2),
+				  build_int_cst (integer_type_node, flag),
+				  gimple_call_arg (stmt, 4),
+				  gimple_call_arg (stmt, 5));
+  tree lhs = make_ssa_name (ctype);
+  gimple_call_set_lhs (g, lhs);
+  gimple_set_vdef (g, gimple_vdef (stmt));
+  gimple_set_vuse (g, gimple_vuse (stmt));
+  SSA_NAME_DEF_STMT (gimple_vdef (g)) = g;
+  if (gimple_call_lhs (stmt))
+    {
+      gsi_insert_before (gsi, g, GSI_SAME_STMT);
+      g = gimple_build_assign (make_ssa_name (itype), IMAGPART_EXPR,
+			       build1 (IMAGPART_EXPR, itype, lhs));
+      gsi_insert_before (gsi, g, GSI_SAME_STMT);
+      g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR,
+			       gimple_assign_lhs (g));
+    }
+  gsi_replace (gsi, g, true);
+  g = gimple_build_assign (make_ssa_name (itype), REALPART_EXPR,
+			   build1 (REALPART_EXPR, itype, lhs));
+  gsi_insert_after (gsi, g, GSI_NEW_STMT);
+  if (!useless_type_conversion_p (TREE_TYPE (expected), itype))
+    {
+      g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)),
+			       VIEW_CONVERT_EXPR,
+			       build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expected),
+				       gimple_assign_lhs (g)));
+      gsi_insert_after (gsi, g, GSI_NEW_STMT);
+    }
+  g = gimple_build_assign (expected, SSA_NAME, gimple_assign_lhs (g));
+  gsi_insert_after (gsi, g, GSI_NEW_STMT);
+  *gsi = gsiret;
+}
+
 /* Return true if ARG0 CODE ARG1 in infinite signed precision operation
    doesn't fit into TYPE.  The test for overflow should be regardless of
    -fwrapv, and even for unsigned types.  */
--- gcc/tree-ssa.c.jj	2016-06-14 16:29:49.000000000 +0200
+++ gcc/tree-ssa.c	2016-06-24 10:38:54.929029421 +0200
@@ -1414,8 +1414,21 @@ execute_update_addresses_taken (void)
 	  enum gimple_code code = gimple_code (stmt);
 	  tree decl;
 
-	  /* Note all addresses taken by the stmt.  */
-	  gimple_ior_addresses_taken (addresses_taken, stmt);
+	  if (code == GIMPLE_CALL
+	      && optimize_atomic_compare_exchange_p (stmt))
+	    {
+	      /* For __atomic_compare_exchange_N if the second argument
+		 is &var, don't mark var addressable;
+		 if it becomes non-addressable, we'll rewrite it into
+		 ATOMIC_COMPARE_EXCHANGE call.  */
+	      tree arg = gimple_call_arg (stmt, 1);
+	      gimple_call_set_arg (stmt, 1, null_pointer_node);
+	      gimple_ior_addresses_taken (addresses_taken, stmt);
+	      gimple_call_set_arg (stmt, 1, arg);
+	    }
+	  else
+	    /* Note all addresses taken by the stmt.  */
+	    gimple_ior_addresses_taken (addresses_taken, stmt);
 
 	  /* If we have a call or an assignment, see if the lhs contains
 	     a local decl that requires not to be a gimple register.  */
@@ -1657,6 +1670,16 @@ execute_update_addresses_taken (void)
 	    else if (gimple_code (stmt) == GIMPLE_CALL)
 	      {
 		unsigned i;
+		if (optimize_atomic_compare_exchange_p (stmt))
+		  {
+		    tree expected = gimple_call_arg (stmt, 1);
+		    if (bitmap_bit_p (suitable_for_renaming,
+				      DECL_UID (TREE_OPERAND (expected, 0))))
+		      {
+			fold_builtin_atomic_compare_exchange (&gsi);
+			continue;
+		      }
+		  }
 		for (i = 0; i < gimple_call_num_args (stmt); ++i)
 		  {
 		    tree *argp = gimple_call_arg_ptr (stmt, i);


	Jakub



More information about the Gcc-patches mailing list