This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] expand acc_on_device earlier


I've committed this to gomp4 branch. It expands the acc_on_device builtin earlier in the new oacc_xform pass. This will allow more optimization earlier on.

The existing expansion point now only needs to deal with the host-side case.

nathan
2015-08-02  Nathan Sidwell  <nathan@codesourcery.com>

	gcc/
	* omp-low.c (oacc_xform_on_device): New function.
	(execute_oacc_transform): Use get_oacc_fn_attrib.  Call
	oacc_xform_on_device.
	* builtins.c (expand_builtin_on_device): Only expect to be
	expanded on host compiler.

	libgcc/
	* config/nvptx/comp-acc_on_device.c: Include gomp-constants.h.
	(acc_on_device): Code directly here.

	libgomp/
	* openacc.h (acc_on_device): Take int and explain why.
	* oacc-init.c (acc_on_device): Likewise.

Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 226462)
+++ gcc/omp-low.c	(working copy)
@@ -14510,29 +14510,65 @@ make_pass_late_lower_omp (gcc::context *
   return new pass_late_lower_omp (ctxt);
 }
 
+/* Transform an acc_on_device call.  The std requires this folded at
+   compile time for constant operands.  We always fold it.  In an
+   offloaded function we're never 'none'.  We cannot detect
+   host_nonshm here, as that's a dynamic feature of the runtime.
+   However, users shouldn't be using host_nonshm anyway, only the
+   test harness.  */
+
+static void
+oacc_xform_on_device (gimple_stmt_iterator *gsi, gimple stmt)
+{
+  tree arg = gimple_call_arg (stmt, 0);
+  unsigned val = GOMP_DEVICE_HOST;
+	      
+#ifdef ACCEL_COMPILER
+  val = GOMP_DEVICE_NOT_HOST;
+#endif
+  tree result = build2 (EQ_EXPR, boolean_type_node, arg,
+			build_int_cst (integer_type_node, val));
+#ifdef ACCEL_COMPILER
+  {
+    tree dev  = build2 (EQ_EXPR, boolean_type_node, arg,
+			build_int_cst (integer_type_node,
+				       ACCEL_COMPILER_acc_device));
+    result = build2 (TRUTH_OR_EXPR, boolean_type_node, result, dev);
+  }
+#endif
+  result = fold_convert (integer_type_node, result);
+  tree lhs = gimple_call_lhs (stmt);
+  gimple_seq replace = NULL;
+
+  push_gimplify_context (true);
+  gimplify_assign (lhs, result, &replace);
+  pop_gimplify_context (NULL);
+  gsi_replace_with_seq (gsi, replace, false);
+}
+
 /* Main entry point for oacc transformations which run on the device
-   compiler.  */
+   compilerafter LTO, so we know what the target device is at this
+   point (including the host fallback).  */
 
 static unsigned int
 execute_oacc_transform ()
 {
   basic_block bb;
-  gimple_stmt_iterator gsi;
-  gimple stmt;
 
-  if (!lookup_attribute ("oacc function",
-			 DECL_ATTRIBUTES (current_function_decl)))
+  if (!get_oacc_fn_attrib (current_function_decl))
     return 0;
 
-
   FOR_ALL_BB_FN (bb, cfun)
     {
-      gsi = gsi_start_bb (bb);
-
-      while (!gsi_end_p (gsi))
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
 	{
-	  stmt = gsi_stmt (gsi);
-	  gsi_next (&gsi);
+	  gimple stmt = gsi_stmt (gsi);
+
+	  /* acc_on_device must be evaluated at compile time for
+	     constant arguments.  */
+	  if (gimple_call_builtin_p (stmt, BUILT_IN_ACC_ON_DEVICE))
+	    oacc_xform_on_device (&gsi, stmt);
 	}
     }
 
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	(revision 226462)
+++ gcc/builtins.c	(working copy)
@@ -5880,43 +5880,39 @@ expand_stack_save (void)
 }
 
 
-/* Expand OpenACC acc_on_device.
-
-   This has to happen late (that is, not in early folding; expand_builtin_*,
-   rather than fold_builtin_*), as we have to act differently for host and
-   acceleration device (ACCEL_COMPILER conditional).  */
+/* Expand OpenACC acc_on_device.  This is expanded in the openacc
+   transform pass, but if the user has this outside of an offloaded
+   region, we'll find it here.  In that case we must be host or none.  */
 
 static rtx
 expand_builtin_acc_on_device (tree exp, rtx target)
 {
+#ifdef ACCEL_COMPILER
+  gcc_unreachable ();
+#else
+  gcc_assert (!get_oacc_fn_attrib (current_function_decl));
+  
   if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
     return NULL_RTX;
 
   tree arg = CALL_EXPR_ARG (exp, 0);
-
-  /* Return (arg == v1 || arg == v2) ? 1 : 0.  */
-  machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg));
-  rtx v = expand_normal (arg), v1, v2;
-#ifdef ACCEL_COMPILER
-  v1 = GEN_INT (GOMP_DEVICE_NOT_HOST);
-  v2 = GEN_INT (ACCEL_COMPILER_acc_device);
-#else
-  v1 = GEN_INT (GOMP_DEVICE_NONE);
-  v2 = GEN_INT (GOMP_DEVICE_HOST);
-#endif
+  rtx val = expand_normal (arg);
   machine_mode target_mode = TYPE_MODE (integer_type_node);
   if (!target || !register_operand (target, target_mode))
     target = gen_reg_rtx (target_mode);
   emit_move_insn (target, const1_rtx);
   rtx_code_label *done_label = gen_label_rtx ();
-  do_compare_rtx_and_jump (v, v1, EQ, false, v_mode, NULL_RTX,
+  do_compare_rtx_and_jump (val, GEN_INT (GOMP_DEVICE_HOST), EQ,
+			   false, GET_MODE (val), NULL_RTX,
 			   NULL, done_label, PROB_EVEN);
-  do_compare_rtx_and_jump (v, v2, EQ, false, v_mode, NULL_RTX,
+  do_compare_rtx_and_jump (val, GEN_INT (GOMP_DEVICE_NONE), EQ,
+			   false, GET_MODE (val), NULL_RTX,
 			   NULL, done_label, PROB_EVEN);
   emit_move_insn (target, const0_rtx);
   emit_label (done_label);
 
   return target;
+#endif
 }
 
 /* Expand a thread-id/thread-count builtin for OpenACC.  */
Index: libgcc/config/nvptx/gomp-acc_on_device.c
===================================================================
--- libgcc/config/nvptx/gomp-acc_on_device.c	(revision 226462)
+++ libgcc/config/nvptx/gomp-acc_on_device.c	(working copy)
@@ -1,6 +1,14 @@
-int acc_on_device(int d)
+#include "gomp-constants.h"
+
+/* For when the builtin is explicitly disabled.  */
+int acc_on_device (int d)
 {
-  return __builtin_acc_on_device(d);
+  /* We can't use the builtin itself here, because that only expands
+     to device-like things inside offloaded compute regions, which
+     this isn't.  Even though it'll be executed on the device --
+     unless someone builds a host-side PTX compiler, which would be
+     very strange.  */
+  return d == GOMP_DEVICE_NOT_HOST || d == GOMP_DEVICE_NVIDIA_PTX;
 }
 
 int acc_on_device_h_(int *d)
Index: libgomp/openacc.h
===================================================================
--- libgomp/openacc.h	(revision 226462)
+++ libgomp/openacc.h	(working copy)
@@ -78,7 +78,11 @@ void acc_wait_all (void) __GOACC_NOTHROW
 void acc_wait_all_async (int) __GOACC_NOTHROW;
 void acc_init (acc_device_t) __GOACC_NOTHROW;
 void acc_shutdown (acc_device_t) __GOACC_NOTHROW;
-int acc_on_device (acc_device_t) __GOACC_NOTHROW;
+/* Library function declaration.  Although it should take an
+   acc_device_t argument, that causes problems with matching the
+   builtin, which takes an int (to avoid declaring the enumeration
+   inside the compiler).  */
+int acc_on_device (int) __GOACC_NOTHROW;
 void *acc_malloc (size_t) __GOACC_NOTHROW;
 void acc_free (void *) __GOACC_NOTHROW;
 /* Some of these would be more correct with const qualifiers, but
Index: libgomp/oacc-init.c
===================================================================
--- libgomp/oacc-init.c	(revision 226462)
+++ libgomp/oacc-init.c	(working copy)
@@ -632,10 +632,14 @@ acc_set_device_num (int ord, acc_device_
 
 ialias (acc_set_device_num)
 
+/* The compiler always attempts to expand acc_on_device, but if the
+   user disables the builtin, or calls it via a pointer, we have this
+   version.  */
+
 int
-acc_on_device (acc_device_t dev)
+acc_on_device (int dev)
 {
-  /* Just rely on the compiler builtin.  */
+  /* It is safe to use the compiler builtin, as we're the host.  */
   return __builtin_acc_on_device (dev);
 }
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]