This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: New post-LTO OpenACC pass


On 09/25/15 06:28, Bernd Schmidt wrote:


This is the c-c++-common/goacc/acc_on_device-2.c testcase. Is that expected to
be handled? If I change it to use __builtin_acc_on_device, I can step right into

Breakpoint 8, fold_call_stmt (stmt=0x7ffff0736e10, ignore=false) at
../../git/gcc/builtins.c:12277
12277      tree ret = NULL_TREE;

Maybe you were compiling without optimization? In that case
expand_builtin_acc_on_device (which already exists) should still end up doing
the right thing. In no case should you see a RTL call to a function, that
indicates that something else went wrong.

I think I was reading more into the std than it intended, as it claims on_deveice should evaluate 'to a constant'. (no mention of 'when optimizing'). It can't mean 'be useable in integral-constant-expression, as at the point we need those, one doesn't know the value it should be.

thinking about it, I don't think a user can tell. the case I had in mind (and have used it for), is something like

on_device (nvidia)  ? asm ("NVIDIA specific asm") : c-expr

and for that to work, one must turn the optimzer on to get the dead code removal, regardless of where on_device expands. So my goal of getting it expanded regardless of optimization level is not needed --- indeed getting it expanded in fold_call_stmt will mean the body of expand_on_device can go away (I think).

From the POV of what the programmer really cares about is that when optimizing the compiler knows how to fold it.

Can you send me the patch you tried (and possibly a testcase you expect to be
handled), I'll see if I can find out what's going on.

Thanks! When things didn't work, I tried getting it workong on the gomp4 branch, as I new what to expect there. So the patch is for that branch.

The fails I observed are:

FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/if-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none execution test FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/gang-static-2.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O0 execution test FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/gang-static-2.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O2 execution test FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/if-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none execution test FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/gang-static-2.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O0 execution test FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/gang-static-2.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O2 execution test


the diff I have is attached -- as you can see it's 'experimental'.

nathan
Index: builtins.c
===================================================================
--- builtins.c	(revision 228094)
+++ builtins.c	(working copy)
@@ -5866,6 +5866,8 @@ expand_stack_save (void)
 static rtx
 expand_builtin_acc_on_device (tree exp, rtx target)
 {
+   gcc_unreachable ();
+  
 #ifndef ACCEL_COMPILER
   gcc_assert (!get_oacc_fn_attrib (current_function_decl));
 #endif
@@ -10272,6 +10274,27 @@ fold_builtin_1 (location_t loc, tree fnd
 	return build_empty_stmt (loc);
       break;
 
+    case BUILT_IN_ACC_ON_DEVICE:
+      /* Don't fold on_device until we know which compiler is active.  */
+      if (symtab->state == EXPANSION)
+	{
+	  unsigned val_host = GOMP_DEVICE_HOST;
+	  unsigned val_dev = GOMP_DEVICE_NONE;
+
+#ifdef ACCEL_COMPILER
+	  val_host = GOMP_DEVICE_NOT_HOST;
+	  val_dev = ACCEL_COMPILER_acc_device;
+#endif
+	  tree host = build2 (EQ_EXPR, boolean_type_node, arg0,
+			      build_int_cst (integer_type_node, val_host));
+	  tree dev = build2 (EQ_EXPR, boolean_type_node, arg0,
+			     build_int_cst (integer_type_node, val_dev));
+
+	  tree result = build2 (TRUTH_OR_EXPR, boolean_type_node, host, dev);
+	  return fold_convert (integer_type_node, result);
+	}
+      break;
+
     default:
       break;
     }
Index: omp-low.c
===================================================================
--- omp-low.c	(revision 228094)
+++ omp-low.c	(working copy)
@@ -14725,21 +14725,20 @@ static void
 oacc_xform_on_device (gcall *call)
 {
   tree arg = gimple_call_arg (call, 0);
-  unsigned val = GOMP_DEVICE_HOST;
-	      
-#ifdef ACCEL_COMPILER
-  val = GOMP_DEVICE_NOT_HOST;
-#endif
-  tree result = build2 (EQ_EXPR, boolean_type_node, arg,
-			build_int_cst (integer_type_node, val));
+  unsigned val_host = GOMP_DEVICE_HOST;
+  unsigned val_dev = GOMP_DEVICE_NONE;
+
 #ifdef ACCEL_COMPILER
-  {
-    tree dev  = build2 (EQ_EXPR, boolean_type_node, arg,
-			build_int_cst (integer_type_node,
-				       ACCEL_COMPILER_acc_device));
-    result = build2 (TRUTH_OR_EXPR, boolean_type_node, result, dev);
-  }
+  val_host = GOMP_DEVICE_NOT_HOST;
+  val_dev = ACCEL_COMPILER_acc_device;
 #endif
+
+  tree host = build2 (EQ_EXPR, boolean_type_node, arg,
+		      build_int_cst (integer_type_node, val_host));
+  tree dev = build2 (EQ_EXPR, boolean_type_node, arg,
+		     build_int_cst (integer_type_node, val_dev));
+
+  tree result = build2 (TRUTH_OR_EXPR, boolean_type_node, host, dev);
   result = fold_convert (integer_type_node, result);
   tree lhs = gimple_call_lhs (call);
   gimple_seq seq = NULL;
@@ -14879,7 +14878,7 @@ execute_oacc_transform ()
 
 	gcall *call = as_a <gcall *> (stmt);
 	
-	if (gimple_call_builtin_p (call, BUILT_IN_ACC_ON_DEVICE))
+	if (0 && gimple_call_builtin_p (call, BUILT_IN_ACC_ON_DEVICE))
 	  /* acc_on_device must be evaluated at compile time for
 	     constant arguments.  */
 	  {

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]