This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: New post-LTO OpenACC pass
- From: Nathan Sidwell <nathan at acm dot org>
- To: Bernd Schmidt <bschmidt at redhat dot com>, Jakub Jelinek <jakub at redhat dot com>
- Cc: Cesar Philippidis <cesar at codesourcery dot com>, GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Fri, 25 Sep 2015 06:56:19 -0400
- Subject: Re: New post-LTO OpenACC pass
- Authentication-results: sourceware.org; auth=none
- References: <560030A9 dot 5020705 at acm dot org> <560068D0 dot 1050308 at codesourcery dot com> <56006B13 dot 1060004 at acm dot org> <560170CB dot 3040603 at acm dot org> <5602862A dot 5090808 at redhat dot com> <560297A1 dot 8040009 at acm dot org> <5602A1EA dot 3060009 at redhat dot com> <5602F278 dot 1030708 at acm dot org> <5602F4BB dot 9000908 at redhat dot com> <5602F662 dot 6000202 at acm dot org> <56047B4E dot 3010704 at acm dot org> <560521CB dot 6000609 at redhat dot com>
On 09/25/15 06:28, Bernd Schmidt wrote:
This is the c-c++-common/goacc/acc_on_device-2.c testcase. Is that expected to
be handled? If I change it to use __builtin_acc_on_device, I can step right into
Breakpoint 8, fold_call_stmt (stmt=0x7ffff0736e10, ignore=false) at
../../git/gcc/builtins.c:12277
12277 tree ret = NULL_TREE;
Maybe you were compiling without optimization? In that case
expand_builtin_acc_on_device (which already exists) should still end up doing
the right thing. In no case should you see a RTL call to a function, that
indicates that something else went wrong.
I think I was reading more into the std than it intended, as it claims
on_deveice should evaluate 'to a constant'. (no mention of 'when optimizing').
It can't mean 'be useable in integral-constant-expression, as at the point we
need those, one doesn't know the value it should be.
thinking about it, I don't think a user can tell. the case I had in mind (and
have used it for), is something like
on_device (nvidia) ? asm ("NVIDIA specific asm") : c-expr
and for that to work, one must turn the optimzer on to get the dead code
removal, regardless of where on_device expands. So my goal of getting it
expanded regardless of optimization level is not needed --- indeed getting it
expanded in fold_call_stmt will mean the body of expand_on_device can go away (I
think).
From the POV of what the programmer really cares about is that when optimizing
the compiler knows how to fold it.
Can you send me the patch you tried (and possibly a testcase you expect to be
handled), I'll see if I can find out what's going on.
Thanks! When things didn't work, I tried getting it workong on the gomp4
branch, as I new what to expect there. So the patch is for that branch.
The fails I observed are:
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/if-1.c
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none execution test
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/gang-static-2.c
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O0
execution test
FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/gang-static-2.c
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O2
execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/if-1.c
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/gang-static-2.c
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O0
execution test
FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/gang-static-2.c
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O2
execution test
the diff I have is attached -- as you can see it's 'experimental'.
nathan
Index: builtins.c
===================================================================
--- builtins.c (revision 228094)
+++ builtins.c (working copy)
@@ -5866,6 +5866,8 @@ expand_stack_save (void)
static rtx
expand_builtin_acc_on_device (tree exp, rtx target)
{
+ gcc_unreachable ();
+
#ifndef ACCEL_COMPILER
gcc_assert (!get_oacc_fn_attrib (current_function_decl));
#endif
@@ -10272,6 +10274,27 @@ fold_builtin_1 (location_t loc, tree fnd
return build_empty_stmt (loc);
break;
+ case BUILT_IN_ACC_ON_DEVICE:
+ /* Don't fold on_device until we know which compiler is active. */
+ if (symtab->state == EXPANSION)
+ {
+ unsigned val_host = GOMP_DEVICE_HOST;
+ unsigned val_dev = GOMP_DEVICE_NONE;
+
+#ifdef ACCEL_COMPILER
+ val_host = GOMP_DEVICE_NOT_HOST;
+ val_dev = ACCEL_COMPILER_acc_device;
+#endif
+ tree host = build2 (EQ_EXPR, boolean_type_node, arg0,
+ build_int_cst (integer_type_node, val_host));
+ tree dev = build2 (EQ_EXPR, boolean_type_node, arg0,
+ build_int_cst (integer_type_node, val_dev));
+
+ tree result = build2 (TRUTH_OR_EXPR, boolean_type_node, host, dev);
+ return fold_convert (integer_type_node, result);
+ }
+ break;
+
default:
break;
}
Index: omp-low.c
===================================================================
--- omp-low.c (revision 228094)
+++ omp-low.c (working copy)
@@ -14725,21 +14725,20 @@ static void
oacc_xform_on_device (gcall *call)
{
tree arg = gimple_call_arg (call, 0);
- unsigned val = GOMP_DEVICE_HOST;
-
-#ifdef ACCEL_COMPILER
- val = GOMP_DEVICE_NOT_HOST;
-#endif
- tree result = build2 (EQ_EXPR, boolean_type_node, arg,
- build_int_cst (integer_type_node, val));
+ unsigned val_host = GOMP_DEVICE_HOST;
+ unsigned val_dev = GOMP_DEVICE_NONE;
+
#ifdef ACCEL_COMPILER
- {
- tree dev = build2 (EQ_EXPR, boolean_type_node, arg,
- build_int_cst (integer_type_node,
- ACCEL_COMPILER_acc_device));
- result = build2 (TRUTH_OR_EXPR, boolean_type_node, result, dev);
- }
+ val_host = GOMP_DEVICE_NOT_HOST;
+ val_dev = ACCEL_COMPILER_acc_device;
#endif
+
+ tree host = build2 (EQ_EXPR, boolean_type_node, arg,
+ build_int_cst (integer_type_node, val_host));
+ tree dev = build2 (EQ_EXPR, boolean_type_node, arg,
+ build_int_cst (integer_type_node, val_dev));
+
+ tree result = build2 (TRUTH_OR_EXPR, boolean_type_node, host, dev);
result = fold_convert (integer_type_node, result);
tree lhs = gimple_call_lhs (call);
gimple_seq seq = NULL;
@@ -14879,7 +14878,7 @@ execute_oacc_transform ()
gcall *call = as_a <gcall *> (stmt);
- if (gimple_call_builtin_p (call, BUILT_IN_ACC_ON_DEVICE))
+ if (0 && gimple_call_builtin_p (call, BUILT_IN_ACC_ON_DEVICE))
/* acc_on_device must be evaluated at compile time for
constant arguments. */
{