This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[gomp4] add reduction lock initializer
- From: Nathan Sidwell <nathan at acm dot org>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, Cesar Philippidis <cesar_philippidis at mentor dot com>
- Date: Tue, 25 Aug 2015 15:16:08 -0400
- Subject: [gomp4] add reduction lock initializer
- Authentication-results: sourceware.org; auth=none
Cesar discovered another quirk of PTX. Inspite of PTX documenting that static
variables can be initialized and default to zero, there's a little note that it
doesn't work for .shared variables. Thus we need code to initialize the worker
lock variable used for reductions.
This implements a new internal function 'IFN_GOACC_LOCK_INIT', with the same
arguments as the LOCK and UNLOCK functions. The intent is that it is emitted at
the reduction setup point and expands to target-specific code.
For PTX it's deleted for everything but worker level, and for that we expand to
an initialization of the lock variable. We can simply use the same insn as the
unlocker, but I renamed it to be less confusing.
nathan
2015-08-25 Nathan Sidwell <nathan@codesourcery.com>
* targhooks.h (default_goacc_lock_unlock): Rename to ...
(default_goacc_lock): ... here. Adjust.
* config/nvptx/nvptx.md (oacc_expand_lock, oacc_expand_unlock):
Adjust call to lock expander.
(oacc_expand_lock_init): New.
(nvptx_spinlock, nvptx_spinunlock): Rename to ...
(nvptx_spin_lock, nvtx_spin_reset): ... here.
* config/nvptx/ntptx.c (nvptx_expand_oacc_lock_unlock): Rename to ...
(nvptx_expand_oacc_lock): ... here. Deal with init too.
(nvptx_xform_lock_unlock): Rename to ...
(nvptx_xform_lock): ... here. Deal with init too.
(TARGET_GOACC_LOCK_UNLOCK): Replace with ...
(TARGET_GOACC_LOCK): ... this.
* omp-low.c (exectute_oacc_transform): Deal with
IFN_GOACC_LOCK_INIT.
(default_goacc_lock_unlock): Rename to ...
(default_goacc_lock): ... here. Deal with init too.
* internal-fn.c (expand_GOACC_LOCK_INIT): New.
* internal-fn.def (GOACC_LOCK_INIT): New.
* doc/tm.texi.in (TARGET_GOACC_LOCK_UNLOCK): Replace with ...
(TARGET_GOACC_LOCK): ... this.
* doc/tm.texi: Rebuilt.
* target.def (goacc lock_unlock): Replace with ...
(goacc lock): ... this. Deal with init too.
Index: gcc/targhooks.h
===================================================================
--- gcc/targhooks.h (revision 227174)
+++ gcc/targhooks.h (working copy)
@@ -110,7 +110,7 @@ extern void default_destroy_cost_data (v
extern bool default_goacc_validate_dims (tree, int [], int);
extern unsigned default_goacc_dim_limit (unsigned);
extern bool default_goacc_fork_join (gimple, const int [], bool);
-extern bool default_goacc_lock_unlock (gimple, const int [], bool);
+extern bool default_goacc_lock (gimple, const int [], unsigned);
/* These are here, and not in hooks.[ch], because not all users of
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md (revision 227174)
+++ gcc/config/nvptx/nvptx.md (working copy)
@@ -1371,7 +1371,7 @@
UNSPECV_LOCK)]
""
{
- nvptx_expand_oacc_lock_unlock (operands[0], true);
+ nvptx_expand_oacc_lock (operands[0], 0);
DONE;
})
@@ -1381,7 +1381,17 @@
UNSPECV_LOCK)]
""
{
- nvptx_expand_oacc_lock_unlock (operands[0], false);
+ nvptx_expand_oacc_lock (operands[0], +1);
+ DONE;
+})
+
+(define_expand "oacc_lock_init"
+ [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")
+ (match_operand:SI 1 "const_int_operand" "")]
+ UNSPECV_LOCK)]
+ ""
+{
+ nvptx_expand_oacc_lock (operands[0], -1);
DONE;
})
@@ -1592,8 +1602,8 @@
""
"membar%B0;")
-;; spinlock and unlock
-(define_insn "nvptx_spinlock"
+;; spin lock and reset
+(define_insn "nvptx_spin_lock"
[(parallel
[(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")
(match_operand:SI 1 "const_int_operand" "i")]
@@ -1604,7 +1614,7 @@
""
"%4:\\tatom%R1.cas.b32 %2,%0,0,1;setp.ne.u32 %3,%2,0;@%3 bra.uni %4;")
-(define_insn "nvptx_spinunlock"
+(define_insn "nvptx_spin_reset"
[(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")
(match_operand:SI 1 "const_int_operand" "i")]
UNSPECV_LOCK)
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c (revision 227174)
+++ gcc/config/nvptx/nvptx.c (working copy)
@@ -1220,7 +1220,7 @@ nvptx_expand_oacc_join (unsigned mode)
gang or worker level. */
void
-nvptx_expand_oacc_lock_unlock (rtx src, bool lock)
+nvptx_expand_oacc_lock (rtx src, int direction)
{
unsigned HOST_WIDE_INT kind;
rtx pat;
@@ -1230,22 +1230,26 @@ nvptx_expand_oacc_lock_unlock (rtx src,
rtx mem = gen_rtx_MEM (SImode, lock_syms[kind]);
rtx space = GEN_INT (lock_space[kind]);
- rtx barrier = gen_nvptx_membar (GEN_INT (lock_level[kind]));
+ rtx barrier = NULL_RTX;
rtx tmp = gen_reg_rtx (SImode);
- if (!lock)
+ if (direction >= 0)
+ barrier = gen_nvptx_membar (GEN_INT (lock_level[kind]));
+
+ if (direction > 0)
emit_insn (barrier);
- if (lock)
+ if (!direction)
{
rtx_code_label *label = gen_label_rtx ();
LABEL_NUSES (label)++;
- pat = gen_nvptx_spinlock (mem, space, tmp, gen_reg_rtx (BImode), label);
+ pat = gen_nvptx_spin_lock (mem, space, tmp, gen_reg_rtx (BImode), label);
}
else
- pat = gen_nvptx_spinunlock (mem, space, tmp);
+ /* We can use reset for both unlock and initialization. */
+ pat = gen_nvptx_spin_reset (mem, space, tmp);
emit_insn (pat);
- if (lock)
+ if (!direction)
emit_insn (barrier);
}
@@ -3628,12 +3632,22 @@ nvptx_xform_fork_join (gimple stmt, cons
*/
static bool
-nvptx_xform_lock_unlock (gimple stmt, const int *ARG_UNUSED (dims),
- bool ARG_UNUSED (is_lock))
+nvptx_xform_lock (gimple stmt, const int *ARG_UNUSED (dims), unsigned ifn_code)
{
tree arg = gimple_call_arg (stmt, 0);
+ unsigned mode = TREE_INT_CST_LOW (arg);
- return TREE_INT_CST_LOW (arg) > GOMP_DIM_WORKER;
+ switch (ifn_code)
+ {
+ case IFN_GOACC_LOCK:
+ case IFN_GOACC_UNLOCK:
+ return mode > GOMP_DIM_WORKER;
+
+ case IFN_GOACC_LOCK_INIT:
+ return mode != GOMP_DIM_WORKER;
+
+ default: gcc_unreachable();
+ }
}
#undef TARGET_OPTION_OVERRIDE
@@ -3739,8 +3753,8 @@ nvptx_xform_lock_unlock (gimple stmt, co
#undef TARGET_GOACC_FORK_JOIN
#define TARGET_GOACC_FORK_JOIN nvptx_xform_fork_join
-#undef TARGET_GOACC_LOCK_UNLOCK
-#define TARGET_GOACC_LOCK_UNLOCK nvptx_xform_lock_unlock
+#undef TARGET_GOACC_LOCK
+#define TARGET_GOACC_LOCK nvptx_xform_lock
struct gcc_target targetm = TARGET_INITIALIZER;
Index: gcc/config/nvptx/nvptx-protos.h
===================================================================
--- gcc/config/nvptx/nvptx-protos.h (revision 227174)
+++ gcc/config/nvptx/nvptx-protos.h (working copy)
@@ -34,7 +34,7 @@ extern const char *nvptx_section_for_dec
#ifdef RTX_CODE
extern void nvptx_expand_oacc_fork (unsigned);
extern void nvptx_expand_oacc_join (unsigned);
-extern void nvptx_expand_oacc_lock_unlock (rtx, bool);
+extern void nvptx_expand_oacc_lock (rtx, int);
extern void nvptx_expand_call (rtx, rtx);
extern rtx nvptx_expand_compare (rtx);
extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c (revision 227174)
+++ gcc/omp-low.c (working copy)
@@ -14761,8 +14761,8 @@ execute_oacc_transform ()
case IFN_GOACC_LOCK:
case IFN_GOACC_UNLOCK:
- if (targetm.goacc.lock_unlock
- (stmt, dims, ifn_code == IFN_GOACC_LOCK))
+ case IFN_GOACC_LOCK_INIT:
+ if (targetm.goacc.lock (stmt, dims, ifn_code))
goto remove;
break;
@@ -14848,21 +14848,28 @@ default_goacc_fork_join (gimple ARG_UNUS
there is no RTL expander. */
bool
-default_goacc_lock_unlock (gimple ARG_UNUSED (stmt),
- const int*ARG_UNUSED (dims),
- bool is_lock)
+default_goacc_lock (gimple ARG_UNUSED (stmt), const int*ARG_UNUSED (dims),
+ unsigned ifn_code)
{
- if (is_lock)
+ switch (ifn_code)
{
+ case IFN_GOACC_LOCK:
#ifndef HAVE_oacc_lock
return true;
#endif
- }
- else
- {
+ break;
+ case IFN_GOACC_UNLOCK:
#ifndef HAVE_oacc_unlock
return true;
#endif
+ break;
+ case IFN_GOACC_LOCK_INIT:
+#ifndef HAVE_oacc_lock_init
+ return true;
+#endif
+ break;
+ default:
+ gcc_unreachable ();
}
return false;
Index: gcc/internal-fn.c
===================================================================
--- gcc/internal-fn.c (revision 227174)
+++ gcc/internal-fn.c (working copy)
@@ -2051,6 +2051,19 @@ expand_GOACC_UNLOCK (gcall *ARG_UNUSED (
#endif
}
+static void
+expand_GOACC_LOCK_INIT (gcall *ARG_UNUSED (stmt))
+{
+#ifdef HAVE_oacc_lock_init
+ rtx dim = expand_normal (gimple_call_arg (stmt, 0));
+ rtx id = expand_normal (gimple_call_arg (stmt, 1));
+
+ emit_insn (gen_oacc_lock_init (dim, id));
+#else
+ gcc_unreachable ();
+#endif
+}
+
/* Routines to expand each internal function, indexed by function number.
Each routine has the prototype:
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in (revision 227174)
+++ gcc/doc/tm.texi.in (working copy)
@@ -4251,7 +4251,7 @@ address; but often a machine-dependent
@hook TARGET_GOACC_FORK_JOIN
-@hook TARGET_GOACC_LOCK_UNLOCK
+@hook TARGET_GOACC_LOCK
@node Anchored Addresses
@section Anchored Addresses
Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi (revision 227174)
+++ gcc/doc/tm.texi (working copy)
@@ -5760,11 +5760,12 @@ pass. It should return true, if the fun
default hook returns true, if there is no RTL expanders for them.
@end deftypefn
-@deftypefn {Target Hook} bool TARGET_GOACC_LOCK_UNLOCK (gimple, const @var{int[]}, @var{bool})
-This hook should convert IFN_GOACC_LOCK and IFN_GOACC_UNLOCK function
-calls to target-specific gimple. It is executed during the oacc_xform
-pass. It should return true, if the functions should be deleted. The
-default hook returns true, if there is no RTL expanders for them.
+@deftypefn {Target Hook} bool TARGET_GOACC_LOCK (gimple, const @var{int[]}, @var{unsigned})
+This hook should convert IFN_GOACC_LOCK, IFN_GOACC_UNLOCK,
+IFN_GOACC_LOCK_INIT function calls to target-specific gimple. It is
+executed during the oacc_xform pass. It should return true, if the
+functions should be deleted. The default hook returns true, if there
+is no RTL expanders for them.
@end deftypefn
@node Anchored Addresses
Index: gcc/target.def
===================================================================
--- gcc/target.def (revision 227174)
+++ gcc/target.def (working copy)
@@ -1671,13 +1671,14 @@ bool, (gimple, const int[], bool),
default_goacc_fork_join)
DEFHOOK
-(lock_unlock,
-"This hook should convert IFN_GOACC_LOCK and IFN_GOACC_UNLOCK function\n\
-calls to target-specific gimple. It is executed during the oacc_xform\n\
-pass. It should return true, if the functions should be deleted. The\n\
-default hook returns true, if there is no RTL expanders for them.",
-bool, (gimple, const int[], bool),
-default_goacc_lock_unlock)
+(lock,
+"This hook should convert IFN_GOACC_LOCK, IFN_GOACC_UNLOCK,\n\
+IFN_GOACC_LOCK_INIT function calls to target-specific gimple. It is\n\
+executed during the oacc_xform pass. It should return true, if the\n\
+functions should be deleted. The default hook returns true, if there\n\
+is no RTL expanders for them.",
+bool, (gimple, const int[], unsigned),
+default_goacc_lock)
HOOK_VECTOR_END (goacc)
Index: gcc/internal-fn.def
===================================================================
--- gcc/internal-fn.def (revision 227174)
+++ gcc/internal-fn.def (working copy)
@@ -84,8 +84,9 @@ DEF_INTERNAL_FN (GOACC_JOIN, ECF_NOTHROW
DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".")
DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_PURE | ECF_NOTHROW | ECF_LEAF, ".")
-/* LOCK and UNLOCK operate a mutex used for reductions. The first
+/* LOCK, UNLOCK & LOCK_INIT operate a mutex used for reductions. The first
argument is the compute dimension of the reduction and the second
argument is a loop identifer. */
DEF_INTERNAL_FN (GOACC_LOCK, ECF_NOTHROW | ECF_LEAF, "..")
DEF_INTERNAL_FN (GOACC_UNLOCK, ECF_NOTHROW | ECF_LEAF, "..")
+DEF_INTERNAL_FN (GOACC_LOCK_INIT, ECF_NOTHROW | ECF_LEAF, "..")