This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] add reduction lock initializer


Cesar discovered another quirk of PTX. Inspite of PTX documenting that static variables can be initialized and default to zero, there's a little note that it doesn't work for .shared variables. Thus we need code to initialize the worker lock variable used for reductions.

This implements a new internal function 'IFN_GOACC_LOCK_INIT', with the same arguments as the LOCK and UNLOCK functions. The intent is that it is emitted at the reduction setup point and expands to target-specific code.

For PTX it's deleted for everything but worker level, and for that we expand to an initialization of the lock variable. We can simply use the same insn as the unlocker, but I renamed it to be less confusing.

nathan
2015-08-25  Nathan Sidwell  <nathan@codesourcery.com>

	* targhooks.h (default_goacc_lock_unlock): Rename to ...
	(default_goacc_lock): ... here.  Adjust.
	* config/nvptx/nvptx.md (oacc_expand_lock, oacc_expand_unlock):
	Adjust call to lock expander.
	(oacc_expand_lock_init): New.
	(nvptx_spinlock, nvptx_spinunlock): Rename to ...
	(nvptx_spin_lock, nvtx_spin_reset): ... here.
	* config/nvptx/ntptx.c (nvptx_expand_oacc_lock_unlock): Rename to ...
	(nvptx_expand_oacc_lock): ... here.  Deal with init too.
	(nvptx_xform_lock_unlock): Rename to ...
	(nvptx_xform_lock): ... here.  Deal with init too.
	(TARGET_GOACC_LOCK_UNLOCK): Replace with ...
	(TARGET_GOACC_LOCK): ... this.
	* omp-low.c (exectute_oacc_transform): Deal with
	IFN_GOACC_LOCK_INIT.
	(default_goacc_lock_unlock): Rename to ...
	(default_goacc_lock): ... here.  Deal with init too.
	* internal-fn.c (expand_GOACC_LOCK_INIT): New.
	* internal-fn.def (GOACC_LOCK_INIT): New.
	* doc/tm.texi.in (TARGET_GOACC_LOCK_UNLOCK): Replace with ...
	(TARGET_GOACC_LOCK): ... this.
	* doc/tm.texi: Rebuilt.
	* target.def (goacc lock_unlock): Replace with ...
	(goacc lock): ... this.  Deal with init too.

Index: gcc/targhooks.h
===================================================================
--- gcc/targhooks.h	(revision 227174)
+++ gcc/targhooks.h	(working copy)
@@ -110,7 +110,7 @@ extern void default_destroy_cost_data (v
 extern bool default_goacc_validate_dims (tree, int [], int);
 extern unsigned default_goacc_dim_limit (unsigned);
 extern bool default_goacc_fork_join (gimple, const int [], bool);
-extern bool default_goacc_lock_unlock (gimple, const int [], bool);
+extern bool default_goacc_lock (gimple, const int [], unsigned);
 
 /* These are here, and not in hooks.[ch], because not all users of
    hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS.  */
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md	(revision 227174)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -1371,7 +1371,7 @@
 		       UNSPECV_LOCK)]
   ""
 {
-  nvptx_expand_oacc_lock_unlock (operands[0], true);
+  nvptx_expand_oacc_lock (operands[0], 0);
   DONE;
 })
 
@@ -1381,7 +1381,17 @@
 		       UNSPECV_LOCK)]
   ""
 {
-  nvptx_expand_oacc_lock_unlock (operands[0], false);
+  nvptx_expand_oacc_lock (operands[0], +1);
+  DONE;
+})
+
+(define_expand "oacc_lock_init"
+  [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")
+		        (match_operand:SI 1 "const_int_operand" "")]
+		       UNSPECV_LOCK)]
+  ""
+{
+  nvptx_expand_oacc_lock (operands[0], -1);
   DONE;
 })
 
@@ -1592,8 +1602,8 @@
   ""
   "membar%B0;")
 
-;; spinlock and unlock
-(define_insn "nvptx_spinlock"
+;; spin lock and reset
+(define_insn "nvptx_spin_lock"
    [(parallel
      [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")
 			(match_operand:SI 1 "const_int_operand" "i")]
@@ -1604,7 +1614,7 @@
    ""
    "%4:\\tatom%R1.cas.b32 %2,%0,0,1;setp.ne.u32 %3,%2,0;@%3 bra.uni %4;")
 
-(define_insn "nvptx_spinunlock"
+(define_insn "nvptx_spin_reset"
    [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")
 		      (match_operand:SI 1 "const_int_operand" "i")]
 		      UNSPECV_LOCK)
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 227174)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -1220,7 +1220,7 @@ nvptx_expand_oacc_join (unsigned mode)
    gang or worker level.  */
 
 void
-nvptx_expand_oacc_lock_unlock (rtx src, bool lock)
+nvptx_expand_oacc_lock (rtx src, int direction)
 {
   unsigned HOST_WIDE_INT kind;
   rtx pat;
@@ -1230,22 +1230,26 @@ nvptx_expand_oacc_lock_unlock (rtx src,
 
   rtx mem = gen_rtx_MEM (SImode, lock_syms[kind]);
   rtx space = GEN_INT (lock_space[kind]);
-  rtx barrier = gen_nvptx_membar (GEN_INT (lock_level[kind]));
+  rtx barrier = NULL_RTX;
   rtx tmp = gen_reg_rtx (SImode);
 
-  if (!lock)
+  if (direction >= 0)
+    barrier = gen_nvptx_membar (GEN_INT (lock_level[kind]));
+
+  if (direction > 0)
     emit_insn (barrier);
-  if (lock)
+  if (!direction)
     {
       rtx_code_label *label = gen_label_rtx ();
 
       LABEL_NUSES (label)++;
-      pat = gen_nvptx_spinlock (mem, space, tmp, gen_reg_rtx (BImode), label);
+      pat = gen_nvptx_spin_lock (mem, space, tmp, gen_reg_rtx (BImode), label);
     }
   else
-    pat = gen_nvptx_spinunlock (mem, space, tmp);
+    /* We can use reset for both unlock and initialization.  */
+    pat = gen_nvptx_spin_reset (mem, space, tmp);
   emit_insn (pat);
-  if (lock)
+  if (!direction)
     emit_insn (barrier);
 }
 
@@ -3628,12 +3632,22 @@ nvptx_xform_fork_join (gimple stmt, cons
  */
 
 static bool
-nvptx_xform_lock_unlock (gimple stmt, const int *ARG_UNUSED (dims),
-			 bool ARG_UNUSED (is_lock))
+nvptx_xform_lock (gimple stmt, const int *ARG_UNUSED (dims), unsigned ifn_code)
 {
   tree arg = gimple_call_arg (stmt, 0);
+  unsigned mode = TREE_INT_CST_LOW (arg);
   
-  return TREE_INT_CST_LOW (arg) > GOMP_DIM_WORKER;
+  switch (ifn_code)
+    {
+    case IFN_GOACC_LOCK:
+    case IFN_GOACC_UNLOCK:
+      return mode > GOMP_DIM_WORKER;
+
+    case IFN_GOACC_LOCK_INIT:
+      return mode != GOMP_DIM_WORKER;
+
+    default: gcc_unreachable();
+    }
 }
 
 #undef TARGET_OPTION_OVERRIDE
@@ -3739,8 +3753,8 @@ nvptx_xform_lock_unlock (gimple stmt, co
 #undef TARGET_GOACC_FORK_JOIN
 #define TARGET_GOACC_FORK_JOIN nvptx_xform_fork_join
 
-#undef TARGET_GOACC_LOCK_UNLOCK
-#define TARGET_GOACC_LOCK_UNLOCK nvptx_xform_lock_unlock
+#undef TARGET_GOACC_LOCK
+#define TARGET_GOACC_LOCK nvptx_xform_lock
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
Index: gcc/config/nvptx/nvptx-protos.h
===================================================================
--- gcc/config/nvptx/nvptx-protos.h	(revision 227174)
+++ gcc/config/nvptx/nvptx-protos.h	(working copy)
@@ -34,7 +34,7 @@ extern const char *nvptx_section_for_dec
 #ifdef RTX_CODE
 extern void nvptx_expand_oacc_fork (unsigned);
 extern void nvptx_expand_oacc_join (unsigned);
-extern void nvptx_expand_oacc_lock_unlock (rtx, bool);
+extern void nvptx_expand_oacc_lock (rtx, int);
 extern void nvptx_expand_call (rtx, rtx);
 extern rtx nvptx_expand_compare (rtx);
 extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 227174)
+++ gcc/omp-low.c	(working copy)
@@ -14761,8 +14761,8 @@ execute_oacc_transform ()
 
 	      case IFN_GOACC_LOCK:
 	      case IFN_GOACC_UNLOCK:
-		if (targetm.goacc.lock_unlock
-		    (stmt, dims, ifn_code == IFN_GOACC_LOCK))
+	      case IFN_GOACC_LOCK_INIT:
+		if (targetm.goacc.lock (stmt, dims, ifn_code))
 		  goto remove;
 		break;
 
@@ -14848,21 +14848,28 @@ default_goacc_fork_join (gimple ARG_UNUS
    there is no RTL expander.  */
 
 bool
-default_goacc_lock_unlock (gimple ARG_UNUSED (stmt),
-			   const int*ARG_UNUSED (dims),
-			   bool is_lock)
+default_goacc_lock (gimple ARG_UNUSED (stmt), const int*ARG_UNUSED (dims),
+		    unsigned ifn_code)
 {
-  if (is_lock)
+  switch (ifn_code)
     {
+    case IFN_GOACC_LOCK:
 #ifndef HAVE_oacc_lock
       return true;
 #endif
-    }
-  else
-    {
+      break;
+    case IFN_GOACC_UNLOCK:
 #ifndef HAVE_oacc_unlock
       return true;
 #endif
+      break;
+    case IFN_GOACC_LOCK_INIT:
+#ifndef HAVE_oacc_lock_init
+      return true;
+#endif
+      break;
+    default:
+      gcc_unreachable ();
     }
 
   return false;
Index: gcc/internal-fn.c
===================================================================
--- gcc/internal-fn.c	(revision 227174)
+++ gcc/internal-fn.c	(working copy)
@@ -2051,6 +2051,19 @@ expand_GOACC_UNLOCK (gcall *ARG_UNUSED (
 #endif
 }
 
+static void
+expand_GOACC_LOCK_INIT (gcall *ARG_UNUSED (stmt))
+{
+#ifdef HAVE_oacc_lock_init
+  rtx dim = expand_normal (gimple_call_arg (stmt, 0));
+  rtx id = expand_normal (gimple_call_arg (stmt, 1));
+  
+  emit_insn (gen_oacc_lock_init (dim, id));
+#else
+  gcc_unreachable ();
+#endif
+}
+
 /* Routines to expand each internal function, indexed by function number.
    Each routine has the prototype:
 
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in	(revision 227174)
+++ gcc/doc/tm.texi.in	(working copy)
@@ -4251,7 +4251,7 @@ address;  but often a machine-dependent
 
 @hook TARGET_GOACC_FORK_JOIN
 
-@hook TARGET_GOACC_LOCK_UNLOCK
+@hook TARGET_GOACC_LOCK
 
 @node Anchored Addresses
 @section Anchored Addresses
Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi	(revision 227174)
+++ gcc/doc/tm.texi	(working copy)
@@ -5760,11 +5760,12 @@ pass.  It should return true, if the fun
 default hook returns true, if there is no RTL expanders for them.
 @end deftypefn
 
-@deftypefn {Target Hook} bool TARGET_GOACC_LOCK_UNLOCK (gimple, const @var{int[]}, @var{bool})
-This hook should convert IFN_GOACC_LOCK and IFN_GOACC_UNLOCK function
-calls to target-specific gimple.  It is executed during the oacc_xform
-pass.  It should return true, if the functions should be deleted.  The
-default hook returns true, if there is no RTL expanders for them.
+@deftypefn {Target Hook} bool TARGET_GOACC_LOCK (gimple, const @var{int[]}, @var{unsigned})
+This hook should convert IFN_GOACC_LOCK, IFN_GOACC_UNLOCK,
+IFN_GOACC_LOCK_INIT  function calls to target-specific gimple.  It is
+executed during the oacc_xform pass.  It should return true, if the
+functions should be deleted.  The default hook returns true, if there
+is no RTL expanders for them.
 @end deftypefn
 
 @node Anchored Addresses
Index: gcc/target.def
===================================================================
--- gcc/target.def	(revision 227174)
+++ gcc/target.def	(working copy)
@@ -1671,13 +1671,14 @@ bool, (gimple, const int[], bool),
 default_goacc_fork_join)
 
 DEFHOOK
-(lock_unlock,
-"This hook should convert IFN_GOACC_LOCK and IFN_GOACC_UNLOCK function\n\
-calls to target-specific gimple.  It is executed during the oacc_xform\n\
-pass.  It should return true, if the functions should be deleted.  The\n\
-default hook returns true, if there is no RTL expanders for them.",
-bool, (gimple, const int[], bool),
-default_goacc_lock_unlock)
+(lock,
+"This hook should convert IFN_GOACC_LOCK, IFN_GOACC_UNLOCK,\n\
+IFN_GOACC_LOCK_INIT  function calls to target-specific gimple.  It is\n\
+executed during the oacc_xform pass.  It should return true, if the\n\
+functions should be deleted.  The default hook returns true, if there\n\
+is no RTL expanders for them.",
+bool, (gimple, const int[], unsigned),
+default_goacc_lock)
 
 HOOK_VECTOR_END (goacc)
 
Index: gcc/internal-fn.def
===================================================================
--- gcc/internal-fn.def	(revision 227174)
+++ gcc/internal-fn.def	(working copy)
@@ -84,8 +84,9 @@ DEF_INTERNAL_FN (GOACC_JOIN, ECF_NOTHROW
 DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".")
 DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_PURE | ECF_NOTHROW | ECF_LEAF, ".")
 
-/* LOCK and UNLOCK operate a mutex used for reductions.  The first
+/* LOCK, UNLOCK & LOCK_INIT operate a mutex used for reductions.  The first
    argument is the compute dimension of the reduction and the second
    argument is a loop identifer.  */
 DEF_INTERNAL_FN (GOACC_LOCK, ECF_NOTHROW | ECF_LEAF, "..")
 DEF_INTERNAL_FN (GOACC_UNLOCK, ECF_NOTHROW | ECF_LEAF, "..")
+DEF_INTERNAL_FN (GOACC_LOCK_INIT, ECF_NOTHROW | ECF_LEAF, "..")

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]