This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] lock/unlock internal fn


I've committed this patch to add a new pair of internal functions. These will be used in implementing reductions.

They'll be emitted around reduction finalization, and implement the locking required for the general case of combining reduction values. They may be transformed in the oacc_xform pass, and the default behaviour is to delete them, if there is no RTL expander. For PTX we delete them if they are at the vector level.

This avoids needing machine-specific builtins to expand to, and thus should result in less backend code duplication.

nathan
2015-08-17  Nathan Sidwell  <nathan@codesourcery.com>

	* target.def (lock_unlock): New GOACC hook.
	* targhooks.h (default_goacc_lock_unlock): Declare.
	* doc/tm.texi.in (TARGET_GOACC_LOCK_UNLOCK): Add.
	* doc/tm.texi: Rebuilt.
	* internal-fn.def (GOACC_LOCK, GOACC_UNLOCK): New.
	* internal-fn.c (expand_GOACC_LOCK, expand_GOACC_UNLOCK): New.
	* omp-low.c (execute_oacc_transform): Add lock/unlock handling.
	(default_goacc_lock_unlock): New.
	* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_lock_unlock): Declare.
	* config/nvptx/nvptx.md (UNSPECV_UNLOCK): Delete.
	(oacc_lock, oacc_unlock): New expanders.
	(nvptx_spinlock, nvptx_spinunlock): Use UNSPECV_LOCK.
	* config/nvptx/nvptx.c (nvptx_expand_oacc_lock_unlock): New.
	(nvptx_expand_lock_unlock): Delete.
	(nvptx_expand_lock, nvptx_expand_unlock): Delete.
	(nvptx_expand_work_red_addr): Fixup address generation.
	(enum nvptx_types): Delete NT_VOID_UINT.
	(builtins): Delete nvptx_lock and nvptx_unlock.
	(nvptx_init_builtins): Adjust.
	(nvptx_xform_lock_unlock): New.
	(TARGET_GOACC_LOCK_UNLOCK): Override.
	
Index: gcc/config/nvptx/nvptx-protos.h
===================================================================
--- gcc/config/nvptx/nvptx-protos.h	(revision 226951)
+++ gcc/config/nvptx/nvptx-protos.h	(working copy)
@@ -34,6 +34,7 @@ extern const char *nvptx_section_for_dec
 #ifdef RTX_CODE
 extern void nvptx_expand_oacc_fork (rtx);
 extern void nvptx_expand_oacc_join (rtx);
+extern void nvptx_expand_oacc_lock_unlock (rtx, bool);
 extern void nvptx_expand_call (rtx, rtx);
 extern rtx nvptx_expand_compare (rtx);
 extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md	(revision 226951)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -61,7 +61,6 @@
 
 (define_c_enum "unspecv" [
    UNSPECV_LOCK
-   UNSPECV_UNLOCK
    UNSPECV_CAS
    UNSPECV_XCHG
    UNSPECV_BARSYNC
@@ -1366,6 +1365,26 @@
   return asms[INTVAL (operands[1])];
 })
 
+(define_expand "oacc_lock"
+  [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")
+		        (match_operand:SI 1 "const_int_operand" "")]
+		       UNSPECV_LOCK)]
+  ""
+{
+  nvptx_expand_oacc_lock_unlock (operands[0], true);
+  DONE;
+})
+
+(define_expand "oacc_unlock"
+  [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")
+		        (match_operand:SI 1 "const_int_operand" "")]
+		       UNSPECV_LOCK)]
+  ""
+{
+  nvptx_expand_oacc_lock_unlock (operands[0], false);
+  DONE;
+})
+
 (define_insn "nvptx_fork"
   [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
 		       UNSPECV_FORK)]
@@ -1576,7 +1595,7 @@
    [(parallel
      [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")
 			(match_operand:SI 1 "const_int_operand" "i")]
-		       UNSPECV_UNLOCK)
+		       UNSPECV_LOCK)
       (match_operand:SI 2 "register_operand" "=R")
       (match_operand:BI 3 "register_operand" "=R")
       (label_ref (match_operand 4 "" ""))])]
@@ -1586,7 +1605,7 @@
 (define_insn "nvptx_spinunlock"
    [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")
 		      (match_operand:SI 1 "const_int_operand" "i")]
-		      UNSPECV_UNLOCK)
+		      UNSPECV_LOCK)
     (match_operand:SI 2 "register_operand" "=R")]
    ""
    "atom%R1.exch.b32 %2,%0,0;")
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 226951)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -1164,6 +1164,39 @@ nvptx_expand_oacc_join (rtx mode)
   emit_insn (gen_nvptx_joining (mode));
 }
 
+/* Expander for reduction locking and unlocking.  We expect SRC to be
+   gang or worker level.  */
+
+void
+nvptx_expand_oacc_lock_unlock (rtx src, bool lock)
+{
+  unsigned HOST_WIDE_INT kind;
+  rtx pat;
+  
+  kind = INTVAL (src) == GOMP_DIM_GANG ? LOCK_GLOBAL : LOCK_SHARED;
+  lock_used[kind] = true;
+
+  rtx mem = gen_rtx_MEM (SImode, lock_syms[kind]);
+  rtx space = GEN_INT (lock_space[kind]);
+  rtx barrier = gen_nvptx_membar (GEN_INT (lock_level[kind]));
+  rtx tmp = gen_reg_rtx (SImode);
+
+  if (!lock)
+    emit_insn (barrier);
+  if (lock)
+    {
+      rtx_code_label *label = gen_label_rtx ();
+
+      LABEL_NUSES (label)++;
+      pat = gen_nvptx_spinlock (mem, space, tmp, gen_reg_rtx (BImode), label);
+    }
+  else
+    pat = gen_nvptx_spinunlock (mem, space, tmp);
+  emit_insn (pat);
+  if (lock)
+    emit_insn (barrier);
+}
+
 /* Generate instruction(s) to unpack a 64 bit object into 2 32 bit
    objects.  */
 
@@ -3306,62 +3339,6 @@ nvptx_expand_shuffle_down (tree exp, rtx
   return target;
 }
 
-/* Expander for locking and unlocking.  */
-static rtx
-nvptx_expand_lock_unlock (tree exp, bool lock)
-{
-  rtx src = expand_expr (CALL_EXPR_ARG (exp, 0),
-			 NULL_RTX, SImode, EXPAND_NORMAL);
-  unsigned HOST_WIDE_INT kind;
-  rtx pat;
-  
-  kind = GET_CODE (src) == CONST_INT ? INTVAL  (src) : LOCK_MAX;
-  if (kind >= LOCK_MAX)
-    error ("builtin %D requires constant argument less than %u",
-	   get_callee_fndecl (exp), LOCK_MAX);
-  lock_used[kind] = true;
-
-  rtx mem = gen_rtx_MEM (SImode, lock_syms[kind]);
-  rtx space = GEN_INT (lock_space[kind]);
-  rtx barrier = gen_nvptx_membar (GEN_INT (lock_level[kind]));
-
-  if (!lock)
-    emit_insn (barrier);
-  if (lock)
-    {
-      rtx_code_label *label = gen_label_rtx ();
-
-      LABEL_NUSES (label)++;
-      pat = gen_nvptx_spinlock (mem, space,
-				gen_reg_rtx (SImode), gen_reg_rtx (BImode),
-				label);
-    }
-  else
-    pat = gen_nvptx_spinunlock (mem, space, gen_reg_rtx (SImode));
-  emit_insn (pat);
-  if (lock)
-    emit_insn (barrier);
-  return const0_rtx;
-}
-
-/* Lock expander.  */
-
-static rtx
-nvptx_expand_lock (tree exp, rtx ARG_UNUSED (target),
-		   machine_mode ARG_UNUSED (mode), int ARG_UNUSED (ignore))
-{
-  return nvptx_expand_lock_unlock (exp, true);
-}
-
-/* Unlock expander.  */
-
-static rtx
-nvptx_expand_unlock (tree exp, rtx ARG_UNUSED (target),
-		     machine_mode ARG_UNUSED (mode), int ARG_UNUSED (ignore))
-{
-  return nvptx_expand_lock_unlock (exp, false);
-}
-
 /* Worker reduction address expander.  */
 static rtx
 nvptx_expand_work_red_addr (tree exp, rtx target,
@@ -3413,12 +3390,16 @@ nvptx_expand_work_red_addr (tree exp, rt
   /* Return offset into worker reduction array.  */
   unsigned offset = loop.vars[ix].second;
   
-  rtx addr = gen_reg_rtx (Pmode);
-  emit_move_insn (addr,
-		  gen_rtx_PLUS (Pmode, worker_red_sym, GEN_INT (offset)));
+  emit_insn (gen_rtx_SET (target, worker_red_sym));
+
+  if (offset)
+    emit_insn (gen_rtx_SET (target,
+			    gen_rtx_PLUS (Pmode, target, GEN_INT (offset))));
+	       
   emit_insn (gen_rtx_SET (target,
-			  gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
+			  gen_rtx_UNSPEC (Pmode, gen_rtvec (1, target),
 					  UNSPEC_FROM_SHARED)));
+
   return target;
 }
 
@@ -3428,7 +3409,6 @@ enum nvptx_types
     NT_ULL_ULL_INT,
     NT_FLT_FLT_INT,
     NT_DBL_DBL_INT,
-    NT_VOID_UINT,
     NT_UINTPTR_UINT_UINT,
     NT_ULLPTR_UINT_UINT,
     NT_FLTPTR_UINT_UINT,
@@ -3446,8 +3426,6 @@ static const struct builtin_description
    nvptx_expand_shuffle_down},
   {"__builtin_nvptx_shuffle_downd", NT_DBL_DBL_INT,
    nvptx_expand_shuffle_down},
-  {"__builtin_nvptx_lock", NT_VOID_UINT, nvptx_expand_lock},
-  {"__builtin_nvptx_unlock", NT_VOID_UINT, nvptx_expand_unlock},
   {"__builtin_nvptx_work_red_addr", NT_UINTPTR_UINT_UINT,
    nvptx_expand_work_red_addr},
   {"__builtin_nvptx_work_red_addrll", NT_ULLPTR_UINT_UINT,
@@ -3492,9 +3470,6 @@ nvptx_init_builtins (void)
   types[NT_DBL_DBL_INT]
     = build_function_type_list (double_type_node, double_type_node,
 				integer_type_node, NULL_TREE);
-  types[NT_VOID_UINT]
-    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
-
   types[NT_UINTPTR_UINT_UINT]
     = build_function_type_list (build_pointer_type (unsigned_type_node),
 				unsigned_type_node, unsigned_type_node,
@@ -3628,6 +3603,20 @@ nvptx_xform_fork_join (gimple_stmt_itera
 
   return false;
 }
+
+/* Check lock & unlock.  We only need the gang- & worker-level ones.
+ */
+
+static bool
+nvptx_xform_lock_unlock (gimple_stmt_iterator *ARG_UNUSED (gsi),
+			 gimple stmt,
+			 const int *ARG_UNUSED (dims),
+			 bool ARG_UNUSED (is_fork))
+{
+  tree arg = gimple_call_arg (stmt, 0);
+  
+  return TREE_INT_CST_LOW (arg) > GOMP_DIM_WORKER;
+}
 
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE nvptx_option_override
@@ -3732,6 +3721,9 @@ nvptx_xform_fork_join (gimple_stmt_itera
 #undef TARGET_GOACC_FORK_JOIN
 #define TARGET_GOACC_FORK_JOIN nvptx_xform_fork_join
 
+#undef TARGET_GOACC_LOCK_UNLOCK
+#define TARGET_GOACC_LOCK_UNLOCK nvptx_xform_lock_unlock
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-nvptx.h"
Index: gcc/targhooks.h
===================================================================
--- gcc/targhooks.h	(revision 226951)
+++ gcc/targhooks.h	(working copy)
@@ -111,6 +111,8 @@ extern bool default_goacc_validate_dims
 extern unsigned default_goacc_dim_limit (unsigned);
 extern bool default_goacc_fork_join (gimple_stmt_iterator *, gimple,
 				     const int [], bool);
+extern bool default_goacc_lock_unlock (gimple_stmt_iterator *, gimple,
+				       const int [], bool);
 
 /* These are here, and not in hooks.[ch], because not all users of
    hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS.  */
Index: gcc/target.def
===================================================================
--- gcc/target.def	(revision 226951)
+++ gcc/target.def	(working copy)
@@ -1670,6 +1670,15 @@ default hook returns true, if there is n
 bool, (gimple_stmt_iterator *, gimple, const int[], bool),
 default_goacc_fork_join)
 
+DEFHOOK
+(lock_unlock,
+"This hook should convert IFN_GOACC_LOCK and IFN_GOACC_UNLOCK function\n\
+calls to target-specific gimple.  It is executed during the oacc_xform\n\
+pass.  It should return true, if the functions should be deleted.  The\n\
+default hook returns true, if there is no RTL expanders for them.",
+bool, (gimple_stmt_iterator *, gimple, const int[], bool),
+default_goacc_lock_unlock)
+
 HOOK_VECTOR_END (goacc)
 
 /* Functions relating to vectorization.  */
Index: gcc/internal-fn.def
===================================================================
--- gcc/internal-fn.def	(revision 226951)
+++ gcc/internal-fn.def	(working copy)
@@ -83,3 +83,9 @@ DEF_INTERNAL_FN (GOACC_JOIN, ECF_NOTHROW
    single INTEGER_CST argument.  */
 DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".")
 DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_PURE | ECF_NOTHROW | ECF_LEAF, ".")
+
+/* LOCK and UNLOCK operate a mutex used for reductions.  The first
+   argument is the compute dimension of the reduction and the second
+   argument is a loop identifer.  */
+DEF_INTERNAL_FN (GOACC_LOCK, ECF_NOTHROW | ECF_LEAF, "..")
+DEF_INTERNAL_FN (GOACC_UNLOCK, ECF_NOTHROW | ECF_LEAF, "..")
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 226951)
+++ gcc/omp-low.c	(working copy)
@@ -14743,19 +14743,24 @@ execute_oacc_transform ()
 		    {
 		    default: break;
 
+		    case IFN_GOACC_DIM_POS:
 		    case IFN_GOACC_DIM_SIZE:
-		      oacc_xform_dim (&gsi, stmt, dims, false);
+		      oacc_xform_dim (&gsi, stmt, dims,
+				      ifn_code == IFN_GOACC_DIM_POS);
 		      break;
 
-		    case IFN_GOACC_DIM_POS:
-		      oacc_xform_dim (&gsi, stmt, dims, true);
-		      break;
+		    case IFN_GOACC_LOCK:
+		    case IFN_GOACC_UNLOCK:
+		      if (targetm.goacc.lock_unlock
+			  (&gsi, stmt, dims, ifn_code == IFN_GOACC_LOCK))
+			goto remove;
 
 		    case IFN_GOACC_FORK:
 		    case IFN_GOACC_JOIN:
 		      if (targetm.goacc.fork_join
 			  (&gsi, stmt, dims, ifn_code == IFN_GOACC_FORK))
 			{
+			remove:
 			  replace_uses_by (gimple_vdef (stmt),
 					   gimple_vuse (stmt));
 			  gsi_remove (&gsi, true);
@@ -14814,7 +14819,6 @@ default_goacc_fork_join (gimple_stmt_ite
 			 gimple ARG_UNUSED (stmt),
 			 const int *ARG_UNUSED (dims), bool is_fork)
 {
-  /* If there is no expander, we can delete the functions.  */
   if (is_fork)
     {
 #ifndef HAVE_oacc_fork
@@ -14827,6 +14831,31 @@ default_goacc_fork_join (gimple_stmt_ite
       return true;
 #endif
     }
+
+  return false;
+}
+
+/* Default lock/unlock early expander.  Delete the function calls if
+   there is no RTL expander.  */
+
+bool
+default_goacc_lock_unlock (gimple_stmt_iterator *ARG_UNUSED (gsi),
+			   gimple ARG_UNUSED (stmt),
+			   const int*ARG_UNUSED (dims),
+			   bool is_lock)
+{
+  if (is_lock)
+    {
+#ifndef HAVE_oacc_lock
+      return true;
+#endif
+    }
+  else
+    {
+#ifndef HAVE_oacc_unlock
+      return true;
+#endif
+    }
 
   return false;
 }
Index: gcc/internal-fn.c
===================================================================
--- gcc/internal-fn.c	(revision 226951)
+++ gcc/internal-fn.c	(working copy)
@@ -2025,6 +2025,32 @@ expand_GOACC_DIM_POS (gcall *ARG_UNUSED
 #endif
 }
 
+static void
+expand_GOACC_LOCK (gcall *ARG_UNUSED (stmt))
+{
+#ifdef HAVE_oacc_lock
+  rtx dim = expand_normal (gimple_call_arg (stmt, 0));
+  rtx id = expand_normal (gimple_call_arg (stmt, 1));
+  
+  emit_insn (gen_oacc_lock (dim, id));
+#else
+  gcc_unreachable ();
+#endif
+}
+
+static void
+expand_GOACC_UNLOCK (gcall *ARG_UNUSED (stmt))
+{
+#ifdef HAVE_oacc_unlock
+  rtx dim = expand_normal (gimple_call_arg (stmt, 0));
+  rtx id = expand_normal (gimple_call_arg (stmt, 1));
+  
+  emit_insn (gen_oacc_unlock (dim, id));
+#else
+  gcc_unreachable ();
+#endif
+}
+
 /* Routines to expand each internal function, indexed by function number.
    Each routine has the prototype:
 
Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi	(revision 226951)
+++ gcc/doc/tm.texi	(working copy)
@@ -5760,6 +5760,13 @@ pass.  It should return true, if the fun
 default hook returns true, if there is no RTL expanders for them.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_GOACC_LOCK_UNLOCK (gimple_stmt_iterator *@var{}, @var{gimple}, const @var{int[]}, @var{bool})
+This hook should convert IFN_GOACC_LOCK and IFN_GOACC_UNLOCK function
+calls to target-specific gimple.  It is executed during the oacc_xform
+pass.  It should return true, if the functions should be deleted.  The
+default hook returns true, if there is no RTL expanders for them.
+@end deftypefn
+
 @node Anchored Addresses
 @section Anchored Addresses
 @cindex anchored addresses
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in	(revision 226951)
+++ gcc/doc/tm.texi.in	(working copy)
@@ -4251,6 +4251,8 @@ address;  but often a machine-dependent
 
 @hook TARGET_GOACC_FORK_JOIN
 
+@hook TARGET_GOACC_LOCK_UNLOCK
+
 @node Anchored Addresses
 @section Anchored Addresses
 @cindex anchored addresses

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]