This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp] more ptx builtins


I've committed this to gomp4. It adds spinlock builtins, which will be necessary for complete reduction implementation. There is a single global and a single .shared lock variable (of course the latter is per-cta). We lazily emit these declarations, if locks are taken during compilation. There's a slight tweak to the builtin expanders, they now take a pointer to the descriptor. Hm, thinking about it, they could have gotten the name from the DECL in the EXPR, for this particular case. It might well turn out to be useful for further builtins, and we can adjust once this piece of implementation is completed.

Whilst there I noticed the shuffle, pack and unpack insns lacked register constraints, so I added them.

nathan
2015-07-30  Nathan Sidwell  <nathan@codesourcery.com>

	gcc/
	* config/nvptx/nvptx.md (UNSPECV_UNLOCK): New.
	(nvptx_shuffle<mode>): Add constraints.
	(nvptx_unpack<mode>, nvptx_pack<mode>): Likewise.
	(nvptx_spinlock, nvptx_spinunlock):  New.
	* config/nvptx/nvptx.c (LOCK_GLOBAL, LOCK_SHARED, LOC_MAS): New
	defines.
	(lock_names, lock_regions, lock_space, lock_syms, loc_used): New.
	(nvptx_option_override): Set worker_bcast_align
	correctly. Initialize lock_sums.
	(nvptx_print_operand): Add 'R'.
	(nvptx_file_end): Emit lock vars if needed.
	(struct  builtin_description): Move earlier, add object pointer to
	callback.
	(nvptx_expand_shuffle_down): Adjust.
	(nvptx_expand_lock_unlock): New expander.
	(nvptx_expand_lock, nvptx_expand_unlock): New.
	(enum nvptx_types): Add NT_VOID_UINT.
	(builtins): Add lock/unlock builtins.
	(nvptx_init_bultins): Create new tupe.
	(nvptx_expand_builtin): Adjust.

	gcc/testsuite/
	* gcc.target/nvptx/spinlock-1.c: New.
	* gcc.target/nvptx/spinlock-2.c: New.

Index: gcc/testsuite/gcc.target/nvptx/spinlock-1.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/spinlock-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/spinlock-1.c	(revision 0)
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+void Foo ()
+{
+  __builtin_nvptx_lock (0);
+  __builtin_nvptx_unlock (0);
+}
+
+
+/* { dg-final { scan-assembler-times ".atom.global.cas.b32" 2 } } */
+/* { dg-final { scan-assember ".global .u32 __global_lock;" } } */
+/* { dg-final { scan-assember-not ".shared .u32 __shared_lock;" } } */
Index: gcc/testsuite/gcc.target/nvptx/spinlock-2.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/spinlock-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/spinlock-2.c	(revision 0)
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+void Foo ()
+{
+  __builtin_nvptx_lock (1);
+  __builtin_nvptx_unlock (1);
+}
+
+/* { dg-final { scan-assembler-times ".atom.shared.cas.b32" 2 } } */
+/* { dg-final { scan-assember ".shared .u32 __shared_lock;" } } */
+/* { dg-final { scan-assember-not ".shared .u32 __shared_lock;" } } */
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md	(revision 226406)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -61,6 +61,7 @@
 
 (define_c_enum "unspecv" [
    UNSPECV_LOCK
+   UNSPECV_UNLOCK
    UNSPECV_CAS
    UNSPECV_XCHG
    UNSPECV_BARSYNC
@@ -1409,30 +1410,30 @@
 
 ;; only 32-bit shuffles exist.
 (define_insn "nvptx_shuffle<mode>"
-  [(set (match_operand:BITS 0 "nvptx_register_operand" "")
+  [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
 	(unspec:BITS
-		[(match_operand:BITS 1 "nvptx_register_operand" "")
-		 (match_operand:SI 2 "nvptx_nonmemory_operand" "")
-		 (match_operand:SI 3 "const_int_operand" "")]
+		[(match_operand:BITS 1 "nvptx_register_operand" "R")
+		 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
+		 (match_operand:SI 3 "const_int_operand" "n")]
 		  UNSPEC_SHUFFLE))]
   ""
   "%.\\tshfl.%S3.b32\\t%0, %1, %2, 31;")
 
 ;; extract parts of a 64 bit object into 2 32-bit ints
 (define_insn "unpack<mode>si2"
-  [(set (match_operand:SI 0 "nvptx_register_operand" "")
-        (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "")
+  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+        (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
 		    (const_int 0)] UNSPEC_BIT_CONV))
-   (set (match_operand:SI 1 "nvptx_register_operand" "")
+   (set (match_operand:SI 1 "nvptx_register_operand" "=R")
         (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
   ""
   "%.\\tmov.b64 {%0,%1}, %2;")
 
 ;; pack 2 32-bit ints into a 64 bit object
 (define_insn "packsi<mode>2"
-  [(set (match_operand:BITD 0 "nvptx_register_operand" "")
-        (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "")
-		      (match_operand:SI 2 "nvptx_register_operand" "")]
+  [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
+        (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
+		      (match_operand:SI 2 "nvptx_register_operand" "R")]
 		    UNSPEC_BIT_CONV))]
   ""
   "%.\\tmov.b64 %0, {%1,%2};")
@@ -1561,3 +1562,22 @@
 		    UNSPECV_BARSYNC)]
   ""
   "bar.sync\\t%0;")
+
+
+;; spinlock and unlock
+(define_insn "nvptx_spinlock"
+   [(parallel
+     [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")
+			(match_operand:SI 1 "const_int_operand" "i")]
+		       UNSPECV_UNLOCK)
+      (match_operand:SI 2 "register_operand" "=R")
+      (match_operand:BI 3 "register_operand" "=R")])]
+   ""
+   "1:\\t.atom%R1.cas.b32 %2,%0,0,1;setp.ne.u32 %3,%2,0;@%3 bra.uni 1b;")
+
+(define_insn "nvptx_spinunlock"
+   [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")
+		      (match_operand:SI 1 "const_int_operand" "i")]
+		      UNSPECV_UNLOCK)]
+   ""
+   ".atom%R1.cas.b32 %0,1,0;")
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 226406)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -101,6 +101,21 @@ static unsigned worker_bcast_align;
 #define worker_bcast_name "__worker_bcast"
 static GTY(()) rtx worker_bcast_sym;
 
+/* Global and shared lock variables.  Allocated at end of compilation,
+   if used.  Again, PTX lacks common blocks, so we can't share across
+   compilations.  */
+#define LOCK_GLOBAL 0
+#define LOCK_SHARED 1
+#define LOCK_MAX    2
+static const char *const lock_names[] = 
+  {"__global_lock", "__shared_lock"};
+static const char *const lock_regions[] = 
+  {"global", "shared"};
+static unsigned lock_space[] =
+  {ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED};
+static GTY(()) rtx lock_syms[LOCK_MAX];
+static bool lock_used[LOCK_MAX];
+
 /* Allocate a new, cleared machine_function structure.  */
 
 static struct machine_function *
@@ -130,7 +145,10 @@ nvptx_option_override (void)
     = hash_table<declared_libfunc_hasher>::create_ggc (17);
 
   worker_bcast_sym = gen_rtx_SYMBOL_REF (Pmode, worker_bcast_name);
-  worker_bcast_align = GET_MODE_SIZE (SImode);
+  worker_bcast_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
+
+  for (unsigned ix = LOCK_MAX; ix--;)
+    lock_syms[ix] = gen_rtx_SYMBOL_REF (Pmode, lock_names[ix]);
 }
 
 /* Return the mode to be used when declaring a ptx object for OBJ.
@@ -1875,7 +1893,8 @@ nvptx_print_operand_address (FILE *file,
    A -- print an address space identifier for a MEM
    c -- print an opcode suffix for a comparison operator, including a type code
    f -- print a full reg even for something that must always be split
-   S -- print a shuffle kind
+   R -- print an address space specified by CONST_INT
+   S -- print a shuffle kind specified by CONST_INT
    t -- print a type opcode suffix, promoting QImode to 32 bits
    T -- print a type size in bits
    u -- print a type opcode suffix without promotions.  */
@@ -1927,6 +1946,13 @@ nvptx_print_operand (FILE *file, rtx x,
       fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, false));
       break;
 
+    case 'R':
+      {
+	addr_space_t as = UINTVAL (x);
+	fputs (nvptx_section_from_addr_space (as), file);
+      }
+      break;
+
     case 'S':
       {
 	unsigned kind = UINTVAL (x);
@@ -3118,15 +3144,36 @@ nvptx_file_end (void)
 	& ~(worker_bcast_align - 1);
       
       fprintf (asm_out_file, "// BEGIN VAR DEF: %s\n", worker_bcast_name);
-      fprintf (asm_out_file, ".shared.align %d .u8 %s[%d];\n",
+      fprintf (asm_out_file, ".shared .align %d .u8 %s[%d];\n",
 	       worker_bcast_align,
 	       worker_bcast_name, worker_bcast_hwm);
     }
+
+  /* Emit lock variables.  */
+  for (unsigned ix = LOCK_MAX; ix--;)
+    if (lock_used[ix])
+      {
+	fprintf (asm_out_file, "// BEGIN VAR DEF: %s\n", lock_names[ix]);
+	fprintf (asm_out_file, ".%s .u32 %s;\n",
+		 lock_regions[ix], lock_names[ix]);
+      }
 }
 
+/* Descriptor for a builtin.  */
+
+struct builtin_description
+{
+  const char *name;
+  unsigned short type;
+  rtx (*expander) (const struct builtin_description *, tree,
+		   rtx, machine_mode, int);
+};
+
+
 /* Expander for the shuffle down builtins.  */
 static rtx
-nvptx_expand_shuffle_down (tree exp, rtx target, machine_mode mode, int ignore)
+nvptx_expand_shuffle_down (const struct builtin_description *ARG_UNUSED (desc),
+			   tree exp, rtx target, machine_mode mode, int ignore)
 {
   if (ignore)
     return target;
@@ -3135,7 +3182,7 @@ nvptx_expand_shuffle_down (tree exp, rtx
     target = gen_reg_rtx (mode);
 
   rtx src = expand_expr (CALL_EXPR_ARG (exp, 0),
-			NULL_RTX, mode, EXPAND_NORMAL);
+			 NULL_RTX, mode, EXPAND_NORMAL);
   if (!REG_P (src))
     src = copy_to_mode_reg (mode, src);
 
@@ -3151,3 +3198,25 @@ nvptx_expand_shuffle_down (tree exp, rtx
   return target;
 }
 
+/* Expander for locking and unlocking.  */
+static rtx
+nvptx_expand_lock_unlock (const struct builtin_description *desc,
+			   tree exp, bool lock)
+{
+  rtx src = expand_expr (CALL_EXPR_ARG (exp, 0),
+			 NULL_RTX, SImode, EXPAND_NORMAL);
+  unsigned HOST_WIDE_INT kind;
+  rtx pat;
+  
+  kind = GET_CODE (src) == CONST_INT ? INTVAL  (src) : LOCK_MAX;
+  if (kind >= LOCK_MAX)
+    error ("builtin %<%s%> requires constant argument less than %u",
+	   desc->name, LOCK_MAX);
+  lock_used[kind] = true;
+
+  rtx mem = gen_rtx_MEM (SImode, lock_syms[kind]);
+  rtx space = GEN_INT (lock_space[kind]);
+  
+  if (lock)
+    pat = gen_nvptx_spinlock (mem, space,
+			      gen_reg_g1
rtx (SImode), gen_reg_rtx (BImode));
+  else
+    pat = gen_nvptx_spinunlock (mem, space);
+  if (pat)
+    emit_insn (pat);
+  return const0_rtx;
+}
+
+/* Lock expander.  */
+
+static rtx
+nvptx_expand_lock (const struct builtin_description *desc,
+		   tree exp, rtx ARG_UNUSED (target),
+		   machine_mode ARG_UNUSED (mode), int ARG_UNUSED (ignore))
+{
+  return nvptx_expand_lock_unlock (desc, exp, true);
+}
+
+/* Unlock expander.  */
+
+static rtx
+nvptx_expand_unlock (const struct builtin_description *desc,
+		   tree exp, rtx ARG_UNUSED (target),
+		   machine_mode ARG_UNUSED (mode), int ARG_UNUSED (ignore))
+{
+  return nvptx_expand_lock_unlock (desc, exp, false);
+}
+
 enum nvptx_types
   {
     NT_UINT_UINT_INT,
     NT_ULL_ULL_INT,
     NT_FLT_FLT_INT,
     NT_DBL_DBL_INT,
+    NT_VOID_UINT,
 
     NT_MAX
   };
 
-struct builtin_description
-{
-  const char *name;
-  unsigned short type;
-  rtx (*expander) (tree, rtx, machine_mode, int);
-};
-
 static const struct builtin_description builtins[] =
 {
   {"__builtin_nvptx_shuffle_down", NT_UINT_UINT_INT,
@@ -3178,6 +3268,8 @@ static const struct builtin_description
    nvptx_expand_shuffle_down},
   {"__builtin_nvptx_shuffle_downd", NT_DBL_DBL_INT,
    nvptx_expand_shuffle_down},
+  {"__builtin_nvptx_lock", NT_VOID_UINT, nvptx_expand_lock},
+  {"__builtin_nvptx_unlock", NT_VOID_UINT, nvptx_expand_unlock},
 };
 
 #define NVPTX_BUILTIN_MAX (sizeof (builtins) / sizeof (builtins[0]))
@@ -3214,6 +3306,8 @@ nvptx_init_builtins (void)
   types[NT_DBL_DBL_INT]
     = build_function_type_list (double_type_node, double_type_node,
 				integer_type_node, NULL_TREE);
+  types[NT_VOID_UINT]
+    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
 
   for (ix = 0; ix != NVPTX_BUILTIN_MAX; ix++)
     nvptx_builtin_decls[ix]
@@ -3236,7 +3330,7 @@ nvptx_expand_builtin (tree exp, rtx targ
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   const struct builtin_description *d = &builtins[DECL_FUNCTION_CODE (fndecl)];
 
-  return d->expander (exp, target, mode, ignore);
+  return d->expander (d, exp, target, mode, ignore);
 }
 
 #undef TARGET_OPTION_OVERRIDE

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]