This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] New builtins, preparation for oacc vector-single


To implement OpenACC vector-single mode, we need to ensure that only one thread out of the group representing a worker executes. The others skip computations but follow along the CFG, so the results of conditional branch decisions must be broadcast to them.

The patch below adds a new builtin and nvptx pattern to implement that broadcast functionality.

Committed on gomp-4_0-branch.


Bernd
Index: gcc/ChangeLog.gomp
===================================================================
--- gcc/ChangeLog.gomp	(revision 223360)
+++ gcc/ChangeLog.gomp	(working copy)
@@ -1,3 +1,16 @@
+2015-05-19  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* omp-builtins.def (GOACC_thread_broadcast,
+	GOACC_thread_broadcast_ll): New builtins.
+	* optabs.def (oacc_thread_broadcast_optab): New optab.
+	* builtins.c (expand_builtin_oacc_thread_broadcast): New function.
+	(expand_builtin): Use it.
+	* config/nvptx/nvptx.c (nvptx_cannot_copy_insn_p): New function.
+	(TARGET_CANNOT_COPY_INSN_P): Define.
+	* config/nvptx/nvptx.md (UNSPECV_WARP_BCAST): New constant.
+	(oacc_thread_broadcastsi): New pattern.
+	(oacc_thread_broadcastdi): New expander.
+
 2015-05-19  Tom de Vries  <tom@codesourcery.com>
 
 	* omp-low.c (enclosing_target_ctx): Comment out.
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	(revision 223360)
+++ gcc/builtins.c	(working copy)
@@ -6022,6 +6022,43 @@ expand_oacc_ganglocal_ptr (rtx target AT
   return NULL_RTX;
 }
 
+/* Handle a GOACC_thread_broadcast builtin call EXP with target TARGET.
+   Return the result.  */
+
+static rtx
+expand_builtin_oacc_thread_broadcast (tree exp, rtx target)
+{
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  enum insn_code icode;
+
+  enum machine_mode mode = TYPE_MODE (TREE_TYPE (arg0));
+  gcc_assert (INTEGRAL_MODE_P (mode));
+  do
+    {
+      icode = direct_optab_handler (oacc_thread_broadcast_optab, mode);
+      mode = GET_MODE_WIDER_MODE (mode);
+    }
+  while (icode == CODE_FOR_nothing && mode != VOIDmode);
+  if (icode == CODE_FOR_nothing)
+    return expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  rtx tmp = target;
+  machine_mode mode0 = insn_data[icode].operand[0].mode;
+  machine_mode mode1 = insn_data[icode].operand[1].mode;
+  if (!REG_P (tmp) || GET_MODE (tmp) != mode0)
+    tmp = gen_reg_rtx (mode0);
+  rtx op1 = expand_expr (arg0, NULL_RTX, mode1, EXPAND_NORMAL);
+  if (GET_MODE (op1) != mode1)
+    op1 = convert_to_mode (mode1, op1, 0);
+
+  rtx insn = GEN_FCN (icode) (tmp, op1);
+  if (insn != NULL_RTX)
+    {
+      emit_insn (insn);
+      return tmp;
+    }
+  return const0_rtx;
+}
 
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient
@@ -7177,6 +7214,10 @@ expand_builtin (tree exp, rtx target, rt
 	return target;
       break;
 
+    case BUILT_IN_GOACC_THREAD_BROADCAST:
+    case BUILT_IN_GOACC_THREAD_BROADCAST_LL:
+      return expand_builtin_oacc_thread_broadcast (exp, target);
+
     default:	/* just do library call, if unknown builtin */
       break;
     }
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 223360)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -2029,6 +2029,15 @@ nvptx_vector_alignment (const_tree type)
 
   return MIN (align, BIGGEST_ALIGNMENT);
 }
+
+static bool
+nvptx_cannot_copy_insn_p (rtx_insn *insn)
+{
+  if (recog_memoized (insn) == CODE_FOR_oacc_thread_broadcastsi)
+    return true;
+  return false;
+}
+
 
 /* Record a symbol for mkoffload to enter into the mapping table.  */
 
@@ -2153,6 +2162,9 @@ nvptx_file_end (void)
 #undef TARGET_VECTOR_ALIGNMENT
 #define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment
 
+#undef  TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P nvptx_cannot_copy_insn_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-nvptx.h"
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md	(revision 223360)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -61,6 +61,7 @@ (define_c_enum "unspecv" [
    UNSPECV_LOCK
    UNSPECV_CAS
    UNSPECV_XCHG
+   UNSPECV_WARP_BCAST
 ])
 
 (define_attr "subregs_ok" "false,true"
@@ -1322,6 +1323,37 @@ (define_expand "oacc_ctaid"
     FAIL;
 })
 
+(define_insn "oacc_thread_broadcastsi"
+  [(set (match_operand:SI 0 "nvptx_register_operand" "")
+	(unspec_volatile:SI [(match_operand:SI 1 "nvptx_register_operand" "")]
+			    UNSPECV_WARP_BCAST))]
+  ""
+  "%.\\tshfl.idx.b32\\t%0, %1, 0, 31;")
+
+(define_expand "oacc_thread_broadcastdi"
+  [(set (match_operand:DI 0 "nvptx_register_operand" "")
+	(unspec_volatile:DI [(match_operand:DI 1 "nvptx_register_operand" "")]
+			    UNSPECV_WARP_BCAST))]
+  ""
+{
+  rtx t = gen_reg_rtx (DImode);
+  emit_insn (gen_lshrdi3 (t, operands[1], GEN_INT (32)));
+  rtx op0 = force_reg (SImode, gen_lowpart (SImode, t));
+  rtx op1 = force_reg (SImode, gen_lowpart (SImode, operands[1]));
+  rtx targ0 = gen_reg_rtx (SImode);
+  rtx targ1 = gen_reg_rtx (SImode);
+  emit_insn (gen_oacc_thread_broadcastsi (targ0, op0));
+  emit_insn (gen_oacc_thread_broadcastsi (targ1, op1));
+  rtx t2 = gen_reg_rtx (DImode);
+  rtx t3 = gen_reg_rtx (DImode);
+  emit_insn (gen_extendsidi2 (t2, targ0));
+  emit_insn (gen_extendsidi2 (t3, targ1));
+  rtx t4 = gen_reg_rtx (DImode);
+  emit_insn (gen_ashldi3 (t4, t2, GEN_INT (32)));
+  emit_insn (gen_iordi3 (operands[0], t3, t4));
+  DONE;
+})
+
 (define_insn "ganglocal_ptr<mode>"
   [(set (match_operand:P 0 "nvptx_register_operand" "")
 	(unspec:P [(const_int 0)] UNSPEC_SHARED_DATA))]
Index: gcc/fortran/ChangeLog.gomp
===================================================================
--- gcc/fortran/ChangeLog.gomp	(revision 223360)
+++ gcc/fortran/ChangeLog.gomp	(working copy)
@@ -1,3 +1,7 @@
+2015-05-19  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* types.def (BT_FN_ULONGLONG_ULONGLONG): Define.
+
 2015-05-13  Cesar Philippidis  <cesar@codesourcery.com>
 
 	* f95-lang.c (gfc_attribute_table): Add and "oacc function"
Index: gcc/fortran/types.def
===================================================================
--- gcc/fortran/types.def	(revision 223360)
+++ gcc/fortran/types.def	(working copy)
@@ -84,6 +84,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_VOID_PTRPTR,
 DEF_FUNCTION_TYPE_1 (BT_FN_VOID_VPTR, BT_VOID, BT_VOLATILE_PTR)
 DEF_FUNCTION_TYPE_1 (BT_FN_INT_INT, BT_INT, BT_INT)
 DEF_FUNCTION_TYPE_1 (BT_FN_UINT_UINT, BT_UINT, BT_UINT)
+DEF_FUNCTION_TYPE_1 (BT_FN_ULONGLONG_ULONGLONG, BT_ULONGLONG, BT_ULONGLONG)
 DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR)
 DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT)
 DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_INT, BT_BOOL, BT_INT)
Index: gcc/omp-builtins.def
===================================================================
--- gcc/omp-builtins.def	(revision 223360)
+++ gcc/omp-builtins.def	(working copy)
@@ -77,6 +77,10 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_GA
 		   BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, "GOACC_deviceptr",
 		   BT_FN_PTR_PTR, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_THREAD_BROADCAST, "GOACC_thread_broadcast",
+		   BT_FN_UINT_UINT, ATTR_NOTHROW_LEAF_LIST)
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_THREAD_BROADCAST_LL, "GOACC_thread_broadcast_ll",
+		   BT_FN_ULONGLONG_ULONGLONG, ATTR_NOTHROW_LEAF_LIST)
 
 DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device",
 			    BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
Index: gcc/optabs.def
===================================================================
--- gcc/optabs.def	(revision 223360)
+++ gcc/optabs.def	(working copy)
@@ -332,3 +332,5 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I
 
 OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a")
 OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a")
+
+OPTAB_D (oacc_thread_broadcast_optab, "oacc_thread_broadcast$I$a")

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]