[PATCH] new patterns for OpenMP SIMD-via-SIMT

Alexander Monakov amonakov@ispras.ru
Wed Apr 20 16:57:00 GMT 2016


This patch adds a few insn patterns used for OpenMP SIMD
reduction/lastprivate/ordered lowering for SIMT execution.  OpenMP lowering
produces GOMP_SIMT_... internal functions when lowering SIMD constructs that
can be offloaded to a SIMT device.  After lto stream-in, those internal
functions are trivially folded when compiling for non-SIMT execution;
otherwise they are kept, and expanded into these insns.

Previously posted here:

[gomp-nvptx 01/13] nvptx backend: new patterns for OpenMP SIMD-via-SIMT
https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01550.html
 
2016-01-17  Alexander Monakov  <amonakov@ispras.ru>

	* config/nvptx/nvptx-protos.h (nvptx_shuffle_kind): Move enum
	declaration from nvptx.c.
	(nvptx_gen_shuffle): Declare.
	* config/nvptx/nvptx.c (nvptx_shuffle_kind): Moved to nvptx-protos.h.
	(nvptx_gen_shuffle): No longer static.
	* config/nvptx/nvptx.md (UNSPEC_VOTE_BALLOT): New unspec.
	(UNSPEC_LANEID): Ditto.
	(UNSPECV_NOUNROLL): Ditto.
	(nvptx_vote_ballot): New pattern.
	(omp_simt_lane): Ditto.
	(nvptx_nounroll): Ditto.
	(omp_simt_last_lane): Ditto.
	(omp_simt_ordered): Ditto.
	(omp_simt_vote_any): Ditto.
	(omp_simt_xchg_bfly): Ditto.
	(omp_simt_xchg_idx): Ditto.
	* target-insns.def (omp_simt_lane): New.
	(omp_simt_last_lane): New.
	(omp_simt_ordered): New.
	(omp_simt_vote_any): New.
	(omp_simt_xchg_bfly): New.
	(omp_simt_xchg_idx): New.

diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
index ec4588e..2ea3d54 100644
--- a/gcc/config/nvptx/nvptx-protos.h
+++ b/gcc/config/nvptx/nvptx-protos.h
@@ -21,6 +21,16 @@
 #ifndef GCC_NVPTX_PROTOS_H
 #define GCC_NVPTX_PROTOS_H
 
+/* The kind of shuffe instruction.  */
+enum nvptx_shuffle_kind
+{
+  SHUFFLE_UP,
+  SHUFFLE_DOWN,
+  SHUFFLE_BFLY,
+  SHUFFLE_IDX,
+  SHUFFLE_MAX
+};
+
 extern void nvptx_declare_function_name (FILE *, const char *, const_tree decl);
 extern void nvptx_declare_object_name (FILE *file, const char *name,
 				       const_tree decl);
@@ -36,6 +46,7 @@ extern void nvptx_register_pragmas (void);
 extern void nvptx_expand_oacc_fork (unsigned);
 extern void nvptx_expand_oacc_join (unsigned);
 extern void nvptx_expand_call (rtx, rtx);
+extern rtx nvptx_gen_shuffle (rtx, rtx, rtx, nvptx_shuffle_kind);
 extern rtx nvptx_expand_compare (rtx);
 extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
 extern const char *nvptx_output_mov_insn (rtx, rtx);
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 2d4dad1..e9e4d06 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -70,16 +70,6 @@
 /* This file should be included last.  */
 #include "target-def.h"
 
-/* The kind of shuffe instruction.  */
-enum nvptx_shuffle_kind
-{
-  SHUFFLE_UP,
-  SHUFFLE_DOWN,
-  SHUFFLE_BFLY,
-  SHUFFLE_IDX,
-  SHUFFLE_MAX
-};
-
 /* The various PTX memory areas an object might reside in.  */
 enum nvptx_data_area
 {
@@ -1266,7 +1407,7 @@ nvptx_gen_pack (rtx dst, rtx src0, rtx src1)
 /* Generate an instruction or sequence to broadcast register REG
    across the vectors of a single warp.  */
 
-static rtx
+rtx
 nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 {
   rtx res;
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 33a4862..e5650b6 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -40,6 +40,10 @@ (define_c_enum "unspec" [
 
    UNSPEC_BIT_CONV
 
+   UNSPEC_VOTE_BALLOT
+
+   UNSPEC_LANEID
+
    UNSPEC_SHUFFLE
    UNSPEC_BR_UNIFIED
 ])
@@ -1123,6 +1160,81 @@ (define_insn "nvptx_shuffle<mode>"
   ""
   "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
 
+(define_insn "nvptx_vote_ballot"
+  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+	(unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
+		   UNSPEC_VOTE_BALLOT))]
+  ""
+  "%.\\tvote.ballot.b32\\t%0, %1;")
+
+(define_insn "omp_simt_lane"
+  [(set (match_operand:SI 0 "nvptx_register_operand" "")
+	(unspec:SI [(const_int 0)] UNSPEC_LANEID))]
+  ""
+  "%.\\tmov.u32\\t%0, %%laneid;")
+
+(define_insn "nvptx_nounroll"
+  [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
+  ""
+  "\\t.pragma \\\"nounroll\\\";"
+  [(set_attr "predicable" "false")])
+
+(define_expand "omp_simt_last_lane"
+  [(match_operand:SI 0 "nvptx_register_operand" "=R")
+   (match_operand:SI 1 "nvptx_register_operand" "R")]
+  ""
+{
+  rtx pred = gen_reg_rtx (BImode);
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
+  emit_insn (gen_nvptx_vote_ballot (tmp, pred));
+  emit_insn (gen_ctzsi2 (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "omp_simt_ordered"
+  [(match_operand:SI 0 "nvptx_register_operand" "=R")
+   (match_operand:SI 1 "nvptx_register_operand" "R")]
+  ""
+{
+  emit_move_insn (operands[0], operands[1]);
+  emit_insn (gen_nvptx_nounroll ());
+  DONE;
+})
+
+(define_expand "omp_simt_vote_any"
+  [(match_operand:SI 0 "nvptx_register_operand" "=R")
+   (match_operand:SI 1 "nvptx_register_operand" "R")]
+  ""
+{
+  rtx pred = gen_reg_rtx (BImode);
+  emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
+  emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
+  DONE;
+})
+
+(define_expand "omp_simt_xchg_bfly"
+  [(match_operand 0 "nvptx_register_operand" "=R")
+   (match_operand 1 "nvptx_register_operand" "R")
+   (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
+  ""
+{
+  emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
+				SHUFFLE_BFLY));
+  DONE;
+})
+
+(define_expand "omp_simt_xchg_idx"
+  [(match_operand 0 "nvptx_register_operand" "=R")
+   (match_operand 1 "nvptx_register_operand" "R")
+   (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
+  ""
+{
+  emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
+				SHUFFLE_IDX));
+  DONE;
+})
+
 ;; extract parts of a 64 bit object into 2 32-bit ints
 (define_insn "unpack<mode>si2"
   [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
diff --git a/gcc/target-insns.def b/gcc/target-insns.def
index a6a040e..e011a5a 100644
--- a/gcc/target-insns.def
+++ b/gcc/target-insns.def
@@ -68,6 +68,12 @@ DEF_TARGET_INSN (oacc_dim_pos, (rtx x0, rtx x1))
 DEF_TARGET_INSN (oacc_dim_size, (rtx x0, rtx x1))
 DEF_TARGET_INSN (oacc_fork, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (oacc_join, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_lane, (rtx x0))
+DEF_TARGET_INSN (omp_simt_last_lane, (rtx x0, rtx x1))
+DEF_TARGET_INSN (omp_simt_ordered, (rtx x0, rtx x1))
+DEF_TARGET_INSN (omp_simt_vote_any, (rtx x0, rtx x1))
+DEF_TARGET_INSN (omp_simt_xchg_bfly, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_xchg_idx, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (prefetch, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (probe_stack, (rtx x0))
 DEF_TARGET_INSN (probe_stack_address, (rtx x0))




More information about the Gcc-patches mailing list