[PATCH] [ARC] Add SIMD extensions for ARC HS

Claudiu Zissulescu Claudiu.Zissulescu@synopsys.com
Fri Apr 8 08:33:00 GMT 2016


This patch adds support for the new SIMD operations added to ARC HS
cpu class. The proposed patch doesn't chase for performance but offers
support for those newly added operations, and autovectorization.

The patch is tested using dg.exp, compile.exp, and execute.exp for
both arc700 and archs with and without SIMD support enabled.

OK to apply?
Claudiu

gcc/
2016-03-14  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (arc_vector_mode_supported_p): Add support for
	the new ARC HS SIMD instructions.
	(arc_preferred_simd_mode): New function.
	(arc_autovectorize_vector_sizes): Likewise.
	(TARGET_VECTORIZE_PREFERRED_SIMD_MODE)
	(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
	(arc_init_reg_tables): Accept new ARC HS SIMD modes.
	(arc_init_builtins): Add new SIMD builtin types.
	(arc_split_move): Handle 64 bit vector moves.
	* config/arc/arc.h (TARGET_PLUS_DMPY, TARGET_PLUS_MACD)
	(TARGET_PLUS_QMACW): Define.
	* config/arc/builtins.def (QMACH, QMACHU, QMPYH, QMPYHU, DMACH)
	(DMACHU, DMPYH, DMPYHU, DMACWH, DMACWHU, VMAC2H, VMAC2HU, VMPY2H)
	(VMPY2HU, VADDSUB2H, VSUBADD2H, VADDSUB, VSUBADD, VADDSUB4H)
	(VSUBADD4H): New builtins.
	* config/arc/simdext.md: Add new ARC HS SIMD instructions.
	* testsuite/gcc.target/arc/builtin_simdarc.c: New file.
---
 gcc/config/arc/arc.c                           | 112 ++++-
 gcc/config/arc/arc.h                           |   6 +
 gcc/config/arc/builtins.def                    |  27 ++
 gcc/config/arc/simdext.md                      | 571 +++++++++++++++++++++++++
 gcc/testsuite/gcc.target/arc/builtin_simdarc.c |  38 ++
 5 files changed, 747 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/builtin_simdarc.c

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index d60db50..d120946 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -247,16 +247,47 @@ static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
 static bool
 arc_vector_mode_supported_p (machine_mode mode)
 {
-  if (!TARGET_SIMD_SET)
-    return false;
+  switch (mode)
+    {
+    case V2HImode:
+      return TARGET_PLUS_DMPY;
+    case V4HImode:
+    case V2SImode:
+      return TARGET_PLUS_QMACW;
+    case V4SImode:
+    case V8HImode:
+      return TARGET_SIMD_SET;
 
-  if ((mode == V4SImode)
-      || (mode == V8HImode))
-    return true;
+    default:
+      return false;
+    }
+}
 
-  return false;
+/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
+
+static enum machine_mode
+arc_preferred_simd_mode (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case HImode:
+      return TARGET_PLUS_QMACW ? V4HImode : V2HImode;
+    case SImode:
+      return V2SImode;
+
+    default:
+      return word_mode;
+    }
 }
 
+/* Implements target hook
+   TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
+
+static unsigned int
+arc_autovectorize_vector_sizes (void)
+{
+  return TARGET_PLUS_QMACW ? (8 | 4) : 0;
+}
 
 /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review.  */
 static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
@@ -345,6 +376,12 @@ static void arc_finalize_pic (void);
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
 
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode
+
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES arc_autovectorize_vector_sizes
+
 #undef TARGET_CAN_USE_DOLOOP_P
 #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
 
@@ -1214,7 +1251,12 @@ arc_init_reg_tables (void)
 	    arc_mode_class[i] = 0;
 	  break;
 	case MODE_VECTOR_INT:
-	  arc_mode_class [i] = (1<< (int) V_MODE);
+	  if (GET_MODE_SIZE (m) == 4)
+	    arc_mode_class[i] = (1 << (int) S_MODE);
+	  else if (GET_MODE_SIZE (m) == 8)
+	    arc_mode_class[i] = (1 << (int) D_MODE);
+	  else
+	    arc_mode_class[i] = (1 << (int) V_MODE);
 	  break;
 	case MODE_CC:
 	default:
@@ -5277,6 +5319,15 @@ arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
 static void
 arc_init_builtins (void)
 {
+  tree V4HI_type_node;
+  tree V2SI_type_node;
+  tree V2HI_type_node;
+
+  /* Vector types based on HS SIMD elements.  */
+  V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+  V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+  V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
+
   tree pcvoid_type_node
     = build_pointer_type (build_qualified_type (void_type_node,
 						TYPE_QUAL_CONST));
@@ -5341,6 +5392,28 @@ arc_init_builtins (void)
   tree v8hi_ftype_v8hi
     = build_function_type_list (V8HI_type_node, V8HI_type_node,
 				NULL_TREE);
+  /* ARCv2 SIMD types.  */
+  tree long_ftype_v4hi_v4hi
+    = build_function_type_list (long_long_integer_type_node,
+				V4HI_type_node,	V4HI_type_node, NULL_TREE);
+  tree int_ftype_v2hi_v2hi
+    = build_function_type_list (integer_type_node,
+				V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v2si_ftype_v2hi_v2hi
+    = build_function_type_list (V2SI_type_node,
+				V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi_v2hi
+    = build_function_type_list (V2HI_type_node,
+				V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v2si_ftype_v2si_v2si
+    = build_function_type_list (V2SI_type_node,
+				V2SI_type_node, V2SI_type_node, NULL_TREE);
+  tree v4hi_ftype_v4hi_v4hi
+    = build_function_type_list (V4HI_type_node,
+				V4HI_type_node, V4HI_type_node, NULL_TREE);
+  tree long_ftype_v2si_v2hi
+    = build_function_type_list (long_long_integer_type_node,
+				V2SI_type_node, V2HI_type_node, NULL_TREE);
 
   /* Add the builtins.  */
 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)			\
@@ -8706,6 +8779,31 @@ arc_split_move (rtx *operands)
       return;
     }
 
+  if (TARGET_PLUS_QMACW
+      && GET_CODE (operands[1]) == CONST_VECTOR)
+    {
+      HOST_WIDE_INT intval0, intval1;
+      if (GET_MODE (operands[1]) == V2SImode)
+	{
+	  intval0 = INTVAL (XVECEXP (operands[1], 0, 0));
+	  intval1 = INTVAL (XVECEXP (operands[1], 0, 1));
+	}
+      else
+	{
+	  intval1  = INTVAL (XVECEXP (operands[1], 0, 3)) << 16;
+	  intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF;
+	  intval0  = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
+	  intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
+	}
+      xop[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+      xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode));
+      xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode));
+      emit_move_insn (xop[0], xop[2]);
+      emit_move_insn (xop[3], xop[1]);
+      return;
+    }
+
   for (i = 0; i < 2; i++)
     {
       if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index 21c049f..7fc465b 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -1723,6 +1723,12 @@ enum
 /* Any multiplication feature macro.  */
 #define TARGET_ANY_MPY						\
   (TARGET_MPY || TARGET_MUL64_SET || TARGET_MULMAC_32BY16_SET)
+/* PLUS_DMPY feature macro.  */
+#define TARGET_PLUS_DMPY  ((arc_mpy_option > 6) && TARGET_HS)
+/* PLUS_MACD feature macro.  */
+#define TARGET_PLUS_MACD  ((arc_mpy_option > 7) && TARGET_HS)
+/* PLUS_QMACW feature macro.  */
+#define TARGET_PLUS_QMACW ((arc_mpy_option > 8) && TARGET_HS)
 
 /* ARC600 and ARC601 feature macro.  */
 #define TARGET_ARC600_FAMILY (TARGET_ARC600 || TARGET_ARC601)
diff --git a/gcc/config/arc/builtins.def b/gcc/config/arc/builtins.def
index 19be1d2..8c71d30 100644
--- a/gcc/config/arc/builtins.def
+++ b/gcc/config/arc/builtins.def
@@ -193,3 +193,30 @@ DEF_BUILTIN (VINTI, 1, void_ftype_int, vinti_insn, TARGET_SIMD_SET)
 
 /* END SIMD marker.  */
 DEF_BUILTIN (SIMD_END, 0, void_ftype_void, nothing, 0)
+
+/* ARCv2 SIMD instructions that use/clobber the accumulator reg.  */
+DEF_BUILTIN (QMACH,      2, long_ftype_v4hi_v4hi,   qmach,      TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMACHU,     2, long_ftype_v4hi_v4hi,   qmachu,     TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMPYH,      2, long_ftype_v4hi_v4hi,   qmpyh,      TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMPYHU,     2, long_ftype_v4hi_v4hi,   qmpyhu,     TARGET_PLUS_QMACW)
+
+DEF_BUILTIN (DMACH,      2, int_ftype_v2hi_v2hi,    dmach,      TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMACHU,     2, int_ftype_v2hi_v2hi,    dmachu,     TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMPYH,      2, int_ftype_v2hi_v2hi,    dmpyh,      TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMPYHU,     2, int_ftype_v2hi_v2hi,    dmpyhu,     TARGET_PLUS_DMPY)
+
+DEF_BUILTIN (DMACWH,     2, long_ftype_v2si_v2hi,   dmacwh,     TARGET_PLUS_QMACW)
+DEF_BUILTIN (DMACWHU,    2, long_ftype_v2si_v2hi,   dmacwhu,    TARGET_PLUS_QMACW)
+
+DEF_BUILTIN (VMAC2H,     2, v2si_ftype_v2hi_v2hi,   vmac2h,     TARGET_PLUS_MACD)
+DEF_BUILTIN (VMAC2HU,    2, v2si_ftype_v2hi_v2hi,   vmac2hu,    TARGET_PLUS_MACD)
+DEF_BUILTIN (VMPY2H,     2, v2si_ftype_v2hi_v2hi,   vmpy2h,     TARGET_PLUS_MACD)
+DEF_BUILTIN (VMPY2HU,    2, v2si_ftype_v2hi_v2hi,   vmpy2hu,    TARGET_PLUS_MACD)
+
+/* Combined add/sub HS SIMD instructions.  */
+DEF_BUILTIN (VADDSUB2H,  2, v2hi_ftype_v2hi_v2hi,   addsubv2hi3, TARGET_PLUS_DMPY)
+DEF_BUILTIN (VSUBADD2H,  2, v2hi_ftype_v2hi_v2hi,   subaddv2hi3, TARGET_PLUS_DMPY)
+DEF_BUILTIN (VADDSUB,    2, v2si_ftype_v2si_v2si,   addsubv2si3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VSUBADD,    2, v2si_ftype_v2si_v2si,   subaddv2si3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VADDSUB4H,  2, v4hi_ftype_v4hi_v4hi,   addsubv4hi3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VSUBADD4H,  2, v4hi_ftype_v4hi_v4hi,   subaddv4hi3, TARGET_PLUS_QMACW)
diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md
index 9fd9d62..51869e3 100644
--- a/gcc/config/arc/simdext.md
+++ b/gcc/config/arc/simdext.md
@@ -1288,3 +1288,574 @@
   [(set_attr "type" "simd_vcontrol")
    (set_attr "length" "4")
    (set_attr "cond" "nocond")])
+
+;; New ARCv2 SIMD extensions
+
+;;64-bit vectors of halwords and words
+(define_mode_iterator VWH [V4HI V2SI])
+
+;;double element vectors
+(define_mode_iterator VDV [V2HI V2SI])
+(define_mode_attr V_addsub [(V2HI "HI") (V2SI "SI")])
+(define_mode_attr V_addsub_suffix [(V2HI "2h") (V2SI "")])
+
+;;all vectors
+(define_mode_iterator VCT [V2HI V4HI V2SI])
+(define_mode_attr V_suffix [(V2HI "2h") (V4HI "4h") (V2SI "2")])
+
+;; Widening operations.
+(define_code_iterator SE [sign_extend zero_extend])
+(define_code_attr V_US [(sign_extend "s") (zero_extend "u")])
+(define_code_attr V_US_suffix [(sign_extend "") (zero_extend "u")])
+
+
+;; Move patterns
+(define_expand "movv2hi"
+  [(set (match_operand:V2HI 0 "move_dest_operand" "")
+	(match_operand:V2HI 1 "general_operand" ""))]
+  ""
+  "{
+    if (prepare_move_operands (operands, V2HImode))
+         DONE;
+   }")
+
+(define_insn_and_split "*movv2hi_insn"
+  [(set (match_operand:V2HI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:V2HI 1 "general_operand"       "i,r,m,r"))]
+  "(register_operand (operands[0], V2HImode)
+    || register_operand (operands[1], V2HImode))"
+  "@
+   #
+   mov%? %0, %1
+   ld%U1%V1 %0,%1
+   st%U0%V0 %1,%0"
+  "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR"
+  [(set (match_dup 0) (match_dup 2))]
+  {
+   HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
+   intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
+
+   operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+   operands[2] = GEN_INT (trunc_int_for_mode (intval, SImode));
+  }
+  [(set_attr "type" "move,move,load,store")
+   (set_attr "predicable" "yes,yes,no,no")
+   (set_attr "iscompact"  "false,false,false,false")
+   ])
+
+(define_expand "movmisalignv2hi"
+ [(set (match_operand:V2HI 0 "general_operand" "")
+       (match_operand:V2HI 1 "general_operand" ""))]
+ ""
+{
+ if (!register_operand (operands[0], V2HImode)
+      && !register_operand (operands[1], V2HImode))
+    operands[1] = force_reg (V2HImode, operands[1]);
+})
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VWH 0 "move_dest_operand" "")
+	(match_operand:VWH 1 "general_operand" ""))]
+  ""
+  "{
+    if (GET_CODE (operands[0]) == MEM)
+     operands[1] = force_reg (<MODE>mode, operands[1]);
+   }")
+
+(define_insn_and_split "*mov<mode>_insn"
+  [(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m")
+	(match_operand:VWH 1 "general_operand"    "i,r,m,r"))]
+  "TARGET_PLUS_QMACW
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "*
+{
+  switch (which_alternative)
+    {
+     default:
+       return \"#\";
+
+     case 1:
+       return \"vadd2 %0, %1, 0\";
+
+     case 2:
+       if (TARGET_LL64)
+         return \"ldd%U1%V1 %0,%1\";
+       return \"#\";
+
+     case 3:
+       if (TARGET_LL64)
+	   return \"std%U0%V0 %1,%0\";
+	 return \"#\";
+    }
+}"
+  "reload_completed"
+  [(const_int 0)]
+  {
+   arc_split_move (operands);
+   DONE;
+  }
+  [(set_attr "type" "move,move,load,store")
+   (set_attr "predicable" "yes,no,no,no")
+   (set_attr "iscompact"  "false,false,false,false")
+   ])
+
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:VWH 0 "general_operand" "")
+       (match_operand:VWH 1 "general_operand" ""))]
+ ""
+{
+ if (!register_operand (operands[0], <MODE>mode)
+      && !register_operand (operands[1], <MODE>mode))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+})
+
+(define_insn "bswapv2hi2"
+  [(set (match_operand:V2HI 0 "register_operand" "=r,r")
+        (bswap:V2HI (match_operand:V2HI 1 "nonmemory_operand" "r,i")))]
+  "TARGET_V2 && TARGET_SWAP"
+  "swape %0, %1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "two_cycle_core")])
+
+;; Simple arithmetic insns
+(define_insn "add<mode>3"
+  [(set (match_operand:VCT 0 "register_operand"          "=r,r")
+	(plus:VCT (match_operand:VCT 1 "register_operand" "0,r")
+		  (match_operand:VCT 2 "register_operand" "r,r")))]
+  "TARGET_PLUS_DMPY"
+  "vadd<V_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VCT 0 "register_operand"           "=r,r")
+	(minus:VCT (match_operand:VCT 1 "register_operand" "0,r")
+		   (match_operand:VCT 2 "register_operand" "r,r")))]
+  "TARGET_PLUS_DMPY"
+  "vsub<V_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; Combined arithmetic ops
+(define_insn "addsub<mode>3"
+  [(set (match_operand:VDV 0 "register_operand" "=r,r")
+	(vec_concat:VDV
+	 (plus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r")
+						 (parallel [(const_int 0)]))
+			  (vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r")
+						 (parallel [(const_int 0)])))
+	 (minus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)]))
+			   (vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_PLUS_DMPY"
+  "vaddsub<V_addsub_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "subadd<mode>3"
+  [(set (match_operand:VDV 0 "register_operand" "=r,r")
+	(vec_concat:VDV
+	 (minus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r")
+						  (parallel [(const_int 0)]))
+			   (vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r")
+						  (parallel [(const_int 0)])))
+	 (plus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)]))
+			  (vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_PLUS_DMPY"
+  "vsubadd<V_addsub_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "addsubv4hi3"
+  [(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
+	(vec_concat:V4HI
+	 (vec_concat:V2HI
+	  (plus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r")
+				  (parallel [(const_int 0)]))
+		   (vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r")
+				  (parallel [(const_int 0)])))
+	  (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		    (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
+	 (vec_concat:V2HI
+	  (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		   (vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
+	  (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
+		    (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	 ))]
+  "TARGET_PLUS_QMACW"
+  "vaddsub4h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "subaddv4hi3"
+  [(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
+	(vec_concat:V4HI
+	 (vec_concat:V2HI
+	  (minus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r")
+				   (parallel [(const_int 0)]))
+		    (vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r")
+				  (parallel [(const_int 0)])))
+	  (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		   (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
+	 (vec_concat:V2HI
+	  (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		    (vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
+	  (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
+		   (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	 ))]
+  "TARGET_PLUS_QMACW"
+  "vsubadd4h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; Multiplication
+(define_insn "dmpyh<V_US_suffix>"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI
+	 (mult:SI
+	  (SE:SI
+	   (vec_select:HI (match_operand:V2HI 1 "register_operand" "0,r")
+			  (parallel [(const_int 0)])))
+	  (SE:SI
+	   (vec_select:HI (match_operand:V2HI 2 "register_operand" "r,r")
+			  (parallel [(const_int 0)]))))
+	 (mult:SI
+	  (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	  (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))))
+   (set (reg:DI ARCV2_ACC)
+	(zero_extend:DI
+	 (plus:SI
+	  (mult:SI
+	   (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+	   (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 0)]))))
+	  (mult:SI
+	   (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	   (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))))]
+  "TARGET_PLUS_DMPY"
+  "dmpy<V_US_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; We can use dmac as well here.  To be investigated which version
+;; brings more.
+(define_expand "sdot_prodv2hi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:SI 3 "register_operand" "")]
+  "TARGET_PLUS_DMPY"
+{
+ rtx t = gen_reg_rtx (SImode);
+ emit_insn (gen_dmpyh (t, operands[1], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[3], t));
+ DONE;
+})
+
+(define_expand "udot_prodv2hi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:SI 3 "register_operand" "")]
+  "TARGET_PLUS_DMPY"
+{
+ rtx t = gen_reg_rtx (SImode);
+ emit_insn (gen_dmpyhu (t, operands[1], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[3], t));
+ DONE;
+})
+
+(define_insn "arc_vec_<V_US>mult_lo_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 1 "even_register_operand" "0,r")
+			    (parallel [(const_int 0) (const_int 1)])))
+		  (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 2 "even_register_operand" "r,r")
+			    (parallel [(const_int 0) (const_int 1)])))))
+  (set (reg:V2SI ARCV2_ACC)
+       (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+					    (parallel [(const_int 0) (const_int 1)])))
+		  (SE:V2SI (vec_select:V2HI (match_dup 2)
+					    (parallel [(const_int 0) (const_int 1)])))))
+  ]
+  "TARGET_PLUS_MACD"
+  "vmpy2h<V_US_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "arc_vec_<V_US>multacc_lo_v4hi"
+  [(set (reg:V2SI ARCV2_ACC)
+	(mult:V2SI (SE:V2SI (vec_select:V2HI
+			     (match_operand:V4HI 0 "even_register_operand" "r")
+			     (parallel [(const_int 0) (const_int 1)])))
+		   (SE:V2SI (vec_select:V2HI
+			     (match_operand:V4HI 1 "even_register_operand" "r")
+			     (parallel [(const_int 0) (const_int 1)])))))
+  ]
+  "TARGET_PLUS_MACD"
+  "vmpy2h<V_US_suffix>%? 0, %0, %1"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "no")
+   (set_attr "cond" "nocond")])
+
+(define_expand "vec_widen_<V_US>mult_lo_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                 "")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 1 "even_register_operand" "")
+			    (parallel [(const_int 0) (const_int 1)])))
+		  (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 2 "even_register_operand" "")
+			    (parallel [(const_int 0) (const_int 1)])))))]
+  "TARGET_PLUS_QMACW"
+  {
+     emit_insn (gen_arc_vec_<V_US>mult_lo_v4hi (operands[0],
+						operands[1],
+						operands[2]));
+     DONE;
+  }
+)
+
+(define_insn "arc_vec_<V_US>mult_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 1 "even_register_operand" "0,r")
+			    (parallel [(const_int 2) (const_int 3)])))
+		  (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 2 "even_register_operand" "r,r")
+			    (parallel [(const_int 2) (const_int 3)])))))
+  (set (reg:V2SI ARCV2_ACC)
+       (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+					    (parallel [(const_int 2) (const_int 3)])))
+		  (SE:V2SI (vec_select:V2HI (match_dup 2)
+					    (parallel [(const_int 2) (const_int 3)])))))
+  ]
+  "TARGET_PLUS_QMACW"
+  "vmpy2h<V_US_suffix>%? %0, %R1, %R2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_expand "vec_widen_<V_US>mult_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                               "")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+				     (match_operand:V4HI 1 "even_register_operand" "")
+				     (parallel [(const_int 2) (const_int 3)])))
+		  (SE:V2SI (vec_select:V2HI
+				     (match_operand:V4HI 2 "even_register_operand" "")
+				     (parallel [(const_int 2) (const_int 3)])))))]
+  "TARGET_PLUS_MACD"
+  {
+     emit_insn (gen_arc_vec_<V_US>mult_hi_v4hi (operands[0],
+						operands[1],
+						operands[2]));
+     DONE;
+  }
+)
+
+(define_insn "arc_vec_<V_US>mac_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
+       (plus:V2SI
+	(reg:V2SI ARCV2_ACC)
+	(mult:V2SI (SE:V2SI (vec_select:V2HI
+			     (match_operand:V4HI 1 "even_register_operand" "0,r")
+			     (parallel [(const_int 2) (const_int 3)])))
+		   (SE:V2SI (vec_select:V2HI
+			     (match_operand:V4HI 2 "even_register_operand" "r,r")
+			     (parallel [(const_int 2) (const_int 3)]))))))
+  (set (reg:V2SI ARCV2_ACC)
+       (plus:V2SI
+	(reg:V2SI ARCV2_ACC)
+	(mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+					     (parallel [(const_int 2) (const_int 3)])))
+		   (SE:V2SI (vec_select:V2HI (match_dup 2)
+					     (parallel [(const_int 2) (const_int 3)]))))))
+  ]
+  "TARGET_PLUS_MACD"
+  "vmac2h<V_US_suffix>%? %0, %R1, %R2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; Builtins
+(define_insn "dmach"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		    (match_operand:V2HI 2 "register_operand" "r,r")
+		    (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_DMACH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_DMPY"
+  "dmach%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmachu"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		    (match_operand:V2HI 2 "register_operand" "r,r")
+		    (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_DMACHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_DMPY"
+  "dmachu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmacwh"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+	(unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
+		    (match_operand:V2HI 2 "register_operand"      "r,r")
+		    (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_DMACWH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "dmacwh%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmacwhu"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
+		    (match_operand:V2HI 2 "register_operand"      "r,r")
+		    (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_DMACWHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "dmacwhu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmac2h"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+	(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		      (match_operand:V2HI 2 "register_operand" "r,r")
+		      (reg:DI ARCV2_ACC)]
+		     UNSPEC_ARC_VMAC2H))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmac2h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmac2hu"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+	(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		      (match_operand:V2HI 2 "register_operand" "r,r")
+		      (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_VMAC2HU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmac2hu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmpy2h"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+	(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		      (match_operand:V2HI 2 "register_operand" "r,r")]
+		     UNSPEC_ARC_VMPY2H))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmpy2h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmpy2hu"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+	(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		      (match_operand:V2HI 2 "register_operand" "r,r")]
+		     UNSPEC_ARC_VMPY2HU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmpy2hu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmach"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+	(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+		    (match_operand:V4HI 2 "even_register_operand" "r,r")
+		    (reg:DI ARCV2_ACC)]
+		     UNSPEC_ARC_QMACH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmach%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmachu"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+	(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+		    (match_operand:V4HI 2 "even_register_operand" "r,r")
+		    (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_QMACHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmachu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmpyh"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+	(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+		    (match_operand:V4HI 2 "even_register_operand" "r,r")]
+		     UNSPEC_ARC_QMPYH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmpyh%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmpyhu"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+	(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+		    (match_operand:V4HI 2 "even_register_operand" "r,r")]
+		   UNSPEC_ARC_QMPYHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmpyhu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
diff --git a/gcc/testsuite/gcc.target/arc/builtin_simdarc.c b/gcc/testsuite/gcc.target/arc/builtin_simdarc.c
new file mode 100644
index 0000000..68aae40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/builtin_simdarc.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=archs -O2 -Werror-implicit-function-declaration -mmpy-option=9" } */
+
+#define STEST(name, rettype, op1type, op2type)	\
+  rettype test_ ## name				\
+  (op1type a, op2type b)			\
+  {						\
+    return __builtin_arc_ ## name (a, b);	\
+  }
+
+typedef short v2hi __attribute__ ((vector_size (4)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int   v2si __attribute__ ((vector_size (8)));
+
+STEST (qmach,  long long, v4hi, v4hi)
+STEST (qmachu, long long, v4hi, v4hi)
+STEST (qmpyh,  long long, v4hi, v4hi)
+STEST (qmpyhu, long long, v4hi, v4hi)
+
+STEST (dmach,  int, v2hi, v2hi)
+STEST (dmachu, int, v2hi, v2hi)
+STEST (dmpyh,  int, v2hi, v2hi)
+STEST (dmpyhu, int, v2hi, v2hi)
+
+STEST (dmacwh,  long, v2si, v2hi)
+STEST (dmacwhu, long, v2si, v2hi)
+
+STEST (vmac2h,  v2si, v2hi, v2hi)
+STEST (vmac2hu, v2si, v2hi, v2hi)
+STEST (vmpy2h,  v2si, v2hi, v2hi)
+STEST (vmpy2hu, v2si, v2hi, v2hi)
+
+STEST (vaddsub2h, v2hi, v2hi, v2hi)
+STEST (vsubadd2h, v2hi, v2hi, v2hi)
+STEST (vaddsub,   v2si, v2si, v2si)
+STEST (vsubadd,   v2si, v2si, v2si)
+STEST (vaddsub4h, v4hi, v4hi, v4hi)
+STEST (vsubadd4h, v4hi, v4hi, v4hi)
-- 
1.9.1



More information about the Gcc-patches mailing list