This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] Intrinsic functions for SPARC VIS instructions.


 Hi,

  This patch adds a bunch of intrinsics functions for harder to express
instructions.  This has been tested on sparc-linux and sparc64-linux with
no new regressions.  All the new tests pass.

-- 
Thanks,
Jim

http://www.student.cs.uwaterloo.ca/~ja2morri/
http://open.nit.ca/wiki/?page=jim
http://phython.blogspot.com

2004-11-20  James A. Morrison  <phython@gcc.gnu.org

	* config/sparc/sparc.c: Include insn-codes.h and langhooks.h.
	(sparc_init_builtins): New function.
	(sparc_init_vis_builtins): Create builtin functions for VIS
	instructions.
	(sparc_expand_builtin): Expand builtin functions for VIS instructions.
	(TARGET_INIT_BUILTINS): Define to sparc_init_builtins.
	(TARGET_EXPAND_BUILTIN): Define to sparc_expand_builtin.
	(def_builtin): New macro for creating builtin functions.
	(P): New mode macro for pointer types.
	(UNSPEC_FPACK16, UNSPEC_FPACK32, UNSPEC_FPACKFIX, UNSPEC_FEXPAND,
	UNSPEC_FPMERGE, UNSPEC_MUL16AL, UNSPEC_MUL8UL, UNSPEC_MULDUL,
	UNSPEC_ALIGNDATA, UNSPEC_ALIGNADDR, UNSPEC_PDIST): New constants.
	(fpack16_vis, fpackfix_vis, fpack32_vis, fexpand_vis, fpmerge_vis,
	fmul8x16_vis, fmul8x16au_vis, fmul8x16al_vis, fmul8sux16_vis,
	fmul8ulx16_vis, fmuld8sux16_vis, fmuld8ulx16_vis, pdist_vis,
	faligndata<V64:mode>_vis, alignaddr<P:mode>_vis): New patterns.

testsuite:
2004-11-20  James A. Morrison  <phython@gcc.gnu.org

	* gcc.target/sparc/align.c, gcc.target/sparc/combined-2.c,
	gcc.target/sparc/fpack16.c, gcc.target/sparc/fpack32.c,
	gcc.target/sparc/fpackfix.c, gcc.target/sparc/fpmul.c,
	gcc.target/sparc/fpmerge.c, gcc.target/sparc/fexpand.c,
	gcc.target/sparc/pdist.c: New tests.

Index: gcc/config/sparc/sparc.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sparc/sparc.c,v
retrieving revision 1.344
diff -u -p -r1.344 sparc.c
--- gcc/config/sparc/sparc.c	13 Nov 2004 13:25:09 -0000	1.344
+++ gcc/config/sparc/sparc.c	18 Nov 2004 14:31:33 -0000
@@ -32,6 +32,7 @@ Boston, MA 02111-1307, USA.  */
 #include "hard-reg-set.h"
 #include "real.h"
 #include "insn-config.h"
+#include "insn-codes.h"
 #include "conditions.h"
 #include "output.h"
 #include "insn-attr.h"
@@ -48,6 +49,7 @@ Boston, MA 02111-1307, USA.  */
 #include "target-def.h"
 #include "cfglayout.h"
 #include "tree-gimple.h"
+#include "langhooks.h"
 
 /* Processor costs */
 static const
@@ -332,6 +334,9 @@ static void emit_hard_tfmode_operation (
 
 static bool sparc_function_ok_for_sibcall (tree, tree);
 static void sparc_init_libfuncs (void);
+static void sparc_init_builtins (void);
+static void sparc_vis_init_builtins (void);
+static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
 				   HOST_WIDE_INT, tree);
 static bool sparc_can_output_mi_thunk (tree, HOST_WIDE_INT,
@@ -417,6 +422,11 @@ enum processor_type sparc_cpu;
 
 #undef TARGET_INIT_LIBFUNCS
 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS sparc_init_builtins
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN sparc_expand_builtin
 
 #ifdef HAVE_AS_TLS
 #undef TARGET_HAVE_TLS
@@ -8440,6 +8434,154 @@ sparc_init_libfuncs (void)
   gofast_maybe_init_libfuncs ();
 }
 
+#define def_builtin(NAME, CODE, TYPE) \
+  lang_hooks.builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \
+                              NULL_TREE);
+
+/* Implement the TARGET_INIT_BUILTINS target hook.
+   Create builtin functions for special SPARC instructions.  */
+
+static void
+sparc_init_builtins (void)
+{
+  if (TARGET_VIS)
+    sparc_vis_init_builtins ();
+}
+
+/* Create builtin functions for VIS 1 instructions.  */
+
+static void
+sparc_vis_init_builtins (void)
+{
+  tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
+  tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
+  tree v4hi = build_vector_type (intHI_type_node, 4);
+  tree v2hi = build_vector_type (intHI_type_node, 2);
+  tree v2si = build_vector_type (intSI_type_node, 2);
+
+  tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
+  tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
+  tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
+  tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
+  tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
+  tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
+  tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
+  tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
+  tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
+  tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
+  tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
+  tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
+  tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
+							 v8qi, v8qi,
+							 intDI_type_node, 0);
+  tree ptr_ftype_ptr_ptr = build_function_type_list (ptr_type_node,
+						     ptr_type_node,
+						     ptr_type_node, 0);
+
+  /* Packing and expanding vectors.  */
+  def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi);
+  def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
+	       v8qi_ftype_v2si_v8qi);
+  def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
+	       v2hi_ftype_v2si);
+  def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi);
+  def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
+	       v8qi_ftype_v4qi_v4qi);
+
+  /* Multiplications.  */
+  def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
+	       v4hi_ftype_v4qi_v4hi);
+  def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
+	       v4hi_ftype_v4qi_v2hi);
+  def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
+	       v4hi_ftype_v4qi_v2hi);
+  def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
+	       v4hi_ftype_v8qi_v4hi);
+  def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
+	       v4hi_ftype_v8qi_v4hi);
+  def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
+	       v2si_ftype_v4qi_v2hi);
+  def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
+	       v2si_ftype_v4qi_v2hi);
+
+  /* Data aligning.  */
+  def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
+	       v4hi_ftype_v4hi_v4hi);
+  def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
+	       v8qi_ftype_v8qi_v8qi);
+  def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
+	       v2si_ftype_v2si_v2si);
+  def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
+	       ptr_ftype_ptr_ptr);
+
+  /* Pixel distance.  */
+  def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
+	       di_ftype_v8qi_v8qi_di);
+}
+
+/* Handle TARGET_EXPAND_BUILTIN target hook.
+   Expand builtin functions for sparc instrinsics.  */
+
+static rtx
+sparc_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		      enum machine_mode tmode, int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+  tree arglist = TREE_OPERAND (exp, 1);
+  unsigned int icode = DECL_FUNCTION_CODE (fndecl);
+  rtx pat, op[4];
+  enum machine_mode mode[4];
+  int arg_count = 0;
+
+  mode[arg_count] = tmode;
+  op[arg_count] = target;
+
+  if (icode == CODE_FOR_alignaddrsi_vis && Pmode == DImode)
+    icode = CODE_FOR_alignaddrdi_vis;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    op[arg_count] = gen_reg_rtx (tmode);
+
+  for (; arglist; arglist = TREE_CHAIN (arglist))
+    {
+      tree arg =  TREE_VALUE (arglist);
+
+      arg_count++;
+      gcc_assert (arg_count < 4);
+
+      mode[arg_count] = insn_data[icode].operand[arg_count].mode;
+
+      op[arg_count] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
+      if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
+							      mode[arg_count]))
+	op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
+    }
+
+  switch (arg_count)
+    {
+    case 1:
+      pat = GEN_FCN (icode) (op[0], op[1]);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+      break;
+    case 3:
+      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (!pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+
+  return op[0];
+}
+
 int
 sparc_extra_constraint_check (rtx op, int c, int strict)
 {
Index: gcc/config/sparc/sparc.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sparc/sparc.md,v
retrieving revision 1.221
diff -u -p -r1.221 sparc.md
--- gcc/config/sparc/sparc.md	13 Nov 2004 13:25:09 -0000	1.221
+++ gcc/config/sparc/sparc.md	18 Nov 2004 14:31:36 -0000
@@ -45,6 +45,18 @@
    (UNSPEC_TLSIE		33)
    (UNSPEC_TLSLE		34)
    (UNSPEC_TLSLD_BASE		35)
+
+   (UNSPEC_FPACK16	 	40)
+   (UNSPEC_FPACK32		41)
+   (UNSPEC_FPACKFIX		42)
+   (UNSPEC_FEXPAND		43)
+   (UNSPEC_FPMERGE		44)
+   (UNSPEC_MUL16AL		45)
+   (UNSPEC_MUL8UL		46)
+   (UNSPEC_MULDUL		47)
+   (UNSPEC_ALIGNDATA		48)
+   (UNSPEC_ALIGNADDR		49)
+   (UNSPEC_PDIST		50)
   ])
 
 (define_constants
@@ -8961,3 +8973,161 @@
   "fnands\t%1, %2, %0"
   [(set_attr "type" "fga")
    (set_attr "fptype" "single")])
+
+;; Hard to generate VIS instructions.  We have builtins for these
+(define_insn "fpack16_vis"
+  [(set (match_operand:V4QI 0 "register_operand" "=f")
+        (unspec:V4QI [(match_operand:V4HI 1 "register_operand" "e")]
+		      UNSPEC_FPACK16))]
+  "TARGET_VIS"
+  "fpack16\t%1, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_insn "fpackfix_vis"
+  [(set (match_operand:V2HI 0 "register_operand" "=f")
+        (unspec:V2HI [(match_operand:V2SI 1 "register_operand" "e")]
+		      UNSPEC_FPACKFIX))]
+  "TARGET_VIS"
+  "fpackfix\t%1, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_insn "fpack32_vis"
+  [(set (match_operand:V8QI 0 "register_operand" "=e")
+        (unspec:V8QI [(match_operand:V2SI 1 "register_operand" "e")
+        	      (match_operand:V8QI 2 "register_operand" "e")]
+                     UNSPEC_FPACK32))]
+  "TARGET_VIS"
+  "fpack32\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_insn "fexpand_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V4QI 1 "register_operand" "f")]
+         UNSPEC_FEXPAND))]
+ "TARGET_VIS"
+ "fexpand\t%1, %0"
+ [(set_attr "type" "fga")
+  (set_attr "fptype" "double")])
+
+;; It may be possible to describe this operation as (1 indexed):
+;; (vec_select (vec_duplicate (vec_duplicate (vec_concat 1 2)))
+;;  1,5,10,14,19,23,28,32)
+;; However (vec_merge:V8QI [(V4QI) (V4QI)] (10101010 = 170) doesn't work!
+(define_insn "fpmerge_vis"
+  [(set (match_operand:V8QI 0 "register_operand" "=e")
+        (unspec:V8QI [(match_operand:V4QI 1 "register_operand" "f")
+                      (match_operand:V4QI 2 "register_operand" "f")]
+         UNSPEC_FPMERGE))]
+ "TARGET_VIS"
+ "fpmerge\t%1, %2, %0"
+ [(set_attr "type" "fga")
+  (set_attr "fptype" "double")])
+
+;; Partitioned multiply instructions
+(define_insn "fmul8x16_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (mult:V4HI (match_operand:V4QI 1 "register_operand" "f")
+                   (match_operand:V4HI 2 "register_operand" "e")))]
+  "TARGET_VIS"
+  "fmul8x16\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+;; Only one of the following two insns can be a multiply.
+(define_insn "fmul8x16au_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (mult:V4HI (match_operand:V4QI 1 "register_operand" "f")
+                   (match_operand:V2HI 2 "register_operand" "f")))]
+  "TARGET_VIS"
+  "fmul8x16au\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmul8x16al_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V4QI 1 "register_operand" "f")
+                      (match_operand:V2HI 2 "register_operand" "f")]
+         UNSPEC_MUL16AL))]
+  "TARGET_VIS"
+  "fmul8x16al\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+;; Only one of the following two insns can be a multiply.
+(define_insn "fmul8sux16_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (mult:V4HI (match_operand:V8QI 1 "register_operand" "e")
+                   (match_operand:V4HI 2 "register_operand" "e")))]
+  "TARGET_VIS"
+  "fmul8sux16\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmul8ulx16_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V8QI 1 "register_operand" "e")
+                      (match_operand:V4HI 2 "register_operand" "e")]
+         UNSPEC_MUL8UL))]
+  "TARGET_VIS"
+  "fmul8ulx16\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+;; Only one of the following two insns can be a multiply.
+(define_insn "fmuld8sux16_vis"
+  [(set (match_operand:V2SI 0 "register_operand" "=e")
+        (mult:V2SI (match_operand:V4QI 1 "register_operand" "f")
+                   (match_operand:V2HI 2 "register_operand" "f")))]
+  "TARGET_VIS"
+  "fmuld8sux16\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmuld8ulx16_vis"
+  [(set (match_operand:V2SI 0 "register_operand" "=e")
+        (unspec:V2SI [(match_operand:V4QI 1 "register_operand" "f")
+                      (match_operand:V2HI 2 "register_operand" "f")]
+         UNSPEC_MULDUL))]
+  "TARGET_VIS"
+  "fmuld8ulx16\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+;; This probably isn't exactly safe for DF mode.
+;; In fact, it's not meant for anything other than V8QI.  However, since a
+;; short* or int* will be aligned along 2 byte and 4 byte boundaries anyway,
+;; this will work for V4HI and V2SI as well.
+(define_insn "faligndata<V64:mode>_vis"
+  [(set (match_operand:V64 0 "register_operand" "=e")
+        (unspec:V64 [(match_operand:V64 1 "register_operand" "e")
+                     (match_operand:V64 2 "register_operand" "e")]
+         UNSPEC_ALIGNDATA))]
+  "TARGET_VIS"
+  "faligndata\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_mode_macro P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+(define_insn "alignaddr<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "reg_or_0_operand" "%rJ")
+                   (match_operand:P 2 "register_operand" "r")]
+         UNSPEC_ALIGNADDR))]
+  "TARGET_VIS"
+  "alignaddr\t%1, %2, %0")
+
+(define_insn "pdist_vis"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+        (unspec:DI [(match_operand:V8QI 1 "register_operand" "e")
+                    (match_operand:V8QI 2 "register_operand" "e")
+                    (match_operand:DI 3 "register_operand" "0")]
+         UNSPEC_PDIST))]
+  "TARGET_VIS"
+  "pdist\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+
/* { dg-do compile } */
/* { dg-options "-mcpu=ultrasparc -mvis" } */
typedef int vec32 __attribute__((vector_size(8)));
typedef short vec16 __attribute__((vector_size(8)));
typedef char vec8 __attribute__((vector_size(8)));

vec16 foo1 (vec16 a, vec16 b) {
  return __builtin_vis_faligndatav4hi (a, b);
}

vec32 foo2 (vec32 a, vec32 b) {
  return __builtin_vis_faligndatav2si (a, b);
}

vec8 foo3 (vec8 a, vec8 b) {
  return __builtin_vis_faligndatav8qi (a, b);
}

unsigned char * foo4 (unsigned char *data) {
  return __builtin_vis_alignaddr (data, 0);
}

/* { dg-final { scan-assembler-times "faligndata" 3 } } */
/* { dg-final { scan-assembler "alignaddr" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -mcpu=ultrasparc -mvis" } */
typedef char pixel __attribute__((vector_size(4)));
typedef char vec8 __attribute__((vector_size(8)));
typedef short vec16 __attribute__((vector_size(8)));

vec16 foo (pixel a, pixel b) {
  vec8 c = __builtin_vis_fpmerge (a, b);
  vec16 d = { -1, -1, -1, -1 };
  vec16 e = __builtin_vis_fmul8x16 (a, d);

  return e;
}

vec16 bar (pixel a) {
  vec16 d = { 0, 0, 0, 0 };
  vec16 e = __builtin_vis_fmul8x16 (a, d);  /* Mulitplication by 0 = 0.  */

  return e;
}

/* { dg-final { scan-assembler "fmul8x16" } } */
/* { dg-final { scan-assembler "fzero" } } */
/* { dg-do compile } */
/* { dg-options "-mcpu=ultrasparc -mvis" } */
typedef short vec16 __attribute__((vector_size(8)));
typedef char vec8 __attribute__((vector_size(4)));

vec8 foo (vec16 a) {
  return __builtin_vis_fpack16 (a);
}

/* { dg-final { scan-assembler "fpack16\t%" } } */
/* { dg-do compile } */
/* { dg-options "-mcpu=ultrasparc -mvis" } */
typedef int vec32 __attribute__((vector_size(8)));
typedef unsigned char vec8 __attribute__((vector_size(8)));

vec8 foo (vec32 a, vec8 b) {
  return __builtin_vis_fpack32 (a, b);
}

/* { dg-final { scan-assembler "fpack32\t%" } } */
/* { dg-do compile } */
/* { dg-options "-mcpu=ultrasparc -mvis" } */
typedef int vec32 __attribute__((vector_size(8)));
typedef short vec16 __attribute__((vector_size(4)));

vec16 foo (vec32 a) {
  return __builtin_vis_fpackfix (a);
}

/* { dg-final { scan-assembler "fpackfix\t%" } } */
/* { dg-do compile } */
/* { dg-options "-mcpu=ultrasparc -mvis" } */
typedef int vec32 __attribute__((vector_size(8)));
typedef short vec16 __attribute__((vector_size(8)));
typedef char pixel __attribute__((vector_size(4)));
typedef short pixel16 __attribute__((vector_size(4)));
typedef char vec8 __attribute__((vector_size(8)));

vec16 foo1 (pixel a, vec16 b) {
  return __builtin_vis_fmul8x16 (a, b);
}

vec16 foo2 (pixel a, pixel16 b) {
  return __builtin_vis_fmul8x16au (a, b);
}

vec16 foo3 (pixel a, pixel16 b) {
  return __builtin_vis_fmul8x16al (a, b);
}

vec16 foo4 (vec8 a, vec16 b) {
  return __builtin_vis_fmul8sux16 (a, b);
}

vec16 foo5 (vec8 a, vec16 b) {
  return __builtin_vis_fmul8ulx16 (a, b);
}

vec32 foo6 (pixel a, pixel16 b) {
  return __builtin_vis_fmuld8sux16 (a, b);
}

vec32 foo7 (pixel a, pixel16 b) {
  return __builtin_vis_fmuld8ulx16 (a, b);
}

/* { dg-final { scan-assembler "fmul8x16\t%" } } */
/* { dg-final { scan-assembler "fmul8x16au\t%" } } */
/* { dg-final { scan-assembler "fmul8x16al\t%" } } */
/* { dg-final { scan-assembler "fmul8sux16\t%" } } */
/* { dg-final { scan-assembler "fmul8ulx16\t%" } } */
/* { dg-final { scan-assembler "fmuld8sux16\t%" } } */
/* { dg-final { scan-assembler "fmuld8ulx16\t%" } } */
/* { dg-do compile } */
/* { dg-options "-mcpu=ultrasparc -mvis" } */
typedef char pixel __attribute__((vector_size(8)));
typedef char vec8 __attribute__((vector_size(4)));

pixel foo (vec8 a, vec8 b) {
  return __builtin_vis_fpmerge (a, b);
}

/* { dg-final { scan-assembler "fpmerge\t%" } } */
/* { dg-do compile } */
/* { dg-options "-mcpu=ultrasparc -mvis" } */
typedef short vec16 __attribute__((vector_size(8)));
typedef char vec8 __attribute__((vector_size(4)));

vec16 foo (vec8 a) {
  return __builtin_vis_fexpand (a);
}

/* { dg-final { scan-assembler "fexpand\t%" } } */
/* { dg-do compile } */
/* { dg-options "-mcpu=ultrasparc -mvis" } */
#include <stdint.h>

typedef char vec8 __attribute__((vector_size(8)));

int64_t foo (vec8 a, vec8 b) {
  int64_t d = 0;
  d = __builtin_vis_pdist (a, b, d);
  return d;
}

int64_t bar (vec8 a, vec8 b) {
  int64_t d = 0;
  return __builtin_vis_pdist (a, b, d);
}

int64_t baz (vec8 a, vec8 b, int64_t d) {
  int64_t e = __builtin_vis_pdist (a, b, d);
  return e + d;
}

/* { dg-final { scan-assembler-times "pdist" 4 } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]