This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Alpha CPU-specific builtins


Richard Henderson <rth@redhat.com> writes:

> On Sat, May 25, 2002 at 10:41:02PM +0200, Falk Hueffner wrote:
> > Is anybody interested in this? If so, I could polish it a bit more
> > and post it here...
> 
> Sure.

OK. I'm a total beginner with gcc, so it would be nice if you could
point me to any problems. The instructions currently implemented are
somewhat random (well, mostly those I needed :-). Other candidates
are:

umulh
ext*
ins*
msk*
ctlz
ctpop
cttz

and maybe some volatiles like mb, although scheduling isn't really
important for them usually.

Some of the instructions can be generated by gcc (like bic), but it
sometimes misses them because of CSE.

If anybody thinks this is worthwhile, I could add the missing
instructions and write some documentation and tests...

A question: I would have thought that adding a % to the constraints
would allow gcc to optimize __builtin_alpha_minub8(1, a) to minub8
a0,1,v0, but that doesn't happen, why?

I've also attached a small test file...

	Falk

diff -upr -X X.gcc ../cvs/gcc/gcc/config/alpha/alpha-protos.h gcc-alpha-builtins/gcc/config/alpha/alpha-protos.h
--- ../cvs/gcc/gcc/config/alpha/alpha-protos.h	Sun May 19 20:25:04 2002
+++ gcc-alpha-builtins/gcc/config/alpha/alpha-protos.h	Sat May 25 19:37:44 2002
@@ -167,6 +167,12 @@ extern void alpha_output_mi_thunk_osf PA
 					       HOST_WIDE_INT, tree));
 #endif /* TREE CODE */
 
+extern void alpha_init_builtins PARAMS ((void));
+#if defined (TREE_CODE) && defined (RTX_CODE)
+extern rtx alpha_expand_builtin PARAMS ((tree, rtx, rtx,
+					enum machine_mode, int));
+#endif
+
 #ifdef RTX_CODE
 extern rtx unicosmk_add_call_info_word PARAMS ((rtx));
 #endif
diff -upr -X X.gcc ../cvs/gcc/gcc/config/alpha/alpha.c gcc-alpha-builtins/gcc/config/alpha/alpha.c
--- ../cvs/gcc/gcc/config/alpha/alpha.c	Sat May 25 19:18:12 2002
+++ gcc-alpha-builtins/gcc/config/alpha/alpha.c	Sun May 26 16:48:44 2002
@@ -266,6 +266,12 @@ static void unicosmk_unique_section PARA
 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
   alpha_multipass_dfa_lookahead
 
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS alpha_init_builtins
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN alpha_expand_builtin
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Parse target option strings.  */
@@ -5821,6 +5827,147 @@ alpha_va_arg (valist, type)
 
   return addr;
 }
+
+
+/* Builtins.  */
+void
+alpha_init_builtins ()
+{
+  tree di_ftype
+    = build_function_type (long_integer_type_node, void_list_node);
+  tree di_ftype_di
+    = build_function_type (long_integer_type_node,
+			   tree_cons (NULL_TREE,
+				      long_integer_type_node,
+				      void_list_node));
+  tree di_ftype_di_di
+    = build_function_type (long_integer_type_node,
+			   tree_cons (NULL_TREE,
+				      long_integer_type_node,
+				      tree_cons (NULL_TREE,
+						 long_integer_type_node,
+						 void_list_node)));
+
+#define def_builtin(name, type, code) \
+  builtin_function ((name), (type), (code), BUILT_IN_MD, NULL)
+
+  def_builtin ("__builtin_alpha_bic",     di_ftype_di_di, ALPHA_BUILTIN_BIC);
+  def_builtin ("__builtin_alpha_eqv",     di_ftype_di_di, ALPHA_BUILTIN_EQV);
+  def_builtin ("__builtin_alpha_ornot",   di_ftype_di_di, ALPHA_BUILTIN_ORNOT);
+  def_builtin ("__builtin_alpha_cmpbge",  di_ftype_di_di, ALPHA_BUILTIN_CMPBGE);
+  def_builtin ("__builtin_alpha_extql",	  di_ftype_di_di, ALPHA_BUILTIN_EXTQL);
+  def_builtin ("__builtin_alpha_extqh",	  di_ftype_di_di, ALPHA_BUILTIN_EXTQH);
+  def_builtin ("__builtin_alpha_zap",	  di_ftype_di_di, ALPHA_BUILTIN_ZAP);
+  def_builtin ("__builtin_alpha_zapnot",  di_ftype_di_di, ALPHA_BUILTIN_ZAPNOT);
+  def_builtin ("__builtin_alpha_amask",	  di_ftype_di,	  ALPHA_BUILTIN_AMASK);
+  def_builtin ("__builtin_alpha_implver", di_ftype,	  ALPHA_BUILTIN_IMPLVER);
+  def_builtin ("__builtin_alpha_rpcc",	  di_ftype,	  ALPHA_BUILTIN_RPCC);
+  def_builtin ("__builtin_alpha_minub8",  di_ftype_di_di, ALPHA_BUILTIN_MINUB8);
+  def_builtin ("__builtin_alpha_minsb8",  di_ftype_di_di, ALPHA_BUILTIN_MINSB8);
+  def_builtin ("__builtin_alpha_minuw4",  di_ftype_di_di, ALPHA_BUILTIN_MINUW4);
+  def_builtin ("__builtin_alpha_minsw4",  di_ftype_di_di, ALPHA_BUILTIN_MINSW4);
+  def_builtin ("__builtin_alpha_maxub8",  di_ftype_di_di, ALPHA_BUILTIN_MAXUB8);
+  def_builtin ("__builtin_alpha_maxsb8",  di_ftype_di_di, ALPHA_BUILTIN_MAXSB8);
+  def_builtin ("__builtin_alpha_maxuw4",  di_ftype_di_di, ALPHA_BUILTIN_MAXUW4);
+  def_builtin ("__builtin_alpha_maxsw4",  di_ftype_di_di, ALPHA_BUILTIN_MAXSW4);
+  def_builtin ("__builtin_alpha_perr",	  di_ftype_di_di, ALPHA_BUILTIN_PERR);
+  def_builtin ("__builtin_alpha_pklb",	  di_ftype_di,	  ALPHA_BUILTIN_PKLB);
+  def_builtin ("__builtin_alpha_pkwb",	  di_ftype_di,	  ALPHA_BUILTIN_PKWB);
+  def_builtin ("__builtin_alpha_unpkbl",  di_ftype_di,	  ALPHA_BUILTIN_UNPKBL);
+  def_builtin ("__builtin_alpha_unpkbw",  di_ftype_di,	  ALPHA_BUILTIN_UNPKBW);
+
+#undef def_builtin
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+rtx
+alpha_expand_builtin (exp, target, subtarget, mode, ignore)
+     tree exp;
+     rtx target;
+     rtx subtarget ATTRIBUTE_UNUSED;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+     int ignore ATTRIBUTE_UNUSED;
+{
+  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  enum insn_code icode;
+  tree arglist = TREE_OPERAND (exp, 1);
+  int arity = 0, width = 0;
+#define MAX_ARGS 2
+  rtx op[MAX_ARGS], pat;
+  enum machine_mode tmode;
+
+  switch (fcode)
+    {
+    case ALPHA_BUILTIN_BIC:	icode = CODE_FOR_bic;		       break;
+    case ALPHA_BUILTIN_EQV:	icode = CODE_FOR_eqv;		       break;
+    case ALPHA_BUILTIN_ORNOT:	icode = CODE_FOR_ornot;		       break;
+    case ALPHA_BUILTIN_CMPBGE:	icode = CODE_FOR_cmpbge;	       break;
+    case ALPHA_BUILTIN_EXTQL:	icode = CODE_FOR_extxl_le; width = 64; break;
+    case ALPHA_BUILTIN_EXTQH:	icode = CODE_FOR_extqh_le;	       break;
+    case ALPHA_BUILTIN_ZAP:	icode = CODE_FOR_zap;		       break;
+    case ALPHA_BUILTIN_ZAPNOT:	icode = CODE_FOR_zapnot;	       break;
+    case ALPHA_BUILTIN_AMASK:	icode = CODE_FOR_amask;		       break;
+    case ALPHA_BUILTIN_IMPLVER: icode = CODE_FOR_implver;	       break;
+    case ALPHA_BUILTIN_RPCC:	icode = CODE_FOR_rpcc;		       break;
+    case ALPHA_BUILTIN_MINUB8:	icode = CODE_FOR_minub8;	       break;
+    case ALPHA_BUILTIN_MINSB8:	icode = CODE_FOR_minsb8;	       break;
+    case ALPHA_BUILTIN_MINUW4:	icode = CODE_FOR_minuw4;	       break;
+    case ALPHA_BUILTIN_MINSW4:	icode = CODE_FOR_minsw4;	       break;
+    case ALPHA_BUILTIN_MAXUB8:	icode = CODE_FOR_maxub8;	       break;
+    case ALPHA_BUILTIN_MAXSB8:	icode = CODE_FOR_maxsb8;	       break;
+    case ALPHA_BUILTIN_MAXUW4:	icode = CODE_FOR_maxuw4;	       break;
+    case ALPHA_BUILTIN_MAXSW4:	icode = CODE_FOR_maxsw4;	       break;
+    case ALPHA_BUILTIN_PERR:	icode = CODE_FOR_perr;		       break;
+    case ALPHA_BUILTIN_PKLB:	icode = CODE_FOR_pklb;		       break;
+    case ALPHA_BUILTIN_PKWB:	icode = CODE_FOR_pkwb;		       break;
+    case ALPHA_BUILTIN_UNPKBL:	icode = CODE_FOR_unpkbl;	       break;
+    case ALPHA_BUILTIN_UNPKBW:	icode = CODE_FOR_unpkbw;	       break;
+    default: internal_error("bad builtin fcode");
+    }
+
+  for (arglist = TREE_OPERAND (exp, 1); arglist;
+       arglist = TREE_CHAIN (arglist))
+    {
+      enum machine_mode opmode;
+      tree arg = TREE_VALUE (arglist);
+
+      if (arg == error_mark_node)
+	return NULL_RTX;
+      op[arity] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
+      opmode = insn_data[icode].operand[1 + arity].mode;
+      if (!(*insn_data[icode].operand[1 + arity].predicate) (op[arity], opmode))
+	op[arity] = copy_to_mode_reg (opmode, op[arity]);
+      arity++;
+    }
+
+  tmode = insn_data[icode].operand[0].mode;
+  if (!target
+      || GET_MODE (target) != tmode
+      || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (width)
+    pat = GEN_FCN (icode) (target, op[0], GEN_INT (width), op[1]);
+  else
+    switch (arity)
+      {
+      case 0: pat = GEN_FCN (icode) (target);		    break;
+      case 1: pat = GEN_FCN (icode) (target, op[0]);	    break;
+      case 2: pat = GEN_FCN (icode) (target, op[0], op[1]); break;
+      default: internal_error("bad builtin arity");
+      }
+  if (!pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+ }
 
 /* This page contains routines that are used to determine what the function
    prologue and epilogue code will do and write them out.  */
diff -upr -X X.gcc ../cvs/gcc/gcc/config/alpha/alpha.h gcc-alpha-builtins/gcc/config/alpha/alpha.h
--- ../cvs/gcc/gcc/config/alpha/alpha.h	Tue May 21 22:59:48 2002
+++ gcc-alpha-builtins/gcc/config/alpha/alpha.h	Sat May 25 19:37:44 2002
@@ -2114,3 +2114,30 @@ do {							\
    Used for C++ multiple inheritance.  */
 #define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
   alpha_output_mi_thunk_osf (FILE, THUNK_FNDECL, DELTA, FUNCTION)
+
+enum alpha_builtins {
+  ALPHA_BUILTIN_BIC,
+  ALPHA_BUILTIN_EQV,
+  ALPHA_BUILTIN_ORNOT,
+  ALPHA_BUILTIN_CMPBGE,
+  ALPHA_BUILTIN_EXTQL,
+  ALPHA_BUILTIN_EXTQH,
+  ALPHA_BUILTIN_ZAP,
+  ALPHA_BUILTIN_ZAPNOT,
+  ALPHA_BUILTIN_AMASK,
+  ALPHA_BUILTIN_IMPLVER,
+  ALPHA_BUILTIN_RPCC,
+  ALPHA_BUILTIN_MINUB8,
+  ALPHA_BUILTIN_MINSB8,
+  ALPHA_BUILTIN_MINUW4,
+  ALPHA_BUILTIN_MINSW4,
+  ALPHA_BUILTIN_MAXUB8,
+  ALPHA_BUILTIN_MAXSB8,
+  ALPHA_BUILTIN_MAXUW4,
+  ALPHA_BUILTIN_MAXSW4,
+  ALPHA_BUILTIN_PERR,
+  ALPHA_BUILTIN_PKLB,
+  ALPHA_BUILTIN_PKWB,
+  ALPHA_BUILTIN_UNPKBL,
+  ALPHA_BUILTIN_UNPKBW
+};
diff -upr -X X.gcc ../cvs/gcc/gcc/config/alpha/alpha.md gcc-alpha-builtins/gcc/config/alpha/alpha.md
--- ../cvs/gcc/gcc/config/alpha/alpha.md	Thu May  9 15:32:01 2002
+++ gcc-alpha-builtins/gcc/config/alpha/alpha.md	Sat May 25 22:07:07 2002
@@ -40,6 +40,27 @@
    (UNSPEC_LITUSE	12)
    (UNSPEC_SIBCALL	13)
    (UNSPEC_SYMBOL	14)
+   (UNSPEC_BIC		15)
+   (UNSPEC_EQV		16)
+   (UNSPEC_ORNOT	17)
+   (UNSPEC_CMPBGE	18)
+   (UNSPEC_ZAP		19)
+   (UNSPEC_ZAPNOT	20)
+   (UNSPEC_AMASK	21)
+   (UNSPEC_IMPLVER	22)
+   (UNSPEC_MINUB8	23)
+   (UNSPEC_MINSB8	24)
+   (UNSPEC_MINUW4	25)
+   (UNSPEC_MINSW4	26)
+   (UNSPEC_MAXUB8	27)
+   (UNSPEC_MAXSB8	28)
+   (UNSPEC_MAXUW4	29)
+   (UNSPEC_MAXSW4	30)
+   (UNSPEC_PERR		31)
+   (UNSPEC_PKLB		32)
+   (UNSPEC_PKWB		33)
+   (UNSPEC_UNPKBL	34)
+   (UNSPEC_UNPKBW	35)
   ])
 
 ;; UNSPEC_VOLATILE:
@@ -57,6 +78,7 @@
    (UNSPECV_FORCE_MOV	9)
    (UNSPECV_LDGP1	10)
    (UNSPECV_PLDGP2	11)	; prologue ldgp
+   (UNSPECV_RPCC	12)
   ])
 
 ;; Where necessary, the suffixes _le and _be are used to distinguish between
@@ -6608,6 +6630,197 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi
   [(const_int 2)]
   ""
   "ldq_u $31,0($30)")
+
+;; Instructions to be emitted from __builtins.
+
+(define_insn "bic"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_BIC))]
+  ""
+  "bic %r1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "eqv"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_EQV))]
+  ""
+  "eqv %r1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "ornot"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_ORNOT))]
+  ""
+  "ornot %r1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "cmpbge"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_CMPBGE))]
+  ""
+  "cmpbge %r1,%2,%0"
+  [(set_attr "type" "icmp")])	; ??? on ev6, it's ilog
+
+(define_insn "zap"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_ZAP))]
+  ""
+  "zap %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "zapnot"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_ZAPNOT))]
+  ""
+  "zapnot %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "amask"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_AMASK))]
+  ""
+  "amask %1,%0"
+  [(set_attr "type" "misc")]) ; ??? can't find the correct class
+
+(define_insn "implver"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+  	(unspec:DI [(const_int 0)] UNSPEC_IMPLVER))]
+  ""
+  "implver %0"
+  [(set_attr "type" "misc")]) ; ??? can't find the correct class
+
+(define_insn "rpcc"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+  	(unspec_volatile:DI [(const_int 0)] UNSPECV_RPCC))]
+  ""
+  "rpcc %0"
+  [(set_attr "type" "misc")]) ; ??? can't find the correct class
+
+(define_insn "minub8"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_MINUB8))]
+  ""
+  "minub8 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "minsb8"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_MINSB8))]
+  ""
+  "minsb8 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "minuw4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_MINUW4))]
+  ""
+  "minuw4 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "minsw4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_MINSW4))]
+  ""
+  "minsw4 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "maxub8"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_MAXUB8))]
+  ""
+  "maxub8 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "maxsb8"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_MAXSB8))]
+  ""
+  "maxsb8 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "maxuw4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_MAXUW4))]
+  ""
+  "maxuw4 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "maxsw4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_MAXSW4))]
+  ""
+  "maxsw4 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "perr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rJ")]
+		   UNSPEC_PERR))]
+  ""
+  "perr %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "pklb"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")]
+		   UNSPEC_PKLB))]
+  ""
+  "pklb %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "pkwb"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")]
+		   UNSPEC_PKWB))]
+  ""
+  "pkwb %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "unpkbl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")]
+		   UNSPEC_UNPKBL))]
+  ""
+  "unpkbl %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "unpkbw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")]
+		   UNSPEC_UNPKBW))]
+  ""
+  "unpkbw %r1,%0"
+  [(set_attr "type" "mvi")])
 
 ;; On Unicos/Mk we use a macro for aligning code.
 
#include <stdint.h>

#define ldq(p) (*(const uint64_t *) (p))
#define ldl(p) (*(const int32_t *) (p))
#define stl(l, p) do { *(uint32_t *) (p) = (l); } while (0)

#if 1
#define bic(a, b)    ((a) & ~(b))
#define zap(a, b)    ({ uint64_t __r; asm ("zap     %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
#define minsw4(a, b) ({ uint64_t __r; asm ("minsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define maxsw4(a, b) ({ uint64_t __r; asm ("maxsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define pkwb(a)      ({ uint64_t __r; asm ("pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
#define unpkbw(a)    ({ uint64_t __r; asm ("unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
#else
#define bic	__builtin_alpha_bic
#define zap	__builtin_alpha_zap
#define minsw4	__builtin_alpha_minsw4
#define maxsw4	__builtin_alpha_maxsw4
#define pkwb	__builtin_alpha_pkwb
#define unpkbw	__builtin_alpha_unpkbw
#endif

/* Add signed words to unsigned bytes with clamping.  */
void add_pixels_clamped(const int16_t *block, uint8_t *pixels,
			int line_size)
{
    int h = 8;
    uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
    uint64_t signmask  = zap(-1, 0x33);
    signmask ^= signmask >> 1;  /* 0x8000800080008000 */

    do {
        uint64_t words, pix, signs;

        words = ldq(block);
        pix   = unpkbw(ldl(pixels));
        signs = words & signmask;
        words = bic(words, signmask);
        words += pix;
        words ^= signs;
        words = maxsw4(words, 0);
        words = minsw4(words, clampmask);
        stl(pkwb(words), pixels);

        words = ldq(block + 4);
        pix   = unpkbw(ldl(pixels + 4));
        signs = words & signmask;
        words = bic(words, signmask);
        words += pix;
        words ^= signs;
        words = maxsw4(words, 0);
        words = minsw4(words, clampmask);
        stl(pkwb(words), pixels + 4);

	pixels += line_size;
        block += 8;
    } while (--h);
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]