This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PATCH: Support SSE4.1 sign/zero extend vector instructions


On Tue, May 08, 2007 at 10:13:07PM +0200, Uros Bizjak wrote:
> Hello!
> 
> >This patch enables SSE4.1 sign/zero extend vector instructions. It
> >needs the SSE4.1 patch:
> >
> >http://gcc.gnu.org/ml/gcc-patches/2007-04/msg01586.html
> >
> >
> >H.J.
> >-----
> >2007-05-08  H.J. Lu  <hongjiu.lu@intel.com>
> >
> >	* config/i386/i386.c (ix86_expand_sse4_unpack): New.
> >	(ix86_expand_sse_unpack): Call ix86_expand_sse4_unpack to
> >	unpack the input vector if SSE4.1 is enabled..
> >
> >  
> IMO, it is much more informative if we switch called functions in define 
> expand, like:
> 
> (define_expand "vec_unpacku_hi_v16qi"
>  [(match_operand:V8HI 0 "register_operand" "")
>   (match_operand:V16QI 1 "register_operand" "")]
>  "TARGET_SSE2"
> {
>  if (TARGET_SSE4_1)
>    ix86_expand_sse4_unpack (operands, true, true)
>  else
>    ix86_expand_sse_unpack (operands, true, true);
>  DONE;
> })
> 
> We already have "reduc_splus_v4sf" expander implemented in similar way.
> 

Here is the new patch.


H.J.
----
2007-05-08  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386-protos.h (ix86_expand_sse4_unpack): New.

	* config/i386/i386.c (ix86_expand_sse4_unpack): New.

	* config/i386/sse.md (vec_unpacku_hi_v16qi): Call
	ix86_expand_sse4_unpack if SSE4.1 is enabled.
	(vec_unpacks_hi_v16qi): Likewise.
	(vec_unpacku_lo_v16qi): Likewise.
	(vec_unpacks_lo_v16qi): Likewise.
	(vec_unpacku_hi_v8hi): Likewise.
	(vec_unpacks_hi_v8hi): Likewise.
	(vec_unpacku_lo_v8hi): Likewise.
	(vec_unpacks_lo_v8hi): Likewise.
	(vec_unpacku_hi_v4si): Likewise.
	(vec_unpacks_hi_v4si): Likewise.
	(vec_unpacku_lo_v4si): Likewise.
	(vec_unpacks_lo_v4si): Likewise.

--- gcc/config/i386/i386-protos.h.unpack	2007-04-30 12:39:30.000000000 -0700
+++ gcc/config/i386/i386-protos.h	2007-05-08 13:34:40.000000000 -0700
@@ -112,6 +112,7 @@ extern int ix86_expand_fp_movcc (rtx[]);
 extern bool ix86_expand_fp_vcond (rtx[]);
 extern bool ix86_expand_int_vcond (rtx[]);
 extern void ix86_expand_sse_unpack (rtx[], bool, bool);
+extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
 extern int ix86_expand_int_addcc (rtx[]);
 extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
 extern void x86_initialize_trampoline (rtx, rtx, rtx);
--- gcc/config/i386/i386.c.unpack	2007-05-08 06:21:53.000000000 -0700
+++ gcc/config/i386/i386.c	2007-05-08 13:41:43.000000000 -0700
@@ -12805,6 +12805,55 @@ ix86_expand_sse_unpack (rtx operands[2],
   emit_insn (unpack (dest, operands[1], se));
 }
 
+/* This function performs the same task as ix86_expand_sse_unpack,
+   but with SSE4.1 instructions.  */
+
+void
+ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+  enum machine_mode imode = GET_MODE (operands[1]);
+  rtx (*unpack)(rtx, rtx);
+  rtx src, dest;
+
+  switch (imode)
+    {
+    case V16QImode:
+      if (unsigned_p)
+	unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+      else
+	unpack = gen_sse4_1_extendv8qiv8hi2;
+      break;
+    case V8HImode:
+      if (unsigned_p)
+	unpack = gen_sse4_1_zero_extendv4hiv4si2;
+      else
+	unpack = gen_sse4_1_extendv4hiv4si2;
+      break;
+    case V4SImode:
+      if (unsigned_p)
+	unpack = gen_sse4_1_zero_extendv2siv2di2;
+      else
+	unpack = gen_sse4_1_extendv2siv2di2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  dest = operands[0];
+  if (high_p)
+    {
+      /* Shift higher 8 bytes to lower 8 bytes.  */
+      src = gen_reg_rtx (imode);
+      emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
+				   gen_lowpart (TImode, operands[1]),
+				   GEN_INT (64)));
+    }
+  else
+    src = operands[1];
+
+  emit_insn (unpack (dest, src));
+}
+
 /* Expand conditional increment or decrement using adb/sbb instructions.
    The default case using setcc followed by the conditional move can be
    done by generic code.  */
--- gcc/config/i386/sse.md.unpack	2007-05-08 06:21:53.000000000 -0700
+++ gcc/config/i386/sse.md	2007-05-08 13:37:43.000000000 -0700
@@ -4628,7 +4628,10 @@
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
   DONE;
 })
 
@@ -4637,7 +4640,10 @@
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
   DONE;
 })
 
@@ -4646,7 +4652,10 @@
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
   DONE;
 })
 
@@ -4655,7 +4664,10 @@
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
   DONE;
 })
 
@@ -4664,7 +4676,10 @@
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
   DONE;
 })
 
@@ -4673,7 +4688,10 @@
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
   DONE;
 })
 
@@ -4682,7 +4700,10 @@
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
   DONE;
 })
 
@@ -4691,7 +4712,10 @@
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
   DONE;
 })
 
@@ -4700,7 +4724,10 @@
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
   DONE;
 })
 
@@ -4709,7 +4736,10 @@
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
   DONE;
 })
 
@@ -4718,7 +4748,10 @@
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
   DONE;
 })
 
@@ -4727,7 +4760,10 @@
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
   DONE;
 })
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]