[PATCH] i386: Add PPERM two-operand 64bit vector permutation [PR89021]

Uros Bizjak ubizjak@gmail.com
Wed Jun 23 14:17:35 GMT 2021


Add emulation of V8QI PPERM permutations for TARGET_XOP target.  Similar
to PSHUFB, the permutation is performed with V16QI PPERM instruction,
where selector is defined in V16QI mode with inactive elements set to 0x80.
Specific to two operand permutations is the remapping of elements from
the second operand (e.g. e[8] -> e[16]), as we have to account for the
inactive elements from the first operand.

2021-06-23  Uroš Bizjak  <ubizjak@gmail.com>

gcc/
    PR target/89021
    * config/i386/i386-expand.c (expand_vec_perm_pshufb):
    Handle 64bit modes for TARGET_XOP.  Use indirect gen_* functions.
    * config/i386/mmx.md (mmx_ppermv64): New insn pattern.
    * config/i386/i386.md (unspec): Move UNSPEC_XOP_PERMUTE from ...
    * config/i386/sse.md (unspec): ... here.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
-------------- next part --------------
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 2986b49065c..9c922bf1bf1 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -17467,10 +17467,23 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
 
   if (!d->one_operand_p)
     {
-      if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
+      if (GET_MODE_SIZE (d->vmode) == 8)
+	{
+	  if (!TARGET_XOP)
+	    return false;
+	  vmode = V8QImode;
+	}
+      else if (GET_MODE_SIZE (d->vmode) == 16)
+	{
+	  if (!TARGET_XOP)
+	    return false;
+	}
+      else if (GET_MODE_SIZE (d->vmode) == 32)
 	{
-	  if (TARGET_AVX2
-	      && valid_perm_using_mode_p (V2TImode, d))
+	  if (!TARGET_AVX2)
+	    return false;
+
+	  if (valid_perm_using_mode_p (V2TImode, d))
 	    {
 	      if (d->testing_p)
 		return true;
@@ -17492,6 +17505,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
 	    }
 	  return false;
 	}
+      else
+	return false;
     }
   else
     {
@@ -17651,8 +17666,22 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
     {
       rtx m128 = GEN_INT (-128);
 
+      /* Remap elements from the second operand, as we have to
+	 account for inactive top 8 elements from the first operand.  */
+      if (!d->one_operand_p)
+	for (i = 0; i < nelt; ++i)
+	  {
+	    int ival = INTVAL (rperm[i]);
+	    if (ival >= 8)
+	      ival += 8;
+	    rperm[i] = GEN_INT (ival);
+	  }
+
+      /* V8QI is emulated with V16QI instruction, fill inactive
+	 elements in the top 8 positions with zeros.  */
       for (i = nelt; i < 16; ++i)
 	rperm[i] = m128;
+
       vpmode = V16QImode;
     }
 
@@ -17660,36 +17689,54 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
 				gen_rtvec_v (GET_MODE_NUNITS (vpmode), rperm));
   vperm = force_reg (vpmode, vperm);
 
-  target = d->target;
-  if (d->vmode != vmode)
+  if (vmode == d->vmode)
+    target = d->target;
+  else
     target = gen_reg_rtx (vmode);
+
   op0 = gen_lowpart (vmode, d->op0);
+
   if (d->one_operand_p)
     {
+      rtx (*gen) (rtx, rtx, rtx);
+
       if (vmode == V8QImode)
-	emit_insn (gen_mmx_pshufbv8qi3 (target, op0, vperm));
+	gen = gen_mmx_pshufbv8qi3;
       else if (vmode == V16QImode)
-	emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
+	gen = gen_ssse3_pshufbv16qi3;
       else if (vmode == V32QImode)
-	emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
+	gen = gen_avx2_pshufbv32qi3;
       else if (vmode == V64QImode)
-	emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
+	gen = gen_avx512bw_pshufbv64qi3;
       else if (vmode == V8SFmode)
-	emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
+	gen = gen_avx2_permvarv8sf;
       else if (vmode == V8SImode)
-	emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
+	gen = gen_avx2_permvarv8si;
       else if (vmode == V16SFmode)
-	emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
+	gen = gen_avx512f_permvarv16sf;
       else if (vmode == V16SImode)
-	emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
+	gen = gen_avx512f_permvarv16si;
       else
 	gcc_unreachable ();
+
+      emit_insn (gen (target, op0, vperm));
     }
   else
     {
+      rtx (*gen) (rtx, rtx, rtx, rtx);
+
       op1 = gen_lowpart (vmode, d->op1);
-      emit_insn (gen_xop_pperm (target, op0, op1, vperm));
+
+      if (vmode == V8QImode)
+	gen = gen_mmx_ppermv64;
+      else if (vmode == V16QImode)
+	gen = gen_xop_pperm;
+      else
+	gcc_unreachable ();
+
+      emit_insn (gen (target, op0, op1, vperm));
     }
+
   if (target != d->target)
     emit_move_insn (d->target, gen_lowpart (d->vmode, target));
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4e242105719..9043be3105d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -120,6 +120,7 @@ (define_c_enum "unspec" [
   UNSPEC_MOVMSK
   UNSPEC_BLENDV
   UNSPEC_PSHUFB
+  UNSPEC_XOP_PERMUTE
   UNSPEC_RCP
   UNSPEC_RSQRT
   UNSPEC_PSADBW
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index a107ac5ccb4..7a827dceb01 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2331,6 +2331,19 @@ (define_insn "*xop_pcmov_<mode>"
   "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")])
 
+;; XOP permute instructions
+(define_insn "mmx_ppermv64"
+  [(set (match_operand:V8QI 0 "register_operand" "=x")
+	(unspec:V8QI
+	  [(match_operand:V8QI 1 "register_operand" "x")
+	   (match_operand:V8QI 2 "register_operand" "x")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "xm")]
+	  UNSPEC_XOP_PERMUTE))]
+  "TARGET_XOP && TARGET_MMX_WITH_SSE"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral logical operations
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f5f9403db44..c5f739c73b5 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -53,7 +53,6 @@ (define_c_enum "unspec" [
   UNSPEC_FMADDSUB
   UNSPEC_XOP_UNSIGNED_CMP
   UNSPEC_XOP_TRUEFALSE
-  UNSPEC_XOP_PERMUTE
   UNSPEC_FRCZ
 
   ;; For AES support


More information about the Gcc-patches mailing list