This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 2/2, x86] Add palignr support for AVX2.


Hi,

The patch adds use of palignr instruction, when we have one operand
permutation like:
{5 6 7 0 1 2 3 4}:

Treating this as {5 6 7 8 9 a b c} on 2 operands, and therefore palignr on 5.

Bootstrap and make check passed.

Is it ok?

Evgeny

2014-04-29  Evgeny Stupachenko  <evstupac@gmail.com>

        * config/i386/i386.c (expand_vec_perm_palignr_one_operand): New.
        Enables PALIGNR on one operand permutation.
        * config/i386/i386.c (expand_vec_perm_1): Try PALIGNR on one operand.


diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 002d295..8950cf7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42807,6 +42807,97 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
   return true;
 }

+/* A subroutine of ix86_expand_vec_perm_1.  Try to use just palignr
+   instruction for one operand permutation.  This is better than pshufb
+   as does not require to pass big constant and faster on some x86
+   architectures.  */
+
+static bool
+expand_vec_perm_palignr_one_operand (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt = d->nelt;
+  unsigned min;
+  unsigned in_order_length, in_order_length_max;
+  rtx shift, shift1, target, tmp;
+
+  /* PALIGNR of 2 128-bits registers takes only 1 instrucion.
+     Requires SSSE3.  */
+  if (GET_MODE_SIZE (d->vmode) == 16)
+    {
+      if(!TARGET_SSSE3)
+       return false;
+    }
+  /* PALIGNR of 2 256-bits registers on AVX2 costs only 2 instructions:
+     PERM and PALIGNR.  It is more profitable than 2 PSHUFB and PERM.  */
+  else if (GET_MODE_SIZE (d->vmode) == 32)
+    {
+      if(!TARGET_AVX2)
+       return false;
+    }
+  else
+    return false;
+
+  if (d->one_operand_p != true)
+    return false;
+
+  /* For an in order permutation with one operand like: {5 6 7 0 1 2 3 4}
+     PALIGNR is better than PSHUFB.  Check for an order in permutation.  */
+  in_order_length = 0;
+  in_order_length_max = 0;
+  if (d->one_operand_p == true)
+    for (i = 0; i < 2 * nelt; ++i)
+      {
+       if ((d->perm[(i + 1) & (nelt - 1)] -
+            d->perm[i & (nelt - 1)]) != 1)
+         {
+           if (in_order_length > in_order_length_max)
+               in_order_length_max = in_order_length;
+             in_order_length = 0;
+         }
+       else
+         in_order_length++;
+      }
+
+  /* If not an ordered permutation then try something else.  */
+  if (in_order_length_max != nelt - 1)
+    return false;
+
+  min = d->perm[0];
+
+  shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
+  shift1 = GEN_INT ((min - nelt / 2) *
+          GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
+
+  if (GET_MODE_SIZE (d->vmode) != 32)
+    {
+      target = gen_reg_rtx (TImode);
+      emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
+                                     gen_lowpart (TImode, d->op0), shift));
+    }
+  else
+    {
+      target = gen_reg_rtx (V2TImode);
+      tmp = gen_reg_rtx (V4DImode);
+      emit_insn (gen_avx2_permv2ti (tmp,
+                                   gen_lowpart (V4DImode, d->op0),
+                                   gen_lowpart (V4DImode, d->op1),
+                                   GEN_INT (33)));
+      if (min < nelt / 2)
+        emit_insn (gen_avx2_palignrv2ti (target,
+                                        gen_lowpart (V2TImode, tmp),
+                                        gen_lowpart (V2TImode, d->op0),
+                                        shift));
+      else
+       emit_insn (gen_avx2_palignrv2ti (target,
+                                        gen_lowpart (V2TImode, d->op1),
+                                        gen_lowpart (V2TImode, tmp),
+                                        shift1));
+    }
+  emit_move_insn (d->target, gen_lowpart (d->vmode, target));
+
+  return true;
+}
+
 static bool expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d);

 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
@@ -42943,6 +43034,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
   if (expand_vec_perm_vpermil (d))
     return true;

+  /* Try palignr on one operand.  */
+  if (expand_vec_perm_palignr_one_operand (d))
+    return true;
+
   /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
      vpshufb, vpermd, vpermps or vpermq variable permutation.  */
   if (expand_vec_perm_pshufb (d))


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]