]> gcc.gnu.org Git - gcc.git/commitdiff
[AARCH64] Add zip{1, 2}, uzp{1, 2}, trn{1, 2} support
authorJames Greenhalgh <james.greenhalgh@arm.com>
Wed, 5 Dec 2012 11:42:37 +0000 (11:42 +0000)
committerJames Greenhalgh <jgreenhalgh@gcc.gnu.org>
Wed, 5 Dec 2012 11:42:37 +0000 (11:42 +0000)
for vector permute.

gcc/

* config/aarch64/aarch64-simd-builtins.def: Add new builtins.
* config/aarch64/aarch64-simd.md (simd_type): Add uzp.
(aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): New.
* config/aarch64/aarch64.c (aarch64_evpc_trn): New.
(aarch64_evpc_uzp): Likewise.
(aarch64_evpc_zip): Likewise.
(aarch64_expand_vec_perm_const_1): Check for trn, zip, uzp patterns.
* config/aarch64/iterators.md (unspec): Add neccessary unspecs.
(PERMUTE): New.
(perm_insn): Likewise.
(perm_hilo): Likewise.

From-SVN: r194219

gcc/ChangeLog
gcc/config/aarch64/aarch64-simd-builtins.def
gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/aarch64.c
gcc/config/aarch64/iterators.md

index 370149cf86d22cffb76d049655f7f7d782fef708..0ba2ae1822d342963efaa6028999749caa0d5eed 100644 (file)
@@ -1,3 +1,17 @@
+2012-12-05  James Greenhalgh  <james.greenhalgh@arm.com>
+
+       * config/aarch64/aarch64-simd-builtins.def: Add new builtins.
+       * config/aarch64/aarch64-simd.md (simd_type): Add uzp.
+       (aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): New.
+       * config/aarch64/aarch64.c (aarch64_evpc_trn): New.
+       (aarch64_evpc_uzp): Likewise.
+       (aarch64_evpc_zip): Likewise.
+       (aarch64_expand_vec_perm_const_1): Check for trn, zip, uzp patterns.
+       * config/aarch64/iterators.md (unspec): Add neccessary unspecs.
+       (PERMUTE): New.
+       (perm_insn): Likewise.
+       (perm_hilo): Likewise.
+
 2012-12-05  James Greenhalgh  <james.greenhalgh@arm.com>
 
        * config/aarch64/aarch64-protos.h
index b344120ae90ce16b4512fbf3b96246a2a3d378fe..d441417b00ff084586f441d10960ad1f699e8bdf 100644 (file)
   BUILTIN_VDQF (UNOP, fcvtpu)
   BUILTIN_VDQF (UNOP, fcvtms)
   BUILTIN_VDQF (UNOP, fcvtmu)
+
+  /* Implemented by
+     aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>.  */
+  BUILTIN_VALL (BINOP, zip1)
+  BUILTIN_VALL (BINOP, zip2)
+  BUILTIN_VALL (BINOP, uzp1)
+  BUILTIN_VALL (BINOP, uzp2)
+  BUILTIN_VALL (BINOP, trn1)
+  BUILTIN_VALL (BINOP, trn2)
index baee0cc8c5992814cce9b8110fe7c2cbd4044668..febf71d37c464558e6b2cb1a3615314c2b8d71e8 100644 (file)
 ; simd_store4s          store single structure from one lane for four registers (ST4 [index]).
 ; simd_tbl              table lookup.
 ; simd_trn              transpose.
-; simd_zip              zip/unzip.
+; simd_uzp              unzip.
+; simd_zip              zip.
 
 (define_attr "simd_type"
    "simd_abd,\
    simd_store4s,\
    simd_tbl,\
    simd_trn,\
+   simd_uzp,\
    simd_zip,\
    none"
   (const_string "none"))
   DONE;
 })
 
+(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
+  [(set (match_operand:VALL 0 "register_operand" "=w")
+       (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
+                     (match_operand:VALL 2 "register_operand" "w")]
+                      PERMUTE))]
+  "TARGET_SIMD"
+  "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_<PERMUTE:perm_insn>")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
 (define_insn "aarch64_st2<mode>_dreg"
   [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
        (unspec:TI [(match_operand:OI 1 "register_operand" "w")
index ae1a037cf7853c1a56d622ae04c875b93a53cde3..7bc2f6b896a083030ac759b6e52a29658ecb88b2 100644 (file)
@@ -6919,6 +6919,261 @@ aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
 }
 
+/* Recognize patterns suitable for the TRN instructions.  */
+static bool
+aarch64_evpc_trn (struct expand_vec_perm_d *d)
+{
+  unsigned int i, odd, mask, nelt = d->nelt;
+  rtx out, in0, in1, x;
+  rtx (*gen) (rtx, rtx, rtx);
+  enum machine_mode vmode = d->vmode;
+
+  if (GET_MODE_UNIT_SIZE (vmode) > 8)
+    return false;
+
+  /* Note that these are little-endian tests.
+     We correct for big-endian later.  */
+  if (d->perm[0] == 0)
+    odd = 0;
+  else if (d->perm[0] == 1)
+    odd = 1;
+  else
+    return false;
+  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+  for (i = 0; i < nelt; i += 2)
+    {
+      if (d->perm[i] != i + odd)
+       return false;
+      if (d->perm[i + 1] != ((i + nelt + odd) & mask))
+       return false;
+    }
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  in0 = d->op0;
+  in1 = d->op1;
+  if (BYTES_BIG_ENDIAN)
+    {
+      x = in0, in0 = in1, in1 = x;
+      odd = !odd;
+    }
+  out = d->target;
+
+  if (odd)
+    {
+      switch (vmode)
+       {
+       case V16QImode: gen = gen_aarch64_trn2v16qi; break;
+       case V8QImode: gen = gen_aarch64_trn2v8qi; break;
+       case V8HImode: gen = gen_aarch64_trn2v8hi; break;
+       case V4HImode: gen = gen_aarch64_trn2v4hi; break;
+       case V4SImode: gen = gen_aarch64_trn2v4si; break;
+       case V2SImode: gen = gen_aarch64_trn2v2si; break;
+       case V2DImode: gen = gen_aarch64_trn2v2di; break;
+       case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
+       case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
+       case V2DFmode: gen = gen_aarch64_trn2v2df; break;
+       default:
+         return false;
+       }
+    }
+  else
+    {
+      switch (vmode)
+       {
+       case V16QImode: gen = gen_aarch64_trn1v16qi; break;
+       case V8QImode: gen = gen_aarch64_trn1v8qi; break;
+       case V8HImode: gen = gen_aarch64_trn1v8hi; break;
+       case V4HImode: gen = gen_aarch64_trn1v4hi; break;
+       case V4SImode: gen = gen_aarch64_trn1v4si; break;
+       case V2SImode: gen = gen_aarch64_trn1v2si; break;
+       case V2DImode: gen = gen_aarch64_trn1v2di; break;
+       case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
+       case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
+       case V2DFmode: gen = gen_aarch64_trn1v2df; break;
+       default:
+         return false;
+       }
+    }
+
+  emit_insn (gen (out, in0, in1));
+  return true;
+}
+
+/* Recognize patterns suitable for the UZP instructions.  */
+static bool
+aarch64_evpc_uzp (struct expand_vec_perm_d *d)
+{
+  unsigned int i, odd, mask, nelt = d->nelt;
+  rtx out, in0, in1, x;
+  rtx (*gen) (rtx, rtx, rtx);
+  enum machine_mode vmode = d->vmode;
+
+  if (GET_MODE_UNIT_SIZE (vmode) > 8)
+    return false;
+
+  /* Note that these are little-endian tests.
+     We correct for big-endian later.  */
+  if (d->perm[0] == 0)
+    odd = 0;
+  else if (d->perm[0] == 1)
+    odd = 1;
+  else
+    return false;
+  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+  for (i = 0; i < nelt; i++)
+    {
+      unsigned elt = (i * 2 + odd) & mask;
+      if (d->perm[i] != elt)
+       return false;
+    }
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  in0 = d->op0;
+  in1 = d->op1;
+  if (BYTES_BIG_ENDIAN)
+    {
+      x = in0, in0 = in1, in1 = x;
+      odd = !odd;
+    }
+  out = d->target;
+
+  if (odd)
+    {
+      switch (vmode)
+       {
+       case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
+       case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
+       case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
+       case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
+       case V4SImode: gen = gen_aarch64_uzp2v4si; break;
+       case V2SImode: gen = gen_aarch64_uzp2v2si; break;
+       case V2DImode: gen = gen_aarch64_uzp2v2di; break;
+       case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
+       case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
+       case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
+       default:
+         return false;
+       }
+    }
+  else
+    {
+      switch (vmode)
+       {
+       case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
+       case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
+       case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
+       case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
+       case V4SImode: gen = gen_aarch64_uzp1v4si; break;
+       case V2SImode: gen = gen_aarch64_uzp1v2si; break;
+       case V2DImode: gen = gen_aarch64_uzp1v2di; break;
+       case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
+       case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
+       case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
+       default:
+         return false;
+       }
+    }
+
+  emit_insn (gen (out, in0, in1));
+  return true;
+}
+
+/* Recognize patterns suitable for the ZIP instructions.  */
+static bool
+aarch64_evpc_zip (struct expand_vec_perm_d *d)
+{
+  unsigned int i, high, mask, nelt = d->nelt;
+  rtx out, in0, in1, x;
+  rtx (*gen) (rtx, rtx, rtx);
+  enum machine_mode vmode = d->vmode;
+
+  if (GET_MODE_UNIT_SIZE (vmode) > 8)
+    return false;
+
+  /* Note that these are little-endian tests.
+     We correct for big-endian later.  */
+  high = nelt / 2;
+  if (d->perm[0] == high)
+    /* Do Nothing.  */
+    ;
+  else if (d->perm[0] == 0)
+    high = 0;
+  else
+    return false;
+  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+  for (i = 0; i < nelt / 2; i++)
+    {
+      unsigned elt = (i + high) & mask;
+      if (d->perm[i * 2] != elt)
+       return false;
+      elt = (elt + nelt) & mask;
+      if (d->perm[i * 2 + 1] != elt)
+       return false;
+    }
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  in0 = d->op0;
+  in1 = d->op1;
+  if (BYTES_BIG_ENDIAN)
+    {
+      x = in0, in0 = in1, in1 = x;
+      high = !high;
+    }
+  out = d->target;
+
+  if (high)
+    {
+      switch (vmode)
+       {
+       case V16QImode: gen = gen_aarch64_zip2v16qi; break;
+       case V8QImode: gen = gen_aarch64_zip2v8qi; break;
+       case V8HImode: gen = gen_aarch64_zip2v8hi; break;
+       case V4HImode: gen = gen_aarch64_zip2v4hi; break;
+       case V4SImode: gen = gen_aarch64_zip2v4si; break;
+       case V2SImode: gen = gen_aarch64_zip2v2si; break;
+       case V2DImode: gen = gen_aarch64_zip2v2di; break;
+       case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
+       case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
+       case V2DFmode: gen = gen_aarch64_zip2v2df; break;
+       default:
+         return false;
+       }
+    }
+  else
+    {
+      switch (vmode)
+       {
+       case V16QImode: gen = gen_aarch64_zip1v16qi; break;
+       case V8QImode: gen = gen_aarch64_zip1v8qi; break;
+       case V8HImode: gen = gen_aarch64_zip1v8hi; break;
+       case V4HImode: gen = gen_aarch64_zip1v4hi; break;
+       case V4SImode: gen = gen_aarch64_zip1v4si; break;
+       case V2SImode: gen = gen_aarch64_zip1v2si; break;
+       case V2DImode: gen = gen_aarch64_zip1v2di; break;
+       case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
+       case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
+       case V2DFmode: gen = gen_aarch64_zip1v2df; break;
+       default:
+         return false;
+       }
+    }
+
+  emit_insn (gen (out, in0, in1));
+  return true;
+}
+
 static bool
 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
 {
@@ -6969,7 +7224,15 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
     }
 
   if (TARGET_SIMD)
-    return aarch64_evpc_tbl (d);
+    {
+      if (aarch64_evpc_zip (d))
+       return true;
+      else if (aarch64_evpc_uzp (d))
+       return true;
+      else if (aarch64_evpc_trn (d))
+       return true;
+      return aarch64_evpc_tbl (d);
+    }
   return false;
 }
 
index 7cd4cef0eef84c2c3fa2a75de4158445dbdcd5d3..0eb30f06c04f13bb81fe38d0e1c6fb4f3c3bccc4 100644 (file)
     UNSPEC_BSL         ; Used in aarch64-simd.md.
     UNSPEC_TBL         ; Used in vector permute patterns.
     UNSPEC_CONCAT      ; Used in vector permute patterns.
+    UNSPEC_ZIP1                ; Used in vector permute patterns.
+    UNSPEC_ZIP2                ; Used in vector permute patterns.
+    UNSPEC_UZP1                ; Used in vector permute patterns.
+    UNSPEC_UZP2                ; Used in vector permute patterns.
+    UNSPEC_TRN1                ; Used in vector permute patterns.
+    UNSPEC_TRN2                ; Used in vector permute patterns.
 ])
 
 ;; -------------------------------------------------------------------
 
 (define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
 
+(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
+                             UNSPEC_TRN1 UNSPEC_TRN2
+                             UNSPEC_UZP1 UNSPEC_UZP2])
 
 (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
                             UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA])
 (define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round")
                               (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")])
 
+(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
+                           (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
+                           (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")])
+
+(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
+                           (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
+                           (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
This page took 0.123244 seconds and 5 git commands to generate.