This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][ARM] Implement vceq_p64 and vtst_p64 intrinsics in arm_neon.h
- From: Kyrill Tkachov <kyrylo dot tkachov at arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Ramana Radhakrishnan <Ramana dot Radhakrishnan at arm dot com>, Richard Earnshaw <Richard dot Earnshaw at arm dot com>
- Date: Fri, 06 Dec 2013 17:19:09 +0000
- Subject: [PATCH][ARM] Implement vceq_p64 and vtst_p64 intrinsics in arm_neon.h
- Authentication-results: sourceware.org; auth=none
Hi all,
Following the implementation of the Crypto intrinsics I posted earlier this
week, this patch implements the vceq_p64 and vtst_p64 intrinsics that operate on
the new poly64_t type. They do not have a regular form and can thus not be
autogenerated from our beloved ML scripts and are therefore synthesised as a
vceq_u32 or vtst_u32 operation, followed by a pairwise reduce with min or max
respectively.
These intrinsics are only available when the crypto intrinsics are available
(i.e. -mfpu=crypto-neon-fp-armv8 and -mfloat-abi=(hard|softfp)).
I've added two runtime tests to make sure they generate correct results.
Ok for trunk?
Thanks,
Kyrill
2013-12-06 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/neon.ml (crypto_intrinsics): Add vceq_64 and vtst_p64.
* config/arm/arm_neon.h: Regenerate.
* config/arm/neon-docgen.ml: Add vceq_p64 and vtst_p64.
* doc/arm-neon-intrinsics.texi: Regenerate.
2013-12-06 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/arm/neon-vceq_p64.c: New test.
* gcc.target/arm/neon-vtst_p64.c: Likewise.
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 59ef22c..cc3f56c 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -13278,6 +13278,26 @@ vstrq_p128 (poly128_t * __ptr, poly128_t __val)
#endif
}
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceq_p64 (poly64x1_t a, poly64x1_t b)
+{
+ uint32x2_t t_a = vreinterpret_u32_p64 (a);
+ uint32x2_t t_b = vreinterpret_u32_p64 (b);
+ uint32x2_t c = vceq_u32 (t_a, t_b);
+ uint32x2_t m = vpmin_u32 (c, c);
+ return vreinterpret_u64_u32 (m);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vtst_p64 (poly64x1_t a, poly64x1_t b)
+{
+ uint32x2_t t_a = vreinterpret_u32_p64 (a);
+ uint32x2_t t_b = vreinterpret_u32_p64 (b);
+ uint32x2_t c = vtst_u32 (t_a, t_b);
+ uint32x2_t m = vpmax_u32 (c, c);
+ return vreinterpret_u64_u32 (m);
+}
+
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vaeseq_u8 (uint8x16_t __data, uint8x16_t __key)
{
diff --git a/gcc/config/arm/neon-docgen.ml b/gcc/config/arm/neon-docgen.ml
index 41ae059..8945da7 100644
--- a/gcc/config/arm/neon-docgen.ml
+++ b/gcc/config/arm/neon-docgen.ml
@@ -340,6 +340,14 @@ let crypto_doc =
@end itemize
@itemize @bullet
+@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t)
+@end itemize
+
+@itemize @bullet
+@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t)
+@end itemize
+
+@itemize @bullet
@item uint32_t vsha1h_u32 (uint32_t)
@*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}}
@end itemize
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index 968c171..69618d0 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -2208,6 +2208,26 @@ vstrq_p128 (poly128_t * __ptr, poly128_t __val)
#endif
}
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceq_p64 (poly64x1_t a, poly64x1_t b)
+{
+ uint32x2_t t_a = vreinterpret_u32_p64 (a);
+ uint32x2_t t_b = vreinterpret_u32_p64 (b);
+ uint32x2_t c = vceq_u32 (t_a, t_b);
+ uint32x2_t m = vpmin_u32 (c, c);
+ return vreinterpret_u64_u32 (m);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vtst_p64 (poly64x1_t a, poly64x1_t b)
+{
+ uint32x2_t t_a = vreinterpret_u32_p64 (a);
+ uint32x2_t t_b = vreinterpret_u32_p64 (b);
+ uint32x2_t c = vtst_u32 (t_a, t_b);
+ uint32x2_t m = vpmax_u32 (c, c);
+ return vreinterpret_u64_u32 (m);
+}
+
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vaeseq_u8 (uint8x16_t __data, uint8x16_t __key)
{
diff --git a/gcc/doc/arm-neon-intrinsics.texi b/gcc/doc/arm-neon-intrinsics.texi
index 610892d..b146868 100644
--- a/gcc/doc/arm-neon-intrinsics.texi
+++ b/gcc/doc/arm-neon-intrinsics.texi
@@ -11939,6 +11939,14 @@
@end itemize
@itemize @bullet
+@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t)
+@end itemize
+
+@itemize @bullet
+@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t)
+@end itemize
+
+@itemize @bullet
@item uint32_t vsha1h_u32 (uint32_t)
@*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}}
@end itemize
diff --git a/gcc/testsuite/gcc.target/arm/neon-vceq_p64.c b/gcc/testsuite/gcc.target/arm/neon-vceq_p64.c
new file mode 100644
index 0000000..21a6a78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vceq_p64.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_crypto } */
+
+#include "arm_neon.h"
+#include <stdio.h>
+
+extern void abort (void);
+
+int
+main (void)
+{
+ uint64_t args[] = { 0x0, 0xdeadbeef, ~0xdeadbeef, 0xffff,
+ ~0xffff, 0xffffffff, ~0xffffffff, ~0x0 };
+ int i, j;
+
+ for (i = 0; i < sizeof (args) / sizeof (args[0]); ++i)
+ {
+ for (j = 0; j < sizeof (args) / sizeof (args[0]); ++j)
+ {
+ uint64_t a1 = args[i];
+ uint64_t a2 = args[j];
+ uint64_t res = vceq_p64 (vreinterpret_p64_u64 (a1),
+ vreinterpret_p64_u64 (a2));
+ uint64_t exp = (a1 == a2) ? ~0x0 : 0x0;
+
+ if (res != exp)
+ {
+ fprintf (stderr, "vceq_p64 (a1= %lx, a2= %lx)"
+ " returned %lx, expected %lx\n",
+ a1, a2, res, exp);
+ abort ();
+ }
+ }
+ }
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon-vtst_p64.c b/gcc/testsuite/gcc.target/arm/neon-vtst_p64.c
new file mode 100644
index 0000000..3a0b117
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vtst_p64.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_crypto } */
+
+#include "arm_neon.h"
+#include <stdio.h>
+
+extern void abort (void);
+
+int
+main (void)
+{
+ uint64_t args[] = { 0x0, 0xdeadbeef, ~0xdeadbeef, 0xffff,
+ ~0xffff, 0xffffffff, ~0xffffffff, ~0x0 };
+ int i, j;
+
+ for (i = 0; i < sizeof (args) / sizeof (args[0]); ++i)
+ {
+ for (j = 0; j < sizeof (args) / sizeof (args[0]); ++j)
+ {
+ uint64_t a1 = args[i];
+ uint64_t a2 = args[j];
+ uint64_t res = vtst_p64 (vreinterpret_p64_u64 (a1),
+ vreinterpret_p64_u64 (a2));
+ uint64_t exp = (a1 & a2) ? ~0x0 : 0x0;
+
+ if (res != exp)
+ {
+ fprintf (stderr, "vtst_p64 (a1= %lx, a2= %lx)"
+ " returned %lx, expected %lx\n",
+ a1, a2, res, exp);
+ abort ();
+ }
+ }
+ }
+ return 0;
+}