#define VAR13(T, N, A, B, C, D, E, F, G, H, I, J, K, L, M) \
VAR12 (T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
VAR1 (T, N, M)
+#define VAR14(T, N, A, B, C, D, E, F, G, H, I, J, K, L, M, O) \
+ VAR13 (T, N, A, B, C, D, E, F, G, H, I, J, K, L, M) \
+ VAR1 (T, N, O)
/* The builtin data can be found in arm_neon_builtins.def, arm_vfp_builtins.def
and arm_acle_builtins.def. The entries in arm_neon_builtins.def require
return __builtin_neon_vst4v8bf (__ptr, __bu.__o);
}
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_bf16 (bfloat16_t const * __ptr)
+{
+ return __builtin_neon_vld1v4bf (__ptr);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_bf16 (const bfloat16_t * __ptr)
+{
+ return __builtin_neon_vld1v8bf (__ptr);
+}
+
__extension__ extern __inline bfloat16x4x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vld2_bf16 (bfloat16_t const * __ptr)
VAR1 (TERNOP, vtbx2, v8qi)
VAR1 (TERNOP, vtbx3, v8qi)
VAR1 (TERNOP, vtbx4, v8qi)
-VAR12 (LOAD1, vld1,
- v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR14 (LOAD1, vld1,
+ v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di,
+ v4bf, v8bf)
VAR12 (LOAD1LANE, vld1_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf)
VAR10 (LOAD1, vld1_dup,
--- /dev/null
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps -O2 -mfloat-abi=hard" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+**test_vld1_bf16:
+** vld1.16 {d0}, \[r0\]
+** bx lr
+*/
+bfloat16x4_t
+test_vld1_bf16 (bfloat16_t const *p)
+{
+ return vld1_bf16 (p);
+}
+
+/*
+**test_vld1q_bf16:
+** vld1.16 {d0-d1}, \[r0\]
+** bx lr
+*/
+bfloat16x8_t
+test_vld1q_bf16 (bfloat16_t const *p)
+{
+ return vld1q_bf16 (p);
+}