This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[PATCH 4.9][AArch64] Backport 211892: PR/60825 Make float64x1_t in arm_neon.h a proper vector type

From: Alan Lawrence <alan dot lawrence at arm dot com>
To: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
Date: Tue, 08 Jul 2014 12:28:54 +0100
Subject: [PATCH 4.9][AArch64] Backport 211892: PR/60825 Make float64x1_t in arm_neon.h a proper vector type
Authentication-results: sourceware.org; auth=none

This corrects name-mangling of float64x1_t and makes it a distinct type fromfloat64_t, as per ACLE - the error mentioned in the "Caveats" section athttps://gcc.gnu.org/gcc-4.9/changes.html.

(Only) Changes from the original patch are to remove references to__builtin_aarch64_im_lane_boundsi and the update to the ext_f64_1.c testcase,both of which were introduced in r211058(http://pdtlreviewboard.cambridge.arm.com/r/1339/) which is not beingbackported. This means that out-of-bounds lane indices will be silently ignored(the implementation just uses lane 0) rather than raising an error message as inmainline.

Also requires backporting of three other patches, adding functionality missingvs the ACLE spec, all of which apply straightforwardly with 'patch':


r209559  [AArch64] vrnd<*>_f64 patch

r209641 [AArch64] Vreinterpret re-implemention.

r209642 [AArch64] 64-bit float vreinterpret implemention

----
gcc/ChangeLog:

	Backport r211892 from mainline.
	2014-06-23  Alan Lawrence  <alan.lawrence@arm.com>

	PR target/60825
	* config/aarch64/aarch64.c (aarch64_simd_mangle_map): Add entry for
	V1DFmode.
	* config/aarch64/aarch64-builtins.c (aarch64_simd_builtin_type_mode):
	add V1DFmode
	(BUILTIN_VD1): New.
	(BUILTIN_VD_RE): Remove.
	(aarch64_init_simd_builtins): Add V1DF to modes/modenames.
	(aarch64_fold_builtin): Update reinterpret patterns, df becomes v1df.
	* config/aarch64/aarch64-simd-builtins.def (create): Make a v1df
	variant but not df.
	(vreinterpretv1df*, vreinterpret*v1df): New.
	(vreinterpretdf*, vreinterpret*df): Remove.
	* config/aarch64/aarch64-simd.md (aarch64_create, aarch64_reinterpret*):
	Generate V1DFmode pattern not DFmode.
	* config/aarch64/iterators.md (VD_RE): Include V1DF, remove DF.
	(VD1): New.
	* config/aarch64/arm_neon.h (float64x1_t): typedef with gcc extensions.
	(vcreate_f64): Remove cast, use v1df builtin.
	(vcombine_f64): Remove cast, get elements with gcc vector extensions.
	(vget_low_f64, vabs_f64, vceq_f64, vceqz_f64, vcge_f64, vgfez_f64,
	vcgt_f64, vcgtz_f64, vcle_f64, vclez_f64, vclt_f64, vcltz_f64,
	vdup_n_f64, vdupq_lane_f64, vld1_f64, vld2_f64, vld3_f64, vld4_f64,
	vmov_n_f64, vst1_f64): Use gcc vector extensions.
	(vget_lane_f64, vdupd_lane_f64, vmulq_lane_f64, ): Use gcc extensions,
	add range check using __builtin_aarch64_im_lane_boundsi.
	(vfma_lane_f64, vfmad_lane_f64, vfma_laneq_f64, vfmaq_lane_f64,
	vfms_lane_f64, vfmsd_lane_f64, vfms_laneq_f64, vfmsq_lane_f64): Fix
	type signature, use gcc vector extensions.
	(vreinterpret_p8_f64, vreinterpret_p16_f64, vreinterpret_f32_f64,
	vreinterpret_f64_f32, vreinterpret_f64_p8, vreinterpret_f64_p16,
	vreinterpret_f64_s8, vreinterpret_f64_s16, vreinterpret_f64_s32,
	vreinterpret_f64_s64, vreinterpret_f64_u8, vreinterpret_f64_u16,
	vreinterpret_f64_u32, vreinterpret_f64_u64, vreinterpret_s8_f64,
	vreinterpret_s16_f64, vreinterpret_s32_f64, vreinterpret_s64_f64,
	vreinterpret_u8_f64, vreinterpret_u16_f64, vreinterpret_u32_f64,
	vreinterpret_u64_f64): Use v1df builtin not df.

gcc/testsuite/ChangeLog:

	Backport r211892 from mainline.
	2014-06-23  Alan Lawrence  <alan.lawrence@arm.com>

	PR target/60825
        * g++.dg/abi/mangle-neon-aarch64.C: Also test mangling of float64x1_t.
        * gcc.target/aarch64/aapcs/test_64x1_1.c: New test.
        * gcc.target/aarch64/aapcs/func-ret-64x1_1.c: New test.
        * gcc.target/aarch64/simd/ext_f64_1.c (main): Compare vector elements.
        * gcc.target/aarch64/vadd_f64.c: Rewrite with macro to use vector types.
        * gcc.target/aarch64/vsub_f64.c: Likewise.
        * gcc.target/aarch64/vdiv_f.c (INDEX*, RUN_TEST): Remove indexing scheme
        as now the same for all variants.
        * gcc.target/aarch64/vrnd_f64_1.c (compare_f64): Return float64_t not
        float64x1_t.

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 591260f18bcc084bcc6cc16b6597a3d2ec098d05..4ac3d1f6683a1b6ad6bdd8521061f9743cbf34d9 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -53,6 +53,7 @@ enum aarch64_simd_builtin_type_mode
   T_V4HI,
   T_V2SI,
   T_V2SF,
+  T_V1DF,
   T_DI,
   T_DF,
   T_V16QI,
@@ -76,6 +77,7 @@ enum aarch64_simd_builtin_type_mode
 #define v4hi_UP  T_V4HI
 #define v2si_UP  T_V2SI
 #define v2sf_UP  T_V2SF
+#define v1df_UP  T_V1DF
 #define di_UP    T_DI
 #define df_UP    T_DF
 #define v16qi_UP T_V16QI
@@ -317,6 +319,8 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   VAR2 (T, N, MAP, v8qi, v16qi)
 #define BUILTIN_VD(T, N, MAP) \
   VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf)
+#define BUILTIN_VD1(T, N, MAP) \
+  VAR5 (T, N, MAP, v8qi, v4hi, v2si, v2sf, v1df)
 #define BUILTIN_VDC(T, N, MAP) \
   VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
 #define BUILTIN_VDIC(T, N, MAP) \
@@ -351,8 +355,6 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   VAR3 (T, N, MAP, v8qi, v4hi, v2si)
 #define BUILTIN_VD_HSI(T, N, MAP) \
   VAR2 (T, N, MAP, v4hi, v2si)
-#define BUILTIN_VD_RE(T, N, MAP) \
-  VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
 #define BUILTIN_VQ(T, N, MAP) \
   VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df)
 #define BUILTIN_VQN(T, N, MAP) \
@@ -659,13 +661,13 @@ aarch64_init_simd_builtins (void)
       aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
       const char *const modenames[] =
 	{
-	  "v8qi", "v4hi", "v2si", "v2sf", "di", "df",
+	  "v8qi", "v4hi", "v2si", "v2sf", "v1df", "di", "df",
 	  "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
 	  "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
 	};
       const enum machine_mode modes[] =
 	{
-	  V8QImode, V4HImode, V2SImode, V2SFmode, DImode, DFmode,
+	  V8QImode, V4HImode, V2SImode, V2SFmode, V1DFmode, DImode, DFmode,
 	  V16QImode, V8HImode, V4SImode, V4SFmode, V2DImode,
 	  V2DFmode, TImode, EImode, OImode, XImode, SImode,
 	  SFmode, HImode, QImode
@@ -1145,24 +1147,23 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
 	  return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
 	  break;
 	}
-      VAR1 (REINTERP_SS, reinterpretdi, 0, df)
-      VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
-      VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
-      VAR1 (REINTERP_SS, reinterpretv2si, 0, df)
-      VAR1 (REINTERP_SS, reinterpretv2sf, 0, df)
-      BUILTIN_VD (REINTERP_SS, reinterpretdf, 0)
-      BUILTIN_VD (REINTERP_SU, reinterpretdf, 0)
-      VAR1 (REINTERP_US, reinterpretdi, 0, df)
-      VAR1 (REINTERP_US, reinterpretv8qi, 0, df)
-      VAR1 (REINTERP_US, reinterpretv4hi, 0, df)
-      VAR1 (REINTERP_US, reinterpretv2si, 0, df)
-      VAR1 (REINTERP_US, reinterpretv2sf, 0, df)
-      BUILTIN_VD (REINTERP_SP, reinterpretdf, 0)
-      VAR1 (REINTERP_PS, reinterpretdi, 0, df)
-      VAR1 (REINTERP_PS, reinterpretv8qi, 0, df)
-      VAR1 (REINTERP_PS, reinterpretv4hi, 0, df)
-      VAR1 (REINTERP_PS, reinterpretv2si, 0, df)
-      VAR1 (REINTERP_PS, reinterpretv2sf, 0, df)
+      VAR1 (REINTERP_SS, reinterpretdi, 0, v1df)
+      VAR1 (REINTERP_SS, reinterpretv8qi, 0, v1df)
+      VAR1 (REINTERP_SS, reinterpretv4hi, 0, v1df)
+      VAR1 (REINTERP_SS, reinterpretv2si, 0, v1df)
+      VAR1 (REINTERP_SS, reinterpretv2sf, 0, v1df)
+      BUILTIN_VD (REINTERP_SS, reinterpretv1df, 0)
+      BUILTIN_VD (REINTERP_SU, reinterpretv1df, 0)
+      VAR1 (REINTERP_US, reinterpretdi, 0, v1df)
+      VAR1 (REINTERP_US, reinterpretv8qi, 0, v1df)
+      VAR1 (REINTERP_US, reinterpretv4hi, 0, v1df)
+      VAR1 (REINTERP_US, reinterpretv2si, 0, v1df)
+      VAR1 (REINTERP_US, reinterpretv2sf, 0, v1df)
+      BUILTIN_VD (REINTERP_SP, reinterpretv1df, 0)
+      VAR1 (REINTERP_PS, reinterpretdi, 0, v1df)
+      VAR1 (REINTERP_PS, reinterpretv8qi, 0, v1df)
+      VAR1 (REINTERP_PS, reinterpretv4hi, 0, v1df)
+      VAR1 (REINTERP_PS, reinterpretv2sf, 0, v1df)
 	return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]);
       VAR1 (UNOP, floatv2si, 2, v2sf)
       VAR1 (UNOP, floatv4si, 2, v4sf)
@@ -1242,6 +1243,7 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 #undef BUILTIN_VALL
 #undef BUILTIN_VB
 #undef BUILTIN_VD
+#undef BUILTIN_VD1
 #undef BUILTIN_VDC
 #undef BUILTIN_VDIC
 #undef BUILTIN_VDN
@@ -1257,7 +1259,6 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 #undef BUILTIN_VDW
 #undef BUILTIN_VD_BHSI
 #undef BUILTIN_VD_HSI
-#undef BUILTIN_VD_RE
 #undef BUILTIN_VQ
 #undef BUILTIN_VQN
 #undef BUILTIN_VQW
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index f08f9958a138bd28934096c929fd088ea46fe7d9..dbd60aad5f33c6d62eb864ce093d57179fa69325 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -39,7 +39,7 @@
    1-9 - CODE_FOR_<name><mode><1-9>
    10 - CODE_FOR_<name><mode>.  */
 
-  BUILTIN_VD_RE (CREATE, create, 0)
+  BUILTIN_VD1 (CREATE, create, 0)
   BUILTIN_VDC (COMBINE, combine, 0)
   BUILTIN_VB (BINOP, pmul, 0)
   BUILTIN_VDQF (UNOP, sqrt, 2)
@@ -51,28 +51,28 @@
   VAR1 (GETLANE, get_lane, 0, di)
   BUILTIN_VALL (GETLANE, be_checked_get_lane, 0)
 
-  VAR1 (REINTERP_SS, reinterpretdi, 0, df)
-  VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
-  VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
-  VAR1 (REINTERP_SS, reinterpretv2si, 0, df)
-  VAR1 (REINTERP_SS, reinterpretv2sf, 0, df)
-  BUILTIN_VD (REINTERP_SS, reinterpretdf, 0)
+  VAR1 (REINTERP_SS, reinterpretdi, 0, v1df)
+  VAR1 (REINTERP_SS, reinterpretv8qi, 0, v1df)
+  VAR1 (REINTERP_SS, reinterpretv4hi, 0, v1df)
+  VAR1 (REINTERP_SS, reinterpretv2si, 0, v1df)
+  VAR1 (REINTERP_SS, reinterpretv2sf, 0, v1df)
+  BUILTIN_VD (REINTERP_SS, reinterpretv1df, 0)
 
-  BUILTIN_VD (REINTERP_SU, reinterpretdf, 0)
+  BUILTIN_VD (REINTERP_SU, reinterpretv1df, 0)
 
-  VAR1 (REINTERP_US, reinterpretdi, 0, df)
-  VAR1 (REINTERP_US, reinterpretv8qi, 0, df)
-  VAR1 (REINTERP_US, reinterpretv4hi, 0, df)
-  VAR1 (REINTERP_US, reinterpretv2si, 0, df)
-  VAR1 (REINTERP_US, reinterpretv2sf, 0, df)
+  VAR1 (REINTERP_US, reinterpretdi, 0, v1df)
+  VAR1 (REINTERP_US, reinterpretv8qi, 0, v1df)
+  VAR1 (REINTERP_US, reinterpretv4hi, 0, v1df)
+  VAR1 (REINTERP_US, reinterpretv2si, 0, v1df)
+  VAR1 (REINTERP_US, reinterpretv2sf, 0, v1df)
 
-  BUILTIN_VD (REINTERP_SP, reinterpretdf, 0)
+  BUILTIN_VD (REINTERP_SP, reinterpretv1df, 0)
 
-  VAR1 (REINTERP_PS, reinterpretdi, 0, df)
-  VAR1 (REINTERP_PS, reinterpretv8qi, 0, df)
-  VAR1 (REINTERP_PS, reinterpretv4hi, 0, df)
-  VAR1 (REINTERP_PS, reinterpretv2si, 0, df)
-  VAR1 (REINTERP_PS, reinterpretv2sf, 0, df)
+  VAR1 (REINTERP_PS, reinterpretdi, 0, v1df)
+  VAR1 (REINTERP_PS, reinterpretv8qi, 0, v1df)
+  VAR1 (REINTERP_PS, reinterpretv4hi, 0, v1df)
+  VAR1 (REINTERP_PS, reinterpretv2si, 0, v1df)
+  VAR1 (REINTERP_PS, reinterpretv2sf, 0, v1df)
 
   BUILTIN_VDQ_I (BINOP, dup_lane, 0)
   /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 2cfc005abcd1831c9e6e27d009001ae6da12aa7b..2aeb4764f9322bbc618b07858151136074c002a8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2126,7 +2126,7 @@
 ;; Patterns for AArch64 SIMD Intrinsics.
 
 (define_expand "aarch64_create<mode>"
-  [(match_operand:VD_RE 0 "register_operand" "")
+  [(match_operand:VD1 0 "register_operand" "")
    (match_operand:DI 1 "general_operand" "")]
   "TARGET_SIMD"
 {
@@ -2216,7 +2216,7 @@
 
 (define_expand "aarch64_reinterpretv8qi<mode>"
   [(match_operand:V8QI 0 "register_operand" "")
-   (match_operand:VDC 1 "register_operand" "")]
+   (match_operand:VD_RE 1 "register_operand" "")]
   "TARGET_SIMD"
 {
   aarch64_simd_reinterpret (operands[0], operands[1]);
@@ -2225,7 +2225,7 @@
 
 (define_expand "aarch64_reinterpretv4hi<mode>"
   [(match_operand:V4HI 0 "register_operand" "")
-   (match_operand:VDC 1 "register_operand" "")]
+   (match_operand:VD_RE 1 "register_operand" "")]
   "TARGET_SIMD"
 {
   aarch64_simd_reinterpret (operands[0], operands[1]);
@@ -2234,7 +2234,7 @@
 
 (define_expand "aarch64_reinterpretv2si<mode>"
   [(match_operand:V2SI 0 "register_operand" "")
-   (match_operand:VDC 1 "register_operand" "")]
+   (match_operand:VD_RE 1 "register_operand" "")]
   "TARGET_SIMD"
 {
   aarch64_simd_reinterpret (operands[0], operands[1]);
@@ -2243,7 +2243,7 @@
 
 (define_expand "aarch64_reinterpretv2sf<mode>"
   [(match_operand:V2SF 0 "register_operand" "")
-   (match_operand:VDC 1 "register_operand" "")]
+   (match_operand:VD_RE 1 "register_operand" "")]
   "TARGET_SIMD"
 {
   aarch64_simd_reinterpret (operands[0], operands[1]);
@@ -2259,8 +2259,8 @@
   DONE;
 })
 
-(define_expand "aarch64_reinterpretdf<mode>"
-  [(match_operand:DF 0 "register_operand" "")
+(define_expand "aarch64_reinterpretv1df<mode>"
+  [(match_operand:V1DF 0 "register_operand" "")
    (match_operand:VD_RE 1 "register_operand" "")]
   "TARGET_SIMD"
 {
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index bf35031ecd5e0e2e46472aa0e716276c9f2d62f0..5d4538cfbdd7b004e3ea929752ad3e2c8d48a13f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6399,6 +6399,7 @@ static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
+  { V1DFmode,  "__builtin_aarch64_simd_df",	"13__Float64x1_t" },
   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
   /* 128-bit containerized types.  */
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 682ee116783e4c4d4f498b7c7a972223d946ca3d..90856a8541aedf4bff16c209cde670e3fb9b229c 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -42,7 +42,8 @@ typedef int64_t int64x1_t;
 typedef int32_t int32x1_t;
 typedef int16_t int16x1_t;
 typedef int8_t int8x1_t;
-typedef double float64x1_t;
+typedef __builtin_aarch64_simd_df float64x1_t
+  __attribute__ ((__vector_size__ (8)));
 typedef __builtin_aarch64_simd_sf float32x2_t
   __attribute__ ((__vector_size__ (8)));
 typedef __builtin_aarch64_simd_poly8 poly8x8_t
@@ -461,7 +462,7 @@ typedef struct poly16x8x4_t
 
 #define __aarch64_vget_lane_f32(__a, __b) \
   __aarch64_vget_lane_any (v2sf, , , __a, __b)
-#define __aarch64_vget_lane_f64(__a, __b) (__a)
+#define __aarch64_vget_lane_f64(__a, __b) (__a[0])
 
 #define __aarch64_vget_lane_p8(__a, __b) \
   __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
@@ -518,7 +519,8 @@ typedef struct poly16x8x4_t
 
 #define __aarch64_vdup_lane_f32(__a, __b) \
    __aarch64_vdup_lane_any (f32, , , __a, __b)
-#define __aarch64_vdup_lane_f64(__a, __b) (__a)
+#define __aarch64_vdup_lane_f64(__a, __b) \
+   __aarch64_vdup_lane_any (f64, , , __a, __b)
 #define __aarch64_vdup_lane_p8(__a, __b) \
    __aarch64_vdup_lane_any (p8, , , __a, __b)
 #define __aarch64_vdup_lane_p16(__a, __b) \
@@ -567,7 +569,8 @@ typedef struct poly16x8x4_t
 /* __aarch64_vdupq_lane internal macros.  */
 #define __aarch64_vdupq_lane_f32(__a, __b) \
    __aarch64_vdup_lane_any (f32, q, , __a, __b)
-#define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
+#define __aarch64_vdupq_lane_f64(__a, __b) \
+   __aarch64_vdup_lane_any (f64, q, , __a, __b)
 #define __aarch64_vdupq_lane_p8(__a, __b) \
    __aarch64_vdup_lane_any (p8, q, , __a, __b)
 #define __aarch64_vdupq_lane_p16(__a, __b) \
@@ -2477,7 +2480,7 @@ vcreate_u64 (uint64_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vcreate_f64 (uint64_t __a)
 {
-  return (float64x1_t) __builtin_aarch64_createdf (__a);
+  return __builtin_aarch64_createv1df (__a);
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -2645,7 +2648,7 @@ vgetq_lane_u64 (uint64x2_t __a, const int __b)
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv8qidf_ps (__a);
+  return __builtin_aarch64_reinterpretv8qiv1df_ps (__a);
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -2777,7 +2780,7 @@ vreinterpretq_p8_p16 (poly16x8_t __a)
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv4hidf_ps (__a);
+  return __builtin_aarch64_reinterpretv4hiv1df_ps (__a);
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
@@ -2909,7 +2912,7 @@ vreinterpretq_p16_p8 (poly8x16_t __a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv2sfdf (__a);
+  return __builtin_aarch64_reinterpretv2sfv1df (__a);
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
@@ -3041,67 +3044,67 @@ vreinterpretq_f32_p16 (poly16x8_t __a)
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv2sf (__a);
+  return __builtin_aarch64_reinterpretv1dfv2sf (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_p8 (poly8x8_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv8qi_sp (__a);
+  return __builtin_aarch64_reinterpretv1dfv8qi_sp (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_p16 (poly16x4_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv4hi_sp (__a);
+  return __builtin_aarch64_reinterpretv1dfv4hi_sp (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_s8 (int8x8_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv8qi (__a);
+  return __builtin_aarch64_reinterpretv1dfv8qi (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_s16 (int16x4_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv4hi (__a);
+  return __builtin_aarch64_reinterpretv1dfv4hi (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_s32 (int32x2_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv2si (__a);
+  return __builtin_aarch64_reinterpretv1dfv2si (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_s64 (int64x1_t __a)
 {
-  return __builtin_aarch64_createdf ((uint64_t) vget_lane_s64 (__a, 0));
+  return __builtin_aarch64_createv1df ((uint64_t) vget_lane_s64 (__a, 0));
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_u8 (uint8x8_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv8qi_su (__a);
+  return __builtin_aarch64_reinterpretv1dfv8qi_su (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_u16 (uint16x4_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv4hi_su (__a);
+  return __builtin_aarch64_reinterpretv1dfv4hi_su (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_u32 (uint32x2_t __a)
 {
-  return __builtin_aarch64_reinterpretdfv2si_su (__a);
+  return __builtin_aarch64_reinterpretv1dfv2si_su (__a);
 }
 
 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
 vreinterpret_f64_u64 (uint64x1_t __a)
 {
-  return __builtin_aarch64_createdf (vget_lane_u64 (__a, 0));
+  return __builtin_aarch64_createv1df (vget_lane_u64 (__a, 0));
 }
 
 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
@@ -3173,7 +3176,7 @@ vreinterpretq_f64_u64 (uint64x2_t __a)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretdidf (__a);
+  return __builtin_aarch64_reinterpretdiv1df (__a);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -3305,7 +3308,7 @@ vreinterpretq_s64_p16 (poly16x8_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretdidf_us (__a);
+  return __builtin_aarch64_reinterpretdiv1df_us (__a);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -3437,7 +3440,7 @@ vreinterpretq_u64_p16 (poly16x8_t __a)
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv8qidf (__a);
+  return __builtin_aarch64_reinterpretv8qiv1df (__a);
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@@ -3569,7 +3572,7 @@ vreinterpretq_s8_p16 (poly16x8_t __a)
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv4hidf (__a);
+  return __builtin_aarch64_reinterpretv4hiv1df (__a);
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
@@ -3701,7 +3704,7 @@ vreinterpretq_s16_p16 (poly16x8_t __a)
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv2sidf (__a);
+  return __builtin_aarch64_reinterpretv2siv1df (__a);
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
@@ -3833,7 +3836,7 @@ vreinterpretq_s32_p16 (poly16x8_t __a)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv8qidf_us (__a);
+  return __builtin_aarch64_reinterpretv8qiv1df_us (__a);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -3965,7 +3968,7 @@ vreinterpretq_u8_p16 (poly16x8_t __a)
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv4hidf_us (__a);
+  return __builtin_aarch64_reinterpretv4hiv1df_us (__a);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -4097,7 +4100,7 @@ vreinterpretq_u16_p16 (poly16x8_t __a)
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_reinterpretv2sidf_us (__a);
+  return __builtin_aarch64_reinterpretv2siv1df_us (__a);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
@@ -4240,7 +4243,7 @@ vget_low_f32 (float32x4_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vget_low_f64 (float64x2_t __a)
 {
-  return vgetq_lane_f64 (__a, 0);
+  return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -4366,7 +4369,7 @@ vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vcombine_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
+  return __builtin_aarch64_combinedf (__a[0], __b[0]);
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
@@ -15469,7 +15472,7 @@ vabs_f32 (float32x2_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vabs_f64 (float64x1_t __a)
 {
-  return __builtin_fabs (__a);
+  return (float64x1_t) {__builtin_fabs (__a[0])};
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@@ -15953,7 +15956,7 @@ vceq_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vceq_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __a == __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a == __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -16123,7 +16126,7 @@ vceqz_f32 (float32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vceqz_f64 (float64x1_t __a)
 {
-  return __a == 0.0 ? -1ll : 0ll;
+  return (uint64x1_t) (__a == (float64x1_t) {0.0});
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -16313,7 +16316,7 @@ vcge_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcge_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __a >= __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a >= __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -16483,7 +16486,7 @@ vcgez_f32 (float32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgez_f64 (float64x1_t __a)
 {
-  return __a >= 0.0 ? -1ll : 0ll;
+  return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -16673,7 +16676,7 @@ vcgt_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgt_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __a > __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a > __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -16843,7 +16846,7 @@ vcgtz_f32 (float32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgtz_f64 (float64x1_t __a)
 {
-  return __a > 0.0 ? -1ll : 0ll;
+  return (uint64x1_t) (__a > (float64x1_t) {0.0});
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -17033,7 +17036,7 @@ vcle_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcle_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __a <= __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a <= __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -17203,7 +17206,7 @@ vclez_f32 (float32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vclez_f64 (float64x1_t __a)
 {
-  return __a <= 0.0 ? -1ll : 0ll;
+  return (uint64x1_t) (__a <= (float64x1_t) {0.0});
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -17336,7 +17339,7 @@ vclt_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vclt_f64 (float64x1_t __a, float64x1_t __b)
 {
-  return __a < __b ? -1ll : 0ll;
+  return (uint64x1_t) (__a < __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -17506,7 +17509,7 @@ vcltz_f32 (float32x2_t __a)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcltz_f64 (float64x1_t __a)
 {
-  return __a < 0.0 ? -1ll : 0ll;
+  return (uint64x1_t) (__a < (float64x1_t) {0.0});
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -18138,7 +18141,7 @@ vdup_n_f32 (float32_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vdup_n_f64 (float64_t __a)
 {
-  return __a;
+  return (float64x1_t) {__a};
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -18631,9 +18634,9 @@ vdups_lane_u32 (uint32x2_t __a, const int __b)
 
 /* vdupd_lane  */
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b)
+vdupd_lane_f64 (float64x1_t __a, const int __b)
 {
-  return __a;
+  return __a[0];
 }
 
 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
@@ -18735,18 +18738,18 @@ vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
 				    __a);
 }
 
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vfma_lane_f64 (float64_t __a, float64_t __b,
-	       float64_t __c, const int __lane)
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
+	       float64x1_t __c, const int __lane)
 {
-  return __builtin_fma (__b, __c, __a);
+  return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vfmad_lane_f64 (float64_t __a, float64_t __b,
-	        float64_t __c, const int __lane)
+	        float64x1_t __c, const int __lane)
 {
-  return __builtin_fma (__b, __c, __a);
+  return __builtin_fma (__b, __c[0], __a);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
@@ -18767,11 +18770,12 @@ vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
 				    __a);
 }
 
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vfma_laneq_f64 (float64_t __a, float64_t __b,
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
 	        float64x2_t __c, const int __lane)
 {
-  return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
+  float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
+  return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
@@ -18801,9 +18805,9 @@ vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
 
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
-	        float64_t __c, const int __lane)
+	        float64x1_t __c, const int __lane)
 {
-  return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
+  return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
 }
 
 /* vfmaq_laneq  */
@@ -18837,18 +18841,18 @@ vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
 				    __a);
 }
 
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vfms_lane_f64 (float64_t __a, float64_t __b,
-	       float64_t __c, const int __lane)
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
+	       float64x1_t __c, const int __lane)
 {
-  return __builtin_fma (-__b, __c, __a);
+  return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vfmsd_lane_f64 (float64_t __a, float64_t __b,
-	        float64_t __c, const int __lane)
+	        float64x1_t __c, const int __lane)
 {
-  return __builtin_fma (-__b, __c, __a);
+  return __builtin_fma (-__b, __c[0], __a);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
@@ -18869,11 +18873,12 @@ vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
 				    __a);
 }
 
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vfms_laneq_f64 (float64_t __a, float64_t __b,
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
 	        float64x2_t __c, const int __lane)
 {
-  return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
+  float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
+  return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
@@ -18903,9 +18908,9 @@ vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
 
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
-	        float64_t __c, const int __lane)
+	        float64x1_t __c, const int __lane)
 {
-  return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
+  return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
 }
 
 /* vfmsq_laneq  */
@@ -18939,7 +18944,7 @@ vld1_f32 (const float32_t *a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vld1_f64 (const float64_t *a)
 {
-  return *a;
+  return (float64x1_t) {*a};
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -19117,8 +19122,8 @@ vld2_f64 (const float64_t * __a)
   float64x1x2_t ret;
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
-  ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
   return ret;
 }
 
@@ -19383,9 +19388,9 @@ vld3_f64 (const float64_t * __a)
   float64x1x3_t ret;
   __builtin_aarch64_simd_ci __o;
   __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
-  ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
-  ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
   return ret;
 }
 
@@ -19673,10 +19678,10 @@ vld4_f64 (const float64_t * __a)
   float64x1x4_t ret;
   __builtin_aarch64_simd_xi __o;
   __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
-  ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
-  ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
-  ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
-  ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
+  ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
   return ret;
 }
 
@@ -20791,7 +20796,7 @@ vmov_n_f32 (float32_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vmov_n_f64 (float64_t __a)
 {
-  return __a;
+  return (float64x1_t) {__a};
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
@@ -21013,7 +21018,7 @@ vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
 {
-  return __a * __b;
+  return __a * __b[0];
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
@@ -22817,7 +22822,7 @@ vrndn_f32 (float32x2_t __a)
 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
 vrndn_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_frintndf (__a);
+  return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
@@ -24164,7 +24169,7 @@ vst1_f32 (float32_t *a, float32x2_t b)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_f64 (float64_t *a, float64x1_t b)
 {
-  *a = b;
+  *a = b[0];
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e76e3ef10ee6e67e6bceabfb9d4126dbde0642d6..a35bc3bb09299a45f5053d26f05a8dce9bc048f2 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -138,8 +138,11 @@
 ;; Double vector modes for combines.
 (define_mode_iterator VDIC [V8QI V4HI V2SI])
 
-;; Double vector modes.
-(define_mode_iterator VD_RE [V8QI V4HI V2SI DI DF V2SF])
+;; Double vector modes, inc. V1DF and the DI "vector" mode, for VREINTERPRET.
+(define_mode_iterator VD_RE [V8QI V4HI V2SI DI V1DF V2SF])
+
+;; Double vector modes inc V1DF
+(define_mode_iterator VD1 [V8QI V4HI V2SI V2SF V1DF])
 
 ;; Vector modes except double int.
 (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
diff --git a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
index 09540e84144bb937ebb0a0611c891c9e593669cf..025b6904afa9f4ea39550ecd95d91a7be1d48cc6 100644
--- a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
+++ b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C
@@ -27,8 +27,9 @@ void f17 (float32x4_t a) {}
 void f18 (float64x2_t a) {}
 void f19 (poly8x16_t a) {}
 void f20 (poly16x8_t a) {}
+void f21 (float64x1_t a) {}
 
-void f21 (int8x16_t, int8x16_t) {}
+void g1 (int8x16_t, int8x16_t) {}
 
 
 // { dg-final { scan-assembler "_Z2f010__Int8x8_t:" } }
@@ -52,4 +53,5 @@ void f21 (int8x16_t, int8x16_t) {}
 // { dg-final { scan-assembler "_Z3f1813__Float64x2_t:" } }
 // { dg-final { scan-assembler "_Z3f1912__Poly8x16_t:" } }
 // { dg-final { scan-assembler "_Z3f2012__Poly16x8_t:" } }
-// { dg-final { scan-assembler "_Z3f2111__Int8x16_tS_:" } }
+// { dg-final { scan-assembler "_Z3f2113__Float64x1_t:" } }
+// { dg-final { scan-assembler "_Z2g111__Int8x16_tS_:" } }
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-64x1_1.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-64x1_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..673242687e4946d7bc1cb61c247510dfd128cc81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-64x1_1.c
@@ -0,0 +1,15 @@
+/* Test AAPCS64 layout.
+
+  Test 64-bit singleton vector types which should be in FP/SIMD registers.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+/* { dg-additional-sources "abitest.S" } */
+
+#ifndef IN_FRAMEWORK
+#define TESTFILE "func-ret-64x1_1.c"
+#include <arm_neon.h>
+#include "abitest-2.h"
+#else
+FUNC_VAL_CHECK ( 0, float64x1_t, (float64x1_t) {123456.789}, D0, flat)
+#endif
+
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_64x1_1.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_64x1_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..f1dc1a759b07fcc8a9c4310ac14f43274a3f378f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_64x1_1.c
@@ -0,0 +1,16 @@
+/* Test AAPCS64 layout.
+
+   Test 64-bit singleton vector types which should be in FP/SIMD registers.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+
+#ifndef IN_FRAMEWORK
+#define TESTFILE "test_64x1_1.c"
+#include <arm_neon.h>
+
+#include "abitest.h"
+#else
+ARG (float64x1_t, (float64x1_t) {123456.789}, D0)
+ARG (float64_t, 987654.321, D1)
+LAST_ARG (float64x1_t, (float64x1_t) {13579.2468}, D2)
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/vadd_f64.c b/gcc/testsuite/gcc.target/aarch64/vadd_f64.c
index c3bf7349597aa9b75e0bc34cfd4cde4dc16b95f3..f35c42dcfbd2a8da19f183e4d23d365702a087dc 100644
--- a/gcc/testsuite/gcc.target/aarch64/vadd_f64.c
+++ b/gcc/testsuite/gcc.target/aarch64/vadd_f64.c
@@ -4,9 +4,6 @@
 
 #include <arm_neon.h>
 
-#define FLT_EPSILON __FLT_EPSILON__
-#define DBL_EPSILON __DBL_EPSILON__
-
 #define TESTA0 0.33333
 #define TESTA1 -1.7777
 #define TESTA2 0
@@ -42,70 +39,41 @@ extern void abort (void);
     || (ABS (a - b) < epsilon)				\
    )
 
-int
-test_vadd_f64 ()
-{
-  float64x1_t a;
-  float64x1_t b;
-  float64x1_t c;
-
-  a = TESTA0;
-  b = TESTB0;
-  c = ANSW0;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA1;
-  b = TESTB1;
-  c = ANSW1;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA2;
-  b = TESTB2;
-  c = ANSW2;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA3;
-  b = TESTB3;
-  c = ANSW3;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA4;
-  b = TESTB4;
-  c = ANSW4;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA5;
-  b = TESTB5;
-  c = ANSW5;
-
-  a = vadd_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  return 0;
+#define TEST(N)					\
+int						\
+test_vadd_f64_##N ()				\
+{						\
+  float64x1_t a = { TESTA##N };			\
+  float64x1_t b = { TESTB##N };			\
+  float64x1_t c = { ANSW##N };			\
+						\
+  a = vadd_f64 (a, b);				\
+  return !FP_equals (a[0], c[0], EPSILON);	\
 }
 
+TEST (0)
+TEST (1)
+TEST (2)
+TEST (3)
+TEST (4)
+TEST (5)
+
 /* { dg-final { scan-assembler-times "fadd\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 6 } } */
 
 int
 main (int argc, char **argv)
 {
-  if (test_vadd_f64 ())
+  if (test_vadd_f64_0 ())
+    abort ();
+  if (test_vadd_f64_1 ())
+    abort ();
+  if (test_vadd_f64_2 ())
+    abort ();
+  if (test_vadd_f64_3 ())
+    abort ();
+  if (test_vadd_f64_4 ())
+    abort ();
+  if (test_vadd_f64_5 ())
     abort ();
 
   return 0;
diff --git a/gcc/testsuite/gcc.target/aarch64/vdiv_f.c b/gcc/testsuite/gcc.target/aarch64/vdiv_f.c
index cc3a9570c0fac0dcbf38f38314a416cca5e58c6e..9e1b768eda3a88ea37a5da8ffa405e29ec2f2d60 100644
--- a/gcc/testsuite/gcc.target/aarch64/vdiv_f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vdiv_f.c
@@ -99,13 +99,6 @@
 #define EPSILON_64 __DBL_EPSILON__
 #define EPSILON(data_len) EPSILON_##data_len
 
-#define INDEX64_32 [i]
-#define INDEX64_64
-#define INDEX128_32 [i]
-#define INDEX128_64 [i]
-#define INDEX(reg_len, data_len) \
-  CONCAT1 (INDEX, reg_len##_##data_len)
-
 #define LOAD_INST(reg_len, data_len) \
   CONCAT1 (vld1, POSTFIX (reg_len, data_len))
 #define DIV_INST(reg_len, data_len) \
@@ -135,9 +128,7 @@
   for (i = 0; i < n; i++)						\
   {									\
     INHIB_OPTIMIZATION;							\
-    if (!FP_equals ((a) INDEX (reg_len, data_len),			\
-		    (c) INDEX (reg_len, data_len),			\
-		    EPSILON (data_len)))				\
+    if (!FP_equals ((a) [i], (c) [i], EPSILON (data_len)))		\
       return 1;								\
   }									\
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
index 2451ecdcfb6440c100675d34342ee1f5d517c2d5..31efc4f2752b6e32808d7ba382c9f378e9e73299 100644
--- a/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
@@ -30,7 +30,7 @@ set_rounding_mode (uint32_t mode)
   asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :);
 }
 
-float64x1_t __attribute__ ((noinline))
+float64_t __attribute__ ((noinline))
 compare_f64 (float64x1_t passed, float64_t expected)
 {
   return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected)
diff --git a/gcc/testsuite/gcc.target/aarch64/vsub_f64.c b/gcc/testsuite/gcc.target/aarch64/vsub_f64.c
index abf4fc42d49dc695f435b1e0f331737c8e9367b0..91d74638201e386f500717542973ed46f9c7c5cf 100644
--- a/gcc/testsuite/gcc.target/aarch64/vsub_f64.c
+++ b/gcc/testsuite/gcc.target/aarch64/vsub_f64.c
@@ -4,9 +4,6 @@
 
 #include <arm_neon.h>
 
-#define FLT_EPSILON __FLT_EPSILON__
-#define DBL_EPSILON __DBL_EPSILON__
-
 #define TESTA0 1
 #define TESTA1 0.2223
 #define TESTA2 0
@@ -44,70 +41,41 @@ extern void abort (void);
      || ((b > a) && (b < (a + epsilon))))	\
 )
 
-int
-test_vsub_f64 ()
-{
-  float64x1_t a;
-  float64x1_t b;
-  float64x1_t c;
-
-  a = TESTA0;
-  b = TESTB0;
-  c = ANSW0;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA1;
-  b = TESTB1;
-  c = ANSW1;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA2;
-  b = TESTB2;
-  c = ANSW2;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA3;
-  b = TESTB3;
-  c = ANSW3;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA4;
-  b = TESTB4;
-  c = ANSW4;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  a = TESTA5;
-  b = TESTB5;
-  c = ANSW5;
-
-  a = vsub_f64 (a, b);
-  if (!FP_equals (a, c, EPSILON))
-    return 1;
-
-  return 0;
+#define TEST(N)					\
+int						\
+test_vsub_f64_##N ()				\
+{						\
+  float64x1_t a = { TESTA##N };			\
+  float64x1_t b = { TESTB##N };			\
+  float64x1_t c = { ANSW##N };			\
+						\
+  a = vsub_f64 (a, b);				\
+  return !FP_equals (a[0], c[0], EPSILON);	\
 }
 
+TEST (0)
+TEST (1)
+TEST (2)
+TEST (3)
+TEST (4)
+TEST (5)
+
 /* { dg-final { scan-assembler-times "fsub\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 6 } } */
 
 int
 main (int argc, char **argv)
 {
-  if (test_vsub_f64 ())
+  if (test_vsub_f64_0 ())
+    abort ();
+  if (test_vsub_f64_1 ())
+    abort ();
+  if (test_vsub_f64_2 ())
+    abort ();
+  if (test_vsub_f64_3 ())
+    abort ();
+  if (test_vsub_f64_4 ())
+    abort ();
+  if (test_vsub_f64_5 ())
     abort ();
 
   return 0;

Follow-Ups:
- Re: [PATCH 4.9][AArch64] Backport 211892: PR/60825 Make float64x1_t in arm_neon.h a proper vector type
  - From: Jakub Jelinek

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]