This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[ARM] PR 48941: poor code generated for vzip*() and vunzp*()


The buit-in functions that underlie ARM's vzip*() and vunzp*() functions
pass the result by reference rather than value.  This leads to very poor
output, as demonstrated in PR 48941.

This patch makes them return the vectors by value instead, using the
structure modes TImode and OImode.  Tested on arm-linux-gnueabi.
OK to install?

Richard


gcc/
	PR target/48941
	* config/arm/arm.c (arm_init_neon_builtins): Make RESULTPAIR
	intrinsics return by value rather than reference.
	(arm_expand_neon_builtin): Update accordingly.
	(neon_emit_pair_result_insn): Likewise.
	* config/arm/neon.md (neon_vtrn<mode>): Split into double and quad
	patterns.  Make the former return TImode and the latter OImode.
	(neon_vzip<mode>, neon_vuzp<mode>): Likewise.
	* config/arm/neon.ml (features): Remove ReturnPtr.
	(ops): Remove ReturnPtr from Vtrn, Vzip and Vuzp feature lists.
	* config/arm/neon-gen.ml (return_by_ptr): Delete.
	(return, params, print_variant): Remove return-by-pointer handling.
	* config/arm/arm_neon.h: Regenerate.

Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	2011-12-07 11:43:27.359242997 +0000
+++ gcc/config/arm/arm.c	2011-12-08 10:18:02.636136134 +0000
@@ -19386,26 +19386,16 @@ arm_init_neon_builtins (void)
   tree intCI_type_node;
   tree intXI_type_node;
 
-  tree V8QI_pointer_node;
-  tree V4HI_pointer_node;
-  tree V2SI_pointer_node;
-  tree V2SF_pointer_node;
-  tree V16QI_pointer_node;
-  tree V8HI_pointer_node;
-  tree V4SI_pointer_node;
-  tree V4SF_pointer_node;
-  tree V2DI_pointer_node;
-
-  tree void_ftype_pv8qi_v8qi_v8qi;
-  tree void_ftype_pv4hi_v4hi_v4hi;
-  tree void_ftype_pv2si_v2si_v2si;
-  tree void_ftype_pv2sf_v2sf_v2sf;
-  tree void_ftype_pdi_di_di;
-  tree void_ftype_pv16qi_v16qi_v16qi;
-  tree void_ftype_pv8hi_v8hi_v8hi;
-  tree void_ftype_pv4si_v4si_v4si;
-  tree void_ftype_pv4sf_v4sf_v4sf;
-  tree void_ftype_pv2di_v2di_v2di;
+  tree ti_ftype_v8qi_v8qi;
+  tree ti_ftype_v4hi_v4hi;
+  tree ti_ftype_v2si_v2si;
+  tree ti_ftype_v2sf_v2sf;
+  tree ti_ftype_di_di;
+  tree oi_ftype_v16qi_v16qi;
+  tree oi_ftype_v8hi_v8hi;
+  tree oi_ftype_v4si_v4si;
+  tree oi_ftype_v4sf_v4sf;
+  tree oi_ftype_v2di_v2di;
 
   tree reinterp_ftype_dreg[5][5];
   tree reinterp_ftype_qreg[5][5];
@@ -19519,47 +19509,36 @@ arm_init_neon_builtins (void)
   (*lang_hooks.types.register_builtin_type) (intXI_type_node,
 					     "__builtin_neon_xi");
 
-  /* Pointers to vector types.  */
-  V8QI_pointer_node = build_pointer_type (V8QI_type_node);
-  V4HI_pointer_node = build_pointer_type (V4HI_type_node);
-  V2SI_pointer_node = build_pointer_type (V2SI_type_node);
-  V2SF_pointer_node = build_pointer_type (V2SF_type_node);
-  V16QI_pointer_node = build_pointer_type (V16QI_type_node);
-  V8HI_pointer_node = build_pointer_type (V8HI_type_node);
-  V4SI_pointer_node = build_pointer_type (V4SI_type_node);
-  V4SF_pointer_node = build_pointer_type (V4SF_type_node);
-  V2DI_pointer_node = build_pointer_type (V2DI_type_node);
-
   /* Operations which return results as pairs.  */
-  void_ftype_pv8qi_v8qi_v8qi =
-    build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
+  ti_ftype_v8qi_v8qi =
+    build_function_type_list (intTI_type_node, V8QI_type_node,
   			      V8QI_type_node, NULL);
-  void_ftype_pv4hi_v4hi_v4hi =
-    build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
+  ti_ftype_v4hi_v4hi =
+    build_function_type_list (intTI_type_node, V4HI_type_node,
   			      V4HI_type_node, NULL);
-  void_ftype_pv2si_v2si_v2si =
-    build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
+  ti_ftype_v2si_v2si =
+    build_function_type_list (intTI_type_node, V2SI_type_node,
   			      V2SI_type_node, NULL);
-  void_ftype_pv2sf_v2sf_v2sf =
-    build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
+  ti_ftype_v2sf_v2sf =
+    build_function_type_list (intTI_type_node, V2SF_type_node,
   			      V2SF_type_node, NULL);
-  void_ftype_pdi_di_di =
-    build_function_type_list (void_type_node, intDI_pointer_node,
-			      neon_intDI_type_node, neon_intDI_type_node, NULL);
-  void_ftype_pv16qi_v16qi_v16qi =
-    build_function_type_list (void_type_node, V16QI_pointer_node,
-			      V16QI_type_node, V16QI_type_node, NULL);
-  void_ftype_pv8hi_v8hi_v8hi =
-    build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
+  ti_ftype_di_di =
+    build_function_type_list (intTI_type_node, neon_intDI_type_node,
+			      neon_intDI_type_node, NULL);
+  oi_ftype_v16qi_v16qi =
+    build_function_type_list (intOI_type_node, V16QI_type_node,
+			      V16QI_type_node, NULL);
+  oi_ftype_v8hi_v8hi =
+    build_function_type_list (intOI_type_node, V8HI_type_node,
   			      V8HI_type_node, NULL);
-  void_ftype_pv4si_v4si_v4si =
-    build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
+  oi_ftype_v4si_v4si =
+    build_function_type_list (intOI_type_node, V4SI_type_node,
   			      V4SI_type_node, NULL);
-  void_ftype_pv4sf_v4sf_v4sf =
-    build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
+  oi_ftype_v4sf_v4sf =
+    build_function_type_list (intOI_type_node, V4SF_type_node,
   			      V4SF_type_node, NULL);
-  void_ftype_pv2di_v2di_v2di =
-    build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
+  oi_ftype_v2di_v2di =
+    build_function_type_list (intOI_type_node, V2DI_type_node,
 			      V2DI_type_node, NULL);
 
   dreg_types[0] = V8QI_type_node;
@@ -19777,16 +19756,16 @@ arm_init_neon_builtins (void)
 	  {
 	    switch (insn_data[d->code].operand[1].mode)
 	      {
-	      case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
-	      case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
-	      case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
-	      case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
-	      case DImode: ftype = void_ftype_pdi_di_di; break;
-	      case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
-	      case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
-	      case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
-	      case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
-	      case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
+	      case V8QImode: ftype = ti_ftype_v8qi_v8qi; break;
+	      case V4HImode: ftype = ti_ftype_v4hi_v4hi; break;
+	      case V2SImode: ftype = ti_ftype_v2si_v2si; break;
+	      case V2SFmode: ftype = ti_ftype_v2sf_v2sf; break;
+	      case DImode: ftype = ti_ftype_di_di; break;
+	      case V16QImode: ftype = oi_ftype_v16qi_v16qi; break;
+	      case V8HImode: ftype = oi_ftype_v8hi_v8hi; break;
+	      case V4SImode: ftype = oi_ftype_v4si_v4si; break;
+	      case V4SFmode: ftype = oi_ftype_v4sf_v4sf; break;
+	      case V2DImode: ftype = oi_ftype_v2di_v2di; break;
 	      default: gcc_unreachable ();
 	      }
 	  }
@@ -20710,9 +20689,8 @@ arm_expand_neon_builtin (int fcode, tree
         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
 
     case NEON_RESULTPAIR:
-      return arm_expand_neon_args (target, icode, 0, type_mode, exp,
-        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
-        NEON_ARG_STOP);
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
 
     case NEON_LANEMUL:
     case NEON_LANEMULL:
@@ -20779,18 +20757,16 @@ neon_reinterpret (rtx dest, rtx src)
    registers).  */
 void
 neon_emit_pair_result_insn (enum machine_mode mode,
-			    rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
+			    rtx (*intfn) (rtx, rtx, rtx, rtx), rtx pair,
                             rtx op1, rtx op2)
 {
-  rtx mem = gen_rtx_MEM (mode, destaddr);
-  rtx tmp1 = gen_reg_rtx (mode);
-  rtx tmp2 = gen_reg_rtx (mode);
-
-  emit_insn (intfn (tmp1, op1, op2, tmp2));
-
-  emit_move_insn (mem, tmp1);
-  mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
-  emit_move_insn (mem, tmp2);
+  rtx first, second;
+
+  first = simplify_gen_subreg (mode, pair, GET_MODE (pair), 0);
+  second = simplify_gen_subreg (mode, pair, GET_MODE (pair),
+				GET_MODE_SIZE (mode));
+
+  emit_insn (intfn (first, op1, op2, second));
 }
 
 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
Index: gcc/config/arm/neon.md
===================================================================
--- gcc/config/arm/neon.md	2011-12-07 11:43:27.365242985 +0000
+++ gcc/config/arm/neon.md	2011-12-08 10:18:02.704135803 +0000
@@ -3973,9 +3973,20 @@ (define_insn "neon_vtrn<mode>_internal"
 )
 
 (define_expand "neon_vtrn<mode>"
-  [(match_operand:SI 0 "s_register_operand" "r")
-   (match_operand:VDQW 1 "s_register_operand" "w")
-   (match_operand:VDQW 2 "s_register_operand" "w")]
+  [(match_operand:TI 0 "s_register_operand")
+   (match_operand:VD 1 "s_register_operand")
+   (match_operand:VD 2 "s_register_operand")]
+  "TARGET_NEON"
+{
+  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vtrn<mode>_internal,
+			      operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "neon_vtrn<mode>"
+  [(match_operand:OI 0 "s_register_operand")
+   (match_operand:VQ 1 "s_register_operand")
+   (match_operand:VQ 2 "s_register_operand")]
   "TARGET_NEON"
 {
   neon_emit_pair_result_insn (<MODE>mode, gen_neon_vtrn<mode>_internal,
@@ -4000,9 +4011,20 @@ (define_insn "neon_vzip<mode>_internal"
 )
 
 (define_expand "neon_vzip<mode>"
-  [(match_operand:SI 0 "s_register_operand" "r")
-   (match_operand:VDQW 1 "s_register_operand" "w")
-   (match_operand:VDQW 2 "s_register_operand" "w")]
+  [(match_operand:TI 0 "s_register_operand")
+   (match_operand:VD 1 "s_register_operand")
+   (match_operand:VD 2 "s_register_operand")]
+  "TARGET_NEON"
+{
+  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vzip<mode>_internal,
+			      operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "neon_vzip<mode>"
+  [(match_operand:OI 0 "s_register_operand")
+   (match_operand:VQ 1 "s_register_operand")
+   (match_operand:VQ 2 "s_register_operand")]
   "TARGET_NEON"
 {
   neon_emit_pair_result_insn (<MODE>mode, gen_neon_vzip<mode>_internal,
@@ -4027,9 +4049,20 @@ (define_insn "neon_vuzp<mode>_internal"
 )
 
 (define_expand "neon_vuzp<mode>"
-  [(match_operand:SI 0 "s_register_operand" "r")
-   (match_operand:VDQW 1 "s_register_operand" "w")
-   (match_operand:VDQW 2 "s_register_operand" "w")]
+  [(match_operand:TI 0 "s_register_operand")
+   (match_operand:VD 1 "s_register_operand")
+   (match_operand:VD 2 "s_register_operand")]
+  "TARGET_NEON"
+{
+  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vuzp<mode>_internal,
+			      operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "neon_vuzp<mode>"
+  [(match_operand:OI 0 "s_register_operand")
+   (match_operand:VQ 1 "s_register_operand")
+   (match_operand:VQ 2 "s_register_operand")]
   "TARGET_NEON"
 {
   neon_emit_pair_result_insn (<MODE>mode, gen_neon_vuzp<mode>_internal,
Index: gcc/config/arm/neon.ml
===================================================================
--- gcc/config/arm/neon.ml	2011-06-24 16:28:22.000000000 +0100
+++ gcc/config/arm/neon.ml	2011-12-08 10:18:02.712135763 +0000
@@ -214,7 +214,6 @@ type features =
   | Flipped of string  (* Builtin name to use with flipped arguments.  *)
   | InfoWord  (* Pass an extra word for signage/rounding etc. (always passed
                  for All _, Long, Wide, Narrow shape_forms.  *)
-  | ReturnPtr  (* Pass explicit pointer to return value as first argument.  *)
     (* A specification as to the shape of instruction expected upon
        disassembly, used if it differs from the shape used to build the
        intrinsic prototype.  Multiple entries in the constructor's argument
@@ -1308,25 +1307,16 @@ let ops =
       Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
       pf_su_8_64;
 
-    (* Transpose elements.  **NOTE** ReturnPtr goes some of the way towards
-       generating good code for intrinsics which return structure types --
-       builtins work well by themselves (and understand that the values being
-       stored on e.g. the stack also reside in registers, so can optimise the
-       stores away entirely if the results are used immediately), but
-       intrinsics are very much less efficient. Maybe something can be improved
-       re: inlining, or tweaking the ABI used for intrinsics (a special call
-       attribute?).
-    *)
-    Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
-    Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
+    Vtrn, [], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
+    Vtrn, [], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
 
     (* Zip elements.  *)
-    Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
-    Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
+    Vzip, [], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
+    Vzip, [], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
 
     (* Unzip elements.  *)
-    Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
-    Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
+    Vuzp, [], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
+    Vuzp, [], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
 
     (* Element/structure loads.  VLD1 variants.  *)
     Vldx 1,
Index: gcc/config/arm/neon-gen.ml
===================================================================
--- gcc/config/arm/neon-gen.ml	2011-06-24 16:28:22.000000000 +0100
+++ gcc/config/arm/neon-gen.ml	2011-12-08 10:18:02.678135930 +0000
@@ -98,8 +98,6 @@ let print_function arity fnname body =
   close_braceblock ffmt;
   end_function ffmt
 
-let return_by_ptr features = List.mem ReturnPtr features
-
 let union_string num elts base =
   let itype = inttype_for_array num elts in
   let iname = string_of_inttype itype
@@ -141,19 +139,14 @@ let cast_for_return to_ty = "(" ^ (strin
 
 (* Return a tuple of a list of declarations to go at the start of the function,
    and a list of statements needed to return THING.  *)
-let return arity return_by_ptr thing =
+let return arity thing =
   match arity with
     Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
   | Arity4 (ret, _, _, _, _) ->
     match ret with
       T_arrayof (num, vec) ->
-        if return_by_ptr then
-          let sname = string_of_vectype ret in
-          [Printf.sprintf "%s __rv;" sname],
-          [thing ^ ";"; "return __rv;"]
-        else
-          let uname = union_string num vec "__rv" in
-          [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"]
+        let uname = union_string num vec "__rv" in
+        [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"]
     | T_void -> [], [thing ^ ";"]
     | _ ->
         [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"]
@@ -163,7 +156,7 @@ let rec element_type ctype =
     T_arrayof (_, v) -> element_type v
   | _ -> ctype
 
-let params return_by_ptr ps =
+let params ps =
   let pdecls = ref [] in
   let ptype t p =
     match t with
@@ -183,10 +176,7 @@ let params return_by_ptr ps =
   match ps with
     Arity0 ret | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
   | Arity4 (ret, _, _, _, _) ->
-      if return_by_ptr then
-        !pdecls, add_cast (T_ptrto (element_type ret)) "&__rv.val[0]" :: plist
-      else
-        !pdecls, plist
+      !pdecls, plist
 
 let modify_params features plist =
   let is_flipped =
@@ -242,14 +232,13 @@ let rec mode_suffix elttype shape =
 let print_variant opcode features shape name (ctype, asmtype, elttype) =
   let bits = infoword_value elttype features in
   let modesuf = mode_suffix elttype shape in
-  let return_by_ptr = return_by_ptr features in
-  let pdecls, paramlist = params return_by_ptr ctype in
+  let pdecls, paramlist = params ctype in
   let paramlist' = modify_params features paramlist in
   let paramlist'' = extra_word shape features paramlist' bits in
   let parstr = String.concat ", " paramlist'' in
   let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)"
                   (builtin_name features name) modesuf parstr in
-  let rdecls, stmts = return ctype return_by_ptr builtin in
+  let rdecls, stmts = return ctype builtin in
   let body = pdecls @ rdecls @ stmts
   and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in
   print_function ctype fnname body
Index: gcc/config/arm/arm_neon.h
===================================================================
--- gcc/config/arm/arm_neon.h	2011-06-24 16:28:22.000000000 +0100
+++ gcc/config/arm/arm_neon.h	2011-12-08 10:18:02.664135998 +0000
@@ -7395,433 +7395,433 @@ vbslq_p16 (uint16x8_t __a, poly16x8_t __
 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
 vtrn_s8 (int8x8_t __a, int8x8_t __b)
 {
-  int8x8x2_t __rv;
-  __builtin_neon_vtrnv8qi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv8qi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
 vtrn_s16 (int16x4_t __a, int16x4_t __b)
 {
-  int16x4x2_t __rv;
-  __builtin_neon_vtrnv4hi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv4hi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
 vtrn_s32 (int32x2_t __a, int32x2_t __b)
 {
-  int32x2x2_t __rv;
-  __builtin_neon_vtrnv2si (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv2si (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vtrn_f32 (float32x2_t __a, float32x2_t __b)
 {
-  float32x2x2_t __rv;
-  __builtin_neon_vtrnv2sf (&__rv.val[0], __a, __b);
-  return __rv;
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv2sf (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
 vtrn_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  uint8x8x2_t __rv;
-  __builtin_neon_vtrnv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
-  return __rv;
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv8qi ((int8x8_t) __a, (int8x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
 vtrn_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  uint16x4x2_t __rv;
-  __builtin_neon_vtrnv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
-  return __rv;
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv4hi ((int16x4_t) __a, (int16x4_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
 vtrn_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  uint32x2x2_t __rv;
-  __builtin_neon_vtrnv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b);
-  return __rv;
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv2si ((int32x2_t) __a, (int32x2_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
 vtrn_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  poly8x8x2_t __rv;
-  __builtin_neon_vtrnv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
-  return __rv;
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv8qi ((int8x8_t) __a, (int8x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
 vtrn_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  poly16x4x2_t __rv;
-  __builtin_neon_vtrnv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
-  return __rv;
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv4hi ((int16x4_t) __a, (int16x4_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
 vtrnq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  int8x16x2_t __rv;
-  __builtin_neon_vtrnv16qi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv16qi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
 vtrnq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  int16x8x2_t __rv;
-  __builtin_neon_vtrnv8hi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv8hi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
 vtrnq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  int32x4x2_t __rv;
-  __builtin_neon_vtrnv4si (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv4si (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
 vtrnq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  float32x4x2_t __rv;
-  __builtin_neon_vtrnv4sf (&__rv.val[0], __a, __b);
-  return __rv;
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv4sf (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
 vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  uint8x16x2_t __rv;
-  __builtin_neon_vtrnv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
-  return __rv;
+  union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv16qi ((int8x16_t) __a, (int8x16_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
 vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  uint16x8x2_t __rv;
-  __builtin_neon_vtrnv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
-  return __rv;
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv8hi ((int16x8_t) __a, (int16x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
 vtrnq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  uint32x4x2_t __rv;
-  __builtin_neon_vtrnv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b);
-  return __rv;
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv4si ((int32x4_t) __a, (int32x4_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
 vtrnq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  poly8x16x2_t __rv;
-  __builtin_neon_vtrnv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
-  return __rv;
+  union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv16qi ((int8x16_t) __a, (int8x16_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
 vtrnq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  poly16x8x2_t __rv;
-  __builtin_neon_vtrnv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
-  return __rv;
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vtrnv8hi ((int16x8_t) __a, (int16x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
 vzip_s8 (int8x8_t __a, int8x8_t __b)
 {
-  int8x8x2_t __rv;
-  __builtin_neon_vzipv8qi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv8qi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
 vzip_s16 (int16x4_t __a, int16x4_t __b)
 {
-  int16x4x2_t __rv;
-  __builtin_neon_vzipv4hi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv4hi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
 vzip_s32 (int32x2_t __a, int32x2_t __b)
 {
-  int32x2x2_t __rv;
-  __builtin_neon_vzipv2si (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv2si (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vzip_f32 (float32x2_t __a, float32x2_t __b)
 {
-  float32x2x2_t __rv;
-  __builtin_neon_vzipv2sf (&__rv.val[0], __a, __b);
-  return __rv;
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv2sf (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
 vzip_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  uint8x8x2_t __rv;
-  __builtin_neon_vzipv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
-  return __rv;
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv8qi ((int8x8_t) __a, (int8x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
 vzip_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  uint16x4x2_t __rv;
-  __builtin_neon_vzipv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
-  return __rv;
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv4hi ((int16x4_t) __a, (int16x4_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
 vzip_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  uint32x2x2_t __rv;
-  __builtin_neon_vzipv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b);
-  return __rv;
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv2si ((int32x2_t) __a, (int32x2_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
 vzip_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  poly8x8x2_t __rv;
-  __builtin_neon_vzipv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
-  return __rv;
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv8qi ((int8x8_t) __a, (int8x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
 vzip_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  poly16x4x2_t __rv;
-  __builtin_neon_vzipv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
-  return __rv;
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv4hi ((int16x4_t) __a, (int16x4_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
 vzipq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  int8x16x2_t __rv;
-  __builtin_neon_vzipv16qi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv16qi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
 vzipq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  int16x8x2_t __rv;
-  __builtin_neon_vzipv8hi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv8hi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
 vzipq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  int32x4x2_t __rv;
-  __builtin_neon_vzipv4si (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv4si (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
 vzipq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  float32x4x2_t __rv;
-  __builtin_neon_vzipv4sf (&__rv.val[0], __a, __b);
-  return __rv;
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv4sf (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
 vzipq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  uint8x16x2_t __rv;
-  __builtin_neon_vzipv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
-  return __rv;
+  union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv16qi ((int8x16_t) __a, (int8x16_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
 vzipq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  uint16x8x2_t __rv;
-  __builtin_neon_vzipv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
-  return __rv;
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv8hi ((int16x8_t) __a, (int16x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
 vzipq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  uint32x4x2_t __rv;
-  __builtin_neon_vzipv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b);
-  return __rv;
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv4si ((int32x4_t) __a, (int32x4_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
 vzipq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  poly8x16x2_t __rv;
-  __builtin_neon_vzipv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
-  return __rv;
+  union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv16qi ((int8x16_t) __a, (int8x16_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
 vzipq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  poly16x8x2_t __rv;
-  __builtin_neon_vzipv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
-  return __rv;
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vzipv8hi ((int16x8_t) __a, (int16x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
 vuzp_s8 (int8x8_t __a, int8x8_t __b)
 {
-  int8x8x2_t __rv;
-  __builtin_neon_vuzpv8qi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv8qi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
 vuzp_s16 (int16x4_t __a, int16x4_t __b)
 {
-  int16x4x2_t __rv;
-  __builtin_neon_vuzpv4hi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv4hi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
 vuzp_s32 (int32x2_t __a, int32x2_t __b)
 {
-  int32x2x2_t __rv;
-  __builtin_neon_vuzpv2si (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv2si (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vuzp_f32 (float32x2_t __a, float32x2_t __b)
 {
-  float32x2x2_t __rv;
-  __builtin_neon_vuzpv2sf (&__rv.val[0], __a, __b);
-  return __rv;
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv2sf (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
 vuzp_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  uint8x8x2_t __rv;
-  __builtin_neon_vuzpv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
-  return __rv;
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv8qi ((int8x8_t) __a, (int8x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
 vuzp_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  uint16x4x2_t __rv;
-  __builtin_neon_vuzpv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
-  return __rv;
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv4hi ((int16x4_t) __a, (int16x4_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
 vuzp_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  uint32x2x2_t __rv;
-  __builtin_neon_vuzpv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b);
-  return __rv;
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv2si ((int32x2_t) __a, (int32x2_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
 vuzp_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  poly8x8x2_t __rv;
-  __builtin_neon_vuzpv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
-  return __rv;
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv8qi ((int8x8_t) __a, (int8x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
 vuzp_p16 (poly16x4_t __a, poly16x4_t __b)
 {
-  poly16x4x2_t __rv;
-  __builtin_neon_vuzpv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
-  return __rv;
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv4hi ((int16x4_t) __a, (int16x4_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
 vuzpq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  int8x16x2_t __rv;
-  __builtin_neon_vuzpv16qi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv16qi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
 vuzpq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  int16x8x2_t __rv;
-  __builtin_neon_vuzpv8hi (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv8hi (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
 vuzpq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  int32x4x2_t __rv;
-  __builtin_neon_vuzpv4si (&__rv.val[0], __a, __b);
-  return __rv;
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv4si (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
 vuzpq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  float32x4x2_t __rv;
-  __builtin_neon_vuzpv4sf (&__rv.val[0], __a, __b);
-  return __rv;
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv4sf (__a, __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
 vuzpq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  uint8x16x2_t __rv;
-  __builtin_neon_vuzpv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
-  return __rv;
+  union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv16qi ((int8x16_t) __a, (int8x16_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
 vuzpq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  uint16x8x2_t __rv;
-  __builtin_neon_vuzpv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
-  return __rv;
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv8hi ((int16x8_t) __a, (int16x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
 vuzpq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  uint32x4x2_t __rv;
-  __builtin_neon_vuzpv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b);
-  return __rv;
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv4si ((int32x4_t) __a, (int32x4_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
 vuzpq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  poly8x16x2_t __rv;
-  __builtin_neon_vuzpv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
-  return __rv;
+  union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv16qi ((int8x16_t) __a, (int8x16_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
 vuzpq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
-  poly16x8x2_t __rv;
-  __builtin_neon_vuzpv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
-  return __rv;
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vuzpv8hi ((int16x8_t) __a, (int16x8_t) __b);
+  return __rv.__i;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]