;; <r><addsub>hn<q>.
-(define_insn "aarch64_<sur><addsub>hn<mode>"
- [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
- (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2 "register_operand" "w")]
- ADDSUBHN))]
- "TARGET_SIMD"
+(define_insn "aarch64_<sur><addsub>hn<mode>_insn_le"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ (vec_concat:<VNARROWQ2>
+ (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 2 "register_operand" "w")]
+ ADDSUBHN)
+ (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+ "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
+)
+
+(define_insn "aarch64_<sur><addsub>hn<mode>_insn_be"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ (vec_concat:<VNARROWQ2>
+ (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
+ (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 2 "register_operand" "w")]
+ ADDSUBHN)))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
"<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_<addsub>_halve_narrow_q")]
)
+(define_expand "aarch64_<sur><addsub>hn<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand")
+ (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
+ (match_operand:VQN 2 "register_operand")]
+ ADDSUBHN))]
+ "TARGET_SIMD"
+ {
+ rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_be (tmp, operands[1],
+ operands[2], CONST0_RTX (<VNARROWQ>mode)));
+ else
+ emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_le (tmp, operands[1],
+ operands[2], CONST0_RTX (<VNARROWQ>mode)));
+
+ /* The intrinsic expects a narrow result, so emit a subreg that will get
+ optimized away as appropriate. */
+ emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+ <VNARROWQ2>mode));
+ DONE;
+ }
+)
+
(define_insn "aarch64_<sur><addsub>hn2<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
TEST_UNARY (vqmovn, uint16x8_t, uint32x4_t, u32, u16)
TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32)
+#define TEST_ARITH(name, rettype, intype, fs, rs) \
+ rettype test_ ## name ## _ ## fs ## _zero_high \
+ (intype a, intype b) \
+ { \
+ return vcombine_ ## rs (name ## _ ## fs (a, b), \
+ vdup_n_ ## rs (0)); \
+ }
+
+TEST_ARITH (vaddhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vaddhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vaddhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vaddhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vaddhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vaddhn, uint32x4_t, uint64x2_t, u64, u32)
+
+TEST_ARITH (vraddhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vraddhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vraddhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vraddhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vraddhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vraddhn, uint32x4_t, uint64x2_t, u64, u32)
+
+TEST_ARITH (vsubhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vsubhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vsubhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vsubhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vsubhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vsubhn, uint32x4_t, uint64x2_t, u64, u32)
+
+TEST_ARITH (vrsubhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vrsubhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vrsubhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vrsubhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vrsubhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vrsubhn, uint32x4_t, uint64x2_t, u64, u32)
+
/* { dg-final { scan-assembler-not "dup\\t" } } */
/* { dg-final { scan-assembler-times "\\tshrn\\tv" 6} } */
/* { dg-final { scan-assembler-times "\\tsqxtun\\tv" 3} } */
/* { dg-final { scan-assembler-times "\\tuqxtn\\tv" 3} } */
/* { dg-final { scan-assembler-times "\\tsqxtn\\tv" 3} } */
+/* { dg-final { scan-assembler-times "\\taddhn\\tv" 6} } */
+/* { dg-final { scan-assembler-times "\\tsubhn\\tv" 6} } */
+/* { dg-final { scan-assembler-times "\\trsubhn\\tv" 6} } */
+/* { dg-final { scan-assembler-times "\\traddhn\\tv" 6} } */