]> gcc.gnu.org Git - gcc.git/commitdiff
aarch64: Model zero-high-half semantics of ADDHN/SUBHN instructions
authorJonathan Wright <jonathan.wright@arm.com>
Mon, 14 Jun 2021 15:18:44 +0000 (16:18 +0100)
committerJonathan Wright <jonathan.wright@arm.com>
Wed, 16 Jun 2021 13:22:42 +0000 (14:22 +0100)
Model the zero-high-half semantics of the narrowing arithmetic Neon
instructions in the aarch64_<sur><addsub>hn<mode> RTL pattern.
Modeling these semantics allows for better RTL combinations while
also removing some register allocation issues as the compiler now
knows that the operation is totally destructive.

Add new tests to narrow_zero_high_half.c to verify the benefit of
this change.

gcc/ChangeLog:

2021-06-14  Jonathan Wright  <jonathan.wright@arm.com>

* config/aarch64/aarch64-simd.md (aarch64_<sur><addsub>hn<mode>):
Change to an expander that emits the correct instruction
depending on endianness.
(aarch64_<sur><addsub>hn<mode>_insn_le): Define.
(aarch64_<sur><addsub>hn<mode>_insn_be): Define.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/narrow_zero_high_half.c: Add new tests.

gcc/config/aarch64/aarch64-simd.md
gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c

index 2b75e57eb77a0dea449f2c13bd77a88f48c4cea5..540244cf0a919b3ea1d6ebf5929be50fed395179 100644 (file)
 
 ;; <r><addsub>hn<q>.
 
-(define_insn "aarch64_<sur><addsub>hn<mode>"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-        (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
-                           (match_operand:VQN 2 "register_operand" "w")]
-                           ADDSUBHN))]
-  "TARGET_SIMD"
+(define_insn "aarch64_<sur><addsub>hn<mode>_insn_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+       (vec_concat:<VNARROWQ2>
+         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
+                             (match_operand:VQN 2 "register_operand" "w")]
+                            ADDSUBHN)
+         (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
+)
+
+(define_insn "aarch64_<sur><addsub>hn<mode>_insn_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+       (vec_concat:<VNARROWQ2>
+         (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
+         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
+                             (match_operand:VQN 2 "register_operand" "w")]
+                            ADDSUBHN)))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
 )
 
+(define_expand "aarch64_<sur><addsub>hn<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+       (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
+                           (match_operand:VQN 2 "register_operand")]
+                          ADDSUBHN))]
+  "TARGET_SIMD"
+  {
+    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_be (tmp, operands[1],
+                               operands[2], CONST0_RTX (<VNARROWQ>mode)));
+    else
+      emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_le (tmp, operands[1],
+                               operands[2], CONST0_RTX (<VNARROWQ>mode)));
+
+    /* The intrinsic expects a narrow result, so emit a subreg that will get
+       optimized away as appropriate.  */
+    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+                                                <VNARROWQ2>mode));
+    DONE;
+  }
+)
+
 (define_insn "aarch64_<sur><addsub>hn2<mode>_insn_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
        (vec_concat:<VNARROWQ2>
index aa6c7ef389ddaf6be09414a6f09a0dc25949b628..dd5ddf83b9934980c01dd9eddeca6041337c6c1d 100644 (file)
@@ -74,6 +74,42 @@ TEST_UNARY (vqmovn, uint8x16_t, uint16x8_t, u16, u8)
 TEST_UNARY (vqmovn, uint16x8_t, uint32x4_t, u32, u16)
 TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32)
 
+#define TEST_ARITH(name, rettype, intype, fs, rs) \
+  rettype test_ ## name ## _ ## fs ## _zero_high \
+               (intype a, intype b) \
+       { \
+               return vcombine_ ## rs (name ## _ ## fs (a, b), \
+                                       vdup_n_ ## rs (0)); \
+       }
+
+TEST_ARITH (vaddhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vaddhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vaddhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vaddhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vaddhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vaddhn, uint32x4_t, uint64x2_t, u64, u32)
+
+TEST_ARITH (vraddhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vraddhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vraddhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vraddhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vraddhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vraddhn, uint32x4_t, uint64x2_t, u64, u32)
+
+TEST_ARITH (vsubhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vsubhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vsubhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vsubhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vsubhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vsubhn, uint32x4_t, uint64x2_t, u64, u32)
+
+TEST_ARITH (vrsubhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vrsubhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vrsubhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vrsubhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vrsubhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vrsubhn, uint32x4_t, uint64x2_t, u64, u32)
+
 /* { dg-final { scan-assembler-not "dup\\t" } } */
 
 /* { dg-final { scan-assembler-times "\\tshrn\\tv" 6} }  */
@@ -88,3 +124,7 @@ TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32)
 /* { dg-final { scan-assembler-times "\\tsqxtun\\tv" 3} }  */
 /* { dg-final { scan-assembler-times "\\tuqxtn\\tv" 3} }  */
 /* { dg-final { scan-assembler-times "\\tsqxtn\\tv" 3} }  */
+/* { dg-final { scan-assembler-times "\\taddhn\\tv" 6} }  */
+/* { dg-final { scan-assembler-times "\\tsubhn\\tv" 6} }  */
+/* { dg-final { scan-assembler-times "\\trsubhn\\tv" 6} }  */
+/* { dg-final { scan-assembler-times "\\traddhn\\tv" 6} }  */
This page took 0.078991 seconds and 5 git commands to generate.