This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
As pointed out by Christophe in https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00778.html, we need to rework the testcases so that it can work for AArch32 target too. This patch fix this issue. One change: 1. vmull_high_X are only available for aarch64 target, we take care of this through the macro __aarch64__. Tested on armeb-linux-gnueabi, arm-linux-gnueabi, aarch64-linux-gnu and aarch64_be-linux-gnu. OK for the trunk? Index: gcc/ChangeLog =================================================================== --- gcc/ChangeLog (revision 218582) +++ gcc/ChangeLog (working copy) @@ -1,3 +1,38 @@ +2014-12-11 Felix Yang <felix.yang@huawei.com> + Jiji Jiang <jiangjiji@huawei.com> + + * config/aarch64/aarch64-simd.md (aarch64_mul_n<mode>, + aarch64_<su>mull_n<mode>, aarch64_<su>mull<mode>, + aarch64_simd_<su>mull2_n<mode>, aarch64_<su>mull2_n<mode>, + aarch64_<su>mull_lane<mode>, aarch64_<su>mull2_lane<mode>_internal, + aarch64_<su>mull_laneq<mode>, aarch64_<su>mull2_laneq<mode>_internal, + aarch64_smull2_lane<mode>, aarch64_umull2_lane<mode>, + aarch64_smull2_laneq<mode>, aarch64_umull2_laneq<mode>, + aarch64_fmulx<mode>, aarch64_fmulx<mode>, aarch64_fmulx_lane<mode>, + aarch64_pmull2v16qi, aarch64_pmullv8qi): New patterns. + * config/aarch64/aarch64-simd-builtins.def (vec_widen_smult_hi_, + vec_widen_umult_hi_, umull, smull, smull_n, umull_n, mul_n, smull2_n, + umull2_n, smull_lane, umull_lane, smull_laneq, umull_laneq, pmull, + umull2_lane, smull2_laneq, umull2_laneq, fmulx, fmulx_lane, pmull2, + smull2_lane): New builtins. + * config/aarch64/arm_neon.h (vmul_n_f32, vmul_n_s16, vmul_n_s32, + vmul_n_u16, vmul_n_u32, vmulq_n_f32, vmulq_n_f64, vmulq_n_s16, + vmulq_n_s32, vmulq_n_u16, vmulq_n_u32, vmull_high_lane_s16, + vmull_high_lane_s32, vmull_high_lane_u16, vmull_high_lane_u32, + vmull_high_laneq_s16, vmull_high_laneq_s32, vmull_high_laneq_u16, + vmull_high_laneq_u32, vmull_high_n_s16, vmull_high_n_s32, + vmull_high_n_u16, vmull_high_n_u32, vmull_high_p8, vmull_high_s8, + vmull_high_s16, vmull_high_s32, vmull_high_u8, vmull_high_u16, + vmull_high_u32, vmull_lane_s16, vmull_lane_s32, vmull_lane_u16, + vmull_lane_u32, vmull_laneq_s16, vmull_laneq_s32, vmull_laneq_u16, + vmull_laneq_u32, vmull_n_s16, vmull_n_s32, vmull_n_u16, vmull_n_u32, + vmull_p8, vmull_s8, vmull_s16, vmull_s32, vmull_u8, vmull_u16, + vmull_u32, vmulx_f32, vmulx_lane_f32, vmulxd_f64, vmulxq_f32, + vmulxq_f64, vmulxq_lane_f32, vmulxq_lane_f64, vmulxs_f32): Rewrite + using builtin functions. + * config/aarch64/iterators.md (UNSPEC_FMULX, UNSPEC_FMULX_LANE, + VDQF_Q): New unspec and int iterator. + 2014-12-10 Felix Yang <felix.yang@huawei.com> * config/aarch64/aarch64-protos.h (aarch64_function_profiler): Remove Index: gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high.c (revision 0) +++ gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high.c (revision 0) @@ -0,0 +1,115 @@ +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#ifdef __aarch64__ + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfc48, 0xfcbf, 0xfd36, 0xfdad, + 0xfe24, 0xfe9b, 0xff12, 0xff89 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff9a0, 0xfffffa28, + 0xfffffab0, 0xfffffb38 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffff7a2, + 0xfffffffffffff83b }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xa4b0, 0xa55a, 0xa604, 0xa6ae, + 0xa758, 0xa802, 0xa8ac, 0xa956 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xbaf73c, 0xbaf7f7, + 0xbaf8b2, 0xbaf96d }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xcbfffff4d8, + 0xcbfffff5a4}; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x6530, 0x659a, 0x6464, 0x64ce, + 0x6798, 0x6732, 0x66cc, 0x6666 }; + +#ifndef INSN_NAME +#define INSN_NAME vmull_high +#define TEST_MSG "VMUL_HIGH" +#endif + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ +#define DECL_VMUL(T, W, N) \ + DECL_VARIABLE(vector1, T, W, N); \ + DECL_VARIABLE(vector2, T, W, N); + + /* vector_res = OP(vector1, vector2), then store the result. */ +#define TEST_VMULL_HIGH1(INSN, Q, T1, T2, W, N, W1, N1) \ + VECT_VAR(vector_res, T1, W1, N1) = \ + INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1q##_##T2##W1(VECT_VAR(result, T1, W1, N1), \ + VECT_VAR(vector_res, T1, W1, N1)) + +#define TEST_VMULL_HIGH(INSN, Q, T1, T2, W, N, W1, N1) \ + TEST_VMULL_HIGH1(INSN, Q, T1, T2, W, N, W1, N1) + +#define CHECK_VMULL_HIGH_RESULTS(test_name,comment) \ + { \ + CHECK(test_name, int, 16, 8, PRIx16, expected, comment); \ + CHECK(test_name, int, 32, 4, PRIx32, expected, comment); \ + CHECK(test_name, int, 64, 2, PRIx64, expected, comment); \ + CHECK(test_name, uint, 16, 8, PRIx16, expected, comment); \ + CHECK(test_name, uint, 32, 4, PRIx32, expected, comment); \ + CHECK(test_name, uint, 64, 2, PRIx64, expected, comment); \ + CHECK(test_name, poly, 16, 8, PRIx16, expected, comment); \ + } + + DECL_VMUL(int, 8, 16); + DECL_VMUL(int, 16, 8); + DECL_VMUL(int, 32, 4); + DECL_VMUL(uint, 8, 16); + DECL_VMUL(uint, 16, 8); + DECL_VMUL(uint, 32, 4); + DECL_VMUL(poly, 8, 16); + + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + DECL_VARIABLE(vector_res, poly, 16, 8); + + clean_results (); + + /* Initialize input "vector1" from "buffer". */ + VLOAD(vector1, buffer, q, int, s, 8, 16); + VLOAD(vector1, buffer, q, int, s, 16, 8); + VLOAD(vector1, buffer, q, int, s, 32, 4); + VLOAD(vector1, buffer, q, uint, u, 8, 16); + VLOAD(vector1, buffer, q, uint, u, 16, 8); + VLOAD(vector1, buffer, q, uint, u, 32, 4); + VLOAD(vector1, buffer, q, poly, p, 8, 16); + + /* Choose init value arbitrarily. */ + VDUP(vector2, q, int, s, 8, 16, 0x77); + VDUP(vector2, q, int, s, 16, 8, 0x88); + VDUP(vector2, q, int, s, 32, 4, 0x99); + VDUP(vector2, q, uint, u, 8, 16, 0xAA); + VDUP(vector2, q, uint, u, 16, 8, 0xBB); + VDUP(vector2, q, uint, u, 32, 4, 0xCC); + VDUP(vector2, q, poly, p, 8, 16, 0xAA); + + /* Execute the tests. */ + TEST_VMULL_HIGH(INSN_NAME, , int, s, 8, 16, 16, 8); + TEST_VMULL_HIGH(INSN_NAME, , int, s, 16, 8, 32, 4); + TEST_VMULL_HIGH(INSN_NAME, , int, s, 32, 4, 64, 2); + TEST_VMULL_HIGH(INSN_NAME, , uint, u, 8, 16, 16, 8); + TEST_VMULL_HIGH(INSN_NAME, , uint, u, 16, 8, 32, 4); + TEST_VMULL_HIGH(INSN_NAME, , uint, u, 32, 4, 64, 2); + TEST_VMULL_HIGH(INSN_NAME, , poly, p, 8, 16, 16, 8); + + CHECK_VMULL_HIGH_RESULTS (TEST_MSG, ""); +} +#endif + +int main (void) +{ +#ifdef __aarch64__ + FNNAME (INSN_NAME) (); +#endif + + return 0; +} Index: gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high_lane.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high_lane.c (revision 0) +++ gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high_lane.c (revision 0) @@ -0,0 +1,137 @@ +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#ifdef __aarch64__ + +VECT_VAR_DECL(expected, int, 32, 4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected, int, 64, 2) [] = { 0x2000, 0x2000}; +VECT_VAR_DECL(expected, uint, 32, 4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected, uint, 64, 2) [] = { 0x2000, 0x2000 }; + +#define TEST_MSG "VMULL_HIGH_LANE/VMULL_HIGH_LANEQ" +void exec_vmull_high_lane (void) +{ + /* vector_res = vmull_lane(vector,vector2,lane), then store the result. */ +#define TEST_VMULL_HIGH_LANE(T1, T2, W, W2, N1, N2, L) \ + VECT_VAR(vector_res, T1, W2, N2) = \ + vmull##_high_lane_##T2##W(VECT_VAR(vector, T1, W, N1), \ + VECT_VAR(vector2, T1, W, N2), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N2), VECT_VAR(vector_res, T1, W2, N2)) + +#define CHECK_VMULL_HIGH_LANE_RESULTS(test_name,comment) \ + { \ + CHECK(test_name, int, 32, 4, PRIx32, expected, comment); \ + CHECK(test_name, int, 64, 2, PRIx64, expected, comment); \ + CHECK(test_name, uint, 32, 4, PRIx32, expected, comment); \ + CHECK(test_name, uint, 64, 2, PRIx64, expected, comment); \ + } + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize vector */ + VDUP(vector2, , int, s, 16, 4, 0x1000); + VDUP(vector2, , int, s, 32, 2, 0x1000); + VDUP(vector2, , uint, u, 16, 4, 0x1000); + VDUP(vector2, , uint, u, 32, 2, 0x1000); + + /* Initialize vector2 */ + VDUP(vector, q, int, s, 16, 8, 0x4); + VDUP(vector, q, int, s, 32, 4, 0x2); + VDUP(vector, q, uint, u, 16, 8, 0x4); + VDUP(vector, q, uint, u, 32, 4, 0x2); + + /* Choose lane arbitrarily */ + TEST_VMULL_HIGH_LANE(int, s, 16, 32, 8, 4, 2); + TEST_VMULL_HIGH_LANE(int, s, 32, 64, 4, 2, 1); + TEST_VMULL_HIGH_LANE(uint, u, 16, 32, 8, 4, 2); + TEST_VMULL_HIGH_LANE(uint, u, 32, 64, 4, 2, 1); + + CHECK_VMULL_HIGH_LANE_RESULTS (TEST_MSG, ""); +} + + +void exec_vmull_high_laneq (void) +{ + /* vector_res = vmull_lane(vector,vector2,lane), then store the result. */ +#define TEST_VMULL_HIGH_LANEQ(T1, T2, W, W2, N2, N1, L) \ + VECT_VAR(vector_res, T1, W2, N1) = \ + vmull##_high_laneq_##T2##W(VECT_VAR(vector, T1, W, N2), \ + VECT_VAR(vector2, T1, W, N2), \ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N1), VECT_VAR(vector_res, T1, W2, N1)) + +#define CHECK_VMULL_HIGH_LANEQ_RESULTS(test_name,comment) \ + { \ + CHECK(test_name, int, 32, 4, PRIx32, expected, comment); \ + CHECK(test_name, int, 64, 2, PRIx64, expected, comment); \ + CHECK(test_name, uint, 32, 4, PRIx32, expected, comment); \ + CHECK(test_name, uint, 64, 2, PRIx64, expected, comment); \ + } + + + /* With ARM RVCT, we need to declare variables before any executable + statement */ + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + DECL_VARIABLE(vector2, uint, 16, 8); + DECL_VARIABLE(vector2, uint, 32, 4); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize vector */ + VDUP(vector2, q, int, s, 16, 8, 0x1000); + VDUP(vector2, q, int, s, 32, 4, 0x1000); + VDUP(vector2, q, uint, u, 16, 8, 0x1000); + VDUP(vector2, q, uint, u, 32, 4, 0x1000); + + /* Initialize vector2 */ + VDUP(vector, q, int, s, 16, 8, 0x4); + VDUP(vector, q, int, s, 32, 4, 0x2); + VDUP(vector, q, uint, u, 16, 8, 0x4); + VDUP(vector, q, uint, u, 32, 4, 0x2); + + /* Choose lane arbitrarily */ + TEST_VMULL_HIGH_LANEQ(int, s, 16, 32, 8, 4, 2); + TEST_VMULL_HIGH_LANEQ(int, s, 32, 64, 4, 2, 1); + TEST_VMULL_HIGH_LANEQ(uint, u, 16, 32, 8, 4, 2); + TEST_VMULL_HIGH_LANEQ(uint, u, 32, 64, 4, 2, 1); + + CHECK_VMULL_HIGH_LANEQ_RESULTS (TEST_MSG, ""); +} +#endif + +int main (void) +{ +#ifdef __aarch64__ + exec_vmull_high_lane(); +#endif + return 0; +} Index: gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high_n.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high_n.c (revision 0) +++ gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high_n.c (revision 0) @@ -0,0 +1,85 @@ +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#ifdef __aarch64__ + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff73c, 0xfffff7f7, + 0xfffff8b2, 0xfffff96d }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffff4d8, + 0xfffffffffffff5a4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xedf4d8, 0xedf5c6, + 0xedf6b4, 0xedf7a2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfefffff20e, + 0xfefffff30d}; + +#ifndef INSN_NAME +#define INSN_NAME vmull_high_n +#define TEST_MSG "VMULL_HIGH_N" +#endif + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ +#define DECL_VMUL(T, W, N) \ + DECL_VARIABLE(vector1, T, W, N); \ + + /* vector_res = OP(vector1, vector2), then store the result. */ +#define TEST_VMULL_HIGH1(INSN, Q, T1, T2, W, N, W1, N1, C) \ + VECT_VAR(vector_res, T1, W1, N1) = \ + INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ + C); \ + vst1q##_##T2##W1(VECT_VAR(result, T1, W1, N1), \ + VECT_VAR(vector_res, T1, W1, N1)) + +#define TEST_VMULL_HIGH(INSN, Q, T1, T2, W, N, W1, N1, C) \ + TEST_VMULL_HIGH1(INSN, Q, T1, T2, W, N, W1, N1, C) + +#define CHECK_VMULL_HIGH_N_RESULTS(test_name,comment) \ + { \ + CHECK(test_name, int, 32, 4, PRIx32, expected, comment); \ + CHECK(test_name, int, 64, 2, PRIx64, expected, comment); \ + CHECK(test_name, uint, 32, 4, PRIx32, expected, comment); \ + CHECK(test_name, uint, 64, 2, PRIx64, expected, comment); \ + } + + DECL_VMUL(int, 16, 8); + DECL_VMUL(int, 32, 4); + DECL_VMUL(uint, 16, 8); + DECL_VMUL(uint, 32, 4); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize input "vector1" from "buffer". */ + VLOAD(vector1, buffer, q, int, s, 16, 8); + VLOAD(vector1, buffer, q, int, s, 32, 4); + VLOAD(vector1, buffer, q, uint, u, 16, 8); + VLOAD(vector1, buffer, q, uint, u, 32, 4); + + + /* Execute the tests. */ + TEST_VMULL_HIGH(INSN_NAME, , int, s, 16, 8, 32, 4, 0xBB); + TEST_VMULL_HIGH(INSN_NAME, , int, s, 32, 4, 64, 2, 0xCC); + TEST_VMULL_HIGH(INSN_NAME, , uint, u, 16, 8, 32, 4, 0xEE); + TEST_VMULL_HIGH(INSN_NAME, , uint, u, 32, 4, 64, 2, 0xFF); + + CHECK_VMULL_HIGH_N_RESULTS (TEST_MSG, ""); +} +#endif + +int main (void) +{ +#ifdef __aarh64__ + FNNAME (INSN_NAME) (); +#endif + + return 0; +} Index: gcc/testsuite/ChangeLog =================================================================== --- gcc/testsuite/ChangeLog (revision 218582) +++ gcc/testsuite/ChangeLog (working copy) @@ -1,3 +1,13 @@ +2014-12-11 Felix Yang <felix.yang@huawei.com> + Jiji Jiang <jiangjiji@huawei.com> + + * testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high.c: New + test. + * testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high_lane.c: + New test. + * testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high_n.c: New + test. + 2014-12-10 Martin Liska <mliska@suse.cz> * gcc.dg/ipa/pr63909.c: New test. Index: gcc/config/aarch64/arm_neon.h =================================================================== --- gcc/config/aarch64/arm_neon.h (revision 218582) +++ gcc/config/aarch64/arm_neon.h (working copy) @@ -7576,671 +7576,6 @@ vmovn_u64 (uint64x2_t a) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vmul_n_f32 (float32x2_t a, float32_t b) -{ - float32x2_t result; - __asm__ ("fmul %0.2s,%1.2s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vmul_n_s16 (int16x4_t a, int16_t b) -{ - int16x4_t result; - __asm__ ("mul %0.4h,%1.4h,%2.h[0]" - : "=w"(result) - : "w"(a), "x"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vmul_n_s32 (int32x2_t a, int32_t b) -{ - int32x2_t result; - __asm__ ("mul %0.2s,%1.2s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vmul_n_u16 (uint16x4_t a, uint16_t b) -{ - uint16x4_t result; - __asm__ ("mul %0.4h,%1.4h,%2.h[0]" - : "=w"(result) - : "w"(a), "x"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vmul_n_u32 (uint32x2_t a, uint32_t b) -{ - uint32x2_t result; - __asm__ ("mul %0.2s,%1.2s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -#define vmull_high_lane_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x4_t b_ = (b); \ - int16x8_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_lane_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x2_t b_ = (b); \ - int32x4_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_lane_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x4_t b_ = (b); \ - uint16x8_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_lane_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x2_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_laneq_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int16x8_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_laneq_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_laneq_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint16x8_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_high_laneq_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmull_high_n_s16 (int16x8_t a, int16_t b) -{ - int32x4_t result; - __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]" - : "=w"(result) - : "w"(a), "x"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmull_high_n_s32 (int32x4_t a, int32_t b) -{ - int64x2_t result; - __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmull_high_n_u16 (uint16x8_t a, uint16_t b) -{ - uint32x4_t result; - __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]" - : "=w"(result) - : "w"(a), "x"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmull_high_n_u32 (uint32x4_t a, uint32_t b) -{ - uint64x2_t result; - __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vmull_high_p8 (poly8x16_t a, poly8x16_t b) -{ - poly16x8_t result; - __asm__ ("pmull2 %0.8h,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmull_high_s8 (int8x16_t a, int8x16_t b) -{ - int16x8_t result; - __asm__ ("smull2 %0.8h,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmull_high_s16 (int16x8_t a, int16x8_t b) -{ - int32x4_t result; - __asm__ ("smull2 %0.4s,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmull_high_s32 (int32x4_t a, int32x4_t b) -{ - int64x2_t result; - __asm__ ("smull2 %0.2d,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmull_high_u8 (uint8x16_t a, uint8x16_t b) -{ - uint16x8_t result; - __asm__ ("umull2 %0.8h,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmull_high_u16 (uint16x8_t a, uint16x8_t b) -{ - uint32x4_t result; - __asm__ ("umull2 %0.4s,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmull_high_u32 (uint32x4_t a, uint32x4_t b) -{ - uint64x2_t result; - __asm__ ("umull2 %0.2d,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -#define vmull_lane_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_lane_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_lane_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_lane_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_laneq_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int16x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_laneq_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int32x2_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_laneq_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \ - : "=w"(result) \ - : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmull_laneq_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmull_n_s16 (int16x4_t a, int16_t b) -{ - int32x4_t result; - __asm__ ("smull %0.4s,%1.4h,%2.h[0]" - : "=w"(result) - : "w"(a), "x"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmull_n_s32 (int32x2_t a, int32_t b) -{ - int64x2_t result; - __asm__ ("smull %0.2d,%1.2s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmull_n_u16 (uint16x4_t a, uint16_t b) -{ - uint32x4_t result; - __asm__ ("umull %0.4s,%1.4h,%2.h[0]" - : "=w"(result) - : "w"(a), "x"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmull_n_u32 (uint32x2_t a, uint32_t b) -{ - uint64x2_t result; - __asm__ ("umull %0.2d,%1.2s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vmull_p8 (poly8x8_t a, poly8x8_t b) -{ - poly16x8_t result; - __asm__ ("pmull %0.8h, %1.8b, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmull_s8 (int8x8_t a, int8x8_t b) -{ - int16x8_t result; - __asm__ ("smull %0.8h, %1.8b, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmull_s16 (int16x4_t a, int16x4_t b) -{ - int32x4_t result; - __asm__ ("smull %0.4s, %1.4h, %2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vmull_s32 (int32x2_t a, int32x2_t b) -{ - int64x2_t result; - __asm__ ("smull %0.2d, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmull_u8 (uint8x8_t a, uint8x8_t b) -{ - uint16x8_t result; - __asm__ ("umull %0.8h, %1.8b, %2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmull_u16 (uint16x4_t a, uint16x4_t b) -{ - uint32x4_t result; - __asm__ ("umull %0.4s, %1.4h, %2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vmull_u32 (uint32x2_t a, uint32x2_t b) -{ - uint64x2_t result; - __asm__ ("umull %0.2d, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vmulq_n_f32 (float32x4_t a, float32_t b) -{ - float32x4_t result; - __asm__ ("fmul %0.4s,%1.4s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vmulq_n_f64 (float64x2_t a, float64_t b) -{ - float64x2_t result; - __asm__ ("fmul %0.2d,%1.2d,%2.d[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmulq_n_s16 (int16x8_t a, int16_t b) -{ - int16x8_t result; - __asm__ ("mul %0.8h,%1.8h,%2.h[0]" - : "=w"(result) - : "w"(a), "x"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmulq_n_s32 (int32x4_t a, int32_t b) -{ - int32x4_t result; - __asm__ ("mul %0.4s,%1.4s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmulq_n_u16 (uint16x8_t a, uint16_t b) -{ - uint16x8_t result; - __asm__ ("mul %0.8h,%1.8h,%2.h[0]" - : "=w"(result) - : "w"(a), "x"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmulq_n_u32 (uint32x4_t a, uint32_t b) -{ - uint32x4_t result; - __asm__ ("mul %0.4s,%1.4s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vmulx_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("fmulx %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -#define vmulx_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32x2_t a_ = (a); \ - float32x2_t result; \ - __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vmulxd_f64 (float64_t a, float64_t b) -{ - float64_t result; - __asm__ ("fmulx %d0, %d1, %d2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vmulxq_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("fmulx %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vmulxq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("fmulx %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -#define vmulxq_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32x4_t a_ = (a); \ - float32x4_t result; \ - __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmulxq_lane_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - float64x2_t a_ = (a); \ - float64x2_t result; \ - __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vmulxs_f32 (float32_t a, float32_t b) -{ - float32_t result; - __asm__ ("fmulx %s0, %s1, %s2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vmvn_p8 (poly8x8_t a) { @@ -18891,6 +18226,78 @@ vmul_n_f64 (float64x1_t __a, float64_t __b) return (float64x1_t) { vget_lane_f64 (__a, 0) * __b }; } +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_n_f32 (float32x2_t __a, float32_t __b) +{ + return __builtin_aarch64_mul_nv2sf (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_n_s16 (int16x4_t __a, int16_t __b) +{ + return __builtin_aarch64_mul_nv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_n_s32 (int32x2_t __a, int32_t __b) +{ + return __builtin_aarch64_mul_nv2si (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_n_u16 (uint16x4_t __a, uint16_t __b) +{ + return (uint16x4_t) __builtin_aarch64_mul_nv4hi ((int16x4_t)__a, + (int16_t)__b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_n_u32 (uint32x2_t __a, uint32_t __b) +{ + return (uint32x2_t) __builtin_aarch64_mul_nv2si ((int32x2_t)__a, + (int32_t)__b); +} + +/* vmulq_n */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_n_f32 (float32x4_t __a, float32_t __b) +{ + return __builtin_aarch64_mul_nv4sf (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulq_n_f64 (float64x2_t __a, float64_t __b) +{ + return __builtin_aarch64_mul_nv2df (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_n_s16 (int16x8_t __a, int16_t __b) +{ + return __builtin_aarch64_mul_nv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_n_s32 (int32x4_t __a, int32_t __b) +{ + return __builtin_aarch64_mul_nv4si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_n_u16 (uint16x8_t __a, uint16_t __b) +{ + return (uint16x8_t) __builtin_aarch64_mul_nv8hi ((int16x8_t)__a, + (int16_t)__b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_n_u32 (uint32x4_t __a, uint32_t __b) +{ + return (uint32x4_t) __builtin_aarch64_mul_nv4si ((int32x4_t)__a, + (int32_t)__b); +} + /* vmulq_lane */ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) @@ -18968,6 +18375,308 @@ vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, c return __a * __aarch64_vget_lane_any (__b, __lane); } +/* vmull_high_lane */ + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) +{ + return __builtin_aarch64_smull2_lanev8hi (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) +{ + return __builtin_aarch64_smull2_lanev4si (__a, __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_high_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c) +{ + return (uint32x4_t) __builtin_aarch64_umull2_lanev8hi ((int16x8_t) __a, + (int16x4_t) __b, + __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_high_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c) +{ + return (uint64x2_t) __builtin_aarch64_umull2_lanev4si ((int32x4_t) __a, + (int32x2_t) __b, + __c); +} + +/* vmull_high_laneq */ + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_smull2_laneqv8hi (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_smull2_laneqv4si (__a, __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_high_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint32x4_t) __builtin_aarch64_umull2_laneqv8hi ((int16x8_t)__a, + (int16x8_t)__b, + __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_high_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint64x2_t) __builtin_aarch64_umull2_laneqv4si ((int32x4_t) __a, + (int32x4_t) __b, + __c); +} + +/* vmull_high_n */ + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_high_n_s16 (int16x8_t __a, int16_t __b) +{ + return __builtin_aarch64_smull2_nv8hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_high_n_s32 (int32x4_t __a, int32_t __b) +{ + return __builtin_aarch64_smull2_nv4si (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_high_n_u16 (uint16x8_t __a, uint16_t __b) +{ + return __builtin_aarch64_umull2_nv8hi_uuu (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_high_n_u32 (uint32x4_t __a, uint32_t __b) +{ + return __builtin_aarch64_umull2_nv4si_uuu (__a, __b); +} + +/* vmull_high */ + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmull_high_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return __builtin_aarch64_pmull2v16qi_ppp (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmull_high_s8 (int8x16_t __a, int8x16_t __b) +{ + return __builtin_aarch64_vec_widen_smult_hi_v16qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_high_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_aarch64_vec_widen_smult_hi_v8hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_high_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_aarch64_vec_widen_smult_hi_v4si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmull_high_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return __builtin_aarch64_vec_widen_umult_hi_v16qi_uuu (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_high_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return __builtin_aarch64_vec_widen_umult_hi_v8hi_uuu (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_high_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return __builtin_aarch64_vec_widen_umult_hi_v4si_uuu (__a, __b); +} + +/* vmull_lane */ + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return __builtin_aarch64_smull_lanev4hi (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return __builtin_aarch64_smull_lanev2si (__a, __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return __builtin_aarch64_umull_lanev4hi_uuuu (__a, __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return __builtin_aarch64_umull_lanev2si_uuuu (__a, __b, __c); +} + +/* vmull_laneq */ + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_smull_laneqv4hi (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_smull_laneqv2si (__a, __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __c) +{ + return __builtin_aarch64_umull_laneqv4hi_uuuu (__a, __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __c) +{ + return __builtin_aarch64_umull_laneqv2si_uuuu (__a, __b, __c); +} + +/* vmull_n */ + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_n_s16 (int16x4_t __a, int16_t __b) +{ + return __builtin_aarch64_smull_nv4hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_n_s32 (int32x2_t __a, int32_t __b) +{ + return __builtin_aarch64_smull_nv2si (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_n_u16 (uint16x4_t __a, uint16_t __b) +{ + return __builtin_aarch64_umull_nv4hi_uuu (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_n_u32 (uint32x2_t __a, uint32_t __b) +{ + return __builtin_aarch64_umull_nv2si_uuu (__a, __b); +} + +/* vmull */ +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmull_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return __builtin_aarch64_pmullv8qi_ppp (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmull_s8 (int8x8_t __a, int8x8_t __b) +{ + return __builtin_aarch64_smullv8qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_s16 (int16x4_t __a, int16x4_t __b) +{ + return __builtin_aarch64_smullv4hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_s32 (int32x2_t __a, int32x2_t __b) +{ + return __builtin_aarch64_smullv2si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmull_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return __builtin_aarch64_umullv8qi_uuu (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return __builtin_aarch64_umullv4hi_uuu (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return __builtin_aarch64_umullv2si_uuu (__a, __b); +} + +/* vmulx */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmulx_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_fmulxv2sf (__a, __b); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmulx_lane_f32 (float32x2_t __a, float32x4_t __b, const int __c) +{ + return __builtin_aarch64_fmulx_lanev2sf (__a, __b, __c); +} + + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmulxd_f64 (float64_t __a, float64_t __b) +{ + return __builtin_aarch64_fmulxdf (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulxq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_fmulxv4sf (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulxq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_fmulxv2df (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulxq_lane_f32 (float32x4_t __a, float32x4_t __b, const int __c) +{ + return __builtin_aarch64_fmulx_lanev4sf (__a, __b, __c); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulxq_lane_f64 (float64x2_t __a, float64x2_t __b, const int __c) +{ + return __builtin_aarch64_fmulx_lanev2df (__a, __b, __c); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmulxs_f32 (float32_t __a, float32_t __b) +{ + return __builtin_aarch64_fmulxsf (__a, __b); +} + /* vneg */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) Index: gcc/config/aarch64/iterators.md =================================================================== --- gcc/config/aarch64/iterators.md (revision 218582) +++ gcc/config/aarch64/iterators.md (working copy) @@ -276,6 +276,8 @@ UNSPEC_SHA256SU1 ; Used in aarch64-simd.md. UNSPEC_PMULL ; Used in aarch64-simd.md. UNSPEC_PMULL2 ; Used in aarch64-simd.md. + UNSPEC_FMULX ; Used in aarch64-simd.md. + UNSPEC_FMULX_LANE ; Used in aarch64-simd.md. ]) ;; ------------------------------------------------------------------- @@ -465,6 +467,9 @@ ) +(define_mode_attr VDQF_Q [(V2SF "V4SF") (V4SF "V4SF") + (V2DF "V2DF")]) + ;; Widened mode register suffixes for VD_BHSI/VQW. (define_mode_attr Vwtype [(V8QI "8h") (V4HI "4s") (V2SI "2d") (V16QI "8h") Index: gcc/config/aarch64/aarch64-simd.md =================================================================== --- gcc/config/aarch64/aarch64-simd.md (revision 218582) +++ gcc/config/aarch64/aarch64-simd.md (working copy) @@ -1394,6 +1394,253 @@ } ) +(define_insn "aarch64_mul_n<mode>" + [(set (match_operand:VMUL 0 "register_operand" "=w") + (mult:VMUL + (match_operand:VMUL 1 "register_operand" "w") + (vec_duplicate:VMUL + (match_operand:<VEL> 2 "register_operand" "<h_con>"))))] + "TARGET_SIMD" + "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]" + [(set_attr "type" "neon_mul_<Vetype>_long")] +) + +(define_insn "aarch64_<su>mull_n<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VD_HSI 1 "register_operand" "w")) + (ANY_EXTEND:<VWIDE> + (vec_duplicate:VD_HSI + (match_operand:<VEL> 2 "register_operand" "<vwx>")))))] + "TARGET_SIMD" + "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]" + [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] +) + + +(define_insn "aarch64_<su>mull<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VD_BHSI 1 "register_operand" "w")) + (ANY_EXTEND:<VWIDE> + (match_operand:VD_BHSI 2 "register_operand" "w"))))] + "TARGET_SIMD" + "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "type" "neon_mul_<Vetype>_long")] +) + +(define_insn "aarch64_simd_<su>mull2_n<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> + (match_operand:<VEL> 2 "register_operand" "<vw>")))))] + "TARGET_SIMD" + "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]" + [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] +) + +(define_expand "aarch64_<su>mull2_n<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "") + (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand" "")) + (match_operand:<VEL> 2 "register_operand" "")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_simd_<su>mull2_n<mode> (operands[0], + operands[1], + operands[2], p)); + DONE; + + } +) + +(define_insn "aarch64_<su>mull_lane<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VD_HSI 1 "register_operand" "w")) + (ANY_EXTEND:<VWIDE> + (vec_duplicate:VD_HSI + (vec_select:<VEL> + (match_operand:<VCOND> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + return "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_<su>mull_laneq<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VD_HSI 1 "register_operand" "w")) + (ANY_EXTEND:<VWIDE> + (vec_duplicate:VD_HSI + (vec_select:<VEL> + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + return "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_<su>mull2_lane<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND:<VWIDE> + (vec_duplicate:<VHALF> + (vec_select:<VEL> + (match_operand:<VCOND> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_<su>mull2_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND:<VWIDE> + (vec_duplicate:<VHALF> + (vec_select:<VEL> + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] +) + +(define_expand "aarch64_smull2_lane<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_smull2_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +(define_expand "aarch64_umull2_lane<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_umull2_lane<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +(define_expand "aarch64_smull2_laneq<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_smull2_laneq<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +(define_expand "aarch64_umull2_laneq<mode>" + [(match_operand:<VWIDE> 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); + emit_insn (gen_aarch64_umull2_laneq<mode>_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +(define_insn "aarch64_fmulx<mode>" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")] + UNSPEC_FMULX))] + "TARGET_SIMD" + "fmulx\\t%0.<vtype>, %1.<vtype>, %2.<vtype>" + [(set_attr "type" "neon_mul_s")] +) + +(define_insn "aarch64_fmulx<mode>" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w")] + UNSPEC_FMULX))] + "TARGET_SIMD" + "fmulx\\t%<s>0, %<s>1, %<s>2" + [(set_attr "type" "neon_mul_s")] +) + +(define_insn "aarch64_fmulx_lane<mode>" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") + (match_operand:<VDQF_Q> 2 "register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_FMULX_LANE))] + "TARGET_SIMD" + "fmulx\\t%0.<vtype>, %1.<vtype>, %2.<vetype>" + [(set_attr "type" "neon_mul_s")] +) + +(define_insn "aarch64_pmull2v16qi" + [(set (match_operand:V8HI 0 "register_operand" "=w") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "w") + (match_operand:V16QI 2 "register_operand" "w")] + UNSPEC_PMULL2))] + "TARGET_SIMD" + "pmull2\\t%0.8h, %1.16b, %2.16b" + [(set_attr "type" "neon_mul_b_long")] +) + +(define_insn "aarch64_pmullv8qi" + [(set (match_operand:V8HI 0 "register_operand" "=w") + (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w") + (match_operand:V8QI 2 "register_operand" "w")] + UNSPEC_PMULL))] + "TARGET_SIMD" + "pmull\\t%0.8h, %1.8b, %2.8b" + [(set_attr "type" "neon_mul_b_long")] +) + ;; FP vector operations. ;; AArch64 AdvSIMD supports single-precision (32-bit) and ;; double-precision (64-bit) floating-point data types and arithmetic as Index: gcc/config/aarch64/aarch64-simd-builtins.def =================================================================== --- gcc/config/aarch64/aarch64-simd-builtins.def (revision 218582) +++ gcc/config/aarch64/aarch64-simd-builtins.def (working copy) @@ -187,6 +187,39 @@ BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_lane, 0) BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_laneq, 0) + /* Implemented by vec_widen_<su>mult_hi_<mode>. */ + BUILTIN_VQW (BINOP, vec_widen_smult_hi_, 10) + BUILTIN_VQW (BINOPU, vec_widen_umult_hi_, 10) + /* Implemented by aarch64_<su>mull<mode>. */ + BUILTIN_VD_BHSI (BINOPU, umull, 0) + BUILTIN_VD_BHSI (BINOP, smull, 0) + /* Implemented by aarch64_<su>mull_n<mode>. */ + BUILTIN_VD_HSI (BINOP, smull_n, 0) + BUILTIN_VD_HSI (BINOPU, umull_n, 0) + /* Implemented by aarch64_mul_n<mode>. */ + BUILTIN_VMUL (BINOP, mul_n, 0) + /* Implemented by aarch64_<su>mull2_n<mode>. */ + BUILTIN_VQ_HSI (BINOP, smull2_n, 0) + BUILTIN_VQ_HSI (BINOPU, umull2_n, 0) + /* Implemented by aarch64_<su>mull_lane<q><mode>. */ + BUILTIN_VD_HSI (TERNOP, smull_lane, 0) + BUILTIN_VD_HSI (TERNOPU, umull_lane, 0) + BUILTIN_VD_HSI (TERNOP, smull_laneq, 0) + BUILTIN_VD_HSI (TERNOPU, umull_laneq, 0) + /* Implemented by aarch64_<su>mull2_lane<q><mode>. */ + BUILTIN_VQ_HSI (TERNOP, smull2_lane, 0) + BUILTIN_VQ_HSI (TERNOP_LANE, umull2_lane, 0) + BUILTIN_VQ_HSI (TERNOP, smull2_laneq, 0) + BUILTIN_VQ_HSI (TERNOP_LANE, umull2_laneq, 0) + /* Implemented by aarch64_fmulx<mode>. */ + BUILTIN_VDQF (BINOP, fmulx, 0) + BUILTIN_GPF (BINOP, fmulx, 0) + BUILTIN_VDQF (BINOP, fmulx_lane, 0) + + /* Implemented by aarch64_pmull<2><mode>.*/ + VAR1 (BINOPP, pmull, 0, v8qi) + VAR1 (BINOPP, pmull2, 0, v16qi) + BUILTIN_VSDQ_I_DI (BINOP, ashl, 3) /* Implemented by aarch64_<sur>shl<mode>. */ BUILTIN_VSDQ_I_DI (BINOP, sshl, 0)
Attachment:
aarch64-instrinsic-vmul-v3.diff
Description: aarch64-instrinsic-vmul-v3.diff
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |