This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [RFA/ARM 2/3] Add vectorizer support for VFMA
- From: Richard Earnshaw <rearnsha at arm dot com>
- To: Matthew Gretton-Dann <Matthew dot Gretton-Dann at arm dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 26 Jun 2012 14:59:57 +0100
- Subject: Re: [RFA/ARM 2/3] Add vectorizer support for VFMA
- References: <4FE87CB9.4080803@arm.com>
On 25/06/12 15:59, Matthew Gretton-Dann wrote:
> All,
>
> This patch adds vectoriser support for VFMA to the ARM Neon backend.
>
> Note that the VFP VFNMA and VFNMS instructions do not have Neon
> equivalents.
>
> OK?
Sorry, no. The neon versions of FMA do not handle denormalized values,
so this needs to reject vectorization unless
flag_unsafe_math_optimizations is true.
R.
>
> gcc/ChangeLog:
>
> 2012-06-25 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
>
> * config/arm/neon.md (fma<mode>4): New pattern.
> (*fmsub<mode>4): Likewise.
>
> 2012-06-25 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
>
> * gcc.target/arm/neon-vfma-1.c: New testcase.
> * gcc.target/arm/neon-vfms-1.c: Likewise.
> * lib/target-supports.exp (add_options_for_arm_neonv2): New
> function.
> (check_effective_target_arm_neonv2_ok_nocache): Likewise.
> (check_effective_target_arm_neonv2_ok): Likewise.
> (check_effective_target_arm_neonv2_hw): Likewise.
> (check_effective_target_arm_neonv2): Likewise.
>
> Thanks,
>
> Matt
>
>
> 0002-Add-vectorizer-support-for-VFMA.txt
>
>
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 4568dea..4d12fb3 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -711,6 +711,33 @@
> (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
> )
>
> +;; Fused multiply-accumulate
> +(define_insn "fma<mode>4"
> + [(set (match_operand:VCVTF 0 "register_operand" "=w")
> + (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
> + (match_operand:VCVTF 2 "register_operand" "w")
> + (match_operand:VCVTF 3 "register_operand" "0")))]
> + "TARGET_NEON && TARGET_FMA"
> + "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> + [(set (attr "neon_type")
> + (if_then_else (match_test "<Is_d_reg>")
> + (const_string "neon_fp_vmla_ddd")
> + (const_string "neon_fp_vmla_qqq")))]
> +)
> +
> +(define_insn "*fmsub<mode>4"
> + [(set (match_operand:VCVTF 0 "register_operand" "=w")
> + (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
> + (match_operand:VCVTF 2 "register_operand" "w")
> + (match_operand:VCVTF 3 "register_operand" "0")))]
> + "TARGET_NEON && TARGET_FMA"
> + "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> + [(set (attr "neon_type")
> + (if_then_else (match_test "<Is_d_reg>")
> + (const_string "neon_fp_vmla_ddd")
> + (const_string "neon_fp_vmla_qqq")))]
> +)
> +
> (define_insn "ior<mode>3"
> [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
> (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
> diff --git a/gcc/testsuite/gcc.target/arm/neon-vfma-1.c b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c
> new file mode 100644
> index 0000000..a003a82
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_neonv2_ok } */
> +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
> +/* { dg-add-options arm_neonv2 } */
> +/* { dg-final { scan-assembler "vfma\\.f32\[ \]+\[dDqQ]" } } */
> +
> +/* Verify that VFMA is used. */
> +void f1(int n, float a, float x[], float y[]) {
> + int i;
> + for (i = 0; i < n; ++i)
> + y[i] = a * x[i] + y[i];
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon-vfms-1.c b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c
> new file mode 100644
> index 0000000..8cefd8a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_neonv2_ok } */
> +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
> +/* { dg-add-options arm_neonv2 } */
> +/* { dg-final { scan-assembler "vfms\\.f32\[ \]+\[dDqQ]" } } */
> +
> +/* Verify that VFMS is used. */
> +void f1(int n, float a, float x[], float y[]) {
> + int i;
> + for (i = 0; i < n; ++i)
> + y[i] = a * -x[i] + y[i];
> +}
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index bc5baa7..9fc8a5c 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -2082,6 +2082,19 @@ proc add_options_for_arm_neon { flags } {
> return "$flags $et_arm_neon_flags"
> }
>
> +# Add the options needed for NEON. We need either -mfloat-abi=softfp
> +# or -mfloat-abi=hard, but if one is already specified by the
> +# multilib, use it. Similarly, if a -mfpu option already enables
> +# NEON, do not add -mfpu=neon.
> +
> +proc add_options_for_arm_neonv2 { flags } {
> + if { ! [check_effective_target_arm_neonv2_ok] } {
> + return "$flags"
> + }
> + global et_arm_neonv2_flags
> + return "$flags $et_arm_neonv2_flags"
> +}
> +
> # Return 1 if this is an ARM target supporting -mfpu=neon
> # -mfloat-abi=softfp or equivalent options. Some multilibs may be
> # incompatible with these options. Also set et_arm_neon_flags to the
> @@ -2110,6 +2123,38 @@ proc check_effective_target_arm_neon_ok { } {
> check_effective_target_arm_neon_ok_nocache]
> }
>
> +# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4
> +# -mfloat-abi=softfp or equivalent options. Some multilibs may be
> +# incompatible with these options. Also set et_arm_neonv2_flags to the
> +# best options to add.
> +
> +proc check_effective_target_arm_neonv2_ok_nocache { } {
> + global et_arm_neonv2_flags
> + set et_arm_neonv2_flags ""
> + if { [check_effective_target_arm32] } {
> + foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} {
> + if { [check_no_compiler_messages_nocache arm_neonv2_ok object {
> + #include "arm_neon.h"
> + float32x2_t
> + foo (float32x2_t a, float32x2_t b, float32x2_t c)
> + {
> + return vfma_f32 (a, b, c);
> + }
> + } "$flags"] } {
> + set et_arm_neonv2_flags $flags
> + return 1
> + }
> + }
> + }
> +
> + return 0
> +}
> +
> +proc check_effective_target_arm_neonv2_ok { } {
> + return [check_cached_effective_target arm_neonv2_ok \
> + check_effective_target_arm_neonv2_ok_nocache]
> +}
> +
> # Add the options needed for NEON. We need either -mfloat-abi=softfp
> # or -mfloat-abi=hard, but if one is already specified by the
> # multilib, use it.
> @@ -2301,6 +2346,21 @@ proc check_effective_target_arm_neon_hw { } {
> } [add_options_for_arm_neon ""]]
> }
>
> +proc check_effective_target_arm_neonv2_hw { } {
> + return [check_runtime arm_neon_hwv2_available {
> + #include "arm_neon.h"
> + int
> + main (void)
> + {
> + float32x2_t a, b, c;
> + asm ("vfma.f32 %P0, %P1, %P2"
> + : "=w" (a)
> + : "w" (b), "w" (c));
> + return 0;
> + }
> + } [add_options_for_arm_neonv2 ""]]
> +}
> +
> # Return 1 if this is a ARM target with NEON enabled.
>
> proc check_effective_target_arm_neon { } {
> @@ -2317,6 +2377,24 @@ proc check_effective_target_arm_neon { } {
> }
> }
>
> +proc check_effective_target_arm_neonv2 { } {
> + if { [check_effective_target_arm32] } {
> + return [check_no_compiler_messages arm_neon object {
> + #ifndef __ARM_NEON__
> + #error not NEON
> + #else
> + #ifndef __ARM_FEATURE_FMA
> + #error not NEONv2
> + #else
> + int dummy;
> + #endif
> + #endif
> + }]
> + } else {
> + return 0
> + }
> +}
> +
> # Return 1 if this a Loongson-2E or -2F target using an ABI that supports
> # the Loongson vector modes.
>
>