This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PATCH: Use NEON fused multiply-add instructions
- From: mitchell at codesourcery dot com (Mark Mitchell)
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 19 May 2009 14:25:37 -0700 (PDT)
- Subject: PATCH: Use NEON fused multiply-add instructions
- Reply-to: mark at codesourcery dot com
This patch tells the vectorizer that NEON has fused multiply-add
instructions.
Tested on arm-eabi.
OK?
--
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713
2009-05-18 Mark Mitchell <mark@codesourcery.com>
* config/arm/neon.md (*mul<mode>3add<mode>_neon): New pattern.
(*mul<mode>3neg<mode>add<mode>_neon): Likewise.
2009-05-18 Mark Mitchell <mark@codesourcery.com>
* gcc.dg/target/arm/neon-vmla-1.c: New.
* gcc.dg/target/arm/neon-vmls-1.c: Likewise.
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md (revision 147676)
+++ config/arm/neon.md (working copy)
@@ -862,6 +862,50 @@
(const_string "neon_mul_qqq_8_16_32_ddd_32")))))]
)
+(define_insn "*mul<mode>3add<mode>_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (plus:VDQ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
+ (match_operand:VDQ 3 "s_register_operand" "w"))
+ (match_operand:VDQ 1 "s_register_operand" "0")))]
+ "TARGET_NEON"
+ "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_fp_vmla_ddd")
+ (const_string "neon_fp_vmla_qqq"))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (if_then_else
+ (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+ (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+ (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+ (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+ (const_string "neon_mla_qqq_8_16")
+ (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
+)
+
+(define_insn "*mul<mode>3neg<mode>add<mode>_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "0")
+ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
+ (match_operand:VDQ 3 "s_register_operand" "w"))))]
+ "TARGET_NEON"
+ "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_fp_vmla_ddd")
+ (const_string "neon_fp_vmla_qqq"))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (if_then_else
+ (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+ (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+ (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+ (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+ (const_string "neon_mla_qqq_8_16")
+ (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
+)
+
(define_insn "ior<mode>3"
[(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
Index: testsuite/gcc.target/arm/neon-vmls-1.c
===================================================================
--- testsuite/gcc.target/arm/neon-vmls-1.c (revision 0)
+++ testsuite/gcc.target/arm/neon-vmls-1.c (revision 0)
@@ -0,0 +1,10 @@
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
+/* { dg-final { scan-assembler "vmls\\.f32" } } */
+
+/* Verify that VMLS is used. */
+void f1(int n, float a, float x[], float y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = y[i] - a * x[i];
+}
Index: testsuite/gcc.target/arm/neon-vmla-1.c
===================================================================
--- testsuite/gcc.target/arm/neon-vmla-1.c (revision 0)
+++ testsuite/gcc.target/arm/neon-vmla-1.c (revision 0)
@@ -0,0 +1,10 @@
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
+/* { dg-final { scan-assembler "vmla\\.f32" } } */
+
+/* Verify that VMLA is used. */
+void f1(int n, float a, float x[], float y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = a * x[i] + y[i];
+}