This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, ARM] Support NEON's VABD with combine pass


This patch adds two define_insn patterns for NEON vabd instruction to make combine pass recognize expressions matching (vabs (vsub ...)) patterns as vabd.
This patch reduces code size of x264 binary from 649143 to 648343 (800 bytes, or 0.12%) and increases its performance on average by 2.5% on plain C version of x264 with -O2 -ftree-vectorize.
On SPEC2K it didn't make any difference -- all vabs instructions found in SPEC2K binaries are either using .f64 mode or scalar .f32 which are not supported by NEON's vabd.
Regtested with QEMU.


Ok for trunk?


-- Best regards, Dmitry
    2011-07-21  Sevak Sargsyan <sevak.sargsyan@ispras.ru>
    
        * config/arm/neon.md (neon_vabd<mode>_2, neon_vabd<mode>_3): New define_insn patterns for combine.
    
    gcc/testsuite:
    
        * gcc.target/arm/neon-combine-sub-abs-into-vabd.c: New test.

diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index a8c1b87..f457365 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -5607,3 +5607,32 @@
   emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
   DONE;
 })
+
+(define_insn "neon_vabd<mode>_2"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+       (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+                           (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ "TARGET_NEON"
+ "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+       (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                   (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                     (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vabd<mode>_3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+       (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
+                             (match_operand:VDQ 2 "s_register_operand" "w")]
+                 UNSPEC_VSUB)))]
+ "TARGET_NEON"
+ "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+       (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                   (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                     (const_string "neon_int_5")))]
+)
diff --git a/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c b/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c
new file mode 100644
index 0000000..aae4117
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -funsafe-math-optimizations" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+float32x2_t f_sub_abs_to_vabd_32()
+{
+
+       float32x2_t val1 = vdup_n_f32 (10); 
+       float32x2_t val2 = vdup_n_f32 (30);
+       float32x2_t sres = vsub_f32(val1, val2);
+       float32x2_t res = vabs_f32 (sres); 
+
+   return res;
+}
+/* { dg-final { scan-assembler "vabd\.f32" } }*/
+
+#include <arm_neon.h>
+int8x8_t sub_abs_to_vabd_8()
+{
+       
+       int8x8_t val1 = vdup_n_s8 (10); 
+        int8x8_t val2 = vdup_n_s8 (30);
+        int8x8_t sres = vsub_s8(val1, val2);
+        int8x8_t res = vabs_s8 (sres); 
+
+   return res;
+}
+/* { dg-final { scan-assembler "vabd\.s8" } }*/
+
+int16x4_t sub_abs_to_vabd_16()
+{
+       
+       int16x4_t val1 = vdup_n_s16 (10); 
+        int16x4_t val2 = vdup_n_s16 (30);
+        int16x4_t sres = vsub_s16(val1, val2);
+        int16x4_t res = vabs_s16 (sres); 
+
+   return res;
+}
+/* { dg-final { scan-assembler "vabd\.s16" } }*/
+
+int32x2_t sub_abs_to_vabd_32()
+{
+
+        int32x2_t val1 = vdup_n_s32 (10);
+        int32x2_t val2 = vdup_n_s32 (30);
+        int32x2_t sres = vsub_s32(val1, val2);
+        int32x2_t res = vabs_s32 (sres);
+
+   return res;
+}
+/* { dg-final { scan-assembler "vabd\.s32" } }*/

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]