This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][AArch64] Improve register allocation of fma


ping


From: Wilco Dijkstra
Sent: 04 January 2018 17:46
To: GCC Patches
Cc: nd
Subject: [PATCH][AArch64] Improve register allocation of fma
  

This patch improves register allocation of fma by preferring to update the
accumulator register.  This is done by adding fma insns with operand 1 as the
accumulator.  The register allocator considers copy preferences only in operand
order, so if the first operand is dead, it has the highest chance of being
reused as the destination.  As a result code using fma often has a better
register allocation.  Performance of SPECFP2017 improves by over 0.5% on some
implementations, while it had no effect on other implementations.  Fma is more
readable too, in a simple example we now generate:

        fmadd   s16, s2, s1, s16
        fmadd   s7, s17, s16, s7
        fmadd   s6, s16, s7, s6
        fmadd   s5, s7, s6, s5

instead of:

        fmadd   s16, s16, s2, s1
        fmadd   s7, s7, s16, s6
        fmadd   s6, s6, s7, s5
        fmadd   s5, s5, s6, s4

Bootstrap OK. OK for commit?

ChangeLog:
2018-01-04  Wilco Dijkstra  <wdijkstr@arm.com>

    gcc/
        * config/aarch64/aarch64.md (fma<mode>4): Change into expand pattern.
        (fnma<mode>4): Likewise.
        (fms<mode>4): Likewise.
        (fnms<mode>4): Likewise.
        (aarch64_fma<mode>4): Rename insn, reorder accumulator operand.
        (aarch64_fnma<mode>4): Likewise.
        (aarch64_fms<mode>4): Likewise.
        (aarch64_fnms<mode>4): Likewise.
        (aarch64_fnmadd<mode>4): Likewise.
--

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 382953e6ec42ae4475d66143be1e25d22e48571f..e773ec0c41559e47cf38e719dcb8c42d5bb4da49 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4743,57 +4743,94 @@ (define_insn "*aarch64_fcvt<su_optab><GPF:mode><GPI:mode>2_mult"
   [(set_attr "type" "f_cvtf2i")]
 )
 
-;; fma - no throw
+;; fma - expand fma into patterns with the accumulator operand first since
+;; reusing the accumulator results in better register allocation.
+;; The register allocator considers copy preferences in operand order,
+;; so this prefers fmadd s0, s1, s2, s0 over fmadd s1, s1, s2, s0.
+
+(define_expand "fma<mode>4"
+  [(set (match_operand:GPF_F16 0 "register_operand")
+       (fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand")
+                    (match_operand:GPF_F16 2 "register_operand")
+                    (match_operand:GPF_F16 3 "register_operand")))]
+  "TARGET_FLOAT"
+)
 
-(define_insn "fma<mode>4"
+(define_insn "*aarch64_fma<mode>4"
   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
-        (fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")
-                    (match_operand:GPF_F16 2 "register_operand" "w")
-                    (match_operand:GPF_F16 3 "register_operand" "w")))]
+       (fma:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w")
+                    (match_operand:GPF_F16 3 "register_operand" "w")
+                    (match_operand:GPF_F16 1 "register_operand" "w")))]
   "TARGET_FLOAT"
-  "fmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<stype>")]
 )
 
-(define_insn "fnma<mode>4"
+(define_expand "fnma<mode>4"
+  [(set (match_operand:GPF_F16 0 "register_operand")
+       (fma:GPF_F16
+         (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand"))
+         (match_operand:GPF_F16 2 "register_operand")
+         (match_operand:GPF_F16 3 "register_operand")))]
+  "TARGET_FLOAT"
+)
+
+(define_insn "*aarch64_fnma<mode>4"
   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
         (fma:GPF_F16
-         (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w"))
-         (match_operand:GPF_F16 2 "register_operand" "w")
-         (match_operand:GPF_F16 3 "register_operand" "w")))]
+         (neg:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w"))
+         (match_operand:GPF_F16 3 "register_operand" "w")
+         (match_operand:GPF_F16 1 "register_operand" "w")))]
   "TARGET_FLOAT"
-  "fmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fmsub\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<stype>")]
 )
 
-(define_insn "fms<mode>4"
+
+(define_expand "fms<mode>4"
+  [(set (match_operand:GPF 0 "register_operand")
+       (fma:GPF (match_operand:GPF 1 "register_operand")
+                (match_operand:GPF 2 "register_operand")
+                (neg:GPF (match_operand:GPF 3 "register_operand"))))]
+  "TARGET_FLOAT"
+)
+
+(define_insn "*aarch64_fms<mode>4"
   [(set (match_operand:GPF 0 "register_operand" "=w")
-        (fma:GPF (match_operand:GPF 1 "register_operand" "w")
-                (match_operand:GPF 2 "register_operand" "w")
-                (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
+       (fma:GPF (match_operand:GPF 2 "register_operand" "w")
+                (match_operand:GPF 3 "register_operand" "w")
+                (neg:GPF (match_operand:GPF 1 "register_operand" "w"))))]
   "TARGET_FLOAT"
-  "fnmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fnmsub\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<s>")]
 )
 
-(define_insn "fnms<mode>4"
+(define_expand "fnms<mode>4"
+  [(set (match_operand:GPF 0 "register_operand")
+       (fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand"))
+                (match_operand:GPF 2 "register_operand")
+                (neg:GPF (match_operand:GPF 3 "register_operand"))))]
+  "TARGET_FLOAT"
+)
+
+(define_insn "*aarch64_fnms<mode>4"
   [(set (match_operand:GPF 0 "register_operand" "=w")
-       (fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w"))
-                (match_operand:GPF 2 "register_operand" "w")
-                (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
+       (fma:GPF (neg:GPF (match_operand:GPF 2 "register_operand" "w"))
+                (match_operand:GPF 3 "register_operand" "w")
+                (neg:GPF (match_operand:GPF 1 "register_operand" "w"))))]
   "TARGET_FLOAT"
-  "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<s>")]
 )
 
 ;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
-(define_insn "*fnmadd<mode>4"
+(define_insn "*aarch64_fnmadd<mode>4"
   [(set (match_operand:GPF 0 "register_operand" "=w")
-       (neg:GPF (fma:GPF (match_operand:GPF 1 "register_operand" "w")
-                         (match_operand:GPF 2 "register_operand" "w")
-                         (match_operand:GPF 3 "register_operand" "w"))))]
+       (neg:GPF (fma:GPF (match_operand:GPF 2 "register_operand" "w")
+                         (match_operand:GPF 3 "register_operand" "w")
+                         (match_operand:GPF 1 "register_operand" "w"))))]
   "!HONOR_SIGNED_ZEROS (<MODE>mode) && TARGET_FLOAT"
-  "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<s>")]
 )
     

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]