This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[AArch64][2/3] Optimize aarch64_add_constant to generate better addition sequences


This patch optimize immediate addition sequences generated by
aarch64_add_constant.

The current addition sequences generated are:

  * If immediate fit into unsigned 12bit range, generate single add/sub.
* Otherwise if it fit into unsigned 24bit range, generate two add/sub.

  * Otherwise invoke general constant build function.


This haven't considered the situation where immedate can't fit into
unsigned 12bit range, but can fit into single mov instruction for which
case we generate one move and one addition.  The move won't touch the
destination register thus the sequences is better than two additions
which both touch the destination register.


This patch thus optimize the addition sequences into:

  * If immediate fit into unsigned 12bit range, generate single add/sub.
* Otherwise if it fit into unsigned 24bit range, generate two add/sub.
    And don't do this if it fit into single move instruction, in which case
    move the immedaite to scratch register firstly, then generate one
    addition to add the scratch register to the destination register.
* Otherwise invoke general constant build function.


OK for trunk?

gcc/
2016-07-20  Jiong Wang  <jiong.wang@arm.com>

            * config/aarch64/aarch64.c (aarch64_add_constant): Optimize
            instruction sequences.

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index aeea3b3ebc514663043ac8d7cd13361f06f78502..41844a101247c939ecb31f8a8c17cf79759255aa 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1865,6 +1865,47 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
   aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
 }
 
+/* Add DELTA onto REGNUM in MODE, using SCRATCHREG to held intermediate value if
+   it is necessary.  */
+
+static void
+aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
+		      HOST_WIDE_INT delta)
+{
+  HOST_WIDE_INT mdelta = abs_hwi (delta);
+  rtx this_rtx = gen_rtx_REG (mode, regnum);
+
+  /* Do nothing if mdelta is zero.  */
+  if (!mdelta)
+    return;
+
+  /* We only need single instruction if the offset fit into add/sub.  */
+  if (aarch64_uimm12_shift (mdelta))
+    {
+      emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
+      return;
+    }
+
+  /* We need two add/sub instructions, each one perform part of the
+     addition/subtraction, but don't this if the addend can be loaded into
+     register by single instruction, in that case we prefer a move to scratch
+     register following by addition.  */
+  if (mdelta < 0x1000000 && !aarch64_move_imm (delta, mode))
+    {
+      HOST_WIDE_INT low_off = mdelta & 0xfff;
+
+      low_off = delta < 0 ? -low_off : low_off;
+      emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
+      emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
+      return;
+    }
+
+  /* Otherwise use generic function to handle all other situations.  */
+  rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
+  aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
+  emit_insn (gen_add2_insn (this_rtx, scratch_rtx));
+}
+
 static bool
 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
 				 tree exp ATTRIBUTE_UNUSED)
@@ -3337,44 +3378,6 @@ aarch64_final_eh_return_addr (void)
 				       - 2 * UNITS_PER_WORD));
 }
 
-static void
-aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
-		      HOST_WIDE_INT delta)
-{
-  HOST_WIDE_INT mdelta = delta;
-  rtx this_rtx = gen_rtx_REG (mode, regnum);
-  rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
-
-  if (mdelta < 0)
-    mdelta = -mdelta;
-
-  if (mdelta >= 4096 * 4096)
-    {
-      aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
-      emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
-    }
-  else if (mdelta > 0)
-    {
-      if (mdelta >= 4096)
-	{
-	  emit_insn (gen_rtx_SET (scratch_rtx, GEN_INT (mdelta / 4096)));
-	  rtx shift = gen_rtx_ASHIFT (mode, scratch_rtx, GEN_INT (12));
-	  if (delta < 0)
-	    emit_insn (gen_rtx_SET (this_rtx,
-				    gen_rtx_MINUS (mode, this_rtx, shift)));
-	  else
-	    emit_insn (gen_rtx_SET (this_rtx,
-				    gen_rtx_PLUS (mode, this_rtx, shift)));
-	}
-      if (mdelta % 4096 != 0)
-	{
-	  scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
-	  emit_insn (gen_rtx_SET (this_rtx,
-				  gen_rtx_PLUS (mode, this_rtx, scratch_rtx)));
-	}
-    }
-}
-
 /* Output code to add DELTA to the first argument, and then jump
    to FUNCTION.  Used for C++ multiple inheritance.  */
 static void

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]