[ARM] Model automodified addresses in the Cortex A8 and A9 schedulers

Richard Sandiford richard.sandiford@linaro.org
Thu Aug 18 15:07:00 GMT 2011


...well, for core instructions at least.  NEON is a separate patch.

I haven't measured any benefit or regression from this change on its own.
It makes a difference with the new auto-inc-dec pass though.

I diffed the "before" and "after" assembly code for libav to get a sense
for whether the patch was having the desired effect.  It seems to be,
though the circumstances that lead to the obvious cases can be a bit
unfortunate.  For example, A8 schedules like:

	ldr	r5, [r2], #4
	ldr	r4, [sp, #128]
	cmp	r4, r2

are now scheduled as:

	ldr	r4, [sp, #128]
	ldr	r5, [r2], #4
	cmp	r4, r2

as hoped.  Tested on arm-linux-gnueabi.  OK to install?

Richard


gcc/
	* config/arm/arm-protos.h (arm_writeback_dep): Declare.
	(arm_writeback_only_dep): Likewise.
	* config/arm/arm.c (arm_writeback_dep): New function.
	(arm_writeback_only_dep_1, arm_writeback_only_dep): Likewise.
	* config/arm/cortex-a8.md: Add address-writeback bypasses for
	loads and stores.
	* config/arm/cortex-a9.md: Likewise.

Index: gcc/config/arm/arm-protos.h
===================================================================
--- gcc/config/arm/arm-protos.h	2011-08-17 16:03:05.000000000 +0100
+++ gcc/config/arm/arm-protos.h	2011-08-18 15:03:13.424542107 +0100
@@ -99,6 +99,8 @@ extern int arm_no_early_alu_shift_dep (r
 extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
 extern int arm_no_early_mul_dep (rtx, rtx);
 extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
+extern int arm_writeback_dep (rtx, rtx);
+extern int arm_writeback_only_dep (rtx, rtx);
 
 extern int tls_mentioned_p (rtx);
 extern int symbol_mentioned_p (rtx);
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	2011-08-18 14:47:14.146516649 +0100
+++ gcc/config/arm/arm.c	2011-08-18 15:03:13.450542049 +0100
@@ -22689,6 +22689,50 @@ arm_mac_accumulator_is_mul_result (rtx p
           && !reg_overlap_mentioned_p (mul_result, mac_op1));
 }
 
+/* Return true if there is an address register writeback dependency
+   between PRODUCER and CONSUMER.  */
+
+int
+arm_writeback_dep (rtx producer, rtx consumer)
+{
+  rtx note;
+
+  for (note = REG_NOTES (producer); note; note = XEXP (note, 1))
+    if (REG_NOTE_KIND (note) == REG_INC
+	&& reg_referenced_p (XEXP (note, 0), PATTERN (consumer)))
+      return true;
+  return false;
+}
+
+/* A note_stores callback for which DATA is an rtx *.  If DEST is set
+   by a SET pattern, and if *DATA is nonnull, check whether instruction
+   *DATA references DEST.  Clear *DATA if so.  */
+
+static void
+arm_writeback_only_dep_1 (rtx dest, const_rtx container, void *data)
+{
+  rtx *consumer;
+
+  consumer = (rtx *) data;
+  if (GET_CODE (container) == SET
+      && *consumer
+      && reg_referenced_p (dest, PATTERN (*consumer)))
+    *consumer = 0;
+}
+
+/* Return true if the only true dependence between PRODUCER and CONSUMER
+   is an address register writeback.  */
+
+int
+arm_writeback_only_dep (rtx producer, rtx consumer)
+{
+  if (arm_writeback_dep (producer, consumer))
+    {
+      note_stores (PATTERN (producer), arm_writeback_only_dep_1, &consumer);
+      return consumer != NULL_RTX;
+    }
+  return 0;
+}
 
 /* The EABI says test the least significant bit of a guard variable.  */
 
Index: gcc/config/arm/cortex-a8.md
===================================================================
--- gcc/config/arm/cortex-a8.md	2011-08-12 08:51:44.400598496 +0100
+++ gcc/config/arm/cortex-a8.md	2011-08-18 15:03:35.516496568 +0100
@@ -186,8 +186,22 @@ (define_bypass 4 "cortex_a8_mul,cortex_a
                "cortex_a8_alu_shift_reg"
                "arm_no_early_alu_shift_value_dep")
 
+
+;; Load address register writeback
+
+;; Address register writeback has a latency of 2 instructions, or 1 if
+;; there is no early dependency.  Don't bother handling early shift
+;; dependencies for address writeback; it's very unlikely that an
+;; address will be used that way in critical code.
+(define_bypass 1 "cortex_a8_load*"
+	       "cortex_a8_alu*,cortex_a8_mov"
+	       "arm_writeback_only_dep")
+(define_bypass 2 "cortex_a8_load*"
+	       "cortex_a8_*mul*,cortex_a8_*mla*,cortex_a8_load*,
+	        cortex_a8_store*,cortex_a8_branch,cortex_a8_call"
+	       "arm_writeback_only_dep")
+
 ;; Load instructions.
-;; The presence of any register writeback is ignored here.
 
 ;; A load result has latency 3 unless the dependent instruction has
 ;; no early dep, in which case it is only latency two.
@@ -229,8 +243,18 @@ (define_bypass 4 "cortex_a8_load3_4"
                "cortex_a8_alu_shift_reg"
                "arm_no_early_alu_shift_value_dep")
 
+;; Store address register writeback
+
+;; See comment for load address writeback above.
+(define_bypass 1 "cortex_a8_store*"
+	       "cortex_a8_alu*,cortex_a8_mov"
+	       "arm_writeback_dep")
+(define_bypass 2 "cortex_a8_store*"
+	       "cortex_a8_*mul*,cortex_a8_*mla*,cortex_a8_load*,
+	        cortex_a8_store*,cortex_a8_branch,cortex_a8_call"
+	       "arm_writeback_dep")
+
 ;; Store instructions.
-;; Writeback is again ignored.
 
 (define_insn_reservation "cortex_a8_store1_2" 0
   (and (eq_attr "tune" "cortexa8")
Index: gcc/config/arm/cortex-a9.md
===================================================================
--- gcc/config/arm/cortex-a9.md	2011-08-12 08:51:44.404598488 +0100
+++ gcc/config/arm/cortex-a9.md	2011-08-18 15:03:13.520541920 +0100
@@ -163,6 +163,15 @@ (define_bypass 2 "cortex_a9_dp_shift"
  cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
  cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
 
+;; Address register writeback has a latency of 1 instruction.
+
+(define_bypass 1 "cortex_a9_load*"
+	         "cortex_a9_*"
+		 "arm_writeback_only_dep")
+(define_bypass 1 "cortex_a9_store*"
+	         "cortex_a9_*"
+		 "arm_writeback_dep")
+
 ;; An instruction in the load store pipeline can provide
 ;; read access to a DP instruction in the P0 default pipeline
 ;; before the writeback stage.



More information about the Gcc-patches mailing list