This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] [AArch64] Implement automod load and store for Thunderx2t99


Hi,

Please find attached the patch that implements automod load and store for
Thunderx2t99.
The patch doesn't change spec but improve other benchmarks.

Bootstrapped and Regression tested on aarch64-thunder-linux.
Please review the patch and let us know if its okay for Stage-1?

Thanks,
Naveen

2017-03-06  Julian Brown  <julian@codesourcery.com>
	    Naveen H.S  <Naveen.Hurugalawadi@cavium.com>

	* config/aarch64/aarch64-protos.h (aarch64_automod_addr_only_dep): Add
	prototype.
	* config/aarch64/aarch64.c (aarch64_automod_addr_only_dep): New
	function.
	* config/aarch64/thunderx2t99.md (thunderx2t99_load_basic)
	(thunderx2t99_store_basic, thunderx2t99_storepair_basic)
	(thunderx2t99_fp_load_basic, thunderx2t99_fp_loadpair_basic)
	(thunderx2t99_fp_storepair_basic): Add aarch64_mem_type_p test.
	(thunderx2t99_load_automod, thunderx2t99_load_regoffset)
	(thunderx2t99_load_scale_ext, thunderx2t99_store_automod)
	(thunderx2t99_store_regoffset_scale_ext, thunderx2t99_fp_load_automod)
	(thunderx2t99_storepair_automod, thunderx2t99_fp_load_regoffset)
	(thunderx2t99_fp_load_scale_ext, thunderx2t99_fp_loadpair_automod)
	(thunderx2t99_fp_store_automod, thunderx2t99_fp_storepair_automod)
	(thunderx2t99_fp_store_regoffset_scale_ext): New insn reservations.
	(thunderx2t99_load_automod, thunderx2t99_fp_load_automod)
	(thunderx2t99_fp_loadpair_automod): Add bypass for output address-only
	dependencies.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e045df8..7472d98 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -488,5 +488,6 @@ std::string aarch64_get_extension_string_for_isa_flags (unsigned long,
 							unsigned long);
 
 rtl_opt_pass *make_pass_fma_steering (gcc::context *ctxt);
+int aarch64_automod_addr_only_dep (rtx_insn *, rtx_insn *);
 
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 62f5461..c674c51 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14875,6 +14875,94 @@ aarch64_run_selftests (void)
 
 #endif /* #if CHECKING_P */
 
+/* Return nonzero if the CONSUMER has a dependency only on an automodify
+   address in PRODUCER (a load instruction, i.e. the dependency is not on the
+   loaded value).  */
+
+int
+aarch64_automod_addr_only_dep (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx prod_set = single_set (producer);
+
+  if (prod_set)
+    {
+      rtx dst, src = SET_SRC (prod_set);
+
+      if (GET_CODE (src) == ZERO_EXTEND || GET_CODE (src) == SIGN_EXTEND)
+	src = XEXP (src, 0);
+
+      gcc_assert (MEM_P (src));
+
+      dst = XEXP (prod_set, 0);
+
+      rtx cons_set = single_set (consumer);
+      rtx cons_pat = PATTERN (consumer);
+
+      if (cons_set)
+	return !reg_overlap_mentioned_p (dst, cons_set);
+      else if (GET_CODE (cons_pat) == PARALLEL)
+	{
+	  for (int i = 0; i < XVECLEN (cons_pat, 0); i++)
+	    {
+	      rtx set = XVECEXP (cons_pat, 0, i);
+
+	      if (GET_CODE (set) != SET)
+		continue;
+
+	      if (reg_overlap_mentioned_p (dst, set))
+		return 0;
+	    }
+	}
+      else
+	return 0;
+    }
+  else if (GET_CODE (PATTERN (producer)) == PARALLEL)
+    {
+      rtx prod_pat = PATTERN (producer);
+      rtx cons_set = single_set (consumer);
+      rtx cons_pat = PATTERN (consumer);
+
+      for (int i = 0; i < XVECLEN (prod_pat, 0); i++)
+	{
+	  prod_set = XVECEXP (prod_pat, 0, i);
+
+	  if (GET_CODE (prod_set) == SET)
+	    {
+	      rtx src = XEXP (prod_set, 1), dst = XEXP (prod_set, 0);
+
+	      if (GET_CODE (src) == ZERO_EXTEND
+		  || GET_CODE (src) == SIGN_EXTEND)
+		src = XEXP (src, 0);
+
+	      gcc_assert (MEM_P (src));
+
+	      if (cons_set)
+		{
+		  if (reg_overlap_mentioned_p (dst, cons_set))
+		    return 0;
+		}
+	      else if (GET_CODE (cons_pat) == PARALLEL)
+		{
+		  for (int i = 0; i < XVECLEN (cons_pat, 0); i++)
+		    {
+		      rtx set = XVECEXP (cons_pat, 0, i);
+
+		      if (GET_CODE (set) != SET)
+		        continue;
+
+		      if (reg_overlap_mentioned_p (dst, set))
+			return 0;
+		    }
+		}
+	      else
+		return 0;
+	    }
+	}
+    }
+
+  return 1;
+}
+
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST aarch64_address_cost
 
diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
index 936078c..add3707 100644
--- a/gcc/config/aarch64/thunderx2t99.md
+++ b/gcc/config/aarch64/thunderx2t99.md
@@ -123,24 +123,73 @@
 
 (define_insn_reservation "thunderx2t99_load_basic" 4
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "load1"))
+       (eq_attr "type" "load1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+					      | AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01")
 
+(define_insn_reservation "thunderx2t99_load_automod" 4
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "load1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01+thunderx2t99_i012")
+
+(define_insn_reservation "thunderx2t99_load_regoffset" 5
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "load1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG)"))
+  "thunderx2t99_i012,thunderx2t99_ls01")
+
+(define_insn_reservation "thunderx2t99_load_scale_ext" 6
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "load1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_SHIFT
+					      | AARCH64_ADDR_REG_EXT
+					      | AARCH64_ADDR_REG_SHIFT_EXT)"))
+  "thunderx2t99_i012,thunderx2t99_i012,thunderx2t99_ls01")
+
 (define_insn_reservation "thunderx2t99_loadpair" 5
   (and (eq_attr "tune" "thunderx2t99")
        (eq_attr "type" "load2"))
   "thunderx2t99_i012,thunderx2t99_ls01")
 
-(define_insn_reservation "thunderx2t99_store_basic" 1
+(define_insn_reservation "thunderx2t99_store_basic" 0
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "store1"))
+       (eq_attr "type" "store1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+					      | AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
-(define_insn_reservation "thunderx2t99_storepair_basic" 1
+(define_insn_reservation "thunderx2t99_store_automod" 0
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "store2"))
+       (eq_attr "type" "store1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01,(thunderx2t99_sd+thunderx2t99_i012)")
+
+(define_insn_reservation "thunderx2t99_store_regoffset_scale_ext" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "store1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG
+					      | AARCH64_ADDR_REG_SHIFT
+					      | AARCH64_ADDR_REG_EXT
+					      | AARCH64_ADDR_REG_SHIFT_EXT)"))
+  "thunderx2t99_i012,thunderx2t99_ls01,thunderx2t99_sd")
+
+(define_insn_reservation "thunderx2t99_storepair_basic" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "store2")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
+(define_insn_reservation "thunderx2t99_storepair_automod" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "store2")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01,(thunderx2t99_sd+thunderx2t99_i012)")
+
 ;; FP data processing instructions.
 
 (define_insn_reservation "thunderx2t99_fp_simple" 5
@@ -204,24 +253,81 @@
 
 (define_insn_reservation "thunderx2t99_fp_load_basic" 4
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "f_loads,f_loadd"))
+       (eq_attr "type" "f_loads,f_loadd")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+					      | AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01")
 
+(define_insn_reservation "thunderx2t99_fp_load_automod" 4
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "f_loads,f_loadd")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01,thunderx2t99_i012")
+
+(define_insn_reservation "thunderx2t99_fp_load_regoffset" 5
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "f_loads,f_loadd")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG)"))
+  "thunderx2t99_ls01,thunderx2t99_i012")
+
+(define_insn_reservation "thunderx2t99_fp_load_scale_ext" 6
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "f_loads,f_loadd")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_SHIFT
+					      | AARCH64_ADDR_REG_EXT
+					      | AARCH64_ADDR_REG_SHIFT_EXT)"))
+  "thunderx2t99_ls01,thunderx2t99_i012")
+
 (define_insn_reservation "thunderx2t99_fp_loadpair_basic" 4
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_load1_2reg"))
+       (eq_attr "type" "neon_load1_2reg")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01*2")
 
-(define_insn_reservation "thunderx2t99_fp_store_basic" 1
+(define_insn_reservation "thunderx2t99_fp_loadpair_automod" 4
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "f_stores,f_stored"))
+       (eq_attr "type" "neon_load1_2reg")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "(thunderx2t99_ls01+thunderx2t99_i012),thunderx2t99_ls01")
+
+(define_insn_reservation "thunderx2t99_fp_store_basic" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "f_stores,f_stored")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+					      | AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
-(define_insn_reservation "thunderx2t99_fp_storepair_basic" 1
+(define_insn_reservation "thunderx2t99_fp_store_automod" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "f_stores,f_stored")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01,(thunderx2t99_sd+thunderx2t99_i012)")
+
+(define_insn_reservation "thunderx2t99_fp_store_regoffset_scale_ext" 0
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store1_2reg"))
+       (eq_attr "type" "f_stores,f_stored")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG
+					      | AARCH64_ADDR_REG_SHIFT
+					      | AARCH64_ADDR_REG_EXT
+					      | AARCH64_ADDR_REG_SHIFT_EXT)"))
+  "thunderx2t99_i012,thunderx2t99_ls01,thunderx2t99_sd")
+
+(define_insn_reservation "thunderx2t99_fp_storepair_basic" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "neon_store1_2reg")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01,(thunderx2t99_ls01+thunderx2t99_sd),thunderx2t99_sd")
 
+(define_insn_reservation "thunderx2t99_fp_storepair_automod" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "neon_store1_2reg")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01,(thunderx2t99_ls01+thunderx2t99_sd+thunderx2t99_i012),thunderx2t99_sd")
+
 ;; ASIMD integer instructions.
 
 (define_insn_reservation "thunderx2t99_asimd_int" 7
@@ -443,6 +549,16 @@
        (eq_attr "type" "neon_store2_one_lane,neon_store2_one_lane_q"))
   "thunderx2t99_ls01,thunderx2t99_f01")
 
+;; Bypasses for automodify load insns.
+
+; For automodify loads, the address should be available before the loaded data.
+
+(define_bypass 1
+  "thunderx2t99_load_automod,thunderx2t99_fp_load_automod,\
+   thunderx2t99_fp_loadpair_automod"
+  "thunderx2t99_*"
+  "aarch64_automod_addr_only_dep")
+
 ;; Crypto extensions.
 
 ; FIXME: Forwarding path for aese/aesmc or aesd/aesimc pairs?

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]