[PING][PATCH, AArch64] Disable reg offset in quad-word store for Falkor

Siddhesh Poyarekar siddhesh@gotplt.org
Wed Jan 17 20:46:00 GMT 2018


On Thursday 18 January 2018 01:11 AM, Jim Wilson wrote:
> This is the only solution I found that worked.

I tried a few things and ended up with pretty much the same fix you have
except with the check in a slightly different place.  That is, I used
aarch64_classify_address to gate the change because I intend to use that
to gate pre/post-increments for stores as well for falkor.

I also fixed up my submission to incorporate your more recent changes,
which was to add an extra tuning option instead of just checking for falkor.

Siddhesh

2018-xx-xx  Jim Wilson  <jim.wilson@linaro.org>
	    Kugan Vivenakandarajah  <kugan.vivekanandarajah@linaro.org>
	    Siddhesh Poyarekar  <siddhesh@sourceware.org>

	gcc/
	* gcc/config/aarch64/aarch64-protos.h (aarch64_addr_query_type):
	New member ADDR_QUERY_STR.
	* gcc/config/aarch64/aarch64-tuning-flags.def
	(SLOW_REGOFFSET_QUADWORD_STORE): New.
	* gcc/config/aarch64/aarch64.c (qdf24xx_tunings): Add
	SLOW_REGOFFSET_QUADWORD_STORE to tuning flags.
	(aarch64_classify_address): Avoid register indexing for quad
	mode stores when SLOW_REGOFFSET_QUADWORD_STORE is set.
	* gcc/config/aarch64/constraints.md (Uts): New constraint.
	* gcc/config/aarch64/aarch64.md (movti_aarch64, movtf_aarch64):
	Use it.
	* gcc/config/aarch64/aarch64-simd.md (aarch64_simd_mov<mode>):
	Likewise.

	gcc/testsuite/
	* gcc/testsuite/gcc.target/aarch64/pr82533.c: New test case.
-------------- next part --------------
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 159bc6aee7e..15924fc3f58 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -120,6 +120,9 @@ enum aarch64_symbol_type
    ADDR_QUERY_LDP_STP
       Query what is valid for a load/store pair.
 
+   ADDR_QUERY_STR
+      Query what is valid for a store.
+
    ADDR_QUERY_ANY
       Query what is valid for at least one memory constraint, which may
       allow things that "m" doesn't.  For example, the SVE LDR and STR
@@ -128,6 +131,7 @@ enum aarch64_symbol_type
 enum aarch64_addr_query_type {
   ADDR_QUERY_M,
   ADDR_QUERY_LDP_STP,
+  ADDR_QUERY_STR,
   ADDR_QUERY_ANY
 };
 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 3d1f6a01cb7..48d92702723 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -131,9 +131,9 @@
 
 (define_insn "*aarch64_simd_mov<VQ:mode>"
   [(set (match_operand:VQ 0 "nonimmediate_operand"
-		"=w, Umq,  m,  w, ?r, ?w, ?r, w")
+		"=w, Umq, Uts,  w, ?r, ?w, ?r, w")
 	(match_operand:VQ 1 "general_operand"
-		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
+		"m,  Dz,    w,  w,  w,  r,  r, Dn"))]
   "TARGET_SIMD
    && (register_operand (operands[0], <MODE>mode)
        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index ea9ead234cb..04baf5b6de6 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -41,4 +41,8 @@ AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW)
    are not considered cheap.  */
 AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
 
+/* Don't use a register offset in a memory address for a quad-word store.  */
+AARCH64_EXTRA_TUNING_OPTION ("slow_regoffset_quadword_store",
+			     SLOW_REGOFFSET_QUADWORD_STORE)
+
 #undef AARCH64_EXTRA_TUNING_OPTION
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0599a79bfeb..d17f3b70271 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -894,7 +894,7 @@ static const struct tune_params qdf24xx_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE),		/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_SLOW_REGOFFSET_QUADWORD_STORE),	/* tune_flags.  */
   &qdf24xx_prefetch_tune
 };
 
@@ -5531,6 +5531,16 @@ aarch64_classify_address (struct aarch64_address_info *info,
 				|| vec_flags == VEC_ADVSIMD
 				|| vec_flags == VEC_SVE_DATA));
 
+  /* Avoid register indexing for 128-bit stores when the
+     AARCH64_EXTRA_TUNE_SLOW_REGOFFSET_QUADWORD_STORE option is set.  */
+  if (!optimize_size
+      && type == ADDR_QUERY_STR
+      && (aarch64_tune_params.extra_tuning_flags
+	  & AARCH64_EXTRA_TUNE_SLOW_REGOFFSET_QUADWORD_STORE)
+      && (mode == TImode || mode == TFmode
+	  || aarch64_vector_data_mode_p (mode)))
+    allow_reg_index_p = false;
+
   /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
      [Rn, #offset, MUL VL].  */
   if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index edb6a758333..348b867ff7f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1079,7 +1079,7 @@
 
 (define_insn "*movti_aarch64"
   [(set (match_operand:TI 0
-	 "nonimmediate_operand"  "=r, w,r,w,r,m,m,w,m")
+	 "nonimmediate_operand"  "=r,w,r,w,r,m,m,w,Uts")
 	(match_operand:TI 1
 	 "aarch64_movti_operand" " rn,r,w,w,m,r,Z,m,w"))]
   "(register_operand (operands[0], TImode)
@@ -1226,9 +1226,9 @@
 
 (define_insn "*movtf_aarch64"
   [(set (match_operand:TF 0
-	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
+	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,Uts,?r,m ,m")
 	(match_operand:TF 1
-	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
+	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w  ,m ,?r,Y"))]
   "TARGET_FLOAT && (register_operand (operands[0], TFmode)
     || aarch64_reg_or_fp_zero (operands[1], TFmode))"
   "@
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 6cc4cadfd10..97cfccadadd 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -243,6 +243,13 @@
        (match_test "aarch64_legitimate_address_p (V2DImode,
 						  XEXP (op, 0), 1)")))
 
+(define_memory_constraint "Uts"
+  "@iternal
+   An address valid for storing a 128-it AdvSIMD register"
+  (and (match_code "mem")
+       (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+						  ADDR_QUERY_STR)")))
+
 (define_memory_constraint "Uty"
   "@internal
    An address valid for SVE LD1Rs."
diff --git a/gcc/testsuite/gcc.target/aarch64/pr82533.c b/gcc/testsuite/gcc.target/aarch64/pr82533.c
new file mode 100644
index 00000000000..fa28ffac03a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr82533.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=falkor -O2 -ftree-vectorize" } */
+
+void
+copy (int N, double *c, double *a)
+{
+  for (int i = 0; i < N; ++i)
+    c[i] = a[i];
+}
+
+/* { dg-final { scan-assembler-not "str\tq\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+\\\]" } } */


More information about the Gcc-patches mailing list