This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH, AArch64 v2 04/11] aarch64: Improve atomic-op lse generation

From: Richard Henderson <richard dot henderson at linaro dot org>
To: gcc-patches at gcc dot gnu dot org
Cc: ramana dot radhakrishnan at arm dot com, agraf at suse dot de, marcus dot shawcroft at arm dot com, james dot greenhalgh at arm dot com, richard dot earnshaw at arm dot com
Date: Tue, 2 Oct 2018 11:19:08 -0500
Subject: [PATCH, AArch64 v2 04/11] aarch64: Improve atomic-op lse generation
References: <20181002161915.18843-1-richard.henderson@linaro.org>
Fix constraints; avoid unnecessary split.  Drop the use of the atomic_op
iterator in favor of the ATOMIC_LDOP iterator; this is simplier and more
logical for ldclr aka bic.

	* config/aarch64/aarch64.c (aarch64_emit_bic): Remove.
	(aarch64_atomic_ldop_supported_p): Remove.
	(aarch64_gen_atomic_ldop): Remove.
	* config/aarch64/atomic.md (atomic_<atomic_optab><ALLI>):
	Fully expand LSE operations here.
	(atomic_fetch_<atomic_optab><ALLI>): Likewise.
	(atomic_<atomic_optab>_fetch<ALLI>): Likewise.
	(aarch64_atomic_<ATOMIC_LDOP><ALLI>_lse): Drop atomic_op iterator
	and use ATOMIC_LDOP instead; use register_operand for the input;
	drop the split and emit insns directly.
	(aarch64_atomic_fetch_<ATOMIC_LDOP><ALLI>_lse): Likewise.
	(aarch64_atomic_<atomic_op>_fetch<ALLI>_lse): Remove.
	(@aarch64_atomic_load<ATOMIC_LDOP><ALLI>): Remove.
---
 gcc/config/aarch64/aarch64-protos.h |   2 -
 gcc/config/aarch64/aarch64.c        | 176 -------------------------
 gcc/config/aarch64/atomics.md       | 197 +++++++++++++++-------------
 gcc/config/aarch64/iterators.md     |   5 +-
 4 files changed, 108 insertions(+), 272 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3d045cf43be..1d2f8487d1a 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -563,8 +563,6 @@ rtx aarch64_load_tp (rtx);
 void aarch64_expand_compare_and_swap (rtx op[]);
 void aarch64_split_compare_and_swap (rtx op[]);
 
-bool aarch64_atomic_ldop_supported_p (enum rtx_code);
-void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
 void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
 
 bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f7b0af2589e..867759f7e80 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14226,32 +14226,6 @@ aarch64_expand_compare_and_swap (rtx operands[])
   emit_insn (gen_rtx_SET (bval, x));
 }
 
-/* Test whether the target supports using a atomic load-operate instruction.
-   CODE is the operation and AFTER is TRUE if the data in memory after the
-   operation should be returned and FALSE if the data before the operation
-   should be returned.  Returns FALSE if the operation isn't supported by the
-   architecture.  */
-
-bool
-aarch64_atomic_ldop_supported_p (enum rtx_code code)
-{
-  if (!TARGET_LSE)
-    return false;
-
-  switch (code)
-    {
-    case SET:
-    case AND:
-    case IOR:
-    case XOR:
-    case MINUS:
-    case PLUS:
-      return true;
-    default:
-      return false;
-    }
-}
-
 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
    sequence implementing an atomic operation.  */
 
@@ -14384,156 +14358,6 @@ aarch64_split_compare_and_swap (rtx operands[])
     aarch64_emit_post_barrier (model);
 }
 
-/* Emit a BIC instruction.  */
-
-static void
-aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
-{
-  rtx shift_rtx = GEN_INT (shift);
-  rtx (*gen) (rtx, rtx, rtx, rtx);
-
-  switch (mode)
-    {
-    case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break;
-    case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break;
-    default:
-      gcc_unreachable ();
-    }
-
-  emit_insn (gen (dst, s2, shift_rtx, s1));
-}
-
-/* Emit an atomic load+operate.  CODE is the operation.  OUT_DATA is the
-   location to store the data read from memory.  OUT_RESULT is the location to
-   store the result of the operation.  MEM is the memory location to read and
-   modify.  MODEL_RTX is the memory ordering to use.  VALUE is the second
-   operand for the operation.  Either OUT_DATA or OUT_RESULT, but not both, can
-   be NULL.  */
-
-void
-aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
-			 rtx mem, rtx value, rtx model_rtx)
-{
-  machine_mode mode = GET_MODE (mem);
-  machine_mode wmode = (mode == DImode ? DImode : SImode);
-  const bool short_mode = (mode < SImode);
-  int ldop_code;
-  rtx src;
-  rtx x;
-
-  if (out_data)
-    out_data = gen_lowpart (mode, out_data);
-
-  if (out_result)
-    out_result = gen_lowpart (mode, out_result);
-
-  /* Make sure the value is in a register, putting it into a destination
-     register if it needs to be manipulated.  */
-  if (!register_operand (value, mode)
-      || code == AND || code == MINUS)
-    {
-      src = out_result ? out_result : out_data;
-      emit_move_insn (src, gen_lowpart (mode, value));
-    }
-  else
-    src = value;
-  gcc_assert (register_operand (src, mode));
-
-  /* Preprocess the data for the operation as necessary.  If the operation is
-     a SET then emit a swap instruction and finish.  */
-  switch (code)
-    {
-    case MINUS:
-      /* Negate the value and treat it as a PLUS.  */
-      {
-	rtx neg_src;
-
-	/* Resize the value if necessary.  */
-	if (short_mode)
-	  src = gen_lowpart (wmode, src);
-
-	neg_src = gen_rtx_NEG (wmode, src);
-	emit_insn (gen_rtx_SET (src, neg_src));
-
-	if (short_mode)
-	  src = gen_lowpart (mode, src);
-      }
-      /* Fall-through.  */
-    case PLUS:
-      ldop_code = UNSPECV_ATOMIC_LDOP_PLUS;
-      break;
-
-    case IOR:
-      ldop_code = UNSPECV_ATOMIC_LDOP_OR;
-      break;
-
-    case XOR:
-      ldop_code = UNSPECV_ATOMIC_LDOP_XOR;
-      break;
-
-    case AND:
-      {
-	rtx not_src;
-
-	/* Resize the value if necessary.  */
-	if (short_mode)
-	  src = gen_lowpart (wmode, src);
-
-	not_src = gen_rtx_NOT (wmode, src);
-	emit_insn (gen_rtx_SET (src, not_src));
-
-	if (short_mode)
-	  src = gen_lowpart (mode, src);
-      }
-      ldop_code = UNSPECV_ATOMIC_LDOP_BIC;
-      break;
-
-    default:
-      /* The operation can't be done with atomic instructions.  */
-      gcc_unreachable ();
-    }
-
-  emit_insn (gen_aarch64_atomic_load (ldop_code, mode,
-				      out_data, mem, src, model_rtx));
-
-  /* If necessary, calculate the data in memory after the update by redoing the
-     operation from values in registers.  */
-  if (!out_result)
-    return;
-
-  if (short_mode)
-    {
-      src = gen_lowpart (wmode, src);
-      out_data = gen_lowpart (wmode, out_data);
-      out_result = gen_lowpart (wmode, out_result);
-    }
-
-  x = NULL_RTX;
-
-  switch (code)
-    {
-    case MINUS:
-    case PLUS:
-      x = gen_rtx_PLUS (wmode, out_data, src);
-      break;
-    case IOR:
-      x = gen_rtx_IOR (wmode, out_data, src);
-      break;
-    case XOR:
-      x = gen_rtx_XOR (wmode, out_data, src);
-      break;
-    case AND:
-      aarch64_emit_bic (wmode, out_result, out_data, src, 0);
-      return;
-    default:
-      gcc_unreachable ();
-    }
-
-  emit_set_insn (out_result, x);
-
-  return;
-}
-
 /* Split an atomic operation.  */
 
 void
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index bc9e396dc96..2198649b1be 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -207,13 +207,37 @@
     rtx (*gen) (rtx, rtx, rtx);
 
     /* Use an atomic load-operate instruction when possible.  */
-    if (aarch64_atomic_ldop_supported_p (<CODE>))
-      gen = gen_aarch64_atomic_<atomic_optab><mode>_lse;
+    if (TARGET_LSE)
+      {
+	switch (<CODE>)
+	  {
+	  case MINUS:
+	    operands[1] = expand_simple_unop (<MODE>mode, NEG, operands[1],
+					      NULL, 1);
+	    /* fallthru */
+	  case PLUS:
+	    gen = gen_aarch64_atomic_add<mode>_lse;
+	    break;
+	  case IOR:
+	    gen = gen_aarch64_atomic_ior<mode>_lse;
+	    break;
+	  case XOR:
+	    gen = gen_aarch64_atomic_xor<mode>_lse;
+	    break;
+	  case AND:
+	    operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
+					      NULL, 1);
+	    gen = gen_aarch64_atomic_bic<mode>_lse;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+      }
     else
       gen = gen_aarch64_atomic_<atomic_optab><mode>;
 
     emit_insn (gen (operands[0], operands[1], operands[2]));
-
     DONE;
   }
 )
@@ -239,22 +263,25 @@
   }
 )
 
-(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse"
+(define_insn "aarch64_atomic_<atomic_ldoptab><mode>_lse"
   [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
-    (unspec_volatile:ALLI
-      [(atomic_op:ALLI (match_dup 0)
-	(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
-       (match_operand:SI 2 "const_int_operand")]
-      UNSPECV_ATOMIC_OP))
+	(unspec_volatile:ALLI
+	  [(match_dup 0)
+	   (match_operand:ALLI 1 "register_operand" "r")
+	   (match_operand:SI 2 "const_int_operand")]
+      ATOMIC_LDOP))
    (clobber (match_scratch:ALLI 3 "=&r"))]
   "TARGET_LSE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (<CODE>, operands[3], NULL, operands[0],
-			     operands[1], operands[2]);
-    DONE;
+   enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+   if (is_mm_relaxed (model))
+     return "ld<atomic_ldop><atomic_sfx>\t%<w>1, %<w>3, %0";
+   else if (is_mm_release (model))
+     return "ld<atomic_ldop>l<atomic_sfx>\t%<w>1, %<w>3, %0";
+   else if (is_mm_acquire (model) || is_mm_consume (model))
+     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>1, %<w>3, %0";
+   else
+     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>1, %<w>3, %0";
   }
 )
 
@@ -280,7 +307,7 @@
   }
 )
 
-;; Load-operate-store, returning the updated memory data.
+;; Load-operate-store, returning the original memory data.
 
 (define_expand "atomic_fetch_<atomic_optab><mode>"
  [(match_operand:ALLI 0 "register_operand" "")
@@ -293,13 +320,37 @@
   rtx (*gen) (rtx, rtx, rtx, rtx);
 
   /* Use an atomic load-operate instruction when possible.  */
-  if (aarch64_atomic_ldop_supported_p (<CODE>))
-    gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse;
+  if (TARGET_LSE)
+    {
+      switch (<CODE>)
+        {
+	case MINUS:
+	  operands[2] = expand_simple_unop (<MODE>mode, NEG, operands[2],
+					    NULL, 1);
+	  /* fallthru */
+	case PLUS:
+	  gen = gen_aarch64_atomic_fetch_add<mode>_lse;
+	  break;
+	case IOR:
+	  gen = gen_aarch64_atomic_fetch_ior<mode>_lse;
+	  break;
+	case XOR:
+	  gen = gen_aarch64_atomic_fetch_xor<mode>_lse;
+	  break;
+	case AND:
+	  operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
+					    NULL, 1);
+	  gen = gen_aarch64_atomic_fetch_bic<mode>_lse;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+    }
   else
     gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
 
   emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
-
   DONE;
 })
 
@@ -326,23 +377,26 @@
   }
 )
 
-(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse"
-  [(set (match_operand:ALLI 0 "register_operand" "=&r")
-    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+(define_insn "aarch64_atomic_fetch_<atomic_ldoptab><mode>_lse"
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
+	(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
    (set (match_dup 1)
-    (unspec_volatile:ALLI
-      [(atomic_op:ALLI (match_dup 1)
-	(match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>"))
-       (match_operand:SI 3 "const_int_operand")]
-      UNSPECV_ATOMIC_LDOP))]
+	(unspec_volatile:ALLI
+	  [(match_dup 1)
+	   (match_operand:ALLI 2 "register_operand" "r")
+	   (match_operand:SI 3 "const_int_operand")]
+	  ATOMIC_LDOP))]
   "TARGET_LSE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
   {
-    aarch64_gen_atomic_ldop (<CODE>, operands[0], NULL, operands[1],
-			     operands[2], operands[3]);
-    DONE;
+   enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+   if (is_mm_relaxed (model))
+     return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
+   else if (is_mm_acquire (model) || is_mm_consume (model))
+     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
+   else if (is_mm_release (model))
+     return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
+   else
+     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
   }
 )
 
@@ -370,7 +424,7 @@
   }
 )
 
-;; Load-operate-store, returning the original memory data.
+;; Load-operate-store, returning the updated memory data.
 
 (define_expand "atomic_<atomic_optab>_fetch<mode>"
  [(match_operand:ALLI 0 "register_operand" "")
@@ -380,17 +434,23 @@
   (match_operand:SI 3 "const_int_operand")]
  ""
 {
-  rtx (*gen) (rtx, rtx, rtx, rtx);
-  rtx value = operands[2];
-
-  /* Use an atomic load-operate instruction when possible.  */
-  if (aarch64_atomic_ldop_supported_p (<CODE>))
-    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>_lse;
+  /* Use an atomic load-operate instruction when possible.  In this case
+     we will re-compute the result from the original mem value. */
+  if (TARGET_LSE)
+    {
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+      emit_insn (gen_atomic_fetch_<atomic_optab><mode>
+                 (tmp, operands[1], operands[2], operands[3]));
+      tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
+				 operands[0], 1, OPTAB_WIDEN);
+      emit_move_insn (operands[0], tmp);
+    }
   else
-    gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>;
-
-  emit_insn (gen (operands[0], operands[1], value, operands[3]));
-
+    {
+      emit_insn (gen_aarch64_atomic_<atomic_optab>_fetch<mode>
+                 (operands[0], operands[1], operands[2], operands[3]));
+    }
   DONE;
 })
 
@@ -417,29 +477,6 @@
   }
 )
 
-(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>_lse"
-  [(set (match_operand:ALLI 0 "register_operand" "=&r")
-    (atomic_op:ALLI
-     (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
-     (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")))
-   (set (match_dup 1)
-    (unspec_volatile:ALLI
-      [(match_dup 1)
-       (match_dup 2)
-       (match_operand:SI 3 "const_int_operand")]
-      UNSPECV_ATOMIC_LDOP))
-     (clobber (match_scratch:ALLI 4 "=&r"))]
-  "TARGET_LSE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-  {
-    aarch64_gen_atomic_ldop (<CODE>, operands[4], operands[0], operands[1],
-			     operands[2], operands[3]);
-    DONE;
-  }
-)
-
 (define_insn_and_split "atomic_nand_fetch<mode>"
   [(set (match_operand:ALLI 0 "register_operand" "=&r")
     (not:ALLI
@@ -585,29 +622,3 @@
       return "dmb\\tish";
   }
 )
-
-;; ARMv8.1-A LSE instructions.
-
-;; Atomic load-op: Load data, operate, store result, keep data.
-
-(define_insn "@aarch64_atomic_load<atomic_ldop><mode>"
- [(set (match_operand:ALLI 0 "register_operand" "=r")
-   (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
-  (set (match_dup 1)
-   (unspec_volatile:ALLI
-    [(match_dup 1)
-     (match_operand:ALLI 2 "register_operand")
-     (match_operand:SI 3 "const_int_operand")]
-    ATOMIC_LDOP))]
- "TARGET_LSE && reload_completed"
- {
-   enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
-   if (is_mm_relaxed (model))
-     return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
-   else if (is_mm_acquire (model) || is_mm_consume (model))
-     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
-   else if (is_mm_release (model))
-     return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
-   else
-     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
- })
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index a43956054e8..524e4e6929b 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -503,7 +503,6 @@
     UNSPECV_ATOMIC_CAS		; Represent an atomic CAS.
     UNSPECV_ATOMIC_SWP		; Represent an atomic SWP.
     UNSPECV_ATOMIC_OP		; Represent an atomic operation.
-    UNSPECV_ATOMIC_LDOP		; Represent an atomic load-operation
     UNSPECV_ATOMIC_LDOP_OR	; Represent an atomic load-or
     UNSPECV_ATOMIC_LDOP_BIC	; Represent an atomic load-bic
     UNSPECV_ATOMIC_LDOP_XOR	; Represent an atomic load-xor
@@ -1591,6 +1590,10 @@
  [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
   (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
 
+(define_int_attr atomic_ldoptab
+ [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic")
+  (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+
 ;; -------------------------------------------------------------------
 ;; Int Iterators Attributes.
 ;; -------------------------------------------------------------------
-- 
2.17.1
Follow-Ups:
- Re: [PATCH, AArch64 v2 04/11] aarch64: Improve atomic-op lse generation
  - From: James Greenhalgh
References:
- [PATCH, AArch64 v2 00/11] LSE atomics out-of-line
  - From: Richard Henderson
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]