This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, AArch64 v2 10/11] aarch64: Implement TImode compare-and-swap
- From: Richard Henderson <richard dot henderson at linaro dot org>
- To: gcc-patches at gcc dot gnu dot org
- Cc: ramana dot radhakrishnan at arm dot com, agraf at suse dot de, marcus dot shawcroft at arm dot com, james dot greenhalgh at arm dot com, richard dot earnshaw at arm dot com
- Date: Tue, 2 Oct 2018 11:19:14 -0500
- Subject: [PATCH, AArch64 v2 10/11] aarch64: Implement TImode compare-and-swap
- References: <20181002161915.18843-1-richard.henderson@linaro.org>
This pattern will only be used with the __sync functions, because
we do not yet have a bare TImode atomic load.
* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Add support for NE comparison of TImode values.
(aarch64_print_operand): Extend %R to handle general registers.
(aarch64_emit_load_exclusive): Add support for TImode.
(aarch64_emit_store_exclusive): Likewise.
(aarch64_atomic_ool_func): Likewise.
(aarch64_ool_cas_names): Likewise.
* config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI_TI>):
Change iterator from ALLI to ALLI_TI.
(@atomic_compare_and_swap<JUST_TI>): New.
(@atomic_compare_and_swap<JUST_TI>_lse): New.
(aarch64_load_exclusive_pair): New.
(aarch64_store_exclusive_pair): New.
* config/aarch64/iterators.md (JUST_TI): New.
* config/aarch64/lse.c (cas): Add support for SIZE == 16.
* config/aarch64/t-lse (S0, O0): Split out cas.
(LSE_OBJS): Include $(O0).
---
gcc/config/aarch64/aarch64-protos.h | 2 +-
gcc/config/aarch64/aarch64.c | 72 ++++++++++++++++++-----
libgcc/config/aarch64/lse.c | 48 ++++++++++-----
gcc/config/aarch64/atomics.md | 91 +++++++++++++++++++++++++++--
gcc/config/aarch64/iterators.md | 3 +
libgcc/config/aarch64/t-lse | 10 +++-
6 files changed, 189 insertions(+), 37 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index c7b96b12bbe..f735c4e5ad8 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -626,7 +626,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
struct atomic_ool_names
{
- const char *str[4][4];
+ const char *str[5][4];
};
rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ce4d7e51d00..ac2f055a09e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1610,10 +1610,33 @@ emit_set_insn (rtx x, rtx y)
rtx
aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
{
- machine_mode mode = SELECT_CC_MODE (code, x, y);
- rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+ machine_mode cmp_mode = GET_MODE (x);
+ machine_mode cc_mode;
+ rtx cc_reg;
- emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
+ if (cmp_mode == E_TImode)
+ {
+ gcc_assert (code == NE);
+
+ cc_mode = E_CCmode;
+ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+
+ rtx x_lo = operand_subword (x, 0, 0, TImode);
+ rtx y_lo = operand_subword (y, 0, 0, TImode);
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
+
+ rtx x_hi = operand_subword (x, 1, 0, TImode);
+ rtx y_hi = operand_subword (y, 1, 0, TImode);
+ emit_insn (gen_ccmpdi (cc_reg, cc_reg, x_hi, y_hi,
+ gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
+ GEN_INT (AARCH64_EQ)));
+ }
+ else
+ {
+ cc_mode = SELECT_CC_MODE (code, x, y);
+ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
+ }
return cc_reg;
}
@@ -6693,7 +6716,7 @@ sizetochar (int size)
'S/T/U/V': Print a FP/SIMD register name for a register list.
The register printed is the FP/SIMD register name
of X + 0/1/2/3 for S/T/U/V.
- 'R': Print a scalar FP/SIMD register name + 1.
+ 'R': Print a scalar Integer/FP/SIMD register name + 1.
'X': Print bottom 16 bits of integer constant in hex.
'w/x': Print a general register name or the zero register
(32-bit or 64-bit).
@@ -6885,12 +6908,13 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'R':
- if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
- {
- output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
- return;
- }
- asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
+ if (REG_P (x) && FP_REGNUM_P (REGNO (x)))
+ asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
+ else if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
+ asm_fprintf (f, "x%d", REGNO (x) - R0_REGNUM + 1);
+ else
+ output_operand_lossage ("incompatible register operand for '%%%c'",
+ code);
break;
case 'X':
@@ -14143,16 +14167,26 @@ static void
aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
rtx mem, rtx model_rtx)
{
- emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
+ if (mode == E_TImode)
+ emit_insn (gen_aarch64_load_exclusive_pair (gen_lowpart (DImode, rval),
+ gen_highpart (DImode, rval),
+ mem, model_rtx));
+ else
+ emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
}
/* Emit store exclusive. */
static void
aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
- rtx rval, rtx mem, rtx model_rtx)
+ rtx mem, rtx val, rtx model_rtx)
{
- emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx));
+ if (mode == E_TImode)
+ emit_insn (gen_aarch64_store_exclusive_pair
+ (bval, mem, operand_subword (val, 0, 0, TImode),
+ operand_subword (val, 1, 0, TImode), model_rtx));
+ else
+ emit_insn (gen_aarch64_store_exclusive (mode, bval, mem, val, model_rtx));
}
/* Mark the previous jump instruction as unlikely. */
@@ -14164,7 +14198,7 @@ aarch64_emit_unlikely_jump (rtx insn)
add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
}
-/* We store the names of the various atomic helpers in a 4x4 array.
+/* We store the names of the various atomic helpers in a 5x4 array.
Return the libcall function given MODE, MODEL and NAMES. */
rtx
@@ -14188,6 +14222,9 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
case E_DImode:
mode_idx = 3;
break;
+ case E_TImode:
+ mode_idx = 4;
+ break;
default:
gcc_unreachable ();
}
@@ -14222,9 +14259,11 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
"__aa64_" #B #N "_rel", \
"__aa64_" #B #N "_acq_rel" }
-#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8)
+#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
+ { NULL, NULL, NULL, NULL }
+#define DEF5(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16)
-static const atomic_ool_names aarch64_ool_cas_names = { { DEF4(cas) } };
+static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } };
const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } };
const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
@@ -14247,6 +14286,7 @@ const atomic_ool_names aarch64_ool_steor_names = { { DEF4(eor) } };
#undef DEF0
#undef DEF4
+#undef DEF5
/* Expand a compare and swap pattern. */
diff --git a/libgcc/config/aarch64/lse.c b/libgcc/config/aarch64/lse.c
index 68ca7df667b..f6114add71a 100644
--- a/libgcc/config/aarch64/lse.c
+++ b/libgcc/config/aarch64/lse.c
@@ -91,6 +91,7 @@ asm(".arch armv8-a+lse");
#elif SIZE == 4 || SIZE == 8
# define S ""
# define MASK ""
+#elif SIZE == 16
#else
# error
#endif
@@ -98,9 +99,11 @@ asm(".arch armv8-a+lse");
#if SIZE < 8
# define T unsigned int
# define W "w"
-#else
+#elif SIZE == 8
# define T unsigned long long
# define W ""
+#else
+# define T unsigned __int128
#endif
#if MODEL == 1
@@ -138,19 +141,38 @@ T NAME(cas) (T cmp, T new, T *ptr)
unsigned tmp;
if (have_atomics)
- __asm__("cas" A L S " %"W"0, %"W"2, %1"
- : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp));
+ {
+#if SIZE == 16
+ __asm__("casp" A L " %0, %R0, %2, %R2, %1"
+ : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp));
+#else
+ __asm__("cas" A L S " %"W"0, %"W"2, %1"
+ : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp));
+#endif
+ }
else
- __asm__(
- "0: "
- "ld" A "xr"S" %"W"0, %1\n\t"
- "cmp %"W"0, %"W"4" MASK "\n\t"
- "bne 1f\n\t"
- "st" L "xr"S" %w2, %"W"3, %1\n\t"
- "cbnz %w2, 0b\n"
- "1:"
- : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp));
-
+ {
+#if SIZE == 16
+ __asm__("0: "
+ "ld" A "xp %0, %R0, %1\n\t"
+ "cmp %0, %4\n\t"
+ "ccmp %R0, %R4, #0, eq\n\t"
+ "bne 1f\n\t"
+ "st" L "xp %w2, %3, %R3, %1\n\t"
+ "cbnz %w2, 0b\n"
+ "1:"
+ : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp));
+#else
+ __asm__("0: "
+ "ld" A "xr"S" %"W"0, %1\n\t"
+ "cmp %"W"0, %"W"4" MASK "\n\t"
+ "bne 1f\n\t"
+ "st" L "xr"S" %w2, %"W"3, %1\n\t"
+ "cbnz %w2, 0b\n"
+ "1:"
+ : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp));
+#endif
+ }
return old;
}
#endif
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 72f9962fe55..fe604606bdd 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -22,10 +22,10 @@
(define_expand "@atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "register_operand" "") ;; bool out
- (match_operand:ALLI 1 "register_operand" "") ;; val out
- (match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory
- (match_operand:ALLI 3 "nonmemory_operand" "") ;; expected
- (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired
+ (match_operand:ALLI_TI 1 "register_operand" "") ;; val out
+ (match_operand:ALLI_TI 2 "aarch64_sync_memory_operand" "") ;; memory
+ (match_operand:ALLI_TI 3 "nonmemory_operand" "") ;; expected
+ (match_operand:ALLI_TI 4 "aarch64_reg_or_zero" "") ;; desired
(match_operand:SI 5 "const_int_operand") ;; is_weak
(match_operand:SI 6 "const_int_operand") ;; mod_s
(match_operand:SI 7 "const_int_operand")] ;; mod_f
@@ -88,6 +88,30 @@
}
)
+(define_insn_and_split "@aarch64_compare_and_swap<mode>"
+ [(set (reg:CC CC_REGNUM) ;; bool out
+ (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+ (set (match_operand:JUST_TI 0 "register_operand" "=&r") ;; val out
+ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+ (set (match_dup 1)
+ (unspec_volatile:JUST_TI
+ [(match_operand:JUST_TI 2 "register_operand" "r") ;; expect
+ (match_operand:JUST_TI 3 "aarch64_reg_or_zero" "rZ") ;; desired
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
+ UNSPECV_ATOMIC_CMPSW))
+ (clobber (match_scratch:SI 7 "=&r"))]
+ ""
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ aarch64_split_compare_and_swap (operands);
+ DONE;
+ }
+)
+
(define_insn "@aarch64_compare_and_swap<mode>_lse"
[(set (match_operand:SI 0 "register_operand" "+r") ;; val out
(zero_extend:SI
@@ -133,6 +157,28 @@
return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
})
+(define_insn "@aarch64_compare_and_swap<mode>_lse"
+ [(set (match_operand:JUST_TI 0 "register_operand" "+r") ;; val out
+ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+ (set (match_dup 1)
+ (unspec_volatile:JUST_TI
+ [(match_dup 0) ;; expect
+ (match_operand:JUST_TI 2 "register_operand" "r") ;; desired
+ (match_operand:SI 3 "const_int_operand")] ;; mod_s
+ UNSPECV_ATOMIC_CMPSW))]
+ "TARGET_LSE"
+{
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model))
+ return "casp\t%0, %R0, %2, %R2, %1";
+ else if (is_mm_acquire (model) || is_mm_consume (model))
+ return "caspa\t%0, %R0, %2, %R2, %1";
+ else if (is_mm_release (model))
+ return "caspl\t%0, %R0, %2, %R2, %1";
+ else
+ return "caspal\t%0, %R0, %2, %R2, %1";
+})
+
(define_expand "atomic_exchange<mode>"
[(match_operand:ALLI 0 "register_operand" "")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
@@ -650,6 +696,24 @@
}
)
+(define_insn "aarch64_load_exclusive_pair"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec_volatile:DI
+ [(match_operand:TI 2 "aarch64_sync_memory_operand" "Q")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPECV_LX))
+ (set (match_operand:DI 1 "register_operand" "=r")
+ (unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LX))]
+ ""
+ {
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
+ return "ldxp\t%0, %1, %2";
+ else
+ return "ldaxp\t%0, %1, %2";
+ }
+)
+
(define_insn "@aarch64_store_exclusive<mode>"
[(set (match_operand:SI 0 "register_operand" "=&r")
(unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
@@ -668,6 +732,25 @@
}
)
+(define_insn "aarch64_store_exclusive_pair"
+ [(set (match_operand:SI 0 "register_operand" "=&r")
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
+ (set (match_operand:TI 1 "aarch64_sync_memory_operand" "=Q")
+ (unspec_volatile:TI
+ [(match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
+ (match_operand:DI 3 "aarch64_reg_or_zero" "rZ")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPECV_SX))]
+ ""
+ {
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
+ return "stxp\t%w0, %x2, %x3, %1";
+ else
+ return "stlxp\t%w0, %x2, %x3, %1";
+ }
+)
+
(define_expand "mem_thread_fence"
[(match_operand:SI 0 "const_int_operand" "")]
""
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 524e4e6929b..dd26bdbbc6b 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -29,6 +29,9 @@
;; Iterator for HI, SI, DI, some instructions can only work on these modes.
(define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI])
+;; "Iterator" for just TI -- features like @pattern only work with iterators.
+(define_mode_iterator JUST_TI [TI])
+
;; Iterator for QI and HI modes
(define_mode_iterator SHORT [QI HI])
diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse
index e862b0c2448..534ff6efea8 100644
--- a/libgcc/config/aarch64/t-lse
+++ b/libgcc/config/aarch64/t-lse
@@ -18,15 +18,19 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
-# CAS, Swap, Load-and-operate have 4 sizes and 4 memory models
-S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), cas swp ldadd ldclr ldeor ldset))
+# Compare-and-swap has 5 sizes and 4 memory models.
+S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
+O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0)))
+
+# Swap, Load-and-operate have 4 sizes and 4 memory models
+S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))
O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))
# Store-and-operate has 4 sizes but only 2 memory models (relaxed, release).
S2 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), stadd stclr steor stset))
O2 := $(foreach m, 1 3, $(addsuffix _$(m)$(objext), $(S2)))
-LSE_OBJS := $(O1) $(O2)
+LSE_OBJS := $(O0) $(O1) $(O2)
libgcc-objects += $(LSE_OBJS) have_atomic$(objext)
--
2.17.1