<http://www.gnu.org/licenses/>. */
+/* AArch64 128-bit lock-free atomic implementation.
+
+ 128-bit atomics are now lock-free for all AArch64 architecture versions.
+ This is backwards compatible with existing binaries (as we swap all uses
+ of 128-bit atomics via an ifunc) and gives better performance than locking
+ atomics.
+
+ 128-bit atomic loads use a exclusive loop if LSE2 is not supported.
+ This results in an implicit store which is invisible to software as long
+ as the given address is writeable. Since all other atomics have explicit
+ writes, this will be true when using atomics in actual code.
+
+ The libat_<op>_16 entry points are ARMv8.0.
+ The libat_<op>_16_i1 entry points are used when LSE2 is available. */
+
+
.arch armv8-a+lse
#define ENTRY(name) \
.cfi_endproc; \
.size name, .-name;
+#define ALIAS(alias,name) \
+ .global alias; \
+ .set alias, name;
+
#define res0 x0
#define res1 x1
#define in0 x2
#define SEQ_CST 5
+ENTRY (libat_load_16)
+ mov x5, x0
+ cbnz w1, 2f
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+
+ /* ACQUIRE/CONSUME/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_load_16)
+
+
ENTRY (libat_load_16_i1)
cbnz w1, 1f
END (libat_load_16_i1)
+ENTRY (libat_store_16)
+ cbnz w4, 2f
+
+ /* RELAXED. */
+1: ldxp xzr, tmp0, [x0]
+ stxp w4, in0, in1, [x0]
+ cbnz w4, 1b
+ ret
+
+ /* RELEASE/SEQ_CST. */
+2: ldxp xzr, tmp0, [x0]
+ stlxp w4, in0, in1, [x0]
+ cbnz w4, 2b
+ ret
+END (libat_store_16)
+
+
ENTRY (libat_store_16_i1)
cbnz w4, 1f
ret
/* RELEASE/SEQ_CST. */
-1: ldaxp xzr, tmp0, [x0]
+1: ldxp xzr, tmp0, [x0]
stlxp w4, in0, in1, [x0]
cbnz w4, 1b
ret
END (libat_store_16_i1)
-ENTRY (libat_exchange_16_i1)
+ENTRY (libat_exchange_16)
mov x5, x0
cbnz w4, 2f
stxp w4, in0, in1, [x5]
cbnz w4, 3b
ret
-4:
- cmp w4, RELEASE
- b.ne 6f
- /* RELEASE. */
-5: ldxp res0, res1, [x5]
+ /* RELEASE/ACQ_REL/SEQ_CST. */
+4: ldaxp res0, res1, [x5]
stlxp w4, in0, in1, [x5]
- cbnz w4, 5b
+ cbnz w4, 4b
ret
+END (libat_exchange_16)
- /* ACQ_REL/SEQ_CST. */
-6: ldaxp res0, res1, [x5]
- stlxp w4, in0, in1, [x5]
- cbnz w4, 6b
+
+ENTRY (libat_compare_exchange_16)
+ ldp exp0, exp1, [x1]
+ cbz w4, 3f
+ cmp w4, RELEASE
+ b.hs 5f
+
+ /* ACQUIRE/CONSUME. */
+1: ldaxp tmp0, tmp1, [x0]
+ cmp tmp0, exp0
+ ccmp tmp1, exp1, 0, eq
+ csel tmp0, in0, tmp0, eq
+ csel tmp1, in1, tmp1, eq
+ stxp w4, tmp0, tmp1, [x0]
+ cbnz w4, 1b
+ beq 2f
+ stp tmp0, tmp1, [x1]
+2: cset x0, eq
+ ret
+
+ /* RELAXED. */
+3: ldxp tmp0, tmp1, [x0]
+ cmp tmp0, exp0
+ ccmp tmp1, exp1, 0, eq
+ csel tmp0, in0, tmp0, eq
+ csel tmp1, in1, tmp1, eq
+ stxp w4, tmp0, tmp1, [x0]
+ cbnz w4, 3b
+ beq 4f
+ stp tmp0, tmp1, [x1]
+4: cset x0, eq
+ ret
+
+ /* RELEASE/ACQ_REL/SEQ_CST. */
+5: ldaxp tmp0, tmp1, [x0]
+ cmp tmp0, exp0
+ ccmp tmp1, exp1, 0, eq
+ csel tmp0, in0, tmp0, eq
+ csel tmp1, in1, tmp1, eq
+ stlxp w4, tmp0, tmp1, [x0]
+ cbnz w4, 5b
+ beq 6f
+ stp tmp0, tmp1, [x1]
+6: cset x0, eq
ret
-END (libat_exchange_16_i1)
+END (libat_compare_exchange_16)
ENTRY (libat_compare_exchange_16_i1)
END (libat_compare_exchange_16_i1)
-ENTRY (libat_fetch_add_16_i1)
+ENTRY (libat_fetch_add_16)
mov x5, x0
cbnz w4, 2f
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
-END (libat_fetch_add_16_i1)
+END (libat_fetch_add_16)
-ENTRY (libat_add_fetch_16_i1)
+ENTRY (libat_add_fetch_16)
mov x5, x0
cbnz w4, 2f
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
-END (libat_add_fetch_16_i1)
+END (libat_add_fetch_16)
-ENTRY (libat_fetch_sub_16_i1)
+ENTRY (libat_fetch_sub_16)
mov x5, x0
cbnz w4, 2f
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
-END (libat_fetch_sub_16_i1)
+END (libat_fetch_sub_16)
-ENTRY (libat_sub_fetch_16_i1)
+ENTRY (libat_sub_fetch_16)
mov x5, x0
cbnz w4, 2f
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
-END (libat_sub_fetch_16_i1)
+END (libat_sub_fetch_16)
-ENTRY (libat_fetch_or_16_i1)
+ENTRY (libat_fetch_or_16)
mov x5, x0
cbnz w4, 2f
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
-END (libat_fetch_or_16_i1)
+END (libat_fetch_or_16)
-ENTRY (libat_or_fetch_16_i1)
+ENTRY (libat_or_fetch_16)
mov x5, x0
cbnz w4, 2f
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
-END (libat_or_fetch_16_i1)
+END (libat_or_fetch_16)
-ENTRY (libat_fetch_and_16_i1)
+ENTRY (libat_fetch_and_16)
mov x5, x0
cbnz w4, 2f
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
-END (libat_fetch_and_16_i1)
+END (libat_fetch_and_16)
-ENTRY (libat_and_fetch_16_i1)
+ENTRY (libat_and_fetch_16)
mov x5, x0
cbnz w4, 2f
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
-END (libat_and_fetch_16_i1)
+END (libat_and_fetch_16)
-ENTRY (libat_fetch_xor_16_i1)
+ENTRY (libat_fetch_xor_16)
mov x5, x0
cbnz w4, 2f
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
-END (libat_fetch_xor_16_i1)
+END (libat_fetch_xor_16)
-ENTRY (libat_xor_fetch_16_i1)
+ENTRY (libat_xor_fetch_16)
mov x5, x0
cbnz w4, 2f
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
-END (libat_xor_fetch_16_i1)
+END (libat_xor_fetch_16)
-ENTRY (libat_fetch_nand_16_i1)
+ENTRY (libat_fetch_nand_16)
mov x5, x0
mvn in0, in0
mvn in1, in1
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
-END (libat_fetch_nand_16_i1)
+END (libat_fetch_nand_16)
-ENTRY (libat_nand_fetch_16_i1)
+ENTRY (libat_nand_fetch_16)
mov x5, x0
mvn in0, in0
mvn in1, in1
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
-END (libat_nand_fetch_16_i1)
+END (libat_nand_fetch_16)
-ENTRY (libat_test_and_set_16_i1)
- mov w2, 1
- cbnz w1, 2f
-
- /* RELAXED. */
- swpb w0, w2, [x0]
- ret
+/* __atomic_test_and_set is always inlined, so this entry is unused and
+ only required for completeness. */
+ENTRY (libat_test_and_set_16)
- /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
-2: swpalb w0, w2, [x0]
+ /* RELAXED/ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+ mov x5, x0
+1: ldaxrb w0, [x5]
+ stlxrb w4, w2, [x5]
+ cbnz w4, 1b
ret
-END (libat_test_and_set_16_i1)
+END (libat_test_and_set_16)
+
+
+/* Alias entry points which are the same in baseline and LSE2. */
+
+ALIAS (libat_exchange_16_i1, libat_exchange_16)
+ALIAS (libat_fetch_add_16_i1, libat_fetch_add_16)
+ALIAS (libat_add_fetch_16_i1, libat_add_fetch_16)
+ALIAS (libat_fetch_sub_16_i1, libat_fetch_sub_16)
+ALIAS (libat_sub_fetch_16_i1, libat_sub_fetch_16)
+ALIAS (libat_fetch_or_16_i1, libat_fetch_or_16)
+ALIAS (libat_or_fetch_16_i1, libat_or_fetch_16)
+ALIAS (libat_fetch_and_16_i1, libat_fetch_and_16)
+ALIAS (libat_and_fetch_16_i1, libat_and_fetch_16)
+ALIAS (libat_fetch_xor_16_i1, libat_fetch_xor_16)
+ALIAS (libat_xor_fetch_16_i1, libat_xor_fetch_16)
+ALIAS (libat_fetch_nand_16_i1, libat_fetch_nand_16)
+ALIAS (libat_nand_fetch_16_i1, libat_nand_fetch_16)
+ALIAS (libat_test_and_set_16_i1, libat_test_and_set_16)
/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */