[PATCH 4/5] aarch64: Add backend support for atomic fetch min/max operations
soumyaa@nvidia.com
soumyaa@nvidia.com
Mon Jan 19 16:23:39 GMT 2026
From: Soumya AR <soumyaa@nvidia.com>
This patch adds support for atomic min/max instructions offered by aarch64 under
LSE.
The implementation provides three execution paths:
1. When LSE is available at compile time (-march=armv8.1-a or later):
Emits inline LSE atomic min/max instructions (ldsmin, ldsmax, ldumin, ldumax).
2. When LSE availability is unknown at compile time (default):
Uses outline atomics - calls to libgcc functions that perform runtime
detection for LSE and dispatch to either LSE instructions or LL/SC sequences.
3. When outline atomics are explicitly disabled (-mno-outline-atomics) on
non-LSE targets: Emits inline LL/SC (LDXR, STXR etc) sequences
using conditional select instructions for min/max.
----
For op_fetch varaints, we first generate the appropriate fetch_op variant, then
use aarch64_split_atomic_op to generate the same operation (non-atomically) to
return the updated value. This function is extended to handle the min/max
operations. We have to be careful about QI/HI modes, as ldxr and its variants
do a zero extended load, so it's important to explicitly sign extend the values
before comparing them.
----
lse.S is responsible for emitting the appropriate LSE or non-LSE sequence. For
min/max on non-LSE systems, this is done using a conditional select.
There is, however, a unique case where systems with the CSSC extension have
native min/max instructions as well. In that case, it would be preferable to
emit the LL/SC sequence using the native min/max instructions. But, this would
only occur on targets with CSSC but without LSE, which is quite improbable, and
thus, I haven't added special handling for the CSSC feature.
----
Bootstrapped and regression tested on aarch64-linux-gnu and x86_64-linux-gnu.
Cross-compiled and regression tested for arm-linux-gnueabihf-armv7-a and
aarch64-linux-gnu without LSE.
Signed-off-by: Soumya AR <soumyaa@nvidia.com>
gcc/ChangeLog:
* config/aarch64/aarch64-protos.h: Add declarations for new
outline atomic min/max name structures.
* config/aarch64/aarch64.cc (DEF4): Define names for outline
atomic min/max functions.
(aarch64_ool_ldsmin_names, aarch64_ool_ldsmax_names,
aarch64_ool_ldumin_names, aarch64_ool_ldumax_names): New.
(aarch64_split_atomic_op): Add support for SMIN, SMAX, UMIN,
UMAX operations with sign extension for QI/HI modes.
* config/aarch64/atomics.md: Add LSE and outline atomics
support for atomic fetch min/max operations.
* config/aarch64/iterators.md: Add min/max iterators.
libgcc/ChangeLog:
* config/aarch64/lse.S: Implement outline atomic min/max
functions.
* config/aarch64/t-lse: Add min/max function entries.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/atomic-minmax-lse.c: New test.
* gcc.target/aarch64/atomic-minmax-nolse.c: New test.
* gcc.target/aarch64/atomic-minmax.c: New test.
* gcc.target/aarch64/atomic-minmax.x: New test.
---
gcc/config/aarch64/aarch64-protos.h | 4 +
gcc/config/aarch64/aarch64.cc | 51 +++++
gcc/config/aarch64/atomics.md | 54 ++++-
gcc/config/aarch64/iterators.md | 30 ++-
.../gcc.target/aarch64/atomic-minmax-lse.c | 122 +++++++++++
.../gcc.target/aarch64/atomic-minmax-nolse.c | 196 ++++++++++++++++++
.../gcc.target/aarch64/atomic-minmax.c | 128 ++++++++++++
.../gcc.target/aarch64/atomic-minmax.x | 185 +++++++++++++++++
libgcc/config/aarch64/lse.S | 62 +++++-
libgcc/config/aarch64/t-lse | 3 +-
10 files changed, 825 insertions(+), 10 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-minmax.x
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 48d3a3de235..4df2d37e253 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1271,6 +1271,10 @@ extern const atomic_ool_names aarch64_ool_ldadd_names;
extern const atomic_ool_names aarch64_ool_ldset_names;
extern const atomic_ool_names aarch64_ool_ldclr_names;
extern const atomic_ool_names aarch64_ool_ldeor_names;
+extern const atomic_ool_names aarch64_ool_ldsmin_names;
+extern const atomic_ool_names aarch64_ool_ldsmax_names;
+extern const atomic_ool_names aarch64_ool_ldumin_names;
+extern const atomic_ool_names aarch64_ool_ldumax_names;
tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 293afa52b3b..5d2f96b7f20 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -26518,6 +26518,10 @@ const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } };
const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } };
+const atomic_ool_names aarch64_ool_ldsmin_names = { { DEF4(ldsmin) } };
+const atomic_ool_names aarch64_ool_ldsmax_names = { { DEF4(ldsmax) } };
+const atomic_ool_names aarch64_ool_ldumin_names = { { DEF4(ldumin) } };
+const atomic_ool_names aarch64_ool_ldumax_names = { { DEF4(ldumax) } };
#undef DEF0
#undef DEF4
@@ -26770,6 +26774,53 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
emit_insn (gen_rtx_SET (new_out, x));
break;
+ case SMIN:
+ case SMAX:
+ case UMIN:
+ case UMAX:
+ {
+ rtx_code cmp_code;
+ switch (code)
+ {
+ case SMIN:
+ cmp_code = LT;
+ break;
+ case SMAX:
+ cmp_code = GT;
+ break;
+ case UMIN:
+ cmp_code = LTU;
+ break;
+ case UMAX:
+ cmp_code = GTU;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if ((code == SMIN || code == SMAX) && (mode == QImode || mode == HImode))
+ {
+ rtx old_extended = gen_rtx_REG (wmode, REGNO (old_out));
+ emit_insn (
+ gen_rtx_SET (old_extended,
+ gen_rtx_SIGN_EXTEND (wmode,
+ gen_lowpart (mode, old_out))));
+ old_out = old_extended;
+
+ rtx value_extended = gen_rtx_REG (wmode, REGNO (value));
+ emit_insn (
+ gen_rtx_SET (value_extended,
+ gen_rtx_SIGN_EXTEND (wmode,
+ gen_lowpart (mode, value))));
+ value = value_extended;
+ }
+ rtx cc_reg = aarch64_gen_compare_reg (cmp_code, old_out, value);
+ rtx cond = gen_rtx_fmt_ee (cmp_code, VOIDmode, cc_reg, const0_rtx);
+ x = gen_rtx_IF_THEN_ELSE (wmode, cond, old_out, value);
+ emit_insn (gen_rtx_SET (new_out, x));
+ break;
+ }
+
case MINUS:
if (CONST_INT_P (value))
{
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index c9534d43c0f..8dc0e3ffdac 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -284,6 +284,18 @@
case XOR:
gen = gen_aarch64_atomic_xor<mode>_lse;
break;
+ case SMAX:
+ gen = gen_aarch64_atomic_smax<mode>_lse;
+ break;
+ case SMIN:
+ gen = gen_aarch64_atomic_smin<mode>_lse;
+ break;
+ case UMAX:
+ gen = gen_aarch64_atomic_umax<mode>_lse;
+ break;
+ case UMIN:
+ gen = gen_aarch64_atomic_umin<mode>_lse;
+ break;
case AND:
operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
NULL, 1);
@@ -317,6 +329,18 @@
NULL, 1);
names = &aarch64_ool_ldclr_names;
break;
+ case SMIN:
+ names = &aarch64_ool_ldsmin_names;
+ break;
+ case SMAX:
+ names = &aarch64_ool_ldsmax_names;
+ break;
+ case UMIN:
+ names = &aarch64_ool_ldumin_names;
+ break;
+ case UMAX:
+ names = &aarch64_ool_ldumax_names;
+ break;
default:
gcc_unreachable ();
}
@@ -442,6 +466,18 @@
case XOR:
gen = gen_aarch64_atomic_fetch_xor<mode>_lse;
break;
+ case SMAX:
+ gen = gen_aarch64_atomic_fetch_smax<mode>_lse;
+ break;
+ case SMIN:
+ gen = gen_aarch64_atomic_fetch_smin<mode>_lse;
+ break;
+ case UMAX:
+ gen = gen_aarch64_atomic_fetch_umax<mode>_lse;
+ break;
+ case UMIN:
+ gen = gen_aarch64_atomic_fetch_umin<mode>_lse;
+ break;
case AND:
operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
NULL, 1);
@@ -475,6 +511,18 @@
NULL, 1);
names = &aarch64_ool_ldclr_names;
break;
+ case SMIN:
+ names = &aarch64_ool_ldsmin_names;
+ break;
+ case SMAX:
+ names = &aarch64_ool_ldsmax_names;
+ break;
+ case UMIN:
+ names = &aarch64_ool_ldumin_names;
+ break;
+ case UMAX:
+ names = &aarch64_ool_ldumax_names;
+ break;
default:
gcc_unreachable ();
}
@@ -581,7 +629,11 @@
operands[2] = force_reg (<MODE>mode, operands[2]);
emit_insn (gen_atomic_fetch_<atomic_optab><mode>
(tmp, operands[1], operands[2], operands[3]));
- tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
+ if (<CODE> == SMIN || <CODE> == SMAX)
+ tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
+ operands[0], 0, OPTAB_WIDEN);
+ else
+ tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
operands[0], 1, OPTAB_WIDEN);
emit_move_insn (operands[0], tmp);
}
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b425b0ed2ca..e4c1c8844bb 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1354,6 +1354,10 @@
UNSPECV_ATOMIC_LDOP_BIC ; Represent an atomic load-bic
UNSPECV_ATOMIC_LDOP_XOR ; Represent an atomic load-xor
UNSPECV_ATOMIC_LDOP_PLUS ; Represent an atomic load-add
+ UNSPECV_ATOMIC_LDOP_SMAX ; Represent an atomic load-smax
+ UNSPECV_ATOMIC_LDOP_SMIN ; Represent an atomic load-smin
+ UNSPECV_ATOMIC_LDOP_UMAX ; Represent an atomic load-umax
+ UNSPECV_ATOMIC_LDOP_UMIN ; Represent an atomic load-umin
])
;; -------------------------------------------------------------------
@@ -2898,7 +2902,7 @@
;; Iterator for __sync_<op> operations that where the operation can be
;; represented directly RTL. This is all of the sync operations bar
;; nand.
-(define_code_iterator atomic_op [plus minus ior xor and])
+(define_code_iterator atomic_op [plus minus ior xor and smin smax umin umax])
;; Iterator for integer conversions
(define_code_iterator FIXUORS [fix unsigned_fix])
@@ -3215,21 +3219,27 @@
;; Atomic operations
(define_code_attr atomic_optab
- [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")])
+ [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")
+ (smin "smin") (smax "smax") (umin "umin") (umax "umax")])
(define_code_attr atomic_op_operand
[(ior "aarch64_logical_operand")
(xor "aarch64_logical_operand")
(and "aarch64_logical_operand")
(plus "aarch64_plus_operand")
- (minus "aarch64_plus_operand")])
+ (minus "aarch64_plus_operand")
+ (smin "aarch64_sminmax_operand")
+ (smax "aarch64_sminmax_operand")
+ (umin "aarch64_uminmax_operand")
+ (umax "aarch64_uminmax_operand")])
;; Constants acceptable for atomic operations.
;; This definition must appear in this file before the iterators it refers to.
(define_code_attr const_atomic
[(plus "IJ") (minus "IJ")
(xor "<lconst_atomic>") (ior "<lconst_atomic>")
- (and "<lconst_atomic>")])
+ (and "<lconst_atomic>")
+ (smin "") (smax "") (umin "") (umax "")])
;; Attribute to describe constants acceptable in atomic logical operations
(define_mode_attr lconst_atomic [(QI "K") (HI "K") (SI "K") (DI "L")])
@@ -4096,7 +4106,9 @@
(define_int_iterator ATOMIC_LDOP
[UNSPECV_ATOMIC_LDOP_OR UNSPECV_ATOMIC_LDOP_BIC
- UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS])
+ UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS
+ UNSPECV_ATOMIC_LDOP_SMAX UNSPECV_ATOMIC_LDOP_SMIN
+ UNSPECV_ATOMIC_LDOP_UMAX UNSPECV_ATOMIC_LDOP_UMIN])
(define_int_iterator SUBDI_BITS [8 16 32])
@@ -5255,11 +5267,15 @@
(define_int_attr atomic_ldop
[(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
- (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+ (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")
+ (UNSPECV_ATOMIC_LDOP_SMAX "smax") (UNSPECV_ATOMIC_LDOP_SMIN "smin")
+ (UNSPECV_ATOMIC_LDOP_UMAX "umax") (UNSPECV_ATOMIC_LDOP_UMIN "umin")])
(define_int_attr atomic_ldoptab
[(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic")
- (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+ (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")
+ (UNSPECV_ATOMIC_LDOP_SMAX "smax") (UNSPECV_ATOMIC_LDOP_SMIN "smin")
+ (UNSPECV_ATOMIC_LDOP_UMAX "umax") (UNSPECV_ATOMIC_LDOP_UMIN "umin")])
(define_int_attr fp8_cvt_uns_op
[(UNSPEC_F1CVT "f1cvt")
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c
new file mode 100644
index 00000000000..6d579f8360a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c
@@ -0,0 +1,122 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+lse" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "atomic-minmax.x"
+
+/* { dg-final { scan-assembler-not "\tldxr" } } */
+/* { dg-final { scan-assembler-not "\tldaxr" } } */
+/* { dg-final { scan-assembler-not "\tstxr" } } */
+/* { dg-final { scan-assembler-not "\tstlxr" } } */
+
+/*
+** test_smin_s8:
+** ...
+** ldsminb w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_smax_s8:
+** ...
+** ldsmaxlb w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_smin_s16:
+** ...
+** ldsminah w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_smax_s16:
+** ...
+** ldsmaxalh w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_smin_s32:
+** ...
+** ldsmin w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_smax_s32:
+** ...
+** ldsmaxal w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_smin_s64:
+** ...
+** ldsmina x[0-9]+, x[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_smax_s64:
+** ...
+** ldsmax x[0-9]+, x[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_umin_u8:
+** ...
+** lduminb w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_umax_u8:
+** ...
+** ldumaxab w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_umin_u16:
+** ...
+** lduminah w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_umax_u16:
+** ...
+** ldumaxlh w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_umin_u32:
+** ...
+** lduminal w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_umax_u32:
+** ...
+** ldumax w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_umin_u64:
+** ...
+** ldumin x[0-9]+, x[0-9]+, \[x[0-9]+\]
+** ...
+*/
+
+/*
+** test_umax_u64:
+** ...
+** ldumaxal x[0-9]+, x[0-9]+, \[x[0-9]+\]
+** ...
+*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c
new file mode 100644
index 00000000000..e4962974ea3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c
@@ -0,0 +1,196 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a+nolse -mno-outline-atomics" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "atomic-minmax.x"
+
+/* { dg-final { scan-assembler-not "\tldsmin" } } */
+/* { dg-final { scan-assembler-not "\tldsmax" } } */
+/* { dg-final { scan-assembler-not "\tldumin" } } */
+/* { dg-final { scan-assembler-not "\tldumax" } } */
+
+/* { dg-final { scan-assembler-not "__aarch64_" } } */
+
+/*
+** test_smin_s8:
+** ...
+** ldxrb w[0-9]+, \[x[0-9]+\]
+** sxtb w[0-9]+, w[0-9]+
+** sxtb w[0-9]+, w[0-9]+
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, lt
+** stxrb w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_smax_s8:
+** ...
+** ldxrb w[0-9]+, \[x[0-9]+\]
+** sxtb w[0-9]+, w[0-9]+
+** sxtb w[0-9]+, w[0-9]+
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, gt
+** stlxrb w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_smin_s16:
+** ...
+** ldaxrh w[0-9]+, \[x[0-9]+\]
+** sxth w[0-9]+, w[0-9]+
+** sxth w[0-9]+, w[0-9]+
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, lt
+** stxrh w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_smax_s16:
+** ...
+** ldaxrh w[0-9]+, \[x[0-9]+\]
+** sxth w[0-9]+, w[0-9]+
+** sxth w[0-9]+, w[0-9]+
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, gt
+** stlxrh w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_smin_s32:
+** ...
+** ldxr w[0-9]+, \[x[0-9]+\]
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, lt
+** stxr w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_smax_s32:
+** ...
+** ldaxr w[0-9]+, \[x[0-9]+\]
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, gt
+** stlxr w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_smin_s64:
+** ...
+** ldaxr x[0-9]+, \[x[0-9]+\]
+** cmp x[0-9]+, x[0-9]+
+** csel x[0-9]+, x[0-9]+, x[0-9]+, lt
+** stxr w[0-9]+, x[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_smax_s64:
+** ...
+** ldxr x[0-9]+, \[x[0-9]+\]
+** cmp x[0-9]+, x[0-9]+
+** csel x[0-9]+, x[0-9]+, x[0-9]+, gt
+** stxr w[0-9]+, x[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_umin_u8:
+** ...
+** ldxrb w[0-9]+, \[x[0-9]+\]
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, cc
+** stxrb w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_umax_u8:
+** ...
+** ldaxrb w[0-9]+, \[x[0-9]+\]
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, hi
+** stxrb w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_umin_u16:
+** ...
+** ldaxrh w[0-9]+, \[x[0-9]+\]
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, cc
+** stxrh w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_umax_u16:
+** ...
+** ldxrh w[0-9]+, \[x[0-9]+\]
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, hi
+** stlxrh w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_umin_u32:
+** ...
+** ldaxr w[0-9]+, \[x[0-9]+\]
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, cc
+** stlxr w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_umax_u32:
+** ...
+** ldxr w[0-9]+, \[x[0-9]+\]
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, hi
+** stxr w[0-9]+, w[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_umin_u64:
+** ...
+** ldxr x[0-9]+, \[x[0-9]+\]
+** cmp x[0-9]+, x[0-9]+
+** csel x[0-9]+, x[0-9]+, x[0-9]+, cc
+** stxr w[0-9]+, x[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
+
+/*
+** test_umax_u64:
+** ...
+** ldaxr x[0-9]+, \[x[0-9]+\]
+** cmp x[0-9]+, x[0-9]+
+** csel x[0-9]+, x[0-9]+, x[0-9]+, hi
+** stlxr w[0-9]+, x[0-9]+, \[x[0-9]+\]
+** cbnz w[0-9]+, .*
+** ...
+*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c
new file mode 100644
index 00000000000..225816b63bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c
@@ -0,0 +1,128 @@
+/* { dg-do run } */
+/* { dg-options "--save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "atomic-minmax.x"
+
+int main ()
+{
+ run_tests();
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not "\tldsmin" } } */
+/* { dg-final { scan-assembler-not "\tldsmax" } } */
+/* { dg-final { scan-assembler-not "\tldumin" } } */
+/* { dg-final { scan-assembler-not "\tldumax" } } */
+
+/*
+** test_smin_s8:
+** ...
+** bl __aarch64_ldsmin1_relax
+** ...
+*/
+
+/*
+** test_smax_s8:
+** ...
+** bl __aarch64_ldsmax1_rel
+** ...
+*/
+
+/*
+** test_smin_s16:
+** ...
+** bl __aarch64_ldsmin2_acq
+** ...
+*/
+
+/*
+** test_smax_s16:
+** ...
+** bl __aarch64_ldsmax2_acq_rel
+** ...
+*/
+
+/*
+** test_smin_s32:
+** ...
+** bl __aarch64_ldsmin4_relax
+** ...
+*/
+
+/*
+** test_smax_s32:
+** ...
+** bl __aarch64_ldsmax4_acq_rel
+** ...
+*/
+
+/*
+** test_smin_s64:
+** ...
+** bl __aarch64_ldsmin8_acq
+** ...
+*/
+
+/*
+** test_smax_s64:
+** ...
+** bl __aarch64_ldsmax8_relax
+** ...
+*/
+
+/*
+** test_umin_u8:
+** ...
+** bl __aarch64_ldumin1_relax
+** ...
+*/
+
+/*
+** test_umax_u8:
+** ...
+** bl __aarch64_ldumax1_acq
+** ...
+*/
+
+/*
+** test_umin_u16:
+** ...
+** bl __aarch64_ldumin2_acq
+** ...
+*/
+
+/*
+** test_umax_u16:
+** ...
+** bl __aarch64_ldumax2_rel
+** ...
+*/
+
+/*
+** test_umin_u32:
+** ...
+** bl __aarch64_ldumin4_acq_rel
+** ...
+*/
+
+/*
+** test_umax_u32:
+** ...
+** bl __aarch64_ldumax4_relax
+** ...
+*/
+
+/*
+** test_umin_u64:
+** ...
+** bl __aarch64_ldumin8_relax
+** ...
+*/
+
+/*
+** test_umax_u64:
+** ...
+** bl __aarch64_ldumax8_acq_rel
+** ...
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax.x b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.x
new file mode 100644
index 00000000000..e9e21d3f2db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.x
@@ -0,0 +1,185 @@
+#include <stdint.h>
+
+extern void abort(void);
+
+#define TEST_FETCH_OP(TYPE, VAR, OP, INIT, ARG, EXPECTED_OLD, EXPECTED_NEW, MEM_ORDER) \
+ do { \
+ VAR = INIT; \
+ TYPE old_val = __atomic_fetch_##OP(&VAR, ARG, MEM_ORDER); \
+ TYPE new_val = VAR; \
+ if (old_val != EXPECTED_OLD) { \
+ abort(); \
+ } \
+ if (new_val != EXPECTED_NEW) { \
+ abort(); \
+ } \
+ } while (0)
+
+#define TEST_OP_FETCH(TYPE, VAR, OP, INIT, ARG, EXPECTED_OLD, EXPECTED_NEW, MEM_ORDER) \
+ do { \
+ VAR = INIT; \
+ TYPE result = __atomic_##OP##_fetch(&VAR, ARG, MEM_ORDER); \
+ TYPE new_val = VAR; \
+ if (result != EXPECTED_NEW) { \
+ abort(); \
+ } \
+ if (new_val != EXPECTED_NEW) { \
+ abort(); \
+ } \
+ } while (0)
+
+#define GEN_TYPE_TESTS(TYPE, VAR, SUFFIX, IS_SIGNED) \
+ TYPE VAR; \
+ void test_##SUFFIX() { \
+ TEST_FETCH_OP(TYPE, VAR, min, 10, 5, 10, 5, __ATOMIC_RELAXED); \
+ TEST_FETCH_OP(TYPE, VAR, min, 10, 20, 10, 10, __ATOMIC_SEQ_CST); \
+ TEST_FETCH_OP(TYPE, VAR, max, 10, 20, 10, 20, __ATOMIC_ACQUIRE); \
+ TEST_FETCH_OP(TYPE, VAR, max, 10, 5, 10, 10, __ATOMIC_RELEASE); \
+ \
+ TEST_OP_FETCH(TYPE, VAR, min, 10, 5, 10, 5, __ATOMIC_CONSUME); \
+ TEST_OP_FETCH(TYPE, VAR, min, 10, 20, 10, 10, __ATOMIC_ACQ_REL); \
+ TEST_OP_FETCH(TYPE, VAR, max, 10, 20, 10, 20, __ATOMIC_RELAXED); \
+ TEST_OP_FETCH(TYPE, VAR, max, 10, 5, 10, 10, __ATOMIC_SEQ_CST); \
+ \
+ if (IS_SIGNED) { \
+ TEST_FETCH_OP(TYPE, VAR, min, -10, -20, -10, -20, __ATOMIC_ACQUIRE); \
+ TEST_FETCH_OP(TYPE, VAR, max, -10, 5, -10, 5, __ATOMIC_RELEASE); \
+ TEST_FETCH_OP(TYPE, VAR, min, -5, -3, -5, -5, __ATOMIC_RELAXED); \
+ TEST_FETCH_OP(TYPE, VAR, max, -20, -10, -20, -10, __ATOMIC_SEQ_CST); \
+ TEST_OP_FETCH(TYPE, VAR, min, -100, 50, -100, -100, __ATOMIC_ACQ_REL); \
+ TEST_OP_FETCH(TYPE, VAR, max, -50, -60, -50, -50, __ATOMIC_CONSUME); \
+ } \
+ }
+
+GEN_TYPE_TESTS(int8_t, s8_var_test, s8, 1)
+GEN_TYPE_TESTS(int16_t, s16_var_test, s16, 1)
+GEN_TYPE_TESTS(int32_t, s32_var_test, s32, 1)
+GEN_TYPE_TESTS(int64_t, s64_var_test, s64, 1)
+
+GEN_TYPE_TESTS(uint8_t, u8_var_test, u8, 0)
+GEN_TYPE_TESTS(uint16_t, u16_var_test, u16, 0)
+GEN_TYPE_TESTS(uint32_t, u32_var_test, u32, 0)
+GEN_TYPE_TESTS(uint64_t, u64_var_test, u64, 0)
+
+void run_tests() {
+ test_s8();
+ test_s16();
+ test_s32();
+ test_s64();
+ test_u8();
+ test_u16();
+ test_u32();
+ test_u64();
+}
+
+int8_t s8_var = 0;
+
+int8_t
+test_smin_s8 (int8_t a)
+{
+ return __atomic_fetch_min (&s8_var, a, __ATOMIC_RELAXED);
+}
+
+int8_t
+test_smax_s8 (int8_t a)
+{
+ return __atomic_fetch_max (&s8_var, a, __ATOMIC_RELEASE);
+}
+
+int16_t s16_var = 0;
+
+int16_t
+test_smin_s16 (int16_t a)
+{
+ return __atomic_fetch_min (&s16_var, a, __ATOMIC_ACQUIRE);
+}
+
+int16_t
+test_smax_s16 (int16_t a)
+{
+ return __atomic_fetch_max (&s16_var, a, __ATOMIC_ACQ_REL);
+}
+
+int32_t s32_var = 0;
+
+int32_t
+test_smin_s32 (int32_t a)
+{
+ return __atomic_fetch_min (&s32_var, a, __ATOMIC_RELAXED);
+}
+
+int32_t
+test_smax_s32 (int32_t a)
+{
+ return __atomic_fetch_max (&s32_var, a, __ATOMIC_SEQ_CST);
+}
+
+int64_t s64_var = 0;
+
+int64_t
+test_smin_s64 (int64_t a)
+{
+ return __atomic_fetch_min (&s64_var, a, __ATOMIC_ACQUIRE);
+}
+
+int64_t
+test_smax_s64 (int64_t a)
+{
+ return __atomic_fetch_max (&s64_var, a, __ATOMIC_RELAXED);
+}
+
+uint8_t u8_var = 0;
+
+uint8_t
+test_umin_u8 (uint8_t a)
+{
+ return __atomic_fetch_min (&u8_var, a, __ATOMIC_RELAXED);
+}
+
+uint8_t
+test_umax_u8 (uint8_t a)
+{
+ return __atomic_fetch_max (&u8_var, a, __ATOMIC_CONSUME);
+}
+
+uint16_t u16_var = 0;
+
+uint16_t
+test_umin_u16 (uint16_t a)
+{
+ return __atomic_fetch_min (&u16_var, a, __ATOMIC_ACQUIRE);
+}
+
+uint16_t
+test_umax_u16 (uint16_t a)
+{
+ return __atomic_fetch_max (&u16_var, a, __ATOMIC_RELEASE);
+}
+
+uint32_t u32_var = 0;
+
+uint32_t
+test_umin_u32 (uint32_t a)
+{
+ return __atomic_fetch_min (&u32_var, a, __ATOMIC_ACQ_REL);
+}
+
+uint32_t
+test_umax_u32 (uint32_t a)
+{
+ return __atomic_fetch_max (&u32_var, a, __ATOMIC_RELAXED);
+}
+
+uint64_t u64_var = 0;
+
+uint64_t
+test_umin_u64 (uint64_t a)
+{
+ return __atomic_fetch_min (&u64_var, a, __ATOMIC_RELAXED);
+}
+
+uint64_t
+test_umax_u64 (uint64_t a)
+{
+ return __atomic_fetch_max (&u64_var, a, __ATOMIC_ACQ_REL);
+}
\ No newline at end of file
diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
index e31ffa41514..f098cb278ec 100644
--- a/libgcc/config/aarch64/lse.S
+++ b/libgcc/config/aarch64/lse.S
@@ -276,7 +276,9 @@ ENDFN NAME(swp)
#endif
#if defined(L_ldadd) || defined(L_ldclr) \
- || defined(L_ldeor) || defined(L_ldset)
+ || defined(L_ldeor) || defined(L_ldset) \
+ || defined(L_ldsmin) || defined(L_ldsmax) \
+ || defined(L_ldumin) || defined(L_ldumax)
#ifdef L_ldadd
#define LDNM ldadd
@@ -294,6 +296,26 @@ ENDFN NAME(swp)
#define LDNM ldset
#define OP orr
#define OPN 0x3000
+#elif defined(L_ldsmin)
+#define LDNM ldsmin
+#define OP smin
+#define OPN 0x5000
+#define IS_MINMAX 1
+#elif defined(L_ldsmax)
+#define LDNM ldsmax
+#define OP smax
+#define OPN 0x4000
+#define IS_MINMAX 1
+#elif defined(L_ldumin)
+#define LDNM ldumin
+#define OP umin
+#define OPN 0x7000
+#define IS_MINMAX 1
+#elif defined(L_ldumax)
+#define LDNM ldumax
+#define OP umax
+#define OPN 0x6000
+#define IS_MINMAX 1
#else
#error
#endif
@@ -311,7 +333,45 @@ STARTFN NAME(LDNM)
8: mov s(tmp0), s(0)
0: LDXR s(0), [x1]
+#ifdef IS_MINMAX
+ /* For min/max, extend if needed, compare, and select. */
+#if SIZE < 4
+ #if defined(L_ldsmin) || defined(L_ldsmax)
+ /* Sign extend for signed comparisons. */
+ #if SIZE == 1
+ sxtb w(tmp1), w(0)
+ sxtb w(tmp3), w(tmp0)
+ #else /* SIZE == 2 */
+ sxth w(tmp1), w(0)
+ sxth w(tmp3), w(tmp0)
+ #endif
+ #else /* L_ldumin || L_ldumax */
+ /* Zero extend for unsigned comparisons. */
+ #if SIZE == 1
+ uxtb w(tmp1), w(0)
+ uxtb w(tmp3), w(tmp0)
+ #else /* SIZE == 2 */
+ uxth w(tmp1), w(0)
+ uxth w(tmp3), w(tmp0)
+ #endif
+ #endif
+ cmp w(tmp3), w(tmp1)
+#else /* SIZE >= 4 */
+ cmp s(tmp0), s(0)
+#endif
+ /* Select based on condition. */
+ #if defined(L_ldsmin)
+ csel s(tmp1), s(tmp0), s(0), lt
+ #elif defined(L_ldsmax)
+ csel s(tmp1), s(tmp0), s(0), gt
+ #elif defined(L_ldumin)
+ csel s(tmp1), s(tmp0), s(0), lo
+ #elif defined(L_ldumax)
+ csel s(tmp1), s(tmp0), s(0), hi
+ #endif
+#else /* Not IS_MINMAX */
OP s(tmp1), s(0), s(tmp0)
+#endif /* IS_MINMAX */
STXR w(tmp2), s(tmp1), [x1]
cbnz w(tmp2), 0b
BARRIER
diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse
index c58f003114f..955243341b3 100644
--- a/libgcc/config/aarch64/t-lse
+++ b/libgcc/config/aarch64/t-lse
@@ -23,7 +23,8 @@ S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
O0 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S0)))
# Swap, Load-and-operate have 4 sizes and 5 memory models
-S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))
+S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset \
+ ldsmin ldsmax ldumin ldumax))
O1 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S1)))
LSE_OBJS := $(O0) $(O1)
--
2.43.0
More information about the Gcc-patches
mailing list