[committed v3] libsupc++: Change _Unordered comparison value to minimum value of signed char.

Thu Aug 28 10:10:19 GMT 2025

On Thu, Aug 28, 2025 at 09:28:57AM +0200, Tomasz Kaminski wrote:
> I have no experience with backend optimization, so I would need help to fix
> the above.
> >From my side I could only offer reverting the change, but I do not think we
> should do so,
> see below.

I'll look at it momentarily.

That said, I've used my + Jonathan's testcase
g++ -S -O2 -std=c++23
#include <compare>
bool f1 (std::partial_ordering x) { return x <= 0; }
bool f2 (std::partial_ordering x) { return x >= 0; }
bool f3 (std::partial_ordering x) { return 0 <= x; }
bool f4 (std::partial_ordering x) { return 0 >= x; }
auto f5 (std::partial_ordering x) { return 0 <=> x; }
struct S { friend std::partial_ordering operator<=>(S, int); };
bool f6 (S x) { return x <= 0; }
bool f7 (S x) { return x >= 0; }
bool f8 (S x) { return 0 <= x; }
bool f9 (S x) { return 0 >= x; }
auto f10 (S x) { return 0 <=> x; }

to look at the generated code before/after your changes (i.e. same compiler
for compilation, source preprocessed with GCC 15 and latest trunk).
Not doing any benchmarks on that, so just judging from insn count changes
on x86_64 it is in most cases a wash (but insn size-wise they are in some
cases smaller) and on the rest fewer insns, on ia32 (but who cares about
that) it is mostly a regression or wash with 2 routines with significant
improvements, on ppc64le and aarch64 it is always a win.  diff -U200
edited using script and by hand to leave out unneeded labels or stuff
outside actual functions.

insn count change
    x86_64 ia32 ppc64le aarch64
+1          4
 0    6     4
-1    2                   8
-2    1           10
-3    1
-4                        2
-5          1
-8          1

===x86_64===
 _Z2f1St16partial_ordering:
-	testb	%dil, %dil
-	setle	%al
+	leal	-1(%rdi), %eax
+	shrb	$7, %al
 	ret
 _Z2f2St16partial_ordering:
-	movsbl	%dil, %edi
-	cmpl	$1, %edi
-	setbe	%al
+	movl	%edi, %eax
+	notl	%eax
+	shrb	$7, %al
 	ret
 _Z2f3St16partial_ordering:
-	movsbl	%dil, %edi
-	cmpl	$1, %edi
-	setbe	%al
+	movl	%edi, %eax
+	notl	%eax
+	shrb	$7, %al
 	ret
 _Z2f4St16partial_ordering:
-	testb	%dil, %dil
-	setle	%al
+	leal	-1(%rdi), %eax
+	shrb	$7, %al
 	ret
 _Z2f5St16partial_ordering:
 	movl	%edi, %eax
 	negl	%eax
-	testb	$1, %dil
-	cmove	%edi, %eax
 	ret
 _Z2f61S:
 	subq	$8, %rsp
 	xorl	%edi, %edi
 	call	_Zss1Si
-	testb	%al, %al
-	setle	%al
 	addq	$8, %rsp
+	subl	$1, %eax
+	shrb	$7, %al
 	ret
 _Z2f71S:
 	subq	$8, %rsp
 	xorl	%edi, %edi
 	call	_Zss1Si
-	movsbl	%al, %eax
-	cmpl	$1, %eax
-	setbe	%al
 	addq	$8, %rsp
+	notl	%eax
+	shrb	$7, %al
 	ret
 _Z2f81S:
 	subq	$8, %rsp
 	xorl	%edi, %edi
 	call	_Zss1Si
-	movsbl	%al, %eax
-	cmpl	$1, %eax
-	setbe	%al
 	addq	$8, %rsp
+	notl	%eax
+	shrb	$7, %al
 	ret
 _Z2f91S:
 	subq	$8, %rsp
 	xorl	%edi, %edi
 	call	_Zss1Si
-	testb	%al, %al
-	setle	%al
 	addq	$8, %rsp
+	subl	$1, %eax
+	shrb	$7, %al
 	ret
 _Z3f101S:
 	subq	$8, %rsp
 	xorl	%edi, %edi
 	call	_Zss1Si
-	movl	%eax, %edx
-	negl	%eax
-	testb	$1, %dl
-	cmove	%edx, %eax
 	addq	$8, %rsp
+	negl	%eax
 	ret
===ia32===
 _Z2f1St16partial_ordering:
-	cmpb	$0, 4(%esp)
-	setle	%al
+	movzbl	4(%esp), %eax
+	subl	$1, %eax
+	shrb	$7, %al
 	ret
 _Z2f2St16partial_ordering:
-	movsbl	4(%esp), %eax
-	cmpl	$1, %eax
-	setbe	%al
+	movzbl	4(%esp), %eax
+	notl	%eax
+	shrb	$7, %al
 	ret
 _Z2f3St16partial_ordering:
-	movsbl	4(%esp), %eax
-	cmpl	$1, %eax
-	setbe	%al
+	movzbl	4(%esp), %eax
+	notl	%eax
+	shrb	$7, %al
 	ret
 _Z2f4St16partial_ordering:
-	cmpb	$0, 4(%esp)
-	setle	%al
+	movzbl	4(%esp), %eax
+	subl	$1, %eax
+	shrb	$7, %al
 	ret
 _Z2f5St16partial_ordering:
 	movzbl	8(%esp), %eax
 	movl	4(%esp), %edx
-	testb	$1, %al
-	je	.L7
 	negl	%eax
 	movb	%al, (%edx)
 	movl	%edx, %eax
 	ret	$4
-.L7:
-	movb	%al, (%edx)
-	movl	%edx, %eax
-	ret	$4
 _Z2f61S:
 	subl	$28, %esp
 	leal	15(%esp), %eax
 	subl	$4, %esp
 	pushl	$0
 	pushl	$0
 	pushl	%eax
 	call	_Zss1Si
-	cmpb	$0, 27(%esp)
-	setle	%al
+	movzbl	27(%esp), %eax
 	addl	$40, %esp
+	subl	$1, %eax
+	shrb	$7, %al
 	ret
 _Z2f71S:
 	subl	$28, %esp
 	leal	15(%esp), %eax
 	subl	$4, %esp
 	pushl	$0
 	pushl	$0
 	pushl	%eax
 	call	_Zss1Si
-	movsbl	27(%esp), %eax
-	cmpl	$1, %eax
-	setbe	%al
+	movzbl	27(%esp), %eax
 	addl	$40, %esp
+	notl	%eax
+	shrb	$7, %al
 	ret
 _Z2f81S:
 	subl	$28, %esp
 	leal	15(%esp), %eax
 	subl	$4, %esp
 	pushl	$0
 	pushl	$0
 	pushl	%eax
 	call	_Zss1Si
-	movsbl	27(%esp), %eax
-	cmpl	$1, %eax
-	setbe	%al
+	movzbl	27(%esp), %eax
 	addl	$40, %esp
+	notl	%eax
+	shrb	$7, %al
 	ret
 _Z2f91S:
 	subl	$28, %esp
 	leal	15(%esp), %eax
 	subl	$4, %esp
 	pushl	$0
 	pushl	$0
 	pushl	%eax
 	call	_Zss1Si
-	cmpb	$0, 27(%esp)
-	setle	%al
+	movzbl	27(%esp), %eax
 	addl	$40, %esp
+	subl	$1, %eax
+	shrb	$7, %al
 	ret
 _Z3f101S:
 	subl	$28, %esp
 	leal	15(%esp), %eax
 	subl	$4, %esp
 	pushl	$0
 	pushl	$0
 	pushl	%eax
 	call	_Zss1Si
 	movzbl	27(%esp), %eax
-	addl	$12, %esp
-	testb	$1, %al
-	je	.L18
-	movl	32(%esp), %edx
+	movl	44(%esp), %edx
 	negl	%eax
 	movb	%al, (%edx)
-	movl	32(%esp), %eax
-	addl	$28, %esp
-	ret	$4
-.L18:
-	movl	32(%esp), %ecx
-	movb	%al, (%ecx)
-	movl	32(%esp), %eax
-	addl	$28, %esp
+	movl	%edx, %eax
+	addl	$40, %esp
 	ret	$4
===ppc64le===
 _Z2f1St16partial_ordering:
-	extsb 3,3
-	neg 3,3
-	srdi 3,3,63
-	xori 3,3,0x1
+	addi 3,3,-1
+	rldicl 3,3,57,63
 	blr
 _Z2f2St16partial_ordering:
-	rlwinm 3,3,0,0xff
-	subfic 3,3,1
-	srdi 3,3,63
-	xori 3,3,0x1
+	not 3,3
+	rldicl 3,3,57,63
 	blr
 _Z2f3St16partial_ordering:
-	rlwinm 3,3,0,0xff
-	subfic 3,3,1
-	srdi 3,3,63
-	xori 3,3,0x1
+	not 3,3
+	rldicl 3,3,57,63
 	blr
 _Z2f4St16partial_ordering:
-	extsb 3,3
-	neg 3,3
-	srdi 3,3,63
-	xori 3,3,0x1
+	addi 3,3,-1
+	rldicl 3,3,57,63
 	blr
 _Z2f5St16partial_ordering:
-	andi. 9,3,0x1
-	beqlr 0
 	neg 3,3
 	blr
 _Z2f61S:
 0:	addis 2,12,.TOC.-.LCF5@ha
 	addi 2,2,.TOC.-.LCF5@l
 	.localentry	_Z2f61S,.-_Z2f61S
 	mflr 0
 	li 4,0
 	li 3,0
 	std 0,16(1)
 	stdu 1,-32(1)
 	bl _Zss1Si
 	nop
 	addi 1,1,32
-	extsb 3,3
 	ld 0,16(1)
-	neg 3,3
-	srdi 3,3,63
+	addi 3,3,-1
+	rldicl 3,3,57,63
 	mtlr 0
-	xori 3,3,0x1
 	blr
 _Z2f71S:
 0:	addis 2,12,.TOC.-.LCF6@ha
 	addi 2,2,.TOC.-.LCF6@l
 	.localentry	_Z2f71S,.-_Z2f71S
 	mflr 0
 	li 4,0
 	li 3,0
 	std 0,16(1)
 	stdu 1,-32(1)
 	bl _Zss1Si
 	nop
 	addi 1,1,32
-	rlwinm 3,3,0,0xff
 	ld 0,16(1)
-	subfic 3,3,1
-	srdi 3,3,63
+	not 3,3
+	rldicl 3,3,57,63
 	mtlr 0
-	xori 3,3,0x1
 	blr
 _Z2f81S:
 0:	addis 2,12,.TOC.-.LCF7@ha
 	addi 2,2,.TOC.-.LCF7@l
 	.localentry	_Z2f81S,.-_Z2f81S
 	mflr 0
 	li 4,0
 	li 3,0
 	std 0,16(1)
 	stdu 1,-32(1)
 	bl _Zss1Si
 	nop
 	addi 1,1,32
-	rlwinm 3,3,0,0xff
 	ld 0,16(1)
-	subfic 3,3,1
-	srdi 3,3,63
+	not 3,3
+	rldicl 3,3,57,63
 	mtlr 0
-	xori 3,3,0x1
 	blr
 _Z2f91S:
 0:	addis 2,12,.TOC.-.LCF8@ha
 	addi 2,2,.TOC.-.LCF8@l
 	.localentry	_Z2f91S,.-_Z2f91S
 	mflr 0
 	li 4,0
 	li 3,0
 	std 0,16(1)
 	stdu 1,-32(1)
 	bl _Zss1Si
 	nop
 	addi 1,1,32
-	extsb 3,3
 	ld 0,16(1)
-	neg 3,3
-	srdi 3,3,63
+	addi 3,3,-1
+	rldicl 3,3,57,63
 	mtlr 0
-	xori 3,3,0x1
 	blr
 _Z3f101S:
 0:	addis 2,12,.TOC.-.LCF9@ha
 	addi 2,2,.TOC.-.LCF9@l
 	.localentry	_Z3f101S,.-_Z3f101S
 	mflr 0
 	li 4,0
 	li 3,0
 	std 0,16(1)
 	stdu 1,-32(1)
 	bl _Zss1Si
 	nop
-	andi. 9,3,0x1
-	beq 0,.L20
-	neg 3,3
-.L20:
 	addi 1,1,32
 	ld 0,16(1)
+	neg 3,3
 	mtlr 0
 	blr
===aarch64===
 _Z2f1St16partial_ordering:
-	sxtb	w0, w0
-	cmp	w0, 0
-	cset	w0, le
+	sub	w0, w0, #1
+	ubfx	w0, w0, 7, 1
 	ret
 _Z2f2St16partial_ordering:
-	and	w0, w0, 255
-	cmp	w0, 1
-	cset	w0, ls
+	mvn	w0, w0
+	ubfx	w0, w0, 7, 1
 	ret
 _Z2f3St16partial_ordering:
-	and	w0, w0, 255
-	cmp	w0, 1
-	cset	w0, ls
+	mvn	w0, w0
+	ubfx	w0, w0, 7, 1
 	ret
 _Z2f4St16partial_ordering:
-	sxtb	w0, w0
-	cmp	w0, 0
-	cset	w0, le
+	sub	w0, w0, #1
+	ubfx	w0, w0, 7, 1
 	ret
 _Z2f5St16partial_ordering:
-	sxtb	w1, w0
-	tst	x0, 1
-	neg	w0, w1
-	sxtb	w0, w0
-	csel	w0, w1, w0, eq
+	neg	w0, w0
 	ret
 _Z2f61S:
 	stp	x29, x30, [sp, -16]!
 	mov	w1, 0
 	mov	w0, 0
 	mov	x29, sp
 	bl	_Zss1Si
-	sxtb	w0, w0
-	cmp	w0, 0
-	cset	w0, le
+	sub	w0, w0, #1
 	ldp	x29, x30, [sp], 16
+	ubfx	w0, w0, 7, 1
 	ret
 _Z2f71S:
 	stp	x29, x30, [sp, -16]!
 	mov	w1, 0
 	mov	w0, 0
 	mov	x29, sp
 	bl	_Zss1Si
-	and	w0, w0, 255
-	cmp	w0, 1
-	cset	w0, ls
+	mvn	w0, w0
 	ldp	x29, x30, [sp], 16
+	ubfx	w0, w0, 7, 1
 	ret
 _Z2f81S:
 	stp	x29, x30, [sp, -16]!
 	mov	w1, 0
 	mov	w0, 0
 	mov	x29, sp
 	bl	_Zss1Si
-	and	w0, w0, 255
-	cmp	w0, 1
-	cset	w0, ls
+	mvn	w0, w0
 	ldp	x29, x30, [sp], 16
+	ubfx	w0, w0, 7, 1
 	ret
 _Z2f91S:
 	stp	x29, x30, [sp, -16]!
 	mov	w1, 0
 	mov	w0, 0
 	mov	x29, sp
 	bl	_Zss1Si
-	sxtb	w0, w0
-	cmp	w0, 0
-	cset	w0, le
+	sub	w0, w0, #1
 	ldp	x29, x30, [sp], 16
+	ubfx	w0, w0, 7, 1
 	ret
 _Z3f101S:
 	stp	x29, x30, [sp, -16]!
 	mov	w1, 0
 	mov	w0, 0
 	mov	x29, sp
 	bl	_Zss1Si
-	sxtb	w1, w0
-	tst	x0, 1
-	neg	w0, w1
 	ldp	x29, x30, [sp], 16
-	sxtb	w0, w0
-	csel	w0, w1, w0, eq
+	neg	w0, w0
 	ret

	Jakub