[committed v3] libsupc++: Change _Unordered comparison value to minimum value of signed char.
Jakub Jelinek
jakub@redhat.com
Thu Aug 28 10:10:19 GMT 2025
On Thu, Aug 28, 2025 at 09:28:57AM +0200, Tomasz Kaminski wrote:
> I have no experience with backend optimization, so I would need help to fix
> the above.
> >From my side I could only offer reverting the change, but I do not think we
> should do so,
> see below.
I'll look at it momentarily.
That said, I've used my + Jonathan's testcase
g++ -S -O2 -std=c++23
#include <compare>
bool f1 (std::partial_ordering x) { return x <= 0; }
bool f2 (std::partial_ordering x) { return x >= 0; }
bool f3 (std::partial_ordering x) { return 0 <= x; }
bool f4 (std::partial_ordering x) { return 0 >= x; }
auto f5 (std::partial_ordering x) { return 0 <=> x; }
struct S { friend std::partial_ordering operator<=>(S, int); };
bool f6 (S x) { return x <= 0; }
bool f7 (S x) { return x >= 0; }
bool f8 (S x) { return 0 <= x; }
bool f9 (S x) { return 0 >= x; }
auto f10 (S x) { return 0 <=> x; }
to look at the generated code before/after your changes (i.e. same compiler
for compilation, source preprocessed with GCC 15 and latest trunk).
Not doing any benchmarks on that, so just judging from insn count changes
on x86_64 it is in most cases a wash (but insn size-wise they are in some
cases smaller) and on the rest fewer insns, on ia32 (but who cares about
that) it is mostly a regression or wash with 2 routines with significant
improvements, on ppc64le and aarch64 it is always a win. diff -U200
edited using script and by hand to leave out unneeded labels or stuff
outside actual functions.
insn count change
x86_64 ia32 ppc64le aarch64
+1 4
0 6 4
-1 2 8
-2 1 10
-3 1
-4 2
-5 1
-8 1
===x86_64===
_Z2f1St16partial_ordering:
- testb %dil, %dil
- setle %al
+ leal -1(%rdi), %eax
+ shrb $7, %al
ret
_Z2f2St16partial_ordering:
- movsbl %dil, %edi
- cmpl $1, %edi
- setbe %al
+ movl %edi, %eax
+ notl %eax
+ shrb $7, %al
ret
_Z2f3St16partial_ordering:
- movsbl %dil, %edi
- cmpl $1, %edi
- setbe %al
+ movl %edi, %eax
+ notl %eax
+ shrb $7, %al
ret
_Z2f4St16partial_ordering:
- testb %dil, %dil
- setle %al
+ leal -1(%rdi), %eax
+ shrb $7, %al
ret
_Z2f5St16partial_ordering:
movl %edi, %eax
negl %eax
- testb $1, %dil
- cmove %edi, %eax
ret
_Z2f61S:
subq $8, %rsp
xorl %edi, %edi
call _Zss1Si
- testb %al, %al
- setle %al
addq $8, %rsp
+ subl $1, %eax
+ shrb $7, %al
ret
_Z2f71S:
subq $8, %rsp
xorl %edi, %edi
call _Zss1Si
- movsbl %al, %eax
- cmpl $1, %eax
- setbe %al
addq $8, %rsp
+ notl %eax
+ shrb $7, %al
ret
_Z2f81S:
subq $8, %rsp
xorl %edi, %edi
call _Zss1Si
- movsbl %al, %eax
- cmpl $1, %eax
- setbe %al
addq $8, %rsp
+ notl %eax
+ shrb $7, %al
ret
_Z2f91S:
subq $8, %rsp
xorl %edi, %edi
call _Zss1Si
- testb %al, %al
- setle %al
addq $8, %rsp
+ subl $1, %eax
+ shrb $7, %al
ret
_Z3f101S:
subq $8, %rsp
xorl %edi, %edi
call _Zss1Si
- movl %eax, %edx
- negl %eax
- testb $1, %dl
- cmove %edx, %eax
addq $8, %rsp
+ negl %eax
ret
===ia32===
_Z2f1St16partial_ordering:
- cmpb $0, 4(%esp)
- setle %al
+ movzbl 4(%esp), %eax
+ subl $1, %eax
+ shrb $7, %al
ret
_Z2f2St16partial_ordering:
- movsbl 4(%esp), %eax
- cmpl $1, %eax
- setbe %al
+ movzbl 4(%esp), %eax
+ notl %eax
+ shrb $7, %al
ret
_Z2f3St16partial_ordering:
- movsbl 4(%esp), %eax
- cmpl $1, %eax
- setbe %al
+ movzbl 4(%esp), %eax
+ notl %eax
+ shrb $7, %al
ret
_Z2f4St16partial_ordering:
- cmpb $0, 4(%esp)
- setle %al
+ movzbl 4(%esp), %eax
+ subl $1, %eax
+ shrb $7, %al
ret
_Z2f5St16partial_ordering:
movzbl 8(%esp), %eax
movl 4(%esp), %edx
- testb $1, %al
- je .L7
negl %eax
movb %al, (%edx)
movl %edx, %eax
ret $4
-.L7:
- movb %al, (%edx)
- movl %edx, %eax
- ret $4
_Z2f61S:
subl $28, %esp
leal 15(%esp), %eax
subl $4, %esp
pushl $0
pushl $0
pushl %eax
call _Zss1Si
- cmpb $0, 27(%esp)
- setle %al
+ movzbl 27(%esp), %eax
addl $40, %esp
+ subl $1, %eax
+ shrb $7, %al
ret
_Z2f71S:
subl $28, %esp
leal 15(%esp), %eax
subl $4, %esp
pushl $0
pushl $0
pushl %eax
call _Zss1Si
- movsbl 27(%esp), %eax
- cmpl $1, %eax
- setbe %al
+ movzbl 27(%esp), %eax
addl $40, %esp
+ notl %eax
+ shrb $7, %al
ret
_Z2f81S:
subl $28, %esp
leal 15(%esp), %eax
subl $4, %esp
pushl $0
pushl $0
pushl %eax
call _Zss1Si
- movsbl 27(%esp), %eax
- cmpl $1, %eax
- setbe %al
+ movzbl 27(%esp), %eax
addl $40, %esp
+ notl %eax
+ shrb $7, %al
ret
_Z2f91S:
subl $28, %esp
leal 15(%esp), %eax
subl $4, %esp
pushl $0
pushl $0
pushl %eax
call _Zss1Si
- cmpb $0, 27(%esp)
- setle %al
+ movzbl 27(%esp), %eax
addl $40, %esp
+ subl $1, %eax
+ shrb $7, %al
ret
_Z3f101S:
subl $28, %esp
leal 15(%esp), %eax
subl $4, %esp
pushl $0
pushl $0
pushl %eax
call _Zss1Si
movzbl 27(%esp), %eax
- addl $12, %esp
- testb $1, %al
- je .L18
- movl 32(%esp), %edx
+ movl 44(%esp), %edx
negl %eax
movb %al, (%edx)
- movl 32(%esp), %eax
- addl $28, %esp
- ret $4
-.L18:
- movl 32(%esp), %ecx
- movb %al, (%ecx)
- movl 32(%esp), %eax
- addl $28, %esp
+ movl %edx, %eax
+ addl $40, %esp
ret $4
===ppc64le===
_Z2f1St16partial_ordering:
- extsb 3,3
- neg 3,3
- srdi 3,3,63
- xori 3,3,0x1
+ addi 3,3,-1
+ rldicl 3,3,57,63
blr
_Z2f2St16partial_ordering:
- rlwinm 3,3,0,0xff
- subfic 3,3,1
- srdi 3,3,63
- xori 3,3,0x1
+ not 3,3
+ rldicl 3,3,57,63
blr
_Z2f3St16partial_ordering:
- rlwinm 3,3,0,0xff
- subfic 3,3,1
- srdi 3,3,63
- xori 3,3,0x1
+ not 3,3
+ rldicl 3,3,57,63
blr
_Z2f4St16partial_ordering:
- extsb 3,3
- neg 3,3
- srdi 3,3,63
- xori 3,3,0x1
+ addi 3,3,-1
+ rldicl 3,3,57,63
blr
_Z2f5St16partial_ordering:
- andi. 9,3,0x1
- beqlr 0
neg 3,3
blr
_Z2f61S:
0: addis 2,12,.TOC.-.LCF5@ha
addi 2,2,.TOC.-.LCF5@l
.localentry _Z2f61S,.-_Z2f61S
mflr 0
li 4,0
li 3,0
std 0,16(1)
stdu 1,-32(1)
bl _Zss1Si
nop
addi 1,1,32
- extsb 3,3
ld 0,16(1)
- neg 3,3
- srdi 3,3,63
+ addi 3,3,-1
+ rldicl 3,3,57,63
mtlr 0
- xori 3,3,0x1
blr
_Z2f71S:
0: addis 2,12,.TOC.-.LCF6@ha
addi 2,2,.TOC.-.LCF6@l
.localentry _Z2f71S,.-_Z2f71S
mflr 0
li 4,0
li 3,0
std 0,16(1)
stdu 1,-32(1)
bl _Zss1Si
nop
addi 1,1,32
- rlwinm 3,3,0,0xff
ld 0,16(1)
- subfic 3,3,1
- srdi 3,3,63
+ not 3,3
+ rldicl 3,3,57,63
mtlr 0
- xori 3,3,0x1
blr
_Z2f81S:
0: addis 2,12,.TOC.-.LCF7@ha
addi 2,2,.TOC.-.LCF7@l
.localentry _Z2f81S,.-_Z2f81S
mflr 0
li 4,0
li 3,0
std 0,16(1)
stdu 1,-32(1)
bl _Zss1Si
nop
addi 1,1,32
- rlwinm 3,3,0,0xff
ld 0,16(1)
- subfic 3,3,1
- srdi 3,3,63
+ not 3,3
+ rldicl 3,3,57,63
mtlr 0
- xori 3,3,0x1
blr
_Z2f91S:
0: addis 2,12,.TOC.-.LCF8@ha
addi 2,2,.TOC.-.LCF8@l
.localentry _Z2f91S,.-_Z2f91S
mflr 0
li 4,0
li 3,0
std 0,16(1)
stdu 1,-32(1)
bl _Zss1Si
nop
addi 1,1,32
- extsb 3,3
ld 0,16(1)
- neg 3,3
- srdi 3,3,63
+ addi 3,3,-1
+ rldicl 3,3,57,63
mtlr 0
- xori 3,3,0x1
blr
_Z3f101S:
0: addis 2,12,.TOC.-.LCF9@ha
addi 2,2,.TOC.-.LCF9@l
.localentry _Z3f101S,.-_Z3f101S
mflr 0
li 4,0
li 3,0
std 0,16(1)
stdu 1,-32(1)
bl _Zss1Si
nop
- andi. 9,3,0x1
- beq 0,.L20
- neg 3,3
-.L20:
addi 1,1,32
ld 0,16(1)
+ neg 3,3
mtlr 0
blr
===aarch64===
_Z2f1St16partial_ordering:
- sxtb w0, w0
- cmp w0, 0
- cset w0, le
+ sub w0, w0, #1
+ ubfx w0, w0, 7, 1
ret
_Z2f2St16partial_ordering:
- and w0, w0, 255
- cmp w0, 1
- cset w0, ls
+ mvn w0, w0
+ ubfx w0, w0, 7, 1
ret
_Z2f3St16partial_ordering:
- and w0, w0, 255
- cmp w0, 1
- cset w0, ls
+ mvn w0, w0
+ ubfx w0, w0, 7, 1
ret
_Z2f4St16partial_ordering:
- sxtb w0, w0
- cmp w0, 0
- cset w0, le
+ sub w0, w0, #1
+ ubfx w0, w0, 7, 1
ret
_Z2f5St16partial_ordering:
- sxtb w1, w0
- tst x0, 1
- neg w0, w1
- sxtb w0, w0
- csel w0, w1, w0, eq
+ neg w0, w0
ret
_Z2f61S:
stp x29, x30, [sp, -16]!
mov w1, 0
mov w0, 0
mov x29, sp
bl _Zss1Si
- sxtb w0, w0
- cmp w0, 0
- cset w0, le
+ sub w0, w0, #1
ldp x29, x30, [sp], 16
+ ubfx w0, w0, 7, 1
ret
_Z2f71S:
stp x29, x30, [sp, -16]!
mov w1, 0
mov w0, 0
mov x29, sp
bl _Zss1Si
- and w0, w0, 255
- cmp w0, 1
- cset w0, ls
+ mvn w0, w0
ldp x29, x30, [sp], 16
+ ubfx w0, w0, 7, 1
ret
_Z2f81S:
stp x29, x30, [sp, -16]!
mov w1, 0
mov w0, 0
mov x29, sp
bl _Zss1Si
- and w0, w0, 255
- cmp w0, 1
- cset w0, ls
+ mvn w0, w0
ldp x29, x30, [sp], 16
+ ubfx w0, w0, 7, 1
ret
_Z2f91S:
stp x29, x30, [sp, -16]!
mov w1, 0
mov w0, 0
mov x29, sp
bl _Zss1Si
- sxtb w0, w0
- cmp w0, 0
- cset w0, le
+ sub w0, w0, #1
ldp x29, x30, [sp], 16
+ ubfx w0, w0, 7, 1
ret
_Z3f101S:
stp x29, x30, [sp, -16]!
mov w1, 0
mov w0, 0
mov x29, sp
bl _Zss1Si
- sxtb w1, w0
- tst x0, 1
- neg w0, w1
ldp x29, x30, [sp], 16
- sxtb w0, w0
- csel w0, w1, w0, eq
+ neg w0, w0
ret
Jakub
More information about the Gcc-patches
mailing list