[Bug target/91681] New: Missed optimization for 128 bit arithmetic operations
antoshkka at gmail dot com
gcc-bugzilla@gcc.gnu.org
Fri Sep 6 10:11:00 GMT 2019
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91681
Bug ID: 91681
Summary: Missed optimization for 128 bit arithmetic operations
Product: gcc
Version: 10.0
Status: UNCONFIRMED
Keywords: missed-optimization
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: antoshkka at gmail dot com
Target Milestone: ---
Consider the function:
void multiply128x64x2_3 (
const unsigned long a,
const unsigned long b,
const unsigned long c,
const unsigned long d,
__uint128_t o[2]
) noexcept
{
__uint128_t B0 = __uint128_t{ b } * c;
__uint128_t B2 = __uint128_t{ a } * c;
__uint128_t B1 = __uint128_t{ b } * d;
__uint128_t B3 = __uint128_t{ a } * d;
o[0] = B2 + (B0 >> 64);
o[1] = B3 + (B1 >> 64);
}
With compilation flags "-O2 -std=c++17 -mavx" the following assembly is
produced:
multiply128x64x2_3(unsigned long, unsigned long, unsigned long, unsigned long,
unsigned __int128*):
mov rax, rdx
push rbx
mov rbx, rdx
mov r9, rdi
mul rsi
mov rax, rdx
xor edx, edx
mov r10, rax
mov rax, rbx
mov r11, rdx
pop rbx
mul rdi
add rax, r10
adc rdx, r11
mov QWORD PTR [r8], rax
mov rax, rsi
xor edi, edi
mov QWORD PTR [r8+8], rdx
mul rcx
mov rax, rcx
mov rsi, rdx
mul r9
add rsi, rax
adc rdi, rdx
mov QWORD PTR [r8+16], rsi
mov QWORD PTR [r8+24], rdi
ret
However, it is sub-optimal. Touching the stack is not necessary and the same
result could be achieved with less instructions:
multiply128x64x2_3(unsigned long, unsigned long, unsigned long, unsigned long,
unsigned __int128*):
mov r9, r8
mov r8, rdx
mov rax, rsi
mul r8
mov rax, r8
mov r10, rdx
mul rdi
add r10, rax
mov rax, rsi
mov QWORD PTR [r9], r10
adc rdx, 0
mov QWORD PTR [8+r9], rdx
mul rcx
mov rax, rdi
mov r11, rdx
mul rcx
add r11, rax
mov QWORD PTR [16+r9], r11
adc rdx, 0
mov QWORD PTR [24+r9], rdx
ret
More information about the Gcc-bugs
mailing list