[Bug rtl-optimization/98782] IRA artificially creating spills due to BB frequencies
fxue at os dot amperecomputing.com
gcc-bugzilla@gcc.gnu.org
Fri Jan 22 10:12:30 GMT 2021
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98782
Feng Xue <fxue at os dot amperecomputing.com> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |fxue at os dot amperecomputing.com
--- Comment #1 from Feng Xue <fxue at os dot amperecomputing.com> ---
The value "foo + 1024" is spilled for both cases, but in different way. For bad
case, spill outside loop, and only reload inside. While for good case,
spill/reload pair occurs around the call to "bar", which might also consider
extra cost of using caller-saved registers. It seems that IRA has two different
logics to handle spilling.
[Bad case]
foo:
stp x29, x30, [sp, -80]!
mov w5, 753
mov x29, sp
stp x19, x20, [sp, 16]
mov x19, x1
mul w1, w0, w5
stp x21, x22, [sp, 32]
mov w22, 5271
add w2, w1, 7
mov w21, w5
mul w3, w0, w22
mov w20, 760
mov w22, 0
str w0, [sp, 76]
add x0, x19, 1024
str x0, [sp, 64] // Spill (foo + 1024)
.p2align 3,,7
.L5:
ldrb w0, [x19]
cbz w0, .L2
ldr w0, [sp, 76]
stp w1, w2, [sp, 56]
str w3, [sp, 72]
bl bar
ldrb w0, [x19, 1]!
ldp w1, w2, [sp, 56]
add w21, w21, w0
ldr w3, [sp, 72]
mul w20, w20, w0
ldr x0, [sp, 64] // Reload (foo + 1024)
add w22, w22, w20
cmp x19, x0
bne .L5
b .L4
.p2align 2,,3
.L2:
ldrb w0, [x19, 1]!
add w21, w21, w0
mul w20, w20, w0
ldr x0, [sp, 64] // Reload (foo + 1024)
add w22, w22, w20
cmp x0, x19
bne .L5
.L4:
add w0, w20, w21
add w0, w0, w22
ldp x19, x20, [sp, 16]
ldp x21, x22, [sp, 32]
ldp x29, x30, [sp], 80
ret
[Good case:]
foo:
stp x29, x30, [sp, -80]!
mov w5, 753
add x7, x1, 1024
mul w2, w0, w5
mov x29, sp
stp x21, x22, [sp, 32]
mov w21, 5271
mov w22, w5
stp x19, x20, [sp, 16]
mov x19, x1
mul w3, w0, w21
stp w2, w0, [sp, 72] // Spill x(%w0)
add w2, w2, 7 // t2(%w2)
mov w21, 0
mov w20, 760
.p2align 3,,7
.L5:
ldrb w0, [x19]
cbz w0, .L2
ldp w1, w0, [sp, 72] // Reload x
stp w2, w3, [sp, 56] // Spill t2
str x7, [sp, 64] // Spill (foo + 1024)
bl bar
ldrb w0, [x19, 1]!
ldr x7, [sp, 64] // Reload (foo + 1024)
add w22, w22, w0
ldp w2, w3, [sp, 56] // Reload t2
mul w20, w20, w0
add w21, w21, w20
cmp x19, x7
bne .L5
b .L4
.p2align 2,,3
.L2:
ldrb w0, [x19, 1]!
add w22, w22, w0
mul w20, w20, w0
add w21, w21, w20
cmp x7, x19
bne .L5
.L4:
add w0, w20, w22
add w0, w0, w21
ldp x19, x20, [sp, 16]
ldp x21, x22, [sp, 32]
ldp x29, x30, [sp], 80
ret
Even for good case, we could expect better spill/reload generation. Refer to
comments above, "x" and "t2" are similar, both loop invariant, but handled
differently. Spilling "t2" inside loop is worst than spilling it outside, as
what IRA does for "x".
Both issues could be correlated to same thing.
More information about the Gcc-bugs
mailing list