[Bug middle-end/118188] New: [15 regression] aarch64: 30% regression in TSVC s4115 since r15-5565-gdbc38dd9e96a99
dhruvc at nvidia dot com
gcc-bugzilla@gcc.gnu.org
Tue Dec 24 06:03:15 GMT 2024
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118188
Bug ID: 118188
Summary: [15 regression] aarch64: 30% regression in TSVC s4115
since r15-5565-gdbc38dd9e96a99
Product: gcc
Version: 15.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: middle-end
Assignee: unassigned at gcc dot gnu.org
Reporter: dhruvc at nvidia dot com
Target Milestone: ---
Testcase:
===
#define iterations 100000
#define LEN_1D 32000
#define LEN_2D 256
#define ARRAY_ALIGNMENT 64
#include <sys/time.h>
struct args_t {
struct timeval t1;
struct timeval t2;
void * __restrict__ arg_info;
};
typedef float real_t;
__attribute__((aligned(ARRAY_ALIGNMENT))) real_t
a[LEN_1D],b[LEN_1D],c[LEN_1D],d[LEN_1D],e[LEN_1D];
real_t s4115(struct args_t * func_args)
{
int * __restrict__ ip = func_args->arg_info;
real_t sum;
for (int nl = 0; nl < iterations; nl++) {
sum = 0.;
for (int i = 0; i < LEN_1D; i++) {
sum += a[i] * b[ip[i]];
}
}
return sum;
}
===
Before:
===
.L2:
mov x0, 0
mov p7.b, p15.b
movi d0, #0
.p2align 5,,15
.L3:
ld1w z29.s, p7/z, [x5, x0, lsl 2]
ld1w z31.s, p7/z, [x3, x0, lsl 2]
ld1w z30.s, p7/z, [x2, z29.s, sxtw 2]
add x0, x0, x4
fmla z0.s, p7/m, z31.s, z30.s
whilelo p7.s, w0, w1
b.any .L3
subs w6, w6, #1
bne .L2
ptrue p7.b, all
faddv s0, p7, z0.s
ret
===
After:
===
.L2:
movi v0.4s, 0
mov x0, 0
.p2align 5,,15
.L3:
add x2, x3, x0
ldrsw x4, [x3, x0]
ldrsw x6, [x2, 4]
ldpsw x2, x5, [x2, 8]
ldr s1, [x1, x4, lsl 2]
ldr s30, [x1, x6, lsl 2]
ldr s31, [x1, x5, lsl 2]
ldr s29, [x1, x2, lsl 2]
uzp1 v30.2s, v30.2s, v31.2s
ldr q31, [x7, x0]
add x0, x0, 16
uzp1 v1.2s, v1.2s, v29.2s
zip1 v30.4s, v1.4s, v30.4s
fmla v0.4s, v31.4s, v30.4s
cmp x0, x8
bne .L3
subs w9, w9, #1
bne .L2
faddp v0.4s, v0.4s, v0.4s
faddp v0.4s, v0.4s, v0.4s
ret
===
I think the scalar loads are causing the slowdown. This appears to have been an
optimization in GCC 15 that has regressed again.
Command line:
gcc -std=c99 -march=native -Ofast -fstrict-aliasing -fivopts -ftree-vectorize
-S -mcpu=grace -c src/tsvc.c
More information about the Gcc-bugs
mailing list