[Bug tree-optimization/100922] CSE leads to fully redundant (back to back) zero-extending loads of the same thing in a loop, or a register copy
peter at cordes dot ca
gcc-bugzilla@gcc.gnu.org
Sat Jun 5 08:35:52 GMT 2021
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100922
--- Comment #2 from Peter Cordes <peter at cordes dot ca> ---
Possibly also related:
With different surrounding code, this loop can compile to asm which has two
useless movz / mov register copies in the loop at -O2
(https://godbolt.org/z/PTcqzM6q7). (To set up for entry into the next loop in
over-complicated ways, and doing this in the loop is unnecessary.)
while( lut[(unsigned char)*str] == 0 ){ // also catches terminating 0
str++;
}
.L19:
movzbl 1(%rdi), %edx
addq $1, %rdi
movzbl %dl, %ecx
movl %edx, %eax
cmpb $0, -120(%rsp,%rcx)
je .L19
from source
void remove_chars(char *restrict str, const char *restrict remove)
{
char lut[256] = {0};
do {
lut[(unsigned char)*remove] = -1;
}while(*remove++);
/******* Over complicated asm in this loop *************/
while( lut[(unsigned char)*str] == 0 ){ // also catches terminating 0
str++;
}
// str points at first char to *not* keep (or the terminating 0)
const char *in = str;
char *out = str;
while (*in)
{
char mask = lut[(unsigned char)*in];
unsigned char cin = *in, cout = *out;
*out = mask ? cout : cin;
out += mask + 1;
in++;
}
*out = *in;
}
More information about the Gcc-bugs
mailing list