Totally bogus string optimizations
Joseph S. Myers
jsm28@cam.ac.uk
Sat Oct 28 12:20:00 GMT 2000
[Moved onto gcc-bugs as well as libc-alpha - there is a regression here in
how GCC handles glibc's string macros, as well as scope for GCC to handle
more of the optimisations itself.]
On 28 Oct 2000, Ulrich Drepper wrote:
> And no, the compiler is not able to optimize as good. If the compiler
> would work correctly an strcpy() like your example would be
> transformed into something like this
>
>
> ...get dest address in %eax...
> movl $0x626f6f46, (%eax)
> movw $0x7261, 4(%eax)
> movb $0, 6(%eax)
>
>
> which is much better since we have no loading, no relocation (this is
> a data relocation which always has to be performed), and not another
> string constant.
emit_block_move in the compiler needs to be improved so that it will take
data from (at least) non-wide string constants at compile time (and
ideally all string constants, and constant arrays). As is, CVS GCC will
produce from its builtin strcpy
.LC0:
.string "Foobar"
.text
.align 16
.globl foo
.type foo,@function
foo:
movl p, %edx
movl .LC0, %eax
movl %eax, (%edx)
movzwl .LC0+4, %eax
movw %ax, 4(%edx)
movzbl .LC0+6, %eax
movb %al, 6(%edx)
ret
If this gets improved in GCC, then the strcpy macro should best be
disabled for GCC 2.97 and later: the code generated by the macro in GCC
CVS is terrible:
.LC0:
.string "Foobar"
.text
.align 16
.globl foo
.type foo,@function
foo:
movl $.LC0+1, %eax
subl $12, %esp
cmpl %eax, %eax
je .L158
subl $4, %esp
movl p, %eax
pushl $7
pushl $.LC0
pushl %eax
call memcpy
addl $16, %esp
.L143:
addl $12, %esp
ret
.p2align 4,,7
.L158:
movl p, %eax
movl $1651470150, (%eax)
movw $29281, 4(%eax)
movb $0, 6(%eax)
jmp .L143
while GCC 2.95.2 generates with the macro the code we want, apart from
leaving the string constant in the code (so there *is* a regression in
GCC):
.LC0:
.string "Foobar"
.text
.align 4
.globl foo
.type foo,@function
foo:
movl p,%eax
movl $1651470150,(%eax)
movw $29281,4(%eax)
movb $0,6(%eax)
ret
(All examples on x86 with -O2 -fomit-frame-pointer.)
Every nested use of the strcpy macro increases the preprocessed size
20-fold, so an innocent line
strcpy(D1, strcpy(D2, strcpy(D3, strcpy(D4, S))))
expands to 10Mbyte of source using glibc. This is a strong reason to
disable the glibc optimisations when using a compiler version known to do
them itself.
Here is the preprocessed code which GCC CVS handles much worse than GCC
2.95.2:
typedef unsigned int size_t;
extern void *memcpy (void *__restrict __dest,
__const void *__restrict __src, size_t __n);
extern char *strcpy (char *__restrict __dest, __const char *__restrict __src);
extern size_t strlen (__const char *__s) __attribute__ ((__pure__));
typedef unsigned short int __uint16_t;
typedef unsigned int __uint32_t;
extern __inline char *__strcpy_small (char *, __uint16_t, __uint16_t,
__uint32_t, __uint32_t, size_t);
extern __inline char *
__strcpy_small (char *__dest,
__uint16_t __src0_2, __uint16_t __src4_2,
__uint32_t __src0_4, __uint32_t __src4_4,
size_t __srclen)
{
union {
__uint32_t __ui;
__uint16_t __usi;
unsigned char __uc;
} *__u = (void *) __dest;
switch ((unsigned int) __srclen)
{
case 1:
__u->__uc = '\0';
break;
case 2:
__u->__usi = __src0_2;
break;
case 3:
__u->__usi = __src0_2;
__u = (void *) ((char *) __u + 2);
__u->__uc = '\0';
break;
case 4:
__u->__ui = __src0_4;
break;
case 5:
__u->__ui = __src0_4;
__u = (void *) ((char *) __u + 4);
__u->__uc = '\0';
break;
case 6:
__u->__ui = __src0_4;
__u = (void *) ((char *) __u + 4);
__u->__usi = __src4_2;
break;
case 7:
__u->__ui = __src0_4;
__u = (void *) ((char *) __u + 4);
__u->__usi = __src4_2;
__u = (void *) ((char *) __u + 2);
__u->__uc = '\0';
break;
case 8:
__u->__ui = __src0_4;
__u = (void *) ((char *) __u + 4);
__u->__ui = __src4_4;
break;
}
return __dest;
}
char *p;
void
foo (void)
{
(__extension__ (__builtin_constant_p ("Foobar") ? (((size_t)(const
void *)(("Foobar") + 1) - (size_t)(const void *)("Foobar") == 1) &&
strlen ("Foobar") + 1 <= 8 ? __strcpy_small (p, __extension__
(((__const unsigned char *) (__const char *) ("Foobar"))[0 + 1] << 8
| ((__const unsigned char *) (__const char *) ("Foobar"))[0]),
__extension__ (((__const unsigned char *) (__const char *)
("Foobar"))[4 + 1] << 8 | ((__const unsigned char *) (__const char
*) ("Foobar"))[4]), __extension__ (((((__const unsigned char *)
(__const char *) ("Foobar"))[0 + 3] << 8 | ((__const unsigned char
*) (__const char *) ("Foobar"))[0 + 2]) << 8 | ((__const unsigned
char *) (__const char *) ("Foobar"))[0 + 1]) << 8 | ((__const
unsigned char *) (__const char *) ("Foobar"))[0]), __extension__
(((((__const unsigned char *) (__const char *) ("Foobar"))[4 + 3] <<
8 | ((__const unsigned char *) (__const char *) ("Foobar"))[4 + 2])
<< 8 | ((__const unsigned char *) (__const char *) ("Foobar"))[4 +
1]) << 8 | ((__const unsigned char *) (__const char *)
("Foobar"))[4]), strlen ("Foobar") + 1) : (char *) memcpy (p,
"Foobar", strlen ("Foobar") + 1)) : strcpy (p, "Foobar")));
}
--
Joseph S. Myers
jsm28@cam.ac.uk
More information about the Gcc-bugs
mailing list