This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[3.1?] fix memset builtin for i386/-minline-all-stringops
- From: Jan Hubicka <jh at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org, rth at cygnus dot com, mark at codesourcery dot com,patches at x86-64 dot org
- Date: Fri, 3 May 2002 15:28:12 +0200
- Subject: [3.1?] fix memset builtin for i386/-minline-all-stringops
Hi,
this patch fixes the memset failure (PR target/6456).
You need to use -minline-all-stringops to reproduce the failure, so it is
not 100% regression. On the other hand the code modified is executed if
and only if the option is used, so I think it can be safe even for 3.1
The problem was that code in front of string operation attempted to reach
8 byte alignemnt for pentiumpro possibly copying more than needed bytes
of memory.
Honza
Fri May 3 15:26:07 CEST 2002 Jan Hubicka <jh@suse.cz>
* i386.c (expand_movstr, expand_clrstr): Fix inline-all-stringops
sequence.
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.389
diff -c -3 -p -r1.389 i386.c
*** config/i386/i386.c 29 Apr 2002 18:40:46 -0000 1.389
--- config/i386/i386.c 3 May 2002 13:23:40 -0000
*************** ix86_expand_movstr (dst, src, count_exp,
*** 9242,9247 ****
--- 9242,9250 ----
{
rtx countreg2;
rtx label = NULL;
+ int desired_alignment = (TARGET_PENTIUMPRO
+ && (count == 0 || count >= (unsigned int) 260)
+ ? 8 : UNITS_PER_WORD);
/* In case we don't know anything about the alignment, default to
library version, since it is usually equally fast and result in
*************** ix86_expand_movstr (dst, src, count_exp,
*** 9271,9280 ****
This is quite costy. Maybe we can revisit this decision later or
add some customizability to this code. */
! if (count == 0
! && align < (TARGET_PENTIUMPRO && (count == 0
! || count >= (unsigned int) 260)
! ? 8 : UNITS_PER_WORD))
{
label = gen_label_rtx ();
emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
--- 9274,9280 ----
This is quite costy. Maybe we can revisit this decision later or
add some customizability to this code. */
! if (count == 0 && align < desired_alignment)
{
label = gen_label_rtx ();
emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
*************** ix86_expand_movstr (dst, src, count_exp,
*** 9296,9305 ****
emit_label (label);
LABEL_NUSES (label) = 1;
}
! if (align <= 4
! && ((TARGET_PENTIUMPRO && (count == 0
! || count >= (unsigned int) 260))
! || TARGET_64BIT))
{
rtx label = ix86_expand_aligntest (destreg, 4);
emit_insn (gen_strmovsi (destreg, srcreg));
--- 9296,9302 ----
emit_label (label);
LABEL_NUSES (label) = 1;
}
! if (align <= 4 && desired_alignment > 4)
{
rtx label = ix86_expand_aligntest (destreg, 4);
emit_insn (gen_strmovsi (destreg, srcreg));
*************** ix86_expand_movstr (dst, src, count_exp,
*** 9308,9313 ****
--- 9305,9316 ----
LABEL_NUSES (label) = 1;
}
+ if (label && desired_alignment > 4 && !TARGET_64BIT)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ label = NULL_RTX;
+ }
if (!TARGET_SINGLE_STRINGOP)
emit_insn (gen_cld ());
if (TARGET_64BIT)
*************** ix86_expand_clrstr (src, count_exp, alig
*** 9453,9458 ****
--- 9456,9465 ----
{
rtx countreg2;
rtx label = NULL;
+ /* Compute desired alignment of the string operation. */
+ int desired_alignment = (TARGET_PENTIUMPRO
+ && (count == 0 || count >= (unsigned int) 260)
+ ? 8 : UNITS_PER_WORD);
/* In case we don't know anything about the alignment, default to
library version, since it is usually equally fast and result in
*************** ix86_expand_clrstr (src, count_exp, alig
*** 9467,9479 ****
countreg = copy_to_mode_reg (counter_mode, count_exp);
zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
! if (count == 0
! && align < (TARGET_PENTIUMPRO && (count == 0
! || count >= (unsigned int) 260)
! ? 8 : UNITS_PER_WORD))
{
label = gen_label_rtx ();
! emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
LEU, 0, counter_mode, 1, label);
}
if (align <= 1)
--- 9474,9483 ----
countreg = copy_to_mode_reg (counter_mode, count_exp);
zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
! if (count == 0 && align < desired_alignment)
{
label = gen_label_rtx ();
! emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
LEU, 0, counter_mode, 1, label);
}
if (align <= 1)
*************** ix86_expand_clrstr (src, count_exp, alig
*** 9494,9501 ****
emit_label (label);
LABEL_NUSES (label) = 1;
}
! if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
! || count >= (unsigned int) 260))
{
rtx label = ix86_expand_aligntest (destreg, 4);
emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
--- 9498,9504 ----
emit_label (label);
LABEL_NUSES (label) = 1;
}
! if (align <= 4 && desired_alignment > 4)
{
rtx label = ix86_expand_aligntest (destreg, 4);
emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
*************** ix86_expand_clrstr (src, count_exp, alig
*** 9506,9511 ****
--- 9509,9521 ----
LABEL_NUSES (label) = 1;
}
+ if (label && desired_alignment > 4 && !TARGET_64BIT)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ label = NULL_RTX;
+ }
+
if (!TARGET_SINGLE_STRINGOP)
emit_insn (gen_cld ());
if (TARGET_64BIT)