This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, x86_64]: Fix PR target/30778
- From: Uros Bizjak <ubizjak at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Jan Hubicka <jh at suse dot cz>
- Date: Sat, 24 Feb 2007 17:56:39 +0100
- Subject: [PATCH, x86_64]: Fix PR target/30778
- Dkim-signature: a=rsa-sha1; c=relaxed/relaxed; d=gmail.com; s=beta; h=domainkey-signature:received:received:message-id:date:from:user-agent:mime-version:to:cc:subject:content-type; b=EfqL0QW/Dgwn4sdAQkA5Xdm7ldbdxO1uoGeEySU+sYorJnfpte0KWll4B4DcjNw4aVOcJ6HiIkajWiPTznccRxlvVVBhP+Yx241fP2p3w6CgxSOxAxMGuJG1mnYM2PS/i5lLB366762+McmRtkw8+z0HHFHi4ArO0RjNZJuwNUc=
Hello!
This patch fixes a failure, described in PR target/30778. The problem
was, that new stringop code expanded copy loop even when expected size
was less than minimum size that loop can handle. Copy loop was still
generated, but memory was again set in stringop prologue expansion.
The patch fixes the case of zero iterations, where we exit from the
function early. Also, it implements a small optimization for one
iteration. Compare and jump is not needed in this case.
Unfortunatelly, this fix is not enough to fix pr target/30770, where
BOOT_CFLAGS="-O2 -mtune=nocona". Nocona and k8 are heavy users of
expand_set_or_movmem_via_loop() for small structures, so it is still
something wrong there.
The patch was bootstrapped (defalt BOOT_CFLAGS) on x86_64-linux-gnu and
regression tested for all default languages. It still fails for
BOOT_CFLAGS="-O2 -mtune=nocona", but it is IMO a step in right
direction. OK for mainline?
2007-02-24 Uros Bizjak <ubizjak@gmail.com>
PR target/30778
* config/i386/i386.md (expand_set_or_movmem_via_loop): Return if
GET_MODE_SIZE (mode) * unroll is less than expected_size.
Do not emit compare and jump if number of iterations is less than 2.
testsuite/ChangeLog:
2007-02-24 Uros Bizjak <ubizjak@gmail.com>
PR target/30778
* gcc.target/i386/pr30778.c: New test.
Uros.
Index: testsuite/gcc.target/i386/pr30778.c
===================================================================
--- testsuite/gcc.target/i386/pr30778.c (revision 0)
+++ testsuite/gcc.target/i386/pr30778.c (revision 0)
@@ -0,0 +1,38 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mtune=k8" } */
+
+extern void *memset (void *, int, unsigned long);
+extern void abort (void);
+
+struct reg_stat {
+ void *last_death;
+ void *last_set;
+ void *last_set_value;
+ int last_set_label;
+ char last_set_sign_bit_copies;
+ int last_set_mode : 8;
+ char last_set_invalid;
+ char sign_bit_copies;
+ long nonzero_bits;
+};
+
+static struct reg_stat *reg_stat;
+
+void __attribute__((noinline))
+init_reg_last (void)
+{
+ memset (reg_stat, 0, __builtin_offsetof (struct reg_stat, sign_bit_copies));
+}
+
+int main (void)
+{
+ struct reg_stat r;
+
+ reg_stat = &r;
+ r.nonzero_bits = -1;
+ init_reg_last ();
+ if (r.nonzero_bits != -1)
+ abort ();
+ return 0;
+}
+
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 122286)
+++ config/i386/i386.c (working copy)
@@ -13315,13 +13315,25 @@
{
rtx out_label, top_label, iter, tmp;
enum machine_mode iter_mode;
- rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
- rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
+ HOST_WIDE_INT num_iter;
+ HOST_WIDE_INT min_size = GET_MODE_SIZE (mode) * unroll;
+ rtx piece_size = GEN_INT (min_size);
+ rtx piece_size_mask = GEN_INT (~(min_size - 1));
rtx size;
rtx x_addr;
rtx y_addr;
int i;
+ if (expected_size != -1)
+ num_iter = expected_size / min_size;
+ else
+ num_iter = 2;
+
+ /* Bail out if expected size is less than minimum size
+ that can be emitted. */
+ if (!num_iter)
+ return;
+
iter_mode = GET_MODE (count);
if (iter_mode == VOIDmode)
iter_mode = word_mode;
@@ -13341,16 +13353,27 @@
}
emit_move_insn (iter, const0_rtx);
- emit_label (top_label);
-
tmp = convert_modes (Pmode, iter_mode, iter, true);
- x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
- destmem = change_address (destmem, mode, x_addr);
+ if (num_iter > 1)
+ {
+ emit_label (top_label);
+
+ x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
+ destmem = change_address (destmem, mode, x_addr);
+ }
+ else
+ destmem = change_address (destmem, mode, destptr);
+
if (srcmem)
{
- y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
- srcmem = change_address (srcmem, mode, y_addr);
+ if (num_iter > 1)
+ {
+ y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
+ srcmem = change_address (srcmem, mode, y_addr);
+ }
+ else
+ srcmem = change_address (srcmem, mode, srcptr);
/* When unrolling for chips that reorder memory reads and writes,
we can save registers by using single temporary.
@@ -13408,20 +13431,24 @@
if (tmp != iter)
emit_move_insn (iter, tmp);
- emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
- true, top_label);
- if (expected_size != -1)
+ if (num_iter > 1)
{
- expected_size /= GET_MODE_SIZE (mode) * unroll;
- if (expected_size == 0)
- predict_jump (0);
- else if (expected_size > REG_BR_PROB_BASE)
- predict_jump (REG_BR_PROB_BASE - 1);
+ emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
+ true, top_label);
+ if (expected_size != -1)
+ {
+ expected_size /= GET_MODE_SIZE (mode) * unroll;
+ if (expected_size == 0)
+ predict_jump (0);
+ else if (expected_size > REG_BR_PROB_BASE)
+ predict_jump (REG_BR_PROB_BASE - 1);
+ else
+ predict_jump (REG_BR_PROB_BASE
+ - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
+ }
else
- predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
+ predict_jump (REG_BR_PROB_BASE * 80 / 100);
}
- else
- predict_jump (REG_BR_PROB_BASE * 80 / 100);
iter = ix86_zero_extend_to_Pmode (iter);
tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
true, OPTAB_LIB_WIDEN);