This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, x86_64]: Fix PR target/30778


Hello!

This patch fixes a failure, described in PR target/30778. The problem was, that new stringop code expanded copy loop even when expected size was less than minimum size that loop can handle. Copy loop was still generated, but memory was again set in stringop prologue expansion.

The patch fixes the case of zero iterations, where we exit from the function early. Also, it implements a small optimization for one iteration. Compare and jump is not needed in this case.

Unfortunatelly, this fix is not enough to fix pr target/30770, where BOOT_CFLAGS="-O2 -mtune=nocona". Nocona and k8 are heavy users of expand_set_or_movmem_via_loop() for small structures, so it is still something wrong there.

The patch was bootstrapped (defalt BOOT_CFLAGS) on x86_64-linux-gnu and regression tested for all default languages. It still fails for BOOT_CFLAGS="-O2 -mtune=nocona", but it is IMO a step in right direction. OK for mainline?

2007-02-24 Uros Bizjak <ubizjak@gmail.com>

       PR target/30778
       * config/i386/i386.md (expand_set_or_movmem_via_loop): Return if
       GET_MODE_SIZE (mode) * unroll is less than expected_size.
       Do not emit compare and jump if number of iterations is less than 2.

testsuite/ChangeLog:

2007-02-24 Uros Bizjak <ubizjak@gmail.com>

       PR target/30778
       * gcc.target/i386/pr30778.c: New test.

Uros.
Index: testsuite/gcc.target/i386/pr30778.c
===================================================================
--- testsuite/gcc.target/i386/pr30778.c	(revision 0)
+++ testsuite/gcc.target/i386/pr30778.c	(revision 0)
@@ -0,0 +1,38 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mtune=k8" } */
+
+extern void *memset (void *, int, unsigned long);
+extern void abort (void);
+
+struct reg_stat {
+  void *last_death;
+  void *last_set;
+  void *last_set_value;
+  int   last_set_label;
+  char  last_set_sign_bit_copies;
+  int   last_set_mode : 8;
+  char  last_set_invalid;
+  char sign_bit_copies;
+  long nonzero_bits;
+};
+
+static struct reg_stat *reg_stat;
+
+void __attribute__((noinline))
+init_reg_last (void)
+{
+  memset (reg_stat, 0, __builtin_offsetof (struct reg_stat, sign_bit_copies));
+}
+
+int main (void)
+{
+  struct reg_stat r;
+
+  reg_stat = &r;
+  r.nonzero_bits = -1;
+  init_reg_last ();
+  if (r.nonzero_bits != -1)
+    abort ();
+  return 0;
+}
+
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 122286)
+++ config/i386/i386.c	(working copy)
@@ -13315,13 +13315,25 @@
 {
   rtx out_label, top_label, iter, tmp;
   enum machine_mode iter_mode;
-  rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
-  rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
+  HOST_WIDE_INT num_iter;
+  HOST_WIDE_INT min_size = GET_MODE_SIZE (mode) * unroll;
+  rtx piece_size = GEN_INT (min_size);
+  rtx piece_size_mask = GEN_INT (~(min_size - 1));
   rtx size;
   rtx x_addr;
   rtx y_addr;
   int i;
 
+  if (expected_size != -1)
+    num_iter = expected_size / min_size;
+  else
+    num_iter = 2;
+
+  /* Bail out if expected size is less than minimum size
+     that can be emitted.  */
+  if (!num_iter)
+    return;
+
   iter_mode = GET_MODE (count);
   if (iter_mode == VOIDmode)
     iter_mode = word_mode;
@@ -13341,16 +13353,27 @@
     }
   emit_move_insn (iter, const0_rtx);
 
-  emit_label (top_label);
-
   tmp = convert_modes (Pmode, iter_mode, iter, true);
-  x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
-  destmem = change_address (destmem, mode, x_addr);
 
+  if (num_iter > 1)
+    {
+      emit_label (top_label);
+
+      x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);  
+      destmem = change_address (destmem, mode, x_addr);
+    }
+  else
+    destmem = change_address (destmem, mode, destptr);
+
   if (srcmem)
     {
-      y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
-      srcmem = change_address (srcmem, mode, y_addr);
+      if (num_iter > 1)
+	{
+	  y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
+	  srcmem = change_address (srcmem, mode, y_addr);
+	}
+      else
+	srcmem = change_address (srcmem, mode, srcptr);
 
       /* When unrolling for chips that reorder memory reads and writes,
 	 we can save registers by using single temporary.  
@@ -13408,20 +13431,24 @@
   if (tmp != iter)
     emit_move_insn (iter, tmp);
 
-  emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
-			   true, top_label);
-  if (expected_size != -1)
+  if (num_iter > 1)
     {
-      expected_size /= GET_MODE_SIZE (mode) * unroll;
-      if (expected_size == 0)
-	predict_jump (0);
-      else if (expected_size > REG_BR_PROB_BASE)
-	predict_jump (REG_BR_PROB_BASE - 1);
+      emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
+			       true, top_label);
+      if (expected_size != -1)
+	{
+	  expected_size /= GET_MODE_SIZE (mode) * unroll;
+	  if (expected_size == 0)
+	    predict_jump (0);
+	  else if (expected_size > REG_BR_PROB_BASE)
+	    predict_jump (REG_BR_PROB_BASE - 1);
+	  else
+	    predict_jump (REG_BR_PROB_BASE
+			  - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
+	}
       else
-        predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
+	predict_jump (REG_BR_PROB_BASE * 80 / 100);
     }
-  else
-    predict_jump (REG_BR_PROB_BASE * 80 / 100);
   iter = ix86_zero_extend_to_Pmode (iter);
   tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
 			     true, OPTAB_LIB_WIDEN);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]