This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[3.4 PATCH] Fix PR rtl-optimization/16968


Hi,

This is a fallout of the stopgap that was added just before the 3.4.0 release 
to mitigate the RTX_UNCHANGING_P problems, a regression at -O2 present on the 
3.4 branch.  The problem here is that the loop optimizer happily bypasses the 
optimization barrier.

We have in .09.gcse:

(note 18 251 201 NOTE_INSN_LOOP_BEG)

(code_label 201 18 225 1 22 "" [1 uses])

[...]

(insn 42 41 44 2 pr16968.c:55 (set (mem/s:SI (plus:SI (reg/f:SI 31 31)
                (const_int 16 [0x10])) [9 t+0 S4 A128])
        (reg:SI 131)) 249 {*movsi_internal1} (nil)
    (expr_list:REG_EQUAL (const_int 0 [0x0])
        (nil)))

(insn 44 42 46 2 pr16968.c:55 (set (mem/s:SI (plus:SI (reg/f:SI 31 31)
                (const_int 20 [0x14])) [9 t+4 S4 A32])
        (reg:SI 131)) 249 {*movsi_internal1} (nil)
    (expr_list:REG_EQUAL (const_int 0 [0x0])
        (nil)))

(insn 46 44 48 2 pr16968.c:55 (set (mem/s:SI (plus:SI (reg/f:SI 31 31)
                (const_int 24 [0x18])) [9 t+8 S4 A64])
        (reg:SI 131)) 249 {*movsi_internal1} (nil)
    (expr_list:REG_EQUAL (const_int 0 [0x0])
        (nil)))

(insn 48 46 49 2 pr16968.c:55 (set (mem/s:SI (plus:SI (reg/f:SI 31 31)
                (const_int 28 [0x1c])) [9 t+12 S4 A32])
        (reg:SI 131)) 249 {*movsi_internal1} (nil)
    (expr_list:REG_EQUAL (const_int 0 [0x0])
        (nil)))

(insn 49 48 50 2 pr16968.c:55 (asm_input ("")) -1 (nil)
    (nil))

[...]

(insn 88 87 90 4 pr16968.c:65 (set (reg:SI 142 [ t ])
        (mem/s/u:SI (plus:SI (reg/f:SI 31 31)
                (const_int 16 [0x10])) [9 t+0 S4 A128])) 249 
{*movsi_internal1} (nil)
    (nil))

(insn 90 88 92 4 pr16968.c:65 (set (reg:SI 143 [ t+4 ])
        (mem/s/u:SI (plus:SI (reg/f:SI 31 31)
                (const_int 20 [0x14])) [9 t+4 S4 A32])) 249 {*movsi_internal1} 
(nil)
    (nil))

(insn 92 90 94 4 pr16968.c:65 (set (reg:SI 144 [ t+8 ])
        (mem/s/u:SI (plus:SI (reg/f:SI 31 31)
                (const_int 24 [0x18])) [9 t+8 S4 A64])) 249 {*movsi_internal1} 
(nil)
    (nil))

(insn 94 92 89 4 pr16968.c:65 (set (reg:SI 145 [ t+12 ])
        (mem/s/u:SI (plus:SI (reg/f:SI 31 31)
                (const_int 28 [0x1c])) [9 t+12 S4 A32])) 249 
{*movsi_internal1} (nil)


Note that the 4 MEMs are written to without /u but read from with /u.  Then 
the loop optimizer comes into play and hoists the last 4 insns:

Loop from 18 to 206: 65 real insns.
Continue at insn 198.
Insn 88: regno 142 (life 9), savings 1  moved to 262
Insn 90: regno 143 (life 9), savings 1  moved to 263
Insn 92: regno 144 (life 9), savings 1  moved to 264
Insn 94: regno 145 (life 9), savings 1  moved to 265

because the loop_invariant_p predicate is computed solely based on the result 
of invoking true_dependence on the list of stores.


It appears that the optimizer is not prepared to handle optimization barriers, 
probably because they were far less frequent before 3.4.0.  I'm not sure we 
should fix it "en masse" so I've settled for a minimal fix: the scanning for 
movable insns in scan_loop stops as soon as it encounters a barrier.

Bootstrapped/regtested on amd64-mandrake-linux-gnu.


2004-12-17  Eric Botcazou  <ebotcazou@libertysurf.fr>

	PR rtl-optimization/16968
	* loop.c (scan_loop): Stop scanning the loop for movable
	insns as soon as an optimization barrier is encountered.


2004-12-17  Jakub Jelinek  <jakub@redhat.com>

	* gcc.c-torture/execute/20041217-1.c: New test.



-- 
Eric Botcazou
Index: loop.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/loop.c,v
retrieving revision 1.488.2.5
diff -u -p -r1.488.2.5 loop.c
--- loop.c	13 Jul 2004 15:29:08 -0000	1.488.2.5
+++ loop.c	17 Dec 2004 17:06:15 -0000
@@ -765,6 +765,9 @@ scan_loop (struct loop *loop, int flags)
 	in_libcall--;
       if (GET_CODE (p) == INSN)
 	{
+	  /* Do not scan past an optimization barrier.  */
+	  if (GET_CODE (PATTERN (p)) == ASM_INPUT)
+	    break;
 	  temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
 	  if (temp)
 	    in_libcall++;
/* PR rtl-optimization/16968 */
/* Testcase by Jakub Jelinek  <jakub@redhat.com> */

struct T
{
  unsigned int b, c, *d;
  unsigned char e;
};
struct S
{
  unsigned int a;
  struct T f;
};
struct U
{
  struct S g, h;
};
struct V
{
  unsigned int i;
  struct U j;
};

extern void exit (int);
extern void abort (void);

void *
dummy1 (void *x)
{
  return "";
}

void *
dummy2 (void *x, void *y)
{
  exit (0);
}

struct V *
baz (unsigned int x)
{
  static struct V v;
  __builtin_memset (&v, 0x55, sizeof (v));
  return &v;
}

int
check (void *x, struct S *y)
{
  if (y->a || y->f.b || y->f.c || y->f.d || y->f.e)
    abort ();
  return 1;
}

static struct V *
bar (unsigned int x, void *y)
{
  const struct T t = { 0, 0, (void *) 0, 0 };
  struct V *u;
  void *v;
  v = dummy1 (y);
  if (!v)
    return (void *) 0;

  u = baz (sizeof (struct V));
  u->i = x;
  u->j.g.a = 0;
  u->j.g.f = t;
  u->j.h.a = 0;
  u->j.h.f = t;

  if (!check (v, &u->j.g) || !check (v, &u->j.h))
    return (void *) 0;
  return u;
}

int
foo (unsigned int *x, unsigned int y, void **z)
{
  void *v;
  unsigned int i, j;

  *z = v = (void *) 0;

  for (i = 0; i < y; i++)
    {
      struct V *c;

      j = *x;

      switch (j)
	{
	case 1:
	  c = bar (j, x);
	  break;
	default:
	  c = 0;
	  break;
	}
      if (c)
	v = dummy2 (v, c);
      else
        return 1;
    }

  *z = v;
  return 0;
}

int
main (void)
{
  unsigned int one = 1;
  void *p;
  foo (&one, 1, &p);
  abort ();
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]