Bug 69691 - [6 Regression] wrong code at -O2 on x86_64-linux-gnu
Summary: [6 Regression] wrong code at -O2 on x86_64-linux-gnu
Status: RESOLVED FIXED
Alias: None
Product: gcc
Classification: Unclassified
Component: rtl-optimization (show other bugs)
Version: 6.0
: P1 normal
Target Milestone: 6.0
Assignee: Jakub Jelinek
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2016-02-05 13:26 UTC by Marek Polacek
Modified: 2016-02-05 21:15 UTC (History)
2 users (show)

See Also:
Host:
Target:
Build:
Known to work: 5.3.0
Known to fail: 6.0
Last reconfirmed: 2016-02-05 00:00:00


Attachments
gcc6-pr69691.patch (1.23 KB, patch)
2016-02-05 16:11 UTC, Jakub Jelinek
Details | Diff

Note You need to log in before you can comment on or make changes to this bug.
Description Marek Polacek 2016-02-05 13:26:08 UTC
$ xgcc -O2 zz.c; ./a.out
Aborted (core dumped)

extern int tolower (int) __attribute__ ((leaf));

char *USHyphens[] = { ".ach4", 0 };

#define LCNT (26 + 1 + 4)
#define SEPCNT 10
typedef struct sHyphenNode
{
  char sepcnts[SEPCNT];
  struct sHyphenNode *Daughters[LCNT];
} THyphenNode, *PHyphenNode;

static PHyphenNode HyphenRoot = 0;

static int
GetIndex (char ch)
{
  if (ch == 0)
    __builtin_abort ();
  if ((tolower (ch) >= 'a' - 1) && (tolower (ch) <= 'z'))
    return (tolower (ch) - ('a' - 1));
  else if (ch == '.')
    return 0;
  else
    {
      __builtin_printf ("unallowed character %d\n", ch);
      return -1;
    }
}

static void
InitHyphenNode (PHyphenNode Node)
{
  int z;

  for (z = 0; z < LCNT; Node->Daughters[z++] = 0);
  for (z = 0; z < SEPCNT; Node->sepcnts[z++] = 0);
}

void
BuildTree (char **Patterns)
{
  char **run, ch, *pos, sing[500], *rrun;
  char RunCnts[SEPCNT];
  int z, l, rc, index;
  PHyphenNode Lauf;

  HyphenRoot = (PHyphenNode) __builtin_malloc (sizeof (THyphenNode));
  InitHyphenNode (HyphenRoot);

  for (run = Patterns; *run != 0; run++)
    {
      __builtin_strcpy (sing, *run);
      rrun = sing;
      do
	{
	  pos = __builtin_strchr (rrun, ' ');
	  if (pos)
	    *pos = '\0';
	  l = __builtin_strlen (rrun);
	  rc = 0;
	  Lauf = HyphenRoot;
	  for (z = 0; z < SEPCNT; RunCnts[z++] = 0);
	  for (z = 0; z < l; z++)
	    {
	      ch = rrun[z];
	      if ((ch >= '0') && (ch <= '9'))
		RunCnts[rc] = ch - '0';
	      else
		{
		  index = GetIndex (ch);
		  if (!Lauf->Daughters[index])
		    {
		      Lauf->Daughters[index] =
			(PHyphenNode) __builtin_malloc (sizeof (THyphenNode));
		      InitHyphenNode (Lauf->Daughters[index]);
		    }
		  Lauf = Lauf->Daughters[index];
		  rc++;
		}
	    }
	  __builtin_memcpy (Lauf->sepcnts, RunCnts, SEPCNT);
	  if (pos)
	    rrun = pos + 1;
	}
      while (pos);
    }
}

void
DoHyphens (char *word)
{
  char Field[300];
  char Res[300];
  int z, z2, z3, l;
  PHyphenNode Lauf;

  l = __builtin_strlen (word);
  *Field = 'a' - 1;
  for (z = 0; z < l; z++)
    {
      Field[z + 1] = tolower ((unsigned int) word[z]);
      if (GetIndex (Field[z + 1]) <= 0)
	return;
    }
  Field[l + 1] = 'a' - 1;
  l += 2;
  for (z = 0; z <= l + 1; Res[z++] = 0);

  if (!HyphenRoot)
    return;

  for (z = 0; z < l; z++)
    {
      Lauf = HyphenRoot;
      for (z2 = z; z2 < l; z2++)
	{
	  Lauf = Lauf->Daughters[GetIndex (Field[z2])];
	  if (!Lauf)
	    break;
	  for (z3 = 0; z3 <= z2 - z + 2; z3++)
	    if (Lauf->sepcnts[z3] > Res[z + z3])
	      Res[z + z3] = Lauf->sepcnts[z3];
	}
    }
  for (z = 3; z < l - 2; z++)
    if ((Res[z] & 1) == 1)
     asm ("");
}

int
main ()
{
  BuildTree (USHyphens);
  DoHyphens ("aaaaa");
}
Comment 1 Marek Polacek 2016-02-05 13:27:32 UTC
The testcase ICEd since r230150 and is miscompiled since r231300.
Comment 2 Jakub Jelinek 2016-02-05 14:16:09 UTC
Adjusted testcase, so that it doesn't depend on ASCII ordering, tolower etc.

char u[] = { 46, 97, 99, 104, 52, 0 };
char *v[] = { u, 0 };
static struct S { char a[10]; struct S *b[31]; } *w = 0;

__attribute__((noinline, noclone)) int
fn (int x)
{
  if (__builtin_strchr (u, x) || x == 96)
    return x;
  __builtin_abort ();
}

__attribute__((noinline, noclone)) int
foo (char x)
{
  if (x == 0)
    __builtin_abort ();
  if (fn (x) >= 96 && fn (x) <= 122)
    return (fn (x) - 96);
  else if (x == 46)
    return 0;
  else
    {
      __builtin_printf ("foo %d\n", x);
      return -1;
    }
}

__attribute__((noinline, noclone)) void
bar (char **x)
{
  char **b, c, *d, e[500], *f, g[10];
  int z, l, h, i;
  struct S *s;

  w = __builtin_calloc (1, sizeof (struct S));
  for (b = x; *b; b++)
    {
      __builtin_strcpy (e, *b);
      f = e;
      do
	{
	  d = __builtin_strchr (f, 32);
	  if (d)
	    *d = 0;
	  l = __builtin_strlen (f);
	  h = 0;
	  s = w;
	  __builtin_memset (g, 0, sizeof (g));
	  for (z = 0; z < l; z++)
	    {
	      c = f[z];
	      if (c >= 48 && c <= 57)
		g[h] = c - 48;
	      else
		{
		  i = foo (c);
		  if (!s->b[i])
		    s->b[i] = __builtin_calloc (1, sizeof (struct S));
		  s = s->b[i];
		  h++;
		}
	    }
	  __builtin_memcpy (s->a, g, 10);
	  if (d)
	    f = d + 1;
	}
      while (d);
    }
}

__attribute__((noinline, noclone)) void
baz (char *x)
{
  char a[300], b[300];
  int z, y, t, l;
  struct S *s;

  l = __builtin_strlen (x);
  *a = 96;
  for (z = 0; z < l; z++)
    {
      a[z + 1] = fn ((unsigned int) x[z]);
      if (foo (a[z + 1]) <= 0)
	return;
    }
  a[l + 1] = 96;
  l += 2;
  __builtin_memset (b, 0, l + 2);

  if (!w)
    return;

  for (z = 0; z < l; z++)
    {
      s = w;
      for (y = z; y < l; y++)
	{
	  s = s->b[foo (a[y])];
	  if (!s)
	    break;
	  for (t = 0; t <= y - z + 2; t++)
	    if (s->a[t] > b[z + t])
	      b[z + t] = s->a[t];
	}
    }
  for (z = 3; z < l - 2; z++)
    if ((b[z] & 1) == 1)
     asm ("");
}

int
main ()
{
  bar (v);
  char c[] = { 97, 97, 97, 97, 97, 0 };
  baz (c);
  return 0;
}
Comment 3 Jakub Jelinek 2016-02-05 14:31:45 UTC
This looks like a LRA bug.
We have:
(insn 153 152 276 18 (parallel [
            (set (reg/f:DI 183 [ ivtmp.58 ])
                (plus:DI (reg/f:DI 20 frame)
                    (const_int -608 [0xfffffffffffffda0])))
            (clobber (reg:CC 17 flags))
        ]) 219 {*adddi_1}
     (expr_list:REG_UNUSED (reg:CC 17 flags)
        (expr_list:REG_EQUIV (plus:DI (reg/f:DI 20 frame)
                (const_int -608 [0xfffffffffffffda0]))
            (nil))))
...
(insn 64 159 185 18 (set (reg:DI 182 [ ivtmp.58 ])
        (reg/f:DI 183 [ ivtmp.58 ])) pr69691.c:94 85 {*movdi_internal}
     (expr_list:REG_EQUAL (plus:DI (reg/f:DI 20 frame)
            (const_int -608 [0xfffffffffffffda0]))
        (nil)))
(insn 186 185 323 18 (set (reg:SI 247)
        (plus:SI (plus:SI (subreg:SI (reg/f:DI 183 [ ivtmp.58 ]) 0)
                (subreg:SI (reg:DI 202) 0))
            (const_int 1 [0x1]))) 214 {*leasi}
     (nil))
in *.ira, and LRA creates:
(note 153 152 349 18 NOTE_INSN_DELETED)
...
(insn 64 350 185 18 (set (reg:DI 3 bx [orig:182 ivtmp.58 ] [182])
        (plus:DI (reg/f:DI 7 sp)
            (const_int 32 [0x20]))) pr69691.c:94 215 {*leadi}
     (expr_list:REG_EQUAL (plus:DI (reg/f:DI 20 frame)
            (const_int -608 [0xfffffffffffffda0]))
        (nil)))
(note 185 64 186 18 NOTE_INSN_DELETED)
(insn 186 185 351 18 (set (reg:SI 0 ax [247])
        (plus:SI (plus:SI (reg:SI 7 sp)
                (reg:SI 42 r13 [202]))
            (const_int 65 [0x41]))) 214 {*leasi}
     (nil))

out of this.  Note, frame-608 is eliminated to sp+32 (looks correct), but in
insn 186 which is supposed to be (int)(frame-608) + (int)r13 + 1 the used offset is 65, instead of 32+1, so it seems that the 32 from sp+32 got added twice there during LRA.  Vlad, can you please have a look?

That said, on the generated assembly, I find it weird that nothing post reload CSEs simple stuff like:
        leaq    32(%rsp), %rdi
        leaq    32(%rsp), %r14
or
        leaq    32(%rsp), %rax
        movl    %esi, %esi
        leaq    336(%rsp), %r14
        leaq    32(%rsp), %rbx
Wouldn't it be smaller and better to use movq %rdi, %r14 or movq %rax, %rbx?
Smaller for sure (movq is 2 bytes smaller than leaq with 1 byte immediate).
Comment 4 Jakub Jelinek 2016-02-05 15:49:24 UTC
I think the bug is in move_plus_up, as that function transforms:
(subreg:SI (plus:DI (reg/f:DI 20 frame)
        (const_int 16 [0x10])) 0)
into:
(plus:SI (plus:SI (subreg:SI (reg/f:DI 20 frame) 0)
        (const_int 16 [0x10]))
    (const_int 16 [0x10]))
which looks just wrong, it should have been
(plus:SI (subreg:SI (reg/f:DI 20 frame) 0)
    (const_int 16 [0x10])))
Comment 5 Jakub Jelinek 2016-02-05 16:11:39 UTC
Created attachment 37597 [details]
gcc6-pr69691.patch

Untested fix.
Comment 6 Jakub Jelinek 2016-02-05 21:14:14 UTC
Author: jakub
Date: Fri Feb  5 21:13:43 2016
New Revision: 233187

URL: https://gcc.gnu.org/viewcvs?rev=233187&root=gcc&view=rev
Log:
	PR rtl-optimization/69691
	* lra-eliminations.c (move_plus_up): Don't add the addend twice.

	* gcc.c-torture/execute/pr69691.c: New test.

Added:
    trunk/gcc/testsuite/gcc.c-torture/execute/pr69691.c
Modified:
    trunk/gcc/ChangeLog
    trunk/gcc/lra-eliminations.c
    trunk/gcc/testsuite/ChangeLog
Comment 7 Jakub Jelinek 2016-02-05 21:15:56 UTC
Fixed.