Bug 19790 - equality not noticed when signedness differs.
Summary: equality not noticed when signedness differs.
Status: RESOLVED FIXED
Alias: None
Product: gcc
Classification: Unclassified
Component: tree-optimization (show other bugs)
Version: unknown
: P2 enhancement
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization, TREE
Depends on: 15459
Blocks: 19721
  Show dependency treegraph
 
Reported: 2005-02-06 13:28 UTC by Kazu Hirata
Modified: 2017-12-11 16:30 UTC (History)
5 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2008-07-06 09:37:27


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Kazu Hirata 2005-02-06 13:28:52 UTC
Consider:

extern void bar (int);

foo (int *array)
{
  int i;

  for (i = 0; i <= 10; i++)
    {
      bar (i + 1);
      array[i / ((unsigned) 32)] |= ((unsigned long) 1) << (i % ((unsigned) (32)));
    }
}

The last tree SSA dump looks like so:

foo (array)
{
  unsigned int D.1154;
  unsigned int D.1152;
  unsigned int D.1153;
  unsigned int ivtmp.5;
  int pretmp.4;
  unsigned int pretmp.3;
  int pretmp.2;
  int i;
  int D.1136;
  long unsigned int D.1135;
  long unsigned int D.1134;
  int D.1133;
  long unsigned int D.1132;
  int D.1131;
  int * D.1130;
  int * D.1129;
  unsigned int D.1128;
  unsigned int D.1127;
  unsigned int i.0;
  int D.1125;

<bb 0>:

  # i_24 = PHI <i_1(1), 0(0)>;
<L0>:;
  D.1152_7 = (unsigned int) i_24;
  D.1153_20 = D.1152_7 + 1;
  i_1 = (int) D.1153_20;
  bar (i_1);
  D.1154_27 = (unsigned int) i_1;
  i.0_28 = D.1154_27 + 0ffffffff;
  D.1127_5 = i.0_28 >> 5;
  D.1128_10 = D.1127_5 * 4;
  D.1129_11 = (int *) D.1128_10;
  D.1130_12 = array_8 + D.1129_11;
  D.1131_13 = *D.1130_12;
  D.1132_14 = (long unsigned int) D.1131_13;
  D.1133_15 = i_24 & 31;
  D.1134_16 = 1 << D.1133_15;
  D.1135_17 = D.1132_14 | D.1134_16;
  D.1136_18 = (int) D.1135_17;
  *D.1130_12 = D.1136_18;
  if (D.1153_20 != 11) goto <L0>; else goto <L2>;

<L2>:;
  return;

}

Note that D.1152_7 == i.0_28, but this equality is not noticed
at tree level due to signedness changes in between.

We should replace the definition of i.0_28 as

  i.0_28 = D.1152_7;

and the copy propagation should take care of the rest.

The rtl optimizers catch this opportunity.
Comment 1 Andrew Pinski 2005-02-06 16:12:50 UTC
Confirmed but guess what my tree combiner fixes the problem:
  # i_24 = PHI <i_9(1), 0(0)>;
<L0>:;
  D.1165_26 = (unsigned int) i_24;
  D.1166_25 = D.1165_26 + 1;
  i_9 = (int) D.1166_25;
  bar (i_9);
  D.1121_5 = D.1165_26 >> 5;
  D.1122_10 = D.1121_5 * 4;
  D.1123_11 = (int *) D.1122_10;
  D.1124_12 = array_8 + D.1123_11;
  D.1125_13 = *D.1124_12;
  D.1126_14 = (long unsigned int) D.1125_13;
  D.1127_15 = i_24 & 31;
  D.1128_16 = 1 << D.1127_15;
  D.1129_17 = D.1126_14 | D.1128_16;
  D.1130_18 = (int) D.1129_17;
  *D.1124_12 = D.1130_18;
  if (D.1166_25 != 11) goto <L0>; else goto <L2>;

Guess that means I need to work more on it.
Comment 2 Andrew Pinski 2005-05-04 20:31:35 UTC
We get:
  i_3 = i_6 + 1;
  bar (i_3);
  D.1284_20 = (unsigned int) i_3;
  i.0_25 = D.1284_20 + 0ffffffff;
  D.1241_5 = i.0_25 >> 5;
  D.1242_10 = D.1241_5 * 4;
  D.1243_11 = (int *) D.1242_10;
  D.1244_12 = array_8 + D.1243_11;
  D.1245_13 = *D.1244_12;
  D.1246_14 = (long unsigned int) D.1245_13;
  D.1247_15 = i_6 & 31;
  D.1248_16 = 1 << D.1247_15;
  D.1249_17 = D.1246_14 | D.1248_16;
  D.1250_18 = (int) D.1249_17;
  *D.1244_12 = D.1250_18;


Now, which is semi better in that there is no extra cast but there is still a -1 issue, i.0_25 = (unsigned) 
i_6
Comment 3 Andrew Pinski 2005-09-21 01:57:37 UTC
We now get:
  # i_7 = PHI <i_3(1), 0(0)>;
<L0>:;
  i_3 = i_7 + 1;
  bar (i_3);
  D.1284_13 = *array_8;
  D.1285_14 = (long unsigned int) D.1284_13;
  D.1286_15 = i_7 & 31;
  D.1287_16 = 1 << D.1286_15;
  D.1288_17 = D.1285_14 | D.1287_16;
  D.1289_18 = (int) D.1288_17;
  *array_8 = D.1289_18;
  if (i_3 != 11) goto <L0>; else goto <L2>;

This is semi fixed for bounds less than 32.
trying with 37, we get the same old crap.
Comment 4 Steven Bosscher 2008-07-06 09:37:27 UTC
Still doesn't work.  You need to replace one line for the test case of comment #0 though, because the tree optimizers are now smart enough to see that (i/32) is always 0.  So replace 

   for (i = 0; i <= 10; i++)

with

   for (i = 0; i <= 320; i++)

and you still get the missed optimization.
Comment 5 Richard Biener 2008-07-06 12:20:51 UTC
So we can optimize

void __attribute__((noinline))
bar (int i)
{
  asm ("");
}

extern void link_error (void);
void
foo (void)
{
  int i;
  for (i = 0; i <= 320; i++)
    {
      bar (i);
      if (i > 320)
        link_error ();
    }
}

but not

void __attribute__((noinline))
bar (int i)
{
  asm ("");
}

extern void link_error (void);
void
foo (void)
{
  int i, j = 2;
  for (i = 0; i <= 320; i++)
    {
      j++;
      bar (j);
      if (j > 322)
        link_error ();
    }
}

even with the niter computation hunk restored from the PR34244 patch.
Comment 6 Andrew Pinski 2008-09-09 01:51:54 UTC
The testcase in comment #5 looks like the same issue are referenced in PR 32200.
Comment 7 Jeffrey A. Law 2017-12-11 16:30:06 UTC
The original issue reported by Kazu has been fixed.  We no longer have to increment a temporary holding i - 1 to retrieve i for second statement in the loop.

Given the age of this BZ, I didn't bisect it to figure out exactly when that happened.