[Bug tree-optimization/41026] invariant address load inside loop with -Os.

rahul at icerasemi dot com gcc-bugzilla@gcc.gnu.org
Fri Sep 11 10:04:00 GMT 2009



------- Comment #6 from rahul at icerasemi dot com  2009-09-11 10:03 -------
An interesting regression results as a side effect of loop header copying (this
occurs even in vanilla O2). If I modify my original test case to

struct struct_t {
  int* data;
};

void testAddr (struct struct_t* sp, int len)
{
    short i;
    for (i = 0; i < len; i++)
      {
        sp->data[len-i-1] = 0;
      }
}

The index is now a short, and I have purposefully added an int to form the
final induction variable.

With gcc -S -O2 -fdump-tree-all, I get the following SSA

  short int i;
  int * D.1220;
  long unsigned int D.1219;
  long unsigned int D.1218;
  long unsigned int D.1217;
  int D.1216;
  int D.1215;
  int * D.1214;

<bb 2>:
  goto <bb 4>;

<bb 3>:
  D.1214_6 = sp_5(D)->data;
  D.1215_7 = (int) i_1;
  D.1216_8 = len_4(D) - D.1215_7;
  D.1217_9 = (long unsigned int) D.1216_8;
  D.1218_10 = D.1217_9 + -1;
  D.1219_11 = D.1218_10 * 4;
  D.1220_12 = D.1214_6 + D.1219_11;
  *D.1220_12 ={v} 0;
  i_13 = i_1 + 1;

<bb 4>:
  # i_1 = PHI <0(2), i_13(3)>
  D.1215_3 = (int) i_1;
  if (D.1215_3 < len_4(D))
    goto <bb 3>;
  else
    goto <bb 5>;

<bb 5>:
  return;

The following copy propagation and/or FRE passes identify D.1215_7 as a copy of
D.1215_3 and we get

<bb 3>:
  D.1214_6 = sp_5(D)->data;
  D.1216_8 = len_4(D) - D.1215_3;
  D.1217_9 = (long unsigned int) D.1216_8;
  D.1218_10 = D.1217_9 + -1;
  D.1219_11 = D.1218_10 * 4;
  D.1220_12 = D.1214_6 + D.1219_11;
  *D.1220_12 = 0;
  i_13 = i_1 + 1;

Loop header copying introduces a PHI for D.1215

<bb 2>:
  D.1215_19 = 0;
  if (D.1215_19 < len_4(D))
    goto <bb 3>;
  else
    goto <bb 4>;

<bb 3>:
  # i_20 = PHI <i_13(3), 0(2)>
  # D.1215_21 = PHI <D.1215_3(3), D.1215_19(2)>
  D.1214_6 = sp_5(D)->data;
  D.1216_8 = len_4(D) - D.1215_21;
  D.1217_9 = (long unsigned int) D.1216_8;
  D.1218_10 = D.1217_9 + -1;
  D.1219_11 = D.1218_10 * 4;
  D.1220_12 = D.1214_6 + D.1219_11;
  *D.1220_12 = 0;
  i_13 = i_20 + 1;
  D.1215_3 = (int) i_13;
  if (D.1215_3 < len_4(D))
    goto <bb 3>;
  else
    goto <bb 4>;

This causes IVOpts below, and all subsequent optimisations to fall over.

<bb 3>:
  D.1214_6 = sp_5(D)->data;
  D.1238_7 = (unsigned int) len_4(D);
  D.1239_1 = D.1238_7 + 0x0ffffffff;
  __builtin_loop_start (1, D.1239_1);
  D.1241_24 = (unsigned int) len_4(D);

<bb 4>:
  # D.1215_21 = PHI <0(3), D.1215_3(5)>
  # ivtmp.13_14 = PHI <0(3), ivtmp.13_18(5)>
  __builtin_loop_iteration (1);
  D.1216_8 = len_4(D) - D.1215_21;
  D.1217_9 = (long unsigned int) D.1216_8;
  D.1218_10 = D.1217_9 + -1;
  D.1219_11 = D.1218_10 * 4;
  D.1220_12 = D.1214_6 + D.1219_11;
  *D.1220_12 = 0;
  D.1240_19 = ivtmp.13_14 + 1;
  D.1215_23 = (int) D.1240_19;
  D.1215_3 = D.1215_23;
  ivtmp.13_18 = ivtmp.13_14 + 1;
  if (ivtmp.13_18 != D.1241_24)
    goto <bb 5>;
  else
    goto <bb 6>;

On this test using -fno-tree-copy-prop -fno-tree-pre results in better
optimizations, implying either copy propagating (across blocks) / FREing
potential induction variables is undesirable. Or a less ideal solution is
disable loop header copying when dealing with type promoted loop indices.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41026



More information about the Gcc-bugs mailing list