This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Bug in loop invariant motion with trivial test case


On Thursday 12 August 2004 12:18, Richard Kenner wrote:
> This is based on ACATS test ca11012.
>
> Consider:
>
> struct foo {int a; int b;};
>
> extern struct foo sub2 (struct foo, struct foo);
>
> struct foo
> sub1 (struct foo a)
> {
>   struct foo result = {0, 0};
>   int i;
>
>   for (i = 0; i < 3; i++)
>     result = sub2 (result, a);
>
>   return result;
> }
>
> Look at the .vars file and notice that each call to sub2 is passed the
> same operands, not the one obtained by calling the function the
> previous time:
>
> sub1 (a)
> {
>   struct foo lsm_tmp.2;
>   struct foo lsm_tmp.1;
>   int i;
>   struct foo result;
>   struct foo T.0;
>
> <bb 0>:
>   result.a = 0;
>   result.b = 0;
>   lsm_tmp.1 = result;
>   i = 0;
>
> <L0>:;
>   lsm_tmp.2 = sub2 (result, a);
>   i = i + 1;
>   if (i <= 2) goto <L0>; else goto <L7>;
>
> <L7>:;
>   result = lsm_tmp.1;
>   T.0 = result;
>   return T.0;
>
> }

Interesting bug.  Loop invariant motion detects that result is a store
that can be sunk in the following loop:

  # BLOCK 1
  # PRED: 3 [100.0%]  (fallthru) 0 [100.0%]  (fallthru,exec)
  # .GLOBAL_VAR<D1488>_18 = PHI <.GLOBAL_VAR<D1488>_13(0), .GLOBAL_VAR<D1488>_14(3)>;
  # i<D1474>_19 = PHI <0(0), i<D1474>_11(3)>;
  # result<D1473>_20 = PHI <result<D1473>_5(0), result<D1473>_10(3)>;
<L0>:;
  #   .GLOBAL_VAR<D1488>_14 = V_MAY_DEF <.GLOBAL_VAR<D1488>_18>;
  #   V_MUST_DEF <result<D1473>_10>;
  #   VUSE <result<D1473>_20>;
  #   VUSE <a<D1470>_9>;
  result<D1473> = sub2 (result<D1473>, a<D1470>);
  i<D1474>_11 = i<D1474>_19 + 1;
  if (i<D1474>_11 <= 2) goto <L6>; else goto <L2>;
  # SUCC: 2 [11.0%]  (false,exec) 3 [89.0%]  (dfs_back,true,exec)

And it turns it into this:

  # BLOCK 1
  # PRED: 3 [100.0%]  (fallthru) 0 [100.0%]  (fallthru,exec)
  # .GLOBAL_VAR<D1488>_23 = PHI <.GLOBAL_VAR<D1488>_13(0), .GLOBAL_VAR<D1488>_14(3)>;
  # i<D1474>_19 = PHI <0(0), i<D1474>_11(3)>;
<L0>:;
  #   .GLOBAL_VAR<D1488>_14 = V_MAY_DEF <.GLOBAL_VAR<D1488>_23>;
  #   V_MUST_DEF <lsm_tmp.2<D1493>_17>;
  #   VUSE <result<D1473>_5>;
  #   VUSE <a<D1470>_9>;
  lsm_tmp.2<D1493> = sub2 (result<D1473>, a<D1470>);
  i<D1474>_11 = i<D1474>_19 + 1;
  if (i<D1474>_11 <= 2) goto <L6>; else goto <L7>;
  # SUCC: 4 [11.0%]  (false,exec) 3 [89.0%]  (dfs_back,true,exec)

  # BLOCK 3
  # PRED: 1 [89.0%]  (dfs_back,true,exec)
<L6>:;
  goto <bb 1> (<L0>);
  # SUCC: 1 [100.0%]  (fallthru)

  # BLOCK 4
  # PRED: 1 [11.0%]  (false,exec)
  # lsm_tmp.2<D1493>_16 = PHI <lsm_tmp.2<D1493>_17(1)>;
<L7>:;
  #   V_MUST_DEF <result<D1473>_22>;
  #   VUSE <lsm_tmp.1<D1492>_20>;
  result<D1473> = lsm_tmp.1<D1492>;
  #   V_MUST_DEF <lsm_tmp.1<D1492>_21>;
  #   VUSE <lsm_tmp.2<D1493>_16>;
  lsm_tmp.1<D1492> = lsm_tmp.2<D1493>;
  # SUCC: 2 [100.0%]  (fallthru)

I'm wondering if we really want to apply this optimization to
aggregate types if they appear as an lvalue, because the
replacement temporary is itself also an aggregate, and it can
never be a register.  So perhaps something like this,

Index: tree-ssa-loop-im.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-im.c,v
retrieving revision 2.5
diff -u -3 -p -r2.5 tree-ssa-loop-im.c
--- tree-ssa-loop-im.c  10 Aug 2004 18:31:26 -0000      2.5
+++ tree-ssa-loop-im.c  12 Aug 2004 20:35:48 -0000
@@ -1045,6 +1045,9 @@ determine_lsm_reg (struct loop *loop, ed
   if (!ref)
     return;

+  if (AGGREGATE_TYPE_P (TREE_TYPE (ref)))
+    return;
+
   if (!for_each_index (&ref, may_move_till, loop))
     {
       free_mem_refs (mem_refs);

But as Diego pointed out on irc, 
  #   V_MUST_DEF <lsm_tmp.2<D1493>_17>;
  #   VUSE <result<D1473>_5>;
should be enough for tree-ssa-loop-im to tell that result is
not even loop invariant...

What's also interesting is what SRA does:

  struct foo<D1909> <D1938>;

  # BLOCK 1
  # PRED: 3 [100.0%]  (fallthru) 0 [100.0%]  (fallthru,exec)
  # result$a<D1956>_4 = PHI <0(0), result$a<D1956>_1(3)>;
  # result$b<D1955>_21 = PHI <0(0), result$b<D1955>_29(3)>;
  # <D1936>_22 = PHI <<D1936>_30(0), <D1936>_35(3)>;
  # <D1937>_25 = PHI <<D1937>_31(0), <D1937>_33(3)>;
  # TMT.2<D1951>_18 = PHI <TMT.2<D1951>_8(0), TMT.2<D1951>_28(3)>;
  # <D1938>_23 = PHI <<D1938>_9(0), <D1938>_17(3)>;
  # i<D1920>_24 = PHI <0(0), i<D1920>_20(3)>;
<L0>:;
  #   <D1937>_32 = V_MAY_DEF <<D1937>_25>;
  <D1937>.b<D1912> = a$b<D1958>_3;
  #   <D1937>_33 = V_MAY_DEF <<D1937>_32>;
  <D1937>.a<D1911> = a$a<D1957>_2;
  #   <D1936>_34 = V_MAY_DEF <<D1936>_22>;
  <D1936>.b<D1912> = result$b<D1955>_21;
  #   <D1936>_35 = V_MAY_DEF <<D1936>_34>;
  <D1936>.a<D1911> = result$a<D1956>_4;
  #   TMT.2<D1951>_28 = V_MAY_DEF <TMT.2<D1951>_18>;
  #   NMT.3<D1952>_26 = V_MAY_DEF <NMT.3<D1952>_27>;
  #   <D1938>_17 = V_MAY_DEF <<D1938>_23>;
  #   VUSE <<D1936>_35>;
  #   VUSE <<D1937>_33>;
  <D1938> = sub2 (<D1936>, <D1937>);
  #   VUSE <<D1938>_17>;
  result$b<D1955>_29 = <D1938>.b<D1912>;
  #   VUSE <<D1938>_17>;
  result$a<D1956>_1 = <D1938>.a<D1911>;
  i<D1920>_20 = i<D1920>_24 + 1;
  if (i<D1920>_20 <= 2) goto <L6>; else goto <L2>;
  # SUCC: 2 [11.0%]  (false,exec) 3 [89.0%]  (dfs_back,true,exec)

That seems like a pointless transformation, and it's basically
the same problem...

Gr.
Steven



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]