This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: Bug in loop invariant motion with trivial test case
- From: Steven Bosscher <stevenb at suse dot de>
- To: kenner at vlsi1 dot ultra dot nyu dot edu (Richard Kenner),rakdver at atrey dot karlin dot mff dot cuni dot cz
- Cc: gcc at gcc dot gnu dot org
- Date: Thu, 12 Aug 2004 22:40:08 +0200
- Subject: Re: Bug in loop invariant motion with trivial test case
- Organization: SUSE Labs
- References: <10408121018.AA06579@vlsi1.ultra.nyu.edu>
On Thursday 12 August 2004 12:18, Richard Kenner wrote:
> This is based on ACATS test ca11012.
>
> Consider:
>
> struct foo {int a; int b;};
>
> extern struct foo sub2 (struct foo, struct foo);
>
> struct foo
> sub1 (struct foo a)
> {
> struct foo result = {0, 0};
> int i;
>
> for (i = 0; i < 3; i++)
> result = sub2 (result, a);
>
> return result;
> }
>
> Look at the .vars file and notice that each call to sub2 is passed the
> same operands, not the one obtained by calling the function the
> previous time:
>
> sub1 (a)
> {
> struct foo lsm_tmp.2;
> struct foo lsm_tmp.1;
> int i;
> struct foo result;
> struct foo T.0;
>
> <bb 0>:
> result.a = 0;
> result.b = 0;
> lsm_tmp.1 = result;
> i = 0;
>
> <L0>:;
> lsm_tmp.2 = sub2 (result, a);
> i = i + 1;
> if (i <= 2) goto <L0>; else goto <L7>;
>
> <L7>:;
> result = lsm_tmp.1;
> T.0 = result;
> return T.0;
>
> }
Interesting bug. Loop invariant motion detects that result is a store
that can be sunk in the following loop:
# BLOCK 1
# PRED: 3 [100.0%] (fallthru) 0 [100.0%] (fallthru,exec)
# .GLOBAL_VAR<D1488>_18 = PHI <.GLOBAL_VAR<D1488>_13(0), .GLOBAL_VAR<D1488>_14(3)>;
# i<D1474>_19 = PHI <0(0), i<D1474>_11(3)>;
# result<D1473>_20 = PHI <result<D1473>_5(0), result<D1473>_10(3)>;
<L0>:;
# .GLOBAL_VAR<D1488>_14 = V_MAY_DEF <.GLOBAL_VAR<D1488>_18>;
# V_MUST_DEF <result<D1473>_10>;
# VUSE <result<D1473>_20>;
# VUSE <a<D1470>_9>;
result<D1473> = sub2 (result<D1473>, a<D1470>);
i<D1474>_11 = i<D1474>_19 + 1;
if (i<D1474>_11 <= 2) goto <L6>; else goto <L2>;
# SUCC: 2 [11.0%] (false,exec) 3 [89.0%] (dfs_back,true,exec)
And it turns it into this:
# BLOCK 1
# PRED: 3 [100.0%] (fallthru) 0 [100.0%] (fallthru,exec)
# .GLOBAL_VAR<D1488>_23 = PHI <.GLOBAL_VAR<D1488>_13(0), .GLOBAL_VAR<D1488>_14(3)>;
# i<D1474>_19 = PHI <0(0), i<D1474>_11(3)>;
<L0>:;
# .GLOBAL_VAR<D1488>_14 = V_MAY_DEF <.GLOBAL_VAR<D1488>_23>;
# V_MUST_DEF <lsm_tmp.2<D1493>_17>;
# VUSE <result<D1473>_5>;
# VUSE <a<D1470>_9>;
lsm_tmp.2<D1493> = sub2 (result<D1473>, a<D1470>);
i<D1474>_11 = i<D1474>_19 + 1;
if (i<D1474>_11 <= 2) goto <L6>; else goto <L7>;
# SUCC: 4 [11.0%] (false,exec) 3 [89.0%] (dfs_back,true,exec)
# BLOCK 3
# PRED: 1 [89.0%] (dfs_back,true,exec)
<L6>:;
goto <bb 1> (<L0>);
# SUCC: 1 [100.0%] (fallthru)
# BLOCK 4
# PRED: 1 [11.0%] (false,exec)
# lsm_tmp.2<D1493>_16 = PHI <lsm_tmp.2<D1493>_17(1)>;
<L7>:;
# V_MUST_DEF <result<D1473>_22>;
# VUSE <lsm_tmp.1<D1492>_20>;
result<D1473> = lsm_tmp.1<D1492>;
# V_MUST_DEF <lsm_tmp.1<D1492>_21>;
# VUSE <lsm_tmp.2<D1493>_16>;
lsm_tmp.1<D1492> = lsm_tmp.2<D1493>;
# SUCC: 2 [100.0%] (fallthru)
I'm wondering if we really want to apply this optimization to
aggregate types if they appear as an lvalue, because the
replacement temporary is itself also an aggregate, and it can
never be a register. So perhaps something like this,
Index: tree-ssa-loop-im.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-im.c,v
retrieving revision 2.5
diff -u -3 -p -r2.5 tree-ssa-loop-im.c
--- tree-ssa-loop-im.c 10 Aug 2004 18:31:26 -0000 2.5
+++ tree-ssa-loop-im.c 12 Aug 2004 20:35:48 -0000
@@ -1045,6 +1045,9 @@ determine_lsm_reg (struct loop *loop, ed
if (!ref)
return;
+ if (AGGREGATE_TYPE_P (TREE_TYPE (ref)))
+ return;
+
if (!for_each_index (&ref, may_move_till, loop))
{
free_mem_refs (mem_refs);
But as Diego pointed out on irc,
# V_MUST_DEF <lsm_tmp.2<D1493>_17>;
# VUSE <result<D1473>_5>;
should be enough for tree-ssa-loop-im to tell that result is
not even loop invariant...
What's also interesting is what SRA does:
struct foo<D1909> <D1938>;
# BLOCK 1
# PRED: 3 [100.0%] (fallthru) 0 [100.0%] (fallthru,exec)
# result$a<D1956>_4 = PHI <0(0), result$a<D1956>_1(3)>;
# result$b<D1955>_21 = PHI <0(0), result$b<D1955>_29(3)>;
# <D1936>_22 = PHI <<D1936>_30(0), <D1936>_35(3)>;
# <D1937>_25 = PHI <<D1937>_31(0), <D1937>_33(3)>;
# TMT.2<D1951>_18 = PHI <TMT.2<D1951>_8(0), TMT.2<D1951>_28(3)>;
# <D1938>_23 = PHI <<D1938>_9(0), <D1938>_17(3)>;
# i<D1920>_24 = PHI <0(0), i<D1920>_20(3)>;
<L0>:;
# <D1937>_32 = V_MAY_DEF <<D1937>_25>;
<D1937>.b<D1912> = a$b<D1958>_3;
# <D1937>_33 = V_MAY_DEF <<D1937>_32>;
<D1937>.a<D1911> = a$a<D1957>_2;
# <D1936>_34 = V_MAY_DEF <<D1936>_22>;
<D1936>.b<D1912> = result$b<D1955>_21;
# <D1936>_35 = V_MAY_DEF <<D1936>_34>;
<D1936>.a<D1911> = result$a<D1956>_4;
# TMT.2<D1951>_28 = V_MAY_DEF <TMT.2<D1951>_18>;
# NMT.3<D1952>_26 = V_MAY_DEF <NMT.3<D1952>_27>;
# <D1938>_17 = V_MAY_DEF <<D1938>_23>;
# VUSE <<D1936>_35>;
# VUSE <<D1937>_33>;
<D1938> = sub2 (<D1936>, <D1937>);
# VUSE <<D1938>_17>;
result$b<D1955>_29 = <D1938>.b<D1912>;
# VUSE <<D1938>_17>;
result$a<D1956>_1 = <D1938>.a<D1911>;
i<D1920>_20 = i<D1920>_24 + 1;
if (i<D1920>_20 <= 2) goto <L6>; else goto <L2>;
# SUCC: 2 [11.0%] (false,exec) 3 [89.0%] (dfs_back,true,exec)
That seems like a pointless transformation, and it's basically
the same problem...
Gr.
Steven