This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] Run copyrename one extra time before out-of-ssa


Hi,

Consider the following test case from PR19464:
---------------------------------------------------------------------
int r0, r1, r2, r3, r4, r5;

void f (int n)
{
  while (-- n)
    {
      r1 += r0;
      r2 += r1;
      r3 += r2;
      r4 += r3;
      r5 += r4;
      r0 += r5;
    }
}
---------------------------------------------------------------------

We currently produce the following .optimized dump for this:

<L0>:;
  D.1461 = lsm_tmp.10 + lsm_tmp.11;
  D.1463 = lsm_tmp.9 + D.1461;
  D.1465 = lsm_tmp.8 + D.1463;
  D.1467 = lsm_tmp.7 + D.1465;
  D.1469 = lsm_tmp.6 + D.1467;
  D.1470 = lsm_tmp.10 + D.1469;
  ivtmp.12 = ivtmp.12 + 1;
  lsm_tmp.11 = D.1461;
  lsm_tmp.10 = D.1470;
  lsm_tmp.9 = D.1463;
  lsm_tmp.8 = D.1465;
  lsm_tmp.7 = D.1467;
  lsm_tmp.6 = D.1469;
  if ((unsigned int) n.18 - ivtmp.12 != 0) goto <L0>; else goto <L9>;

which results in the dreadful assembly output for AMD64 of:

.L4:
        leal    (%rcx,%r10), %r9d
        incl    %r11d
        cmpl    %r11d, %r14d
        leal    (%rdi,%r9), %r8d
        movl    %r9d, %r10d
        leal    (%rbx,%r8), %esi
        movl    %r8d, %edi
        leal    (%rbp,%rsi), %edx
        movl    %esi, %ebx
        leal    (%r12,%rdx), %eax
        movl    %edx, %ebp
        leal    (%rcx,%rax), %r13d
        movl    %eax, %r12d
        movl    %r13d, %ecx
        jne     .L4

The problem is that we cannot coalesce the D.* and the lsm_tmp*
temporaries because they don't have the same root variable.  This is
a problem of the out-of-ssa pass which also caused PR19038 and maybe
some other performance bugs.  But in this case the fix is quite easy.
We can just rerun the copyrename pass, which results in the following
code:

<L0>:;
  lsm_tmp.11 = lsm_tmp.10 + lsm_tmp.11;
  lsm_tmp.9 = lsm_tmp.9 + lsm_tmp.11;
  lsm_tmp.8 = lsm_tmp.8 + lsm_tmp.9;
  lsm_tmp.7 = lsm_tmp.7 + lsm_tmp.8;
  lsm_tmp.6 = lsm_tmp.6 + lsm_tmp.7;
  lsm_tmp.10 = lsm_tmp.10 + lsm_tmp.6;
  ivtmp.12 = ivtmp.12 + 1;
  if ((unsigned int) n.18 - ivtmp.12 != 0) goto <L0>; else goto <L9>;

and

.L4:
        addl    %edx, %eax
        incl    %r10d
        addl    %eax, %ecx
        addl    %ecx, %esi
        addl    %esi, %r8d
        addl    %r8d, %r9d
        addl    %r9d, %edx
        cmpl    %r10d, %edi
        jne     .L4


For i686 the effect is even more dramatic.

From this:
.L4:
        movl    -24(%ebp), %eax
        movl    -28(%ebp), %esi
        movl    -32(%ebp), %ebx
        movl    -36(%ebp), %edx
        incl    -20(%ebp)
        leal    (%ecx,%eax), %edi
        movl    -40(%ebp), %eax
        addl    %edi, %esi
        movl    %edi, -24(%ebp)
        addl    %esi, %ebx
        movl    %esi, -28(%ebp)
        addl    %ebx, %edx
        movl    %ebx, -32(%ebp)
        addl    %edx, %eax
        movl    %edx, -36(%ebp)
        addl    %eax, %ecx
        movl    %eax, -48(%ebp)
        movl    %eax, -40(%ebp)
        movl    -20(%ebp), %eax
        cmpl    %eax, -16(%ebp)
        movl    %ecx, -44(%ebp)
        jne     .L4

to this:
.L4:
        addl    %edx, -24(%ebp)
        incl    -20(%ebp)
        addl    -24(%ebp), %ecx
        movl    -20(%ebp), %eax
        addl    %ecx, %ebx
        addl    %ebx, %esi
        addl    %esi, %edi
        addl    %edi, %edx
        cmpl    %eax, -16(%ebp)
        jne     .L4

I am not sure why copyrename can coalesce what out-of-ssa apparently
can't, but copyrename pass is really cheap anyway, and it seems too
late to do anything about out-of-ssa at this stage.  On the other
hand, adding more passes is maybe something we don't want to do.  Any
thoughts?

I still have to bootstrap/test of this patch btw ;-)

Gr.
Steven

	PR rtl-optimization/19464
	* tree-optimize.c (init_tree_optimization_passes): Add one more
	copyrename pass just before out-of-ssa.

Index: tree-optimize.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-optimize.c,v
retrieving revision 2.69
diff -u -p -r2.69 tree-optimize.c
--- tree-optimize.c	20 Jan 2005 19:20:24 -0000	2.69
+++ tree-optimize.c	21 Jan 2005 12:19:23 -0000
@@ -397,6 +397,7 @@ init_tree_optimization_passes (void)
   NEXT_PASS (pass_forwprop);
   NEXT_PASS (pass_phiopt);
   NEXT_PASS (pass_tail_calls);
+  NEXT_PASS (pass_rename_ssa_copies);
   NEXT_PASS (pass_del_ssa);
   NEXT_PASS (pass_nrv);
   NEXT_PASS (pass_remove_useless_vars);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]