Bug 77668 - register allocation shoud not occupy register for return value early
Summary: register allocation shoud not occupy register for return value early
Status: UNCONFIRMED
Alias: None
Product: gcc
Classification: Unclassified
Component: rtl-optimization (show other bugs)
Version: 7.0
: P3 normal
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization, ra
Depends on:
Blocks:
 
Reported: 2016-09-20 18:22 UTC by yumeyao
Modified: 2022-02-06 09:39 UTC (History)
0 users

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description yumeyao 2016-09-20 18:22:00 UTC
when return value is declared (or, RVO introduced returning this pointer), RA seems to be very greedy on occupying r0(eax/rax) for the return value, even if the return value is not used yet.

With such RA strategy, the quick register reserved for return value is no longer usable for tmp variables.

Here is just one simple example demonstrating this issue:
compile with -m32 -O[anylevel] targeting Intel x86
always able to reproduce on gcc 4.x to 6.2.0 and on 7.0 snapshots.

getA() is where RVO is applied, resulting "this" pointer (for returning) occupying one register.
In total, 4 registers used. The life-scope of "this" pointer and one tmp variable don't overlap, where 1 register should be enough, but 2 registers allocated.

getA_RVO() with the complex syntax is where we 'manually' do the RVO stuff to make the function prototype identical to RVO'ed getA() (calling convention on returning stack poping is not same, though)
in getA_RVO(), I just showed up that we can use the same register for ret (eax) as a temp register before we actually start using it, thus only 3 registers used.

getA_RVO2() is just a simple test to clarify the problem is not caused by RVO, as we manually declare the return var and assign to it, but the return var still extended its life-span. So likely the problem is caused by RA being greedy on ret reg.


struct A {
  int a;
  int b;
  int c;
   
  A(int i, int j) {
    int tmp;
    int h = i;
    int e;
    __asm__ __volatile__( //do something in assembly
      "or $123, %0;"
      "mov %0, %1;"
      "xor $234, %1;"
      "lea 20(%0), %2;"
      : "+r"(h), "=r"(tmp), "=r"(e)
    );
    b = h;
    c = e;
    a = j;
  }
};


A getA(int i, int j) {
  return A(i, j);
}

getA(int, int):
 push   %ebx
 mov    0x8(%esp),%eax
 mov    0xc(%esp),%edx
 or     $0x7b,%edx
 mov    %edx,%ebx
 xor    $0xea,%ebx
 lea    0x14(%edx),%ecx
 mov    %edx,0x4(%eax)
 mov    0x10(%esp),%edx
 mov    %ecx,0x8(%eax)
 mov    %edx,(%eax)
 pop    %ebx
 ret    $0x4

A* getA_RVO(A* src, int i, int j) {
  A* ret;
  int h = i;
  int e;
  __asm__ __volatile__(
    "or $123, %0;"
    "mov %0, %1;"
    "xor $234, %1;"
    "lea 20(%0), %2;"
    "mov %3, %1;"  //switch this line with ret = src
    : "+r"(h), "=r"(ret), "=r"(e)
    : "rm"(src)
  );
  //ret = src;
  ret->b = h;
  ret->c = e;
  ret->a = j;
  return ret;
}

getA_RVO(A*, int, int):
 mov    0x8(%esp),%edx
 or     $0x7b,%edx
 mov    %edx,%eax
 xor    $0xea,%eax
 lea    0x14(%edx),%ecx
 mov    0x4(%esp),%eax
 mov    %edx,0x4(%eax)
 mov    0xc(%esp),%edx
 mov    %ecx,0x8(%eax)
 mov    %edx,(%eax)
 ret    

//switch the commented lines in getA_RVO() to get getA_RVO2

getA_RVO2(A*, int, int):
 push   %ebx
 mov    0x8(%esp),%eax
 mov    0xc(%esp),%edx
 or     $0x7b,%edx
 mov    %edx,%ebx
 xor    $0xea,%ebx
 lea    0x14(%edx),%ecx
 mov    %edx,0x4(%eax)
 mov    0x10(%esp),%edx
 mov    %ecx,0x8(%eax)
 mov    %edx,(%eax)
 pop    %ebx
 ret