Bug 111373 - conditional selection gives bad code generation
Summary: conditional selection gives bad code generation
Status: NEW
Alias: None
Product: gcc
Classification: Unclassified
Component: middle-end (show other bugs)
Version: 14.0
: P3 enhancement
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization
Depends on:
Blocks:
 
Reported: 2023-09-11 16:38 UTC by Thomas Koenig
Modified: 2024-01-12 18:44 UTC (History)
1 user (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Thomas Koenig 2023-09-11 16:38:57 UTC
The code

#define SWAP(i,j) do { \
  if (v[i] > v[j]) { \
    tmp_v = v[i]; v[i] = v[j]; v[j] = tmp_v;	\
    tmp_p = a[i]; a[i] = a[j]; a[j] = tmp_p;	\
    }						\
  } while(0)

void s3 (long int *p[3])
{
  long int v[3];
  long int *a[3];
  long int tmp_v;
  long int *tmp_p;
  a[0] = p[0];
  v[0] = *p[0];
  a[1] = p[1];
  v[1] = *p[1];
  a[2] = p[2];
  v[2] = *p[2];
  SWAP (0,1);
  SWAP (0,2);
  SWAP (1,2);
  p[0] = a[0];
  p[1] = a[1];
  p[2] = a[2];
}

yields, with reasonably recent trunk with -O3, code where there are
register moves right before the results are stored, for example on x86_64:

s3:
.LFB0:
        .cfi_startproc
        movq    (%rdi), %rax
        movq    8(%rdi), %rcx
        movq    16(%rdi), %rdx
        movq    (%rax), %r8
        movq    (%rcx), %rsi
        movq    (%rdx), %r9
        cmpq    %rsi, %r8
        jg      .L2
        cmpq    %r9, %r8
        jle     .L3
        movq    %rax, %r9
        movq    %rdx, %rax
        movq    %r9, %rdx
        movq    %r8, %r9
.L3:
        cmpq    %rsi, %r9
        jl      .L10
.L4:
        movq    %rax, (%rdi)
        movq    %rcx, 8(%rdi)
        movq    %rdx, 16(%rdi)
        ret
        .p2align 4,,10
        .p2align 3
.L2:
        cmpq    %r9, %rsi
        jle     .L11
        movq    %rdx, %rsi
        movq    %rax, %rdx
        movq    %rcx, 8(%rdi)
        movq    %rsi, %rax
        movq    %rdx, 16(%rdi)
        movq    %rax, (%rdi)
        ret
        .p2align 4,,10
        .p2align 3
.L11:
        movq    %r8, %rsi
        movq    %rax, %r8
        movq    %rcx, %rax
        movq    %r8, %rcx
        cmpq    %rsi, %r9
        jge     .L4
.L10:
        movq    %rcx, %rsi
        movq    %rdx, %rcx
        movq    %rax, (%rdi)
        movq    %rsi, %rdx
        movq    %rcx, 8(%rdi)
        movq    %rdx, 16(%rdi)
        ret

This seems to be a general phenomenon, see https://godbolt.org/z/xW9x75qbf for
RISC-V (POWER is similar).
Comment 1 Andrew Pinski 2023-09-11 16:54:54 UTC Comment hidden (obsolete)
Comment 2 Andrew Pinski 2023-09-11 16:55:17 UTC
In this case we have this right before expand:
  if (v$2_9 < v$1_25)
    goto <bb 7>; [INV]
  else
    goto <bb 8>; [INV]

  <bb 7> :

  <bb 8> :
  # a$1_65 = PHI <a$1_64(6), a$2_66(7)>
  # a$2_67 = PHI <a$2_66(6), a$1_64(7)>

But this is a conditional move.
The biggest issue here is that GCC is not detecting this as a conditional move though.

And then make really bad register allocation decisions due to that.
Comment 3 Andrew Pinski 2023-09-11 16:56:47 UTC
Note this has nothing to do with returns in this case really, as there are no return value for this function (returns register allocation has been improved in recent years though).
Comment 4 Andrew Pinski 2023-09-11 16:59:57 UTC
Actually this is what we have before expand:
```
  if (_2 > _5)
    goto <bb 8>; [50.00%]
  else
    goto <bb 3>; [50.00%]
...
  if (v$2_7 < v$1_28)
    goto <bb 7>; [33.33%]
  else
    goto <bb 6>; [66.67%]

  <bb 6> [local count: 536870910]:

  <bb 7> [local count: 1073741824]:
  # a$1_18 = PHI <a$1_30(6), a$2_19(5), _3(8)>
  # a$2_20 = PHI <a$2_19(6), a$1_30(5), _1(8)>
  # a$0_29 = PHI <a$0_16(6), a$0_16(5), _6(8)>
  *p_12(D) = a$0_29;
  MEM[(long int * *)p_12(D) + 8B] = a$1_18;
  MEM[(long int * *)p_12(D) + 16B] = a$2_20;
  return;

  <bb 8> [local count: 536870912]:
  if (_5 > _8)
    goto <bb 7>; [50.00%]
  else
    goto <bb 5>; [50.00%]
```

Anyways there is another bug dealing with phi and expansion somewhere