Bug 114169 - miss optimization of repeat load&store in place
Summary: miss optimization of repeat load&store in place
Status: NEW
Alias: None
Product: gcc
Classification: Unclassified
Component: middle-end (show other bugs)
Version: 13.2.0
: P3 enhancement
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization
Depends on:
Blocks:
 
Reported: 2024-02-29 15:35 UTC by absoler
Modified: 2024-03-07 08:46 UTC (History)
0 users

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2024-03-07 00:00:00


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description absoler 2024-02-29 15:35:46 UTC
Hi, here's the code:

```
typedef signed char int8_t;
typedef unsigned char uint8_t;
typedef signed short int int16_t;
typedef unsigned short int uint16_t;
typedef signed int int32_t;
typedef unsigned int uint32_t;
typedef signed long int int64_t;
typedef unsigned long int uint64_t;

#include<stdlib.h>
#include<signal.h>
#include<stdio.h>
#include<string.h>
/* --- Struct/Union Declarations --- */

struct S0 {
   int32_t  f0;
   uint16_t  f1;
   int32_t  f2;
   int32_t  f3;
   uint16_t  f4;
   uint16_t  f5;
   int32_t  f6;
   int16_t  f7;
};

/* --- GLOBAL VARIABLES --- */
struct S0 g_2 = {1L,0xF8C7L,0x5C6EFF3DL,0x0369BD69L,65535UL,0x0CA9L,-9L,0x9C92L};
struct S0 g_4 = {-1L,0UL,0x314A5EA9L,0x4A90C6D2L,0xCD43L,65528UL,0x2E40C18AL,0x9C27L};
int16_t g_17 = 0xF85AL;
uint16_t g_18 = 0xA88AL;
const uint64_t g_19 = 0UL;

/* --- FORWARD DECLARATIONS --- */
struct S0  func_1(void);
void  func_10(struct S0  p_13);

struct S0 func_1() {
  int32_t a;
  g_4 = g_2;
  func_10(g_4);
}
void func_10(struct S0 b) {
  int32_t c = 0;
  int32_t *d = &g_4.f0;
  struct S0 *e = &g_2;
  *d = c;
  *e = b;
}
```

compiled with gcc-13.2.0 -O3, it generates:
https://godbolt.org/z/4d9roGWTz
```
0000000000401630 <func_1>:
func_1():
/root/loadtest3/test/output2.c:49
  401630:	movdqa 0x2a58(%rip),%xmm0        # 404090 <g_2>
  401638:	mov    0x2a52(%rip),%eax        # 404090 <g_2>    # load
  40163e:	movdqu 0x2a56(%rip),%xmm1        # 40409c <g_2+0xc>
  401646:	movaps %xmm0,0x2a23(%rip)        # 404070 <g_4>
func_10():
/root/loadtest3/test/output2.c:57
  40164d:	mov    %eax,0x2a3d(%rip)        # 404090 <g_2>   # store
  401653:	movzwl 0x2a1a(%rip),%eax        # 404074 <g_4+0x4>
func_1():
/root/loadtest3/test/output2.c:49
  40165a:	movups %xmm1,0x2a1b(%rip)        # 40407c <g_4+0xc>
func_10():
/root/loadtest3/test/output2.c:57
  401661:	mov    %ax,0x2a2c(%rip)        # 404094 <g_2+0x4>
  401668:	mov    0x2a09(%rip),%rax        # 404078 <g_4+0x8>
/root/loadtest3/test/output2.c:56
  40166f:	movl   $0x0,0x29f7(%rip)        # 404070 <g_4>
/root/loadtest3/test/output2.c:57
  401679:	mov    %rax,0x2a18(%rip)        # 404098 <g_2+0x8>
  401680:	mov    0x29f9(%rip),%rax        # 404080 <g_4+0x10>
  401687:	mov    %rax,0x2a12(%rip)        # 4040a0 <g_2+0x10>
  40168e:	movzwl 0x29f3(%rip),%eax        # 404088 <g_4+0x18>
  401695:	mov    %ax,0x2a0c(%rip)        # 4040a8 <g_2+0x18>
func_1():
/root/loadtest3/test/output2.c:51
  40169c:	mov    %rdi,%rax
  40169f:	retq   
```
we can see the pair of load&store at address 0x401638 and 0x40164d is unnecessary
Comment 1 Andrew Pinski 2024-03-07 08:46:16 UTC
Confirmed.

Reduced testcase:
```
struct S0 {
   int  f0;
   int t[3+4];
};

struct S0 g_2;
struct S0 g_4;

void func_1() {
  //struct S0 b = g_2;
  g_4 = g_2;
  struct S0 b = g_4;
  g_4.f0 = 0;
  g_2 = b;
}

void func_2() {
  struct S0 b = g_2;
  g_4 = b;
  g_4.f0 = 0;
  g_2 = b;
}
```

We still get an extra load/store with func_2 but it least it is not a dependent load/store ...

Basically GCC doesn't notice that g_2 = b = g_2 was a no-op.