Bug 114647 - missing DSE when looping over a VLA
Summary: missing DSE when looping over a VLA
Status: ASSIGNED
Alias: None
Product: gcc
Classification: Unclassified
Component: tree-optimization (show other bugs)
Version: 14.0
: P3 enhancement
Target Milestone: ---
Assignee: Richard Biener
URL:
Keywords: missed-optimization
Depends on:
Blocks:
 
Reported: 2024-04-08 21:37 UTC by Franck Behaghel
Modified: 2024-05-10 18:43 UTC (History)
2 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2024-04-08 00:00:00


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Franck Behaghel 2024-04-08 21:37:14 UTC
$cat foo.cpp

#include "stdint.h"
#include "stdio.h"

void foo(int n)
{

   uint64_t a[n];
   for (uint32_t i=0;i<n;i++) a[i] = i;

   printf(" ");
}


At O3, gcc vectorize the loop but do not remove it:

foo(int):
        push    rbp
        movsx   rax, edi
        lea     rax, [15+rax*8]
        and     rax, -16
        mov     rbp, rsp
        sub     rsp, rax
        test    edi, edi
        je      .L2
        lea     edx, [rdi-1]
        cmp     edx, 2
        jbe     .L2
        shr     edi, 2
        mov     ecx, 4
        pxor    xmm2, xmm2
        mov     rax, rsp
        mov     edx, edi
        movdqa  xmm1, XMMWORD PTR .LC0[rip]
        movd    xmm4, ecx
        sal     rdx, 5
        pshufd  xmm4, xmm4, 0
        add     rdx, rsp
.L4:
        movdqa  xmm0, xmm1
        add     rax, 32
        paddd   xmm1, xmm4
        movdqa  xmm3, xmm0
        punpckhdq       xmm0, xmm2
        punpckldq       xmm3, xmm2
        movups  XMMWORD PTR [rax-16], xmm0
        movups  XMMWORD PTR [rax-32], xmm3
        cmp     rdx, rax
        jne     .L4
.L2:
        mov     edi, 32
        call    putchar
        leave
        ret


clang ( c or c++ ) does remove the loop :

foo:                                    # @foo
        mov     edi, 32
        jmp     putchar@PLT                     # TAILCALL
Comment 1 Andrew Pinski 2024-04-08 21:46:30 UTC
GCC also does not delete allocated memory when used in loop:
```
void foo(int n)
{
   {
     int *a = __builtin_malloc(n*sizeof(int));
     for (int i=0;i<n;i++) a[i] = i;
     __builtin_free(a);
   }
   printf(" ");
}
```

Let me find the dup.
Comment 2 Richard Biener 2024-04-09 07:12:50 UTC
                      /* If we visit this PHI by following a backedge then we
                         have to make sure ref->ref only refers to SSA names
                         that are invariant with respect to the loop
                         represented by this PHI node.  */
                      if (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt),
                                          gimple_bb (use_stmt))
                          && !for_each_index (ref->ref ? &ref->ref : &ref->base,
                                              check_name, gimple_bb (use_stmt)))
                        return DSE_STORE_LIVE;

we could make this bail-out "delayed" until we hit the next possible use in
the loop (of which there is none).