$cat foo.cpp #include "stdint.h" #include "stdio.h" void foo(int n) { uint64_t a[n]; for (uint32_t i=0;i<n;i++) a[i] = i; printf(" "); } At O3, gcc vectorize the loop but do not remove it: foo(int): push rbp movsx rax, edi lea rax, [15+rax*8] and rax, -16 mov rbp, rsp sub rsp, rax test edi, edi je .L2 lea edx, [rdi-1] cmp edx, 2 jbe .L2 shr edi, 2 mov ecx, 4 pxor xmm2, xmm2 mov rax, rsp mov edx, edi movdqa xmm1, XMMWORD PTR .LC0[rip] movd xmm4, ecx sal rdx, 5 pshufd xmm4, xmm4, 0 add rdx, rsp .L4: movdqa xmm0, xmm1 add rax, 32 paddd xmm1, xmm4 movdqa xmm3, xmm0 punpckhdq xmm0, xmm2 punpckldq xmm3, xmm2 movups XMMWORD PTR [rax-16], xmm0 movups XMMWORD PTR [rax-32], xmm3 cmp rdx, rax jne .L4 .L2: mov edi, 32 call putchar leave ret clang ( c or c++ ) does remove the loop : foo: # @foo mov edi, 32 jmp putchar@PLT # TAILCALL
GCC also does not delete allocated memory when used in loop: ``` void foo(int n) { { int *a = __builtin_malloc(n*sizeof(int)); for (int i=0;i<n;i++) a[i] = i; __builtin_free(a); } printf(" "); } ``` Let me find the dup.
/* If we visit this PHI by following a backedge then we have to make sure ref->ref only refers to SSA names that are invariant with respect to the loop represented by this PHI node. */ if (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt), gimple_bb (use_stmt)) && !for_each_index (ref->ref ? &ref->ref : &ref->base, check_name, gimple_bb (use_stmt))) return DSE_STORE_LIVE; we could make this bail-out "delayed" until we hit the next possible use in the loop (of which there is none).