[Bug rtl-optimization/108318] New: Floating point calculation moved out of loop despite fesetround

Fri Jan 6 14:56:47 GMT 2023

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108318

            Bug ID: 108318
           Summary: Floating point calculation moved out of loop despite
                    fesetround
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: tkoenig at gcc dot gnu.org
  Target Milestone: ---

#include <fenv.h>
void
foo (double res[4], double a, double b)
{
  static const int rm[4]
      = { FE_DOWNWARD, FE_TONEAREST, FE_TOWARDZERO, FE_UPWARD };
  for (int i = 0; i < 4; ++i)
    {
      fesetround (rm[i]);
      res[i] = a + b;
    }
  fesetround (FE_TONEAREST); // restore default
}

when compiled with recent trunk and -O3, yields

        addsd   %xmm1, %xmm0
        pushq   %r14
        .cfi_def_cfa_offset 16
        .cfi_offset 14, -16
        pushq   %rbp
        .cfi_def_cfa_offset 24
        .cfi_offset 6, -24
        movq    %rdi, %rbp
        pushq   %rbx
        .cfi_def_cfa_offset 32
        .cfi_offset 3, -32
        xorl    %ebx, %ebx
        movq    %xmm0, %r14
.L2:
        movl    rm.0(,%rbx,4), %edi
        call    fesetround
        movq    %r14, 0(%rbp,%rbx,8)
        addq    $1, %rbx
        cmpq    $4, %rbx
        jne     .L2
        popq    %rbx
        .cfi_def_cfa_offset 24
        xorl    %edi, %edi
        popq    %rbp
        .cfi_def_cfa_offset 16
        popq    %r14
        .cfi_def_cfa_offset 8
        jmp     fesetround
        .cfi_endproc

Seems all right after tree optimization, the *.optimized dump looks OK:

 <bb 3> [local count: 858993457]:
  # ivtmp.5_16 = PHI <ivtmp.5_7(3), 0(2)>
  _1 = MEM[(int *)&rm + ivtmp.5_16 * 4];
  fesetround (_1);
  _5 = a_12(D) + b_13(D);
  MEM[(double *)res_11(D) + ivtmp.5_16 * 8] = _5;
  ivtmp.5_7 = ivtmp.5_16 + 1;
  if (ivtmp.5_7 != 4)
    goto <bb 3>; [80.00%]
  else
    goto <bb 4>; [20.00%]

  <bb 4> [local count: 214748368]:
  fesetround (0); [tail call]
  return;

This does not seem to be a recent regression, this goes back to at
least gcc 4.1.2.

Noted by Michael S on comp.arch, on
https://groups.google.com/g/comp.arch/c/Izheu-k00Nw/m/oljg70SBBwAJ .