This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug tree-optimization/86855] New: REGRESSON: [8.0] -Ofast optimize away mm_set_ps(0.0f,0.0f,-0.0f,0.0f);


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86855

            Bug ID: 86855
           Summary: REGRESSON: [8.0] -Ofast optimize away
                    mm_set_ps(0.0f,0.0f,-0.0f,0.0f);
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vincenzo.innocente at cern dot ch
  Target Milestone: ---

this function
_m128 _mm_cross_ps(__m128 v1, __m128 v2) {
     // same order is  _MM_SHUFFLE(3,2,1,0)
     //                                               x2, z1,z1
     __m128 v3 = _mm_shuffle_ps(v2, v1, _MM_SHUFFLE(3, 0, 2, 2));
     //                                               y1, x2,y2
     __m128 v4 = _mm_shuffle_ps(v1, v2, _MM_SHUFFLE(3, 1, 0, 1));

     __m128 v5 = _mm_mul_ps(v3, v4);

     //                                         x1, z2,z2
     v3 = _mm_shuffle_ps(v1, v2, _MM_SHUFFLE(3, 0, 2, 2));
     //                                        y2, x1,y1
     v4 = _mm_shuffle_ps(v2, v1, _MM_SHUFFLE(3, 1, 0, 1));

     v3 = _mm_mul_ps(v3, v4);
     const  __m128 neg = _mm_set_ps(0.0f,0.0f,-0.0f,0.0f);
     return _mm_xor_ps(_mm_sub_ps(v5, v3), neg);
   }

compiled more or less in
mm_cross_ps(float __vector(4), float __vector(4)):
  movaps %xmm1, %xmm2
  movaps %xmm0, %xmm4
  movaps %xmm0, %xmm3
  shufps $202, %xmm0, %xmm2
  shufps $209, %xmm1, %xmm4
  shufps $202, %xmm1, %xmm3
  shufps $209, %xmm0, %xmm1
  mulps %xmm4, %xmm2
  mulps %xmm3, %xmm1
  movaps %xmm2, %xmm0
  subps %xmm1, %xmm0
  xorps .LC0(%rip), %xmm0
  ret
.LC0:
  .long 0
  .long 2147483648
  .long 0
  .long 0

according to godbolt since 8.1 the xor is optimized away with -Ofast as
mm_cross_ps(float __vector(4), float __vector(4)):
  movaps %xmm1, %xmm2
  movaps %xmm0, %xmm4
  movaps %xmm0, %xmm3
  shufps $209, %xmm1, %xmm4
  shufps $202, %xmm0, %xmm2
  mulps %xmm4, %xmm2
  shufps $202, %xmm1, %xmm3
  shufps $209, %xmm0, %xmm1
  mulps %xmm3, %xmm1
  movaps %xmm2, %xmm0
  subps %xmm1, %xmm0
  ret

is this intended?

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]