[Bug rtl-optimization/65078] [5.0 Regression] 4.9 and 5.0 generate more spill-fill in comparison with 4.8.2

rguenth at gcc dot gnu.org gcc-bugzilla@gcc.gnu.org
Mon Feb 16 14:38:00 GMT 2015


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65078

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2015-02-16
     Ever confirmed|0                           |1

--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> ---
Confirmed.

4.8 has

  _62 = MEM[(__m64 * {ref-all})dest_284];
  _63 = VIEW_CONVERT_EXPR<long long int>(_62);
  _64 = {_63, 0};
  _65 = VIEW_CONVERT_EXPR<vector(16) char>(_64);
  _66 = __builtin_ia32_punpcklbw128 (_65, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0 });
  _67 = VIEW_CONVERT_EXPR<__m128i>(_66);
  _68 = VIEW_CONVERT_EXPR<vector(8) short int>(_67);
  _70 = __builtin_ia32_paddw128 (pretmp_327, _68);
  _71 = __builtin_ia32_packuswb128 (_70, _70);
  _72 = VIEW_CONVERT_EXPR<__m128i>(_71);
  _73 = __builtin_ia32_vec_ext_v2di (_72, 0);
  MEM[(long long int *)dest_284] = _73;

while 5

  _79 = MEM[(__m64 * {ref-all})dest_268];
  _78 = VIEW_CONVERT_EXPR<long long int>(_79);
  _77 = {_78, 0};
  _74 = VIEW_CONVERT_EXPR<vector(16) char>(_77);
  _73 = __builtin_ia32_punpcklbw128 (_74, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0 });
  _69 = VIEW_CONVERT_EXPR<vector(8) short unsigned int>(_73);
  _68 = _69 + pretmp_312;
  _67 = VIEW_CONVERT_EXPR<vector(8) short int>(_68);
  _64 = __builtin_ia32_packuswb128 (_67, _67);
  _63 = VIEW_CONVERT_EXPR<__m128i>(_64);
  _62 = BIT_FIELD_REF <_63, 64, 0>;
  MEM[(long long int *)dest_268] = _62;

so some intrinsics are no longer builtins.  But the real difference is
the following weird store sequence

        packuswb        %xmm1, %xmm2
        movaps  %xmm2, (%esp)
        movl    (%esp), %esi
        movl    4(%esp), %edi
        movl    %esi, (%eax)
        movl    %edi, 4(%eax)

compared to just

        packuswb        %xmm1, %xmm1
        movq    %xmm1, (%edx)



More information about the Gcc-bugs mailing list