SSE fix 4 (PR target/7386)

Jan Hubicka jh@suse.cz
Tue Oct 15 13:05:00 GMT 2002


Hi,
this patch fixes somewhat nasty situation:

SSE supports just LT/LE comparisons and code request GT.  This is done
by swapping the operands, but the builtin is defined to overwrite the
first word of output and keep the rest untouched.  That means that we
need to not only swap the operand, but also to merge the result into the
first operand afterward.  I don't see reason why we do have builtin for
this so I did it in the header file instead.
In case the patch is acceptable I will kill other "swapped comparsion"
builtins.

Also did the SSE2 builtins documentation got lost somewhere on the
branch?

OK for mainline and 3.2 branch (in SSE1 strip)?


#include <xmmintrin.h>
 
typedef union
 {
   __m128 m;
   float f[4];
   double d[2];
   int i[4];
 } munion;
 
int
 test_it (void)
 {
   munion m1, m2;
   munion res;
 
  m1.m = _mm_set_ps (1.0f, 4.0f, 3.0f, 8.0f);
   m2.m = _mm_set_ps (1.0f, 2.0f, 6.0f, 4.0f);
   
  res.m = _mm_cmpgt_ss (m1.m, m2.m);
 
  if (res.i[0] != -1)
     abort ();
   if (res.f[1] != 3.0f)
     abort ();
   if (res.f[2] != 4.0f)
     abort ();
   if (res.f[3] != 1.0f)
     abort ();
 
  return 0;
 }
 
int
 main (void)
 {
   return test_it ();
 }

Tue Oct 15 21:47:05 CEST 2002  Jan Hubicka  <jh@suse.cz>
	PR target/7386
	* i386.c (builtin_description):Drop cmpg[te]s[sd].
	* xmmintrin.h (__mm_cmpg[te]_s[sd]): Rewrite using
	swapped alternative.

Index: config/i386/i386.c
===================================================================
RCS file: /cvsroot/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.469
diff -c -3 -p -r1.469 i386.c
*** config/i386/i386.c	15 Oct 2002 08:24:34 -0000	1.469
--- config/i386/i386.c	15 Oct 2002 20:02:15 -0000
*************** static const struct builtin_description 
*** 11794,11807 ****
    { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
    { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
    { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
-   { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
-   { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
    { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
    { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
    { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
    { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
-   { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
-   { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
    { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
  
    { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
--- 11794,11803 ----
*************** static const struct builtin_description 
*** 11923,11936 ****
    { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
    { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
    { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
-   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
-   { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
    { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
    { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
    { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
    { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
-   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
-   { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
    { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
  
    { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
--- 11919,11928 ----
Index: config/i386/i386.h
===================================================================
RCS file: /cvsroot/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.294
diff -c -3 -p -r1.294 i386.h
*** config/i386/i386.h	14 Oct 2002 10:07:58 -0000	1.294
--- config/i386/i386.h	15 Oct 2002 20:02:17 -0000
*************** enum ix86_builtins
*** 2017,2029 ****
    IX86_BUILTIN_CMPEQSS,
    IX86_BUILTIN_CMPLTSS,
    IX86_BUILTIN_CMPLESS,
-   IX86_BUILTIN_CMPGTSS,
-   IX86_BUILTIN_CMPGESS,
    IX86_BUILTIN_CMPNEQSS,
    IX86_BUILTIN_CMPNLTSS,
    IX86_BUILTIN_CMPNLESS,
-   IX86_BUILTIN_CMPNGTSS,
-   IX86_BUILTIN_CMPNGESS,
    IX86_BUILTIN_CMPORDSS,
    IX86_BUILTIN_CMPUNORDSS,
    IX86_BUILTIN_CMPNESS,
--- 2017,2025 ----
*************** enum ix86_builtins
*** 2231,2243 ****
    IX86_BUILTIN_CMPEQSD,
    IX86_BUILTIN_CMPLTSD,
    IX86_BUILTIN_CMPLESD,
-   IX86_BUILTIN_CMPGTSD,
-   IX86_BUILTIN_CMPGESD,
    IX86_BUILTIN_CMPNEQSD,
    IX86_BUILTIN_CMPNLTSD,
    IX86_BUILTIN_CMPNLESD,
-   IX86_BUILTIN_CMPNGTSD,
-   IX86_BUILTIN_CMPNGESD,
    IX86_BUILTIN_CMPORDSD,
    IX86_BUILTIN_CMPUNORDSD,
    IX86_BUILTIN_CMPNESD,
--- 2227,2235 ----
Index: config/i386/xmmintrin.h
===================================================================
RCS file: /cvsroot/gcc/egcs/gcc/config/i386/xmmintrin.h,v
retrieving revision 1.7
diff -c -3 -p -r1.7 xmmintrin.h
*** config/i386/xmmintrin.h	15 Oct 2002 08:24:35 -0000	1.7
--- config/i386/xmmintrin.h	15 Oct 2002 20:02:17 -0000
*************** _mm_cmple_ss (__m128 __A, __m128 __B)
*** 245,257 ****
  static __inline __m128
  _mm_cmpgt_ss (__m128 __A, __m128 __B)
  {
!   return (__m128) __builtin_ia32_cmpgtss ((__v4sf)__A, (__v4sf)__B);
  }
  
  static __inline __m128
  _mm_cmpge_ss (__m128 __A, __m128 __B)
  {
!   return (__m128) __builtin_ia32_cmpgess ((__v4sf)__A, (__v4sf)__B);
  }
  
  static __inline __m128
--- 245,265 ----
  static __inline __m128
  _mm_cmpgt_ss (__m128 __A, __m128 __B)
  {
!   return (__m128) __builtin_ia32_movss ((__v4sf) __A,
! 					(__v4sf)
! 					__builtin_ia32_cmpltss ((__v4sf) __B,
! 								(__v4sf)
! 								__A));
  }
  
  static __inline __m128
  _mm_cmpge_ss (__m128 __A, __m128 __B)
  {
!   return (__m128) __builtin_ia32_movss ((__v4sf) __A,
! 					(__v4sf)
! 					__builtin_ia32_cmpless ((__v4sf) __B,
! 								(__v4sf)
! 								__A));
  }
  
  static __inline __m128
*************** _mm_cmpnle_ss (__m128 __A, __m128 __B)
*** 275,287 ****
  static __inline __m128
  _mm_cmpngt_ss (__m128 __A, __m128 __B)
  {
!   return (__m128) __builtin_ia32_cmpngtss ((__v4sf)__A, (__v4sf)__B);
  }
  
  static __inline __m128
  _mm_cmpnge_ss (__m128 __A, __m128 __B)
  {
!   return (__m128) __builtin_ia32_cmpngess ((__v4sf)__A, (__v4sf)__B);
  }
  
  static __inline __m128
--- 283,303 ----
  static __inline __m128
  _mm_cmpngt_ss (__m128 __A, __m128 __B)
  {
!   return (__m128) __builtin_ia32_movss ((__v4sf) __A,
! 					(__v4sf)
! 					__builtin_ia32_cmpnltss ((__v4sf) __B,
! 								 (__v4sf)
! 								 __A));
  }
  
  static __inline __m128
  _mm_cmpnge_ss (__m128 __A, __m128 __B)
  {
!   return (__m128) __builtin_ia32_movss ((__v4sf) __A,
! 					(__v4sf)
! 					__builtin_ia32_cmpnless ((__v4sf) __B,
! 								 (__v4sf)
! 								 __A));
  }
  
  static __inline __m128
*************** _mm_cmple_sd (__m128d __A, __m128d __B)
*** 1270,1282 ****
  static __inline __m128d
  _mm_cmpgt_sd (__m128d __A, __m128d __B)
  {
!   return (__m128d)__builtin_ia32_cmpgtsd ((__v2df)__A, (__v2df)__B);
  }
  
  static __inline __m128d
  _mm_cmpge_sd (__m128d __A, __m128d __B)
  {
!   return (__m128d)__builtin_ia32_cmpgesd ((__v2df)__A, (__v2df)__B);
  }
  
  static __inline __m128d
--- 1286,1306 ----
  static __inline __m128d
  _mm_cmpgt_sd (__m128d __A, __m128d __B)
  {
!   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
! 					 (__v2df)
! 					 __builtin_ia32_cmpltsd ((__v2df) __B,
! 								 (__v2df)
! 								 __A));
  }
  
  static __inline __m128d
  _mm_cmpge_sd (__m128d __A, __m128d __B)
  {
!   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
! 					 (__v2df)
! 					 __builtin_ia32_cmplesd ((__v2df) __B,
! 								 (__v2df)
! 								 __A));
  }
  
  static __inline __m128d
*************** _mm_cmpnle_sd (__m128d __A, __m128d __B)
*** 1300,1312 ****
  static __inline __m128d
  _mm_cmpngt_sd (__m128d __A, __m128d __B)
  {
!   return (__m128d)__builtin_ia32_cmpngtsd ((__v2df)__A, (__v2df)__B);
  }
  
  static __inline __m128d
  _mm_cmpnge_sd (__m128d __A, __m128d __B)
  {
!   return (__m128d)__builtin_ia32_cmpngesd ((__v2df)__A, (__v2df)__B);
  }
  
  static __inline __m128d
--- 1324,1344 ----
  static __inline __m128d
  _mm_cmpngt_sd (__m128d __A, __m128d __B)
  {
!   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
! 					 (__v2df)
! 					 __builtin_ia32_cmpnltsd ((__v2df) __B,
! 								  (__v2df)
! 								  __A));
  }
  
  static __inline __m128d
  _mm_cmpnge_sd (__m128d __A, __m128d __B)
  {
!   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
! 					 (__v2df)
! 					 __builtin_ia32_cmpnlesd ((__v2df) __B,
! 								  (__v2df)
! 								  __A));
  }
  
  static __inline __m128d



More information about the Gcc-patches mailing list