This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

x86-64 fixes for recent sse changes


Blah.  I should have tested here the first time around.  And it found
things that ought to have shown up on the p4 machine, but didn't.

There's still one more outstanding failure, but this fixes a half-dozen,
so I'm going ahead and checking it in.


r~


        * config/i386/i386.c (IX86_BUILTIN_MOVQ, IX86_BUILTIN_LOADD,
        IX86_BUILTIN_STORED, IX86_BUILTIN_MOVQ2DQ,
        IX86_BUILTIN_MOVDQ2Q): Remove.
        (IX86_BUILTIN_VEC_EXT_V4SI): New.
        (ix86_init_mmx_sse_builtins, ix86_expand_builtin): Update to match.
        (ix86_expand_vector_extract): For V4S[FI], extract element 0 after
        shuffling.
        * config/i386/sse.md (sse_concatv2sf): Accept zero operand 2.
        (sse2_pextrw): Fix immediate constraint.
        (sse2_loadq, sse2_loadq_rex64): Remove.
        * config/i386/emmintrin.h (_mm_cvtsi128_si32, _mm_cvtsi128_si64x):
        Use __builtin_ia32_vec_ext_<size>.
        (_mm_cvtsi32_si128, _mm_cvtsi64x_si128): Use _mm_set_epi<size>.

Index: gcc/config/i386/emmintrin.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/emmintrin.h,v
retrieving revision 1.10
diff -c -p -d -r1.10 emmintrin.h
*** gcc/config/i386/emmintrin.h	11 Jan 2005 21:33:11 -0000	1.10
--- gcc/config/i386/emmintrin.h	13 Jan 2005 17:45:44 -0000
*************** _mm_storer_pd (double *__P, __m128d __A)
*** 195,214 ****
  static __inline int
  _mm_cvtsi128_si32 (__m128i __A)
  {
!   int __tmp;
!   __builtin_ia32_stored (&__tmp, (__v4si)__A);
!   return __tmp;
  }
  
  #ifdef __x86_64__
  static __inline long long
  _mm_cvtsi128_si64x (__m128i __A)
  {
!   return __builtin_ia32_movdq2q ((__v2di)__A);
  }
  #endif
  
- 
  static __inline __m128d
  _mm_add_pd (__m128d __A, __m128d __B)
  {
--- 195,211 ----
  static __inline int
  _mm_cvtsi128_si32 (__m128i __A)
  {
!   return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
  }
  
  #ifdef __x86_64__
  static __inline long long
  _mm_cvtsi128_si64x (__m128i __A)
  {
!   return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
  }
  #endif
  
  static __inline __m128d
  _mm_add_pd (__m128d __A, __m128d __B)
  {
*************** _mm_mfence (void)
*** 1377,1390 ****
  static __inline __m128i
  _mm_cvtsi32_si128 (int __A)
  {
!   return (__m128i) __builtin_ia32_loadd (&__A);
  }
  
  #ifdef __x86_64__
  static __inline __m128i
  _mm_cvtsi64x_si128 (long long __A)
  {
!   return (__m128i) __builtin_ia32_movq2dq (__A);
  }
  #endif
  
--- 1374,1387 ----
  static __inline __m128i
  _mm_cvtsi32_si128 (int __A)
  {
!   return _mm_set_epi32 (0, 0, 0, __A);
  }
  
  #ifdef __x86_64__
  static __inline __m128i
  _mm_cvtsi64x_si128 (long long __A)
  {
!   return _mm_set_epi64x (0, __A);
  }
  #endif
  
Index: gcc/config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.774
diff -c -p -d -r1.774 i386.c
*** gcc/config/i386/i386.c	11 Jan 2005 21:33:11 -0000	1.774
--- gcc/config/i386/i386.c	13 Jan 2005 17:45:47 -0000
*************** enum ix86_builtins
*** 12265,12273 ****
  
    IX86_BUILTIN_LOADDQU,
    IX86_BUILTIN_STOREDQU,
-   IX86_BUILTIN_MOVQ,
-   IX86_BUILTIN_LOADD,
-   IX86_BUILTIN_STORED,
  
    IX86_BUILTIN_PACKSSWB,
    IX86_BUILTIN_PACKSSDW,
--- 12265,12270 ----
*************** enum ix86_builtins
*** 12498,12505 ****
    IX86_BUILTIN_MASKMOVDQU,
    IX86_BUILTIN_MOVMSKPD,
    IX86_BUILTIN_PMOVMSKB128,
-   IX86_BUILTIN_MOVQ2DQ,
-   IX86_BUILTIN_MOVDQ2Q,
  
    IX86_BUILTIN_PACKSSWB128,
    IX86_BUILTIN_PACKSSDW128,
--- 12495,12500 ----
*************** enum ix86_builtins
*** 12607,12612 ****
--- 12602,12608 ----
    IX86_BUILTIN_VEC_EXT_V2DF,
    IX86_BUILTIN_VEC_EXT_V2DI,
    IX86_BUILTIN_VEC_EXT_V4SF,
+   IX86_BUILTIN_VEC_EXT_V4SI,
    IX86_BUILTIN_VEC_EXT_V8HI,
    IX86_BUILTIN_VEC_EXT_V4HI,
    IX86_BUILTIN_VEC_SET_V8HI,
*************** ix86_init_mmx_sse_builtins (void)
*** 13154,13161 ****
      = build_function_type_list (V2SI_type_node,
  				V2SF_type_node, V2SF_type_node, NULL_TREE);
    tree pint_type_node    = build_pointer_type (integer_type_node);
-   tree pcint_type_node = build_pointer_type (
- 			     build_type_variant (integer_type_node, 1, 0));
    tree pdouble_type_node = build_pointer_type (double_type_node);
    tree pcdouble_type_node = build_pointer_type (
  				build_type_variant (double_type_node, 1, 0));
--- 13150,13155 ----
*************** ix86_init_mmx_sse_builtins (void)
*** 13168,13179 ****
  				intTI_type_node, intTI_type_node, NULL_TREE);
    tree void_ftype_pcvoid
      = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
-   tree v2di_ftype_di
-     = build_function_type_list (V2DI_type_node,
- 				long_long_unsigned_type_node, NULL_TREE);
-   tree di_ftype_v2di
-     = build_function_type_list (long_long_unsigned_type_node,
- 				V2DI_type_node, NULL_TREE);
    tree v4sf_ftype_v4si
      = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
    tree v4si_ftype_v4sf
--- 13162,13167 ----
*************** ix86_init_mmx_sse_builtins (void)
*** 13285,13297 ****
    tree void_ftype_pchar_v16qi
      = build_function_type_list (void_type_node,
  			        pchar_type_node, V16QI_type_node, NULL_TREE);
-   tree v4si_ftype_pcint
-     = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
-   tree void_ftype_pcint_v4si
-     = build_function_type_list (void_type_node,
- 			        pcint_type_node, V4SI_type_node, NULL_TREE);
-   tree v2di_ftype_v2di
-     = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
  
    tree float80_type;
    tree float128_type;
--- 13273,13278 ----
*************** ix86_init_mmx_sse_builtins (void)
*** 13479,13486 ****
  
    /* SSE2 */
    def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
-   def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
-   def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
    def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
--- 13460,13465 ----
*************** ix86_init_mmx_sse_builtins (void)
*** 13534,13543 ****
    def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
-   def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
    def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
-   def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
-   def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
  
    def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
    def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
--- 13513,13519 ----
*************** ix86_init_mmx_sse_builtins (void)
*** 13622,13627 ****
--- 13598,13608 ----
    def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
  	       ftype, IX86_BUILTIN_VEC_EXT_V4SF);
  
+   ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
+ 				    integer_type_node, NULL_TREE);
+   def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
+ 	       ftype, IX86_BUILTIN_VEC_EXT_V4SI);
+ 
    ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
  				    integer_type_node, NULL_TREE);
    def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
*************** ix86_expand_builtin (tree exp, rtx targe
*** 14399,14411 ****
  
      case IX86_BUILTIN_LOADDQU:
        return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
-     case IX86_BUILTIN_LOADD:
-       return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
- 
      case IX86_BUILTIN_STOREDQU:
        return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
-     case IX86_BUILTIN_STORED:
-       return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
  
      case IX86_BUILTIN_MONITOR:
        arg0 = TREE_VALUE (arglist);
--- 14380,14387 ----
*************** ix86_expand_builtin (tree exp, rtx targe
*** 14447,14452 ****
--- 14423,14429 ----
      case IX86_BUILTIN_VEC_EXT_V2DF:
      case IX86_BUILTIN_VEC_EXT_V2DI:
      case IX86_BUILTIN_VEC_EXT_V4SF:
+     case IX86_BUILTIN_VEC_EXT_V4SI:
      case IX86_BUILTIN_VEC_EXT_V8HI:
      case IX86_BUILTIN_VEC_EXT_V4HI:
        return ix86_expand_vec_ext_builtin (arglist, target);
*************** ix86_expand_builtin (tree exp, rtx targe
*** 14480,14487 ****
      if (d->code == fcode)
        return ix86_expand_sse_comi (d, arglist, target);
  
!   /* @@@ Should really do something sensible here.  */
!   return 0;
  }
  
  /* Store OPERAND to the memory after reload is completed.  This means
--- 14457,14463 ----
      if (d->code == fcode)
        return ix86_expand_sse_comi (d, arglist, target);
  
!   gcc_unreachable ();
  }
  
  /* Store OPERAND to the memory after reload is completed.  This means
*************** ix86_expand_vector_extract (bool mmx_ok,
*** 16402,16407 ****
--- 16378,16384 ----
  	}
        vec = tmp;
        use_vec_extr = true;
+       elt = 0;
        break;
  
      case V4SImode:
*************** ix86_expand_vector_extract (bool mmx_ok,
*** 16431,16436 ****
--- 16408,16414 ----
  	    }
  	  vec = tmp;
  	  use_vec_extr = true;
+ 	  elt = 0;
  	}
        else
  	{
Index: gcc/config/i386/sse.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/sse.md,v
retrieving revision 1.3
diff -c -p -d -r1.3 sse.md
*** gcc/config/i386/sse.md	11 Jan 2005 21:33:14 -0000	1.3
--- gcc/config/i386/sse.md	13 Jan 2005 17:45:48 -0000
***************
*** 1067,1082 ****
  ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
  ;; alternatives pretty much forces the MMX alternative to be chosen.
  (define_insn "*sse_concatv2sf"
!   [(set (match_operand:V2SF 0 "register_operand" "=x,*y")
  	(vec_concat:V2SF
! 	  (match_operand:SF 1 "register_operand" " 0, 0")
! 	  (match_operand:SF 2 "register_operand" " x,*y")))]
    "TARGET_SSE"
    "@
     unpcklps\t{%2, %0|%0, %2}
!    punpckldq\t{%2, %0|%0, %2}"
!   [(set_attr "type" "sselog,mmxcvt")
!    (set_attr "mode" "V4SF,DI")])
  
  (define_insn "*sse_concatv4sf"
    [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
--- 1067,1084 ----
  ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
  ;; alternatives pretty much forces the MMX alternative to be chosen.
  (define_insn "*sse_concatv2sf"
!   [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
  	(vec_concat:V2SF
! 	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
! 	  (match_operand:SF 2 "vector_move_operand"  " x,C,*y, C")))]
    "TARGET_SSE"
    "@
     unpcklps\t{%2, %0|%0, %2}
!    movss\t{%1, %0|%0, %1}
!    punpckldq\t{%2, %0|%0, %2}
!    movd\t{%1, %0|%0, %1}"
!   [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
!    (set_attr "mode" "V4SF,SF,DI,DI")])
  
  (define_insn "*sse_concatv4sf"
    [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
***************
*** 2671,2677 ****
  	(zero_extend:SI
  	  (vec_select:HI
  	    (match_operand:V8HI 1 "register_operand" "x")
! 	    (parallel [(match_operand:SI 2 "const_0_to_7_operand" "0")]))))]
    "TARGET_SSE2"
    "pextrw\t{%2, %1, %0|%0, %1, %2}"
    [(set_attr "type" "sselog")
--- 2673,2679 ----
  	(zero_extend:SI
  	  (vec_select:HI
  	    (match_operand:V8HI 1 "register_operand" "x")
! 	    (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
    "TARGET_SSE2"
    "pextrw\t{%2, %1, %0|%0, %1, %2}"
    [(set_attr "type" "sselog")
***************
*** 2865,2912 ****
    operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
  })
  
- (define_expand "sse2_loadq"
-   [(set (match_operand:V2DI 0 "register_operand" "")
- 	(vec_merge:V2DI
- 	  (vec_duplicate:V2DI
- 	    (match_operand:DI 1 "nonimmediate_operand" ""))
- 	  (match_dup 2)
- 	  (const_int 1)))]	  
-   "TARGET_SSE"
-   "operands[2] = CONST0_RTX (V2DImode);")
- 
- (define_insn "*sse2_loadq"
-   [(set (match_operand:V2DI 0 "register_operand"       "=Y,?Y,Y,x")
- 	(vec_merge:V2DI
- 	  (vec_duplicate:V2DI
- 	    (match_operand:DI 1 "nonimmediate_operand" " m,*y,Y,0"))
- 	  (match_operand:V2DI 2 "vector_move_operand"  " C, C,0,x")
- 	  (const_int 1)))]
-   "TARGET_SSE && !TARGET_64BIT"
-   "@
-    movq\t{%1, %0|%0, %1}
-    movq2dq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}"
-   [(set_attr "type" "ssemov,ssemov,ssemov,sselog")
-    (set_attr "mode" "TI,TI,TI,V4SF")])
- 
- (define_insn "*sse2_loadq_rex64"
-   [(set (match_operand:V2DI 0 "register_operand"       "=x,?x,?x,x")
- 	(vec_merge:V2DI
- 	  (vec_duplicate:V2DI
- 	    (match_operand:DI 1 "nonimmediate_operand" " m,*y, r,x"))
- 	  (match_operand:V2DI 2 "vector_move_operand"  " C, C, C,0")
- 	  (const_int 1)))]
-   "TARGET_SSE2 && TARGET_64BIT"
-   "@
-    movq\t{%1, %0|%0, %1}
-    movq2dq\t{%1, %0|%0, %1}
-    movd\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}"
-   [(set_attr "type" "ssemov")
-    (set_attr "mode" "TI")])
- 
  (define_insn "*vec_dupv4si"
    [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
  	(vec_duplicate:V4SI
--- 2867,2872 ----


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]