This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

SSE fix 2


Hi,
I've found that the logicals on SSE are almost entirely broken at least
regarding to the SSE2.  Almost all builtins results in integer opcode to
be generated resulting in reformating penalty.

I've ended up with somewhat longish patch to reorganize the code.  The
main problem has been the fact that most patterns were noncanonical and
combine reraranged instructions to match the integral patterns instead.
Also there were about twice as many of them than we really need.

I made also testcases checking that proper opcode is generated and the
memory operand is combined.

OK for mainline?
Honza

/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -msse2 -march=athlon" } */
/* { dg-final { scan-assembler "andpd.*\[bs\]p" } } */
/* { dg-final { scan-assembler "andnpd.*\[bs\]p" } } */
/* { dg-final { scan-assembler "xorpd.*\[bs\]p" } } */
/* { dg-final { scan-assembler "iorpd.*\[bs\]p" } } */
/* { dg-final { scan-assembler-not "movdqa" } } */
/* { dg-final { scan-assembler "movapd.*\[bs\]p" } } */

/* Verify that we generate proper instruction with memory operand.  */

#include <xmmintrin.h>
__m128d
t1(__m128d a, __m128d b)
{
return _mm_and_pd (a,b);
}
__m128d
t2(__m128d a, __m128d b)
{
return _mm_andnot_pd (a,b);
}
__m128d
t3(__m128d a, __m128d b)
{
return _mm_or_pd (a,b);
}
__m128d
t4(__m128d a, __m128d b)
{
return _mm_xor_pd (a,b);
}


/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -msse2 -march=athlon" } */
/* { dg-final { scan-assembler "andpd" } } */
/* { dg-final { scan-assembler "andnpd" } } */
/* { dg-final { scan-assembler "xorpd" } } */
/* { dg-final { scan-assembler "iorpd" } } */
/* { dg-final { scan-assembler-not "movdqa" } } */
/* { dg-final { scan-assembler "movapd" } } */

/* Verify that we generate proper instruction without memory operand.  */

#include <xmmintrin.h>
__m128d
t1(__m128d a, __m128d b)
{
a=_mm_sqrt_pd(a);
b=_mm_sqrt_pd(b);
return _mm_and_pd (a,b);
}
__m128d
t2(__m128d a, __m128d b)
{
a=_mm_sqrt_pd(a);
b=_mm_sqrt_pd(b);
return _mm_andnot_pd (a,b);
}
__m128d
t3(__m128d a, __m128d b)
{
a=_mm_sqrt_pd(a);
b=_mm_sqrt_pd(b);
return _mm_or_pd (a,b);
}
__m128d
t4(__m128d a, __m128d b)
{
a=_mm_sqrt_pd(a);
b=_mm_sqrt_pd(b);
return _mm_xor_pd (a,b);
}


/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -msse2 -march=athlon" } */
/* { dg-final { scan-assembler "andps.*\[bs]p" } } */
/* { dg-final { scan-assembler "andnps.*\[bs]p" } } */
/* { dg-final { scan-assembler "xorps.*\[bs]p" } } */
/* { dg-final { scan-assembler "orps.\[b*s]p" } } */
/* { dg-final { scan-assembler-not "movdqa" } } */
/* { dg-final { scan-assembler "movaps.*\[bs]p" } } */

/* Verify that we generate proper instruction with memory operand.  */

#include <xmmintrin.h>
__m128
t1(__m128 a, __m128 b)
{
return _mm_and_ps (a,b);
}
__m128
t2(__m128 a, __m128 b)
{
return _mm_andnot_ps (a,b);
}
__m128
t3(__m128 a, __m128 b)
{
return _mm_or_ps (a,b);
}
__m128
t4(__m128 a, __m128 b)
{
return _mm_xor_ps (a,b);
}

/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -msse2 -march=athlon" } */
/* { dg-final { scan-assembler "andps" } } */
/* { dg-final { scan-assembler "andnps" } } */
/* { dg-final { scan-assembler "xorps" } } */
/* { dg-final { scan-assembler "orps" } } */

/* Verify that we generate proper instruction without memory operand.  */

#include <xmmintrin.h>
__m128
t1(__m128 a, __m128 b)
{
a=_mm_sqrt_ps(a);
b=_mm_sqrt_ps(b);
return _mm_and_ps (a,b);
}
__m128
t2(__m128 a, __m128 b)
{
a=_mm_sqrt_ps(a);
b=_mm_sqrt_ps(b);
return _mm_andnot_ps (a,b);
}
__m128
t3(__m128 a, __m128 b)
{
a=_mm_sqrt_ps(a);
b=_mm_sqrt_ps(b);
return _mm_or_ps (a,b);
}
__m128
t4(__m128 a, __m128 b)
{
a=_mm_sqrt_ps(a);
b=_mm_sqrt_ps(b);
return _mm_xor_ps (a,b);
}
Mon Oct 14 18:02:13 CEST 2002  Jan Hubicka  <jh@suse.cz>
	* i386.c (ix86_expand_timode_binop_builtin): Delete.
	(builtin_description): Add SSE1 logicals; rename SSE2 logicals.
	(ix86_init_mmx_sse_builtins): Kill SSE1 logicals.
	(ix86_expand_builtin): Likewise.
	* i386.h (sse_andti4_df_1, sse_andti3_df_2, sse_andti3_sf_1, sse_andti3_sf_2,
        sse_andti3,
	sse_andnti4_df_1, sse_andti3_df_2, sse_andti3_sf_1, sse_andti3_sf_2,
        sse_andnti3,
	sse_orti4_df_1, sse_orti3_df_2, sse_orti3_sf_1, sse_orti3_sf_2,
        sse_orti3,
	sse_xorti4_df_1, sse_xorti3_df_2, sse_xorti3_sf_1, sse_xorti3_sf_2,
        sse_xorti3): Kill.
	(sse_andv4sf3, sse_andnv4sf3, sse_orv2df3, sse_xorv2df3, sse_andv2df3,
	 sse_andnv2df3, sse_orv2df3, sse_xorv2df3): New expanders.
	(*sse_andv4sf3, *sse_andnv2df3, *sse_orv4sf3, *sse_xorv4sf3, *sse_andv2df3,
	 *sse_andnv2df3, *sse_orv2df3, *sse_xorv2df3): New patterns.
	(*sse_andsf3, *sse_andndf3, *sse_ordf3, *sse_xordf3, *sse_anddf3,
	 *sse_andndf3, *sse_orv2df3, *sse_xorv2df3): New patterns.
	
*** i386.c	Thu Oct 10 11:08:12 2002
--- /p1/ssediv/egcs/gcc/config/i386/i386.c	Mon Oct 14 17:56:23 2002
*************** static rtx ix86_expand_sse_compare PARAM
*** 738,745 ****
  static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
  static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
  static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
- static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
- 						     tree, rtx));
  static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
  static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
  static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
--- 738,743 ----
*************** static const struct builtin_description 
*** 11789,11794 ****
--- 11787,11797 ----
    { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
    { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
  
+   { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
+   { MASK_SSE1, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
+   { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
+   { MASK_SSE1, CODE_FOR_sse_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
+ 
    { MASK_SSE1, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
    { MASK_SSE1, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
    { MASK_SSE1, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
*************** static const struct builtin_description 
*** 11913,11922 ****
    { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
    { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
  
!   { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_nanddf3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_xordf3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
  
    { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
--- 11916,11925 ----
    { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
    { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
  
!   { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
!   { MASK_SSE2, CODE_FOR_sse2_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
  
    { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
    { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
*************** ix86_init_mmx_sse_builtins ()
*** 12421,12431 ****
    def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
    def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
  
-   def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
-   def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
-   def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
-   def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
- 
    def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
    def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
  
--- 12424,12429 ----
*************** ix86_expand_binop_builtin (icode, arglis
*** 12659,12703 ****
    return target;
  }
  
- /* In type_for_mode we restrict the ability to create TImode types
-    to hosts with 64-bit H_W_I.  So we've defined the SSE logicals
-    to have a V4SFmode signature.  Convert them in-place to TImode.  */
- 
- static rtx
- ix86_expand_timode_binop_builtin (icode, arglist, target)
-      enum insn_code icode;
-      tree arglist;
-      rtx target;
- {
-   rtx pat;
-   tree arg0 = TREE_VALUE (arglist);
-   tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
-   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
-   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
- 
-   op0 = gen_lowpart (TImode, op0);
-   op1 = gen_lowpart (TImode, op1);
-   target = gen_reg_rtx (TImode);
- 
-   if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
-     op0 = copy_to_mode_reg (TImode, op0);
-   if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
-     op1 = copy_to_mode_reg (TImode, op1);
- 
-   /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
-      yet one of the two must not be a memory.  This is normally enforced
-      by expanders, but we didn't bother to create one here.  */
-   if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
-     op0 = copy_to_mode_reg (TImode, op0);
- 
-   pat = GEN_FCN (icode) (target, op0, op1);
-   if (! pat)
-     return 0;
-   emit_insn (pat);
- 
-   return gen_lowpart (V4SFmode, target);
- }
- 
  /* Subroutine of ix86_expand_builtin to take care of stores.  */
  
  static rtx
--- 12657,12662 ----
*************** ix86_expand_builtin (exp, target, subtar
*** 13043,13061 ****
      case IX86_BUILTIN_RCPSS:
        return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
  
-     case IX86_BUILTIN_ANDPS:
-       return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
- 					       arglist, target);
-     case IX86_BUILTIN_ANDNPS:
-       return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
- 					       arglist, target);
-     case IX86_BUILTIN_ORPS:
-       return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
- 					       arglist, target);
-     case IX86_BUILTIN_XORPS:
-       return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
- 					       arglist, target);
- 
      case IX86_BUILTIN_LOADAPS:
        return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
  
--- 13002,13007 ----
*** i386.md	Mon Oct 14 18:15:53 2002
--- /p1/ssediv/egcs/gcc/config/i386/i386.md	Mon Oct 14 18:24:01 2002
***************
*** 18964,19199 ****
  
  ;; SSE logical operations.
  
  ;; These are not called andti3 etc. because we really really don't want
  ;; the compiler to widen DImode ands to TImode ands and then try to move
  ;; into DImode subregs of SSE registers, and them together, and move out
  ;; of DImode subregs again!
  
! (define_insn "*sse_andti3_df_1"
!   [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
!         (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
! 		(subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))]
!   "TARGET_SSE2"
!   "andpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V2DF")])
  
! (define_insn "*sse_andti3_df_2"
!   [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
!         (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)
! 		(match_operand:TI 2 "nonimmediate_operand" "Ym")))]
!   "TARGET_SSE2"
!   "andpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V2DF")])
  
! (define_insn "*sse_andti3_sf_1"
!   [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
!         (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0)
! 		(subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))]
    "TARGET_SSE"
!   "andps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V4SF")])
  
! (define_insn "*sse_andti3_sf_2"
    [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
!         (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)
! 		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE"
!   "andps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V4SF")])
  
! (define_insn "sse_andti3"
!   [(set (match_operand:TI 0 "register_operand" "=x")
!         (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE && !TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "andps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V4SF")])
  
! (define_insn "sse2_andti3"
!   [(set (match_operand:TI 0 "register_operand" "=x")
!         (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "pand\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "TI")])
  
! (define_insn "sse2_andv2di3"
!   [(set (match_operand:V2DI 0 "register_operand" "=x")
!         (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
! 		  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "pand\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "TI")])
  
! (define_insn "*sse_nandti3_df"
!   [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
!         (and:TI (not:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0))
! 		(match_operand:TI 2 "nonimmediate_operand" "Ym")))]
    "TARGET_SSE2"
!   "andnpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V2DF")])
  
! (define_insn "*sse_nandti3_sf"
!   [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
!         (and:TI (not:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0))
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE"
!   "andnps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! (define_insn "sse_nandti3"
!   [(set (match_operand:TI 0 "register_operand" "=x")
          (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
! 		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE && !TARGET_SSE2"
!   "andnps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! (define_insn "sse2_nandti3"
!   [(set (match_operand:TI 0 "register_operand" "=x")
          (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
! 		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE2"
!   "pandn\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "TI")])
  
! (define_insn "sse2_nandv2di3"
!   [(set (match_operand:V2DI 0 "register_operand" "=x")
!         (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0"))
! 		  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-   "pandn\t{%2, %0|%0, %2}"
-   [(set_attr "type" "sselog")
-    (set_attr "mode" "TI")])
- 
- (define_insn "*sse_iorti3_df_1"
-   [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
-         (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
- 		(subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))]
-   "TARGET_SSE2"
    "orpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V2DF")])
  
! (define_insn "*sse_iorti3_df_2"
!   [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
!         (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)
! 		(match_operand:TI 2 "nonimmediate_operand" "Ym")))]
!   "TARGET_SSE2"
    "orpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V2DF")])
  
! (define_insn "*sse_iorti3_sf_1"
!   [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
!         (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0)
! 		(subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))]
!   "TARGET_SSE"
!   "orps\t{%2, %0|%0, %2}"
!   [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! (define_insn "*sse_iorti3_sf_2"
!   [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
!         (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE"
!   "orps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! (define_insn "sse_iorti3"
!   [(set (match_operand:TI 0 "register_operand" "=x")
!         (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE && !TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "orps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! (define_insn "sse2_iorti3"
    [(set (match_operand:TI 0 "register_operand" "=x")
!         (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "por\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "TI")])
  
! (define_insn "sse2_iorv2di3"
    [(set (match_operand:V2DI 0 "register_operand" "=x")
!         (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
  		  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "por\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "TI")])
  
! (define_insn "*sse_xorti3_df_1"
!   [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
!         (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
! 		(subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))]
!   "TARGET_SSE2"
!   "xorpd\t{%2, %0|%0, %2}"
!   [(set_attr "type" "sselog")
!    (set_attr "mode" "V2DF")])
! 
! (define_insn "*sse_xorti3_df_2"
!   [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
!         (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)
! 		(match_operand:TI 2 "nonimmediate_operand" "Ym")))]
    "TARGET_SSE2"
!   "xorpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V2DF")])
  
! (define_insn "*sse_xorti3_sf_1"
!   [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
!         (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0)
! 		(subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))]
!   "TARGET_SSE"
!   "xorps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! (define_insn "*sse_xorti3_sf_2"
!   [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
!         (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE"
!   "xorps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! (define_insn "sse_xorti3"
!   [(set (match_operand:TI 0 "register_operand" "=x")
!         (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
! 		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE && !TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "xorps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! (define_insn "sse2_xorti3"
    [(set (match_operand:TI 0 "register_operand" "=x")
          (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
--- 19043,19345 ----
  
  ;; SSE logical operations.
  
+ ;; SSE defines logical operations on floating point values.  This brings
+ ;; interesting challenge to RTL representation where logicals are only valid
+ ;; on integral types.  We deal with this by representing the floating point
+ ;; logical as logical on arguments casted to TImode as this is what hardware
+ ;; really does.  Unfortunately hardware requires the type information to be
+ ;; present and thus we must avoid subregs from being simplified and elliminated
+ ;; in later compilation phases.
+ ;;
+ ;; We have following variants from each instruction:
+ ;; sse_andsf3 - the operation taking V4SF vector operands
+ ;;              and doing TImode cast on them
+ ;; *sse_andsf3_memory - the operation taking one memory operand casted to
+ ;;                      TImode, since backend insist on elliminating casts
+ ;;                      on memory operands
+ ;; sse_andti3_sf_1 - the operation taking SF scalar operands.
+ ;;                   We can not accept memory operand here as instruction reads
+ ;;		     whole scalar.  This is generated only post reload by GCC
+ ;;		     scalar float operations that expands to logicals (fabs)
+ ;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode
+ ;;		     memory operand.  Eventually combine can be able
+ ;;		     to synthetize these using splitter.
+ ;; sse2_anddf3, *sse2_anddf3_memory
+ ;;              
+ ;; 
  ;; These are not called andti3 etc. because we really really don't want
  ;; the compiler to widen DImode ands to TImode ands and then try to move
  ;; into DImode subregs of SSE registers, and them together, and move out
  ;; of DImode subregs again!
+ ;; SSE1 single precision floating point logical operation
+ (define_expand "sse_andv4sf3"
+   [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
+         (and:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)
+ 		(subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
+   "TARGET_SSE"
+   "")
  
! (define_insn "*sse_andv4sf3"
!   [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
!         (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
! 		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE
!    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "andps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! (define_insn "*sse_andsf3"
!   [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
!         (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
! 		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE
!    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "andps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! (define_expand "sse_nandv4sf3"
!   [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
!         (and:TI (not:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0))
! 	        (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
    "TARGET_SSE"
!   "")
! 
! (define_insn "*sse_nandv4sf3"
!   [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
!         (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
! 	        (match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE"
!   "andnps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V4SF")])
  
! (define_insn "*sse_nandsf3"
    [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
!         (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
! 	        (match_operand:TI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE"
!   "andnps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V4SF")])
  
! (define_expand "sse_iorv4sf3"
!   [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
!         (ior:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)
! 		(subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
!   "TARGET_SSE"
!   "")
! 
! (define_insn "*sse_iorv4sf3"
!   [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
!         (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "orps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V4SF")])
  
! (define_insn "*sse_iorsf3"
!   [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
!         (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "orps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! (define_expand "sse_xorv4sf3"
!   [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
!         (xor:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)
! 		(subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
!   "TARGET_SSE
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "")
! 
! (define_insn "*sse_xorv4sf3"
!   [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
!         (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
! 		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE
!    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "xorps\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V4SF")])
  
! ;; SSE2 double precision floating point logical operation
! 
! (define_expand "sse2_andv2df3"
!   [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
!         (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0)
! 	        (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
    "TARGET_SSE2"
!   "")
! 
! (define_insn "*sse2_andv2df3"
!   [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
!         (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
! 		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE2
!    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "andpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V2DF")])
  
! (define_insn "*sse2_andv2df3"
!   [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0)
!         (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE2
!    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "andpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V2DF")])
  
! (define_expand "sse2_nandv2df3"
!   [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
!         (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0))
! 	        (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
!   "TARGET_SSE2"
!   "")
! 
! (define_insn "*sse2_nandv2df3"
!   [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
          (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
! 	        (match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE2"
!   "andnpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V2DF")])
  
! (define_insn "*sse_nandti3_df"
!   [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
          (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
! 		(match_operand:TI 2 "nonimmediate_operand" "Ym")))]
    "TARGET_SSE2"
!   "andnpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V2DF")])
  
! (define_expand "sse2_iorv2df3"
!   [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
!         (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0)
! 		(subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
!   "TARGET_SSE2"
!   "")
! 
! (define_insn "*sse2_iorv2df3"
!   [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
!         (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
! 		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
    "orpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V2DF")])
  
! (define_insn "*sse2_iordf3"
!   [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0)
!         (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
! 		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE2
!    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
    "orpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "V2DF")])
  
! (define_expand "sse2_xorv2df3"
!   [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
!         (xor:TI (subreg:TI (match_operand:V2DF 1 "nonimmediate_operand" "") 0)
! 		(subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
!   "TARGET_SSE2"
!   "")
  
! (define_insn "*sse2_xorv2df3"
!   [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
!         (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE2
!    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "xorpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V2DF")])
  
! (define_insn "*sse2_xordf3"
!   [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0)
!         (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "xorpd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "V2DF")])
  
! ;; SSE2 integral logicals.  These patterns must always come after floating
! ;; point ones since we don't want compiler to use integer opcodes on floating
! ;; point SSE values to avoid matching of subregs in the match_operand.
! (define_insn "*sse2_andti3"
    [(set (match_operand:TI 0 "register_operand" "=x")
!         (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "pand\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "TI")])
  
! (define_insn "sse2_andv2di3"
    [(set (match_operand:V2DI 0 "register_operand" "=x")
!         (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
  		  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "pand\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
     (set_attr "mode" "TI")])
  
! (define_insn "*sse2_nandti3"
!   [(set (match_operand:TI 0 "register_operand" "=x")
!         (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
! 		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
    "TARGET_SSE2"
!   "pandn\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "TI")])
  
! (define_insn "sse2_nandv2di3"
!   [(set (match_operand:V2DI 0 "register_operand" "=x")
!         (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0"))
! 		  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE2
!    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "pandn\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "TI")])
  
! (define_insn "*sse2_iorti3"
!   [(set (match_operand:TI 0 "register_operand" "=x")
!         (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE2
!    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "por\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "TI")])
  
! (define_insn "sse2_iorv2di3"
!   [(set (match_operand:V2DI 0 "register_operand" "=x")
!         (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
! 		  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
!   "TARGET_SSE2
     && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
!   "por\t{%2, %0|%0, %2}"
    [(set_attr "type" "sselog")
!    (set_attr "mode" "TI")])
  
! (define_insn "*sse2_xorti3"
    [(set (match_operand:TI 0 "register_operand" "=x")
          (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
  		(match_operand:TI 2 "nonimmediate_operand" "xm")))]
***************
*** 19412,19418 ****
    [(set_attr "type" "sse")
     (set_attr "mode" "SF")])
  
- 
  ;; SSE <-> integer/MMX conversions
  
  (define_insn "cvtpi2ps"
--- 19558,19563 ----
***************
*** 20799,20840 ****
    "minsd\t{%2, %0|%0, %2}"
    [(set_attr "type" "sseadd")
     (set_attr "mode" "DF")])
- 
- (define_insn "sse2_anddf3"
-   [(set (match_operand:V2DF 0 "register_operand" "=x")
-         (subreg:V2DF (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0)
- 			     (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
-   "TARGET_SSE2"
-   "andpd\t{%2, %0|%0, %2}"
-   [(set_attr "type" "sselog")
-    (set_attr "mode" "V2DF")])
- 
- (define_insn "sse2_nanddf3"
-   [(set (match_operand:V2DF 0 "register_operand" "=x")
-         (subreg:V2DF (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "0") 0))
- 			     (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
-   "TARGET_SSE2"
-   "andnpd\t{%2, %0|%0, %2}"
-   [(set_attr "type" "sselog")
-    (set_attr "mode" "V2DF")])
- 
- (define_insn "sse2_iordf3"
-   [(set (match_operand:V2DF 0 "register_operand" "=x")
-         (subreg:V2DF (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0)
- 			     (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
-   "TARGET_SSE2"
-   "orpd\t{%2, %0|%0, %2}"
-   [(set_attr "type" "sselog")
-    (set_attr "mode" "V2DF")])
- 
- (define_insn "sse2_xordf3"
-   [(set (match_operand:V2DF 0 "register_operand" "=x")
-         (subreg:V2DF (xor:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0)
- 			     (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
-   "TARGET_SSE2"
-   "xorpd\t{%2, %0|%0, %2}"
-   [(set_attr "type" "sselog")
-    (set_attr "mode" "V2DF")])
  ;; SSE2 square root.  There doesn't appear to be an extension for the
  ;; reciprocal/rsqrt instructions if the Intel manual is to be believed.
  
--- 20944,20949 ----


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]