This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: mmx/sse support headers
- From: Richard Henderson <rth at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sat, 12 Jan 2002 02:12:38 -0800
- Subject: Re: mmx/sse support headers
- References: <20020111234931.A1825@redhat.com>
On Fri, Jan 11, 2002 at 11:49:31PM -0800, Richard Henderson wrote:
> Expect the SSE tests to fail at the moment. I tested this with type_for_mode
> hacked to ignore the HOST_BITS_PER_WIDE_INT so that we can create TImode
> registers. We need to find a proper solution here that does not involve
> forcing the use of long long throughout the compiler.
Of course, the easiest answer is to not use TImode for __m128.
Using V4SFmode is somewhat less than clean when it comes time
to add the SSE2 V2DFmode stuff, but what the hey it works.
Also:
Removed a bunch of builtins that we don't use. All the composite
stuff is done just as well in xmmintrin.h in C instead of inside
the compiler.
Removed the x86 private prefetch builtins. The generic prefetch
builtin is a superset, and I suspect that the number of folks that
are currently using __builint_ia32_prefetch approximates zero.
Fixed a bunch of predicates that didn't match their constraints.
Fixed two typos in xmmintrin.h.
r~
* config/i386/i386.c (override_options): If SSE, enable sse prefetch.
(ix86_expand_vector_move): New.
(bdesc_2arg): Remove andps, andnps, orps, xorps.
(ix86_init_mmx_sse_builtins): Make static. Remove composite builtins.
Remove old prefetch builtins. Special case the logicals removed above.
(ix86_expand_builtin): Likewise.
(safe_vector_operand): Use V4SFmode, not TImode.
(ix86_expand_store_builtin): Remove shuffle arg. Update callers.
(ix86_expand_timode_binop_builtin): New.
* config/i386/i386-protos.h: Update.
* config/i386/i386.h (enum ix86_builtins): Update.
* config/i386/i386.md: Correct predicates on MMX/SSE patterns.
Use ix86_expand_vector_move in vector move expanders.
(movti_internal, movti_rex64): Add xorps alternative.
(sse_clrv4sf): Rename and adjust from sse_clrti.
(prefetch): Don't work so hard.
(prefetch_sse, prefetch_3dnow): Use PREFETCH rtx, not UNSPEC.
* config/i386/xmmintrin.h (__m128): Use V4SFmode.
(_mm_getcsr, _mm_setcsr): Fix typo in builtin name.
Index: config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.66
diff -c -p -d -r1.66 i386-protos.h
*** i386-protos.h 2001/10/31 14:32:27 1.66
--- i386-protos.h 2002/01/12 09:50:07
*************** extern rtx i386_simplify_dwarf_addr PARA
*** 108,113 ****
--- 108,114 ----
extern void ix86_expand_clear PARAMS ((rtx));
extern void ix86_expand_move PARAMS ((enum machine_mode, rtx[]));
+ extern void ix86_expand_vector_move PARAMS ((enum machine_mode, rtx[]));
extern void ix86_expand_binary_operator PARAMS ((enum rtx_code,
enum machine_mode, rtx[]));
extern int ix86_binary_operator_ok PARAMS ((enum rtx_code, enum machine_mode,
*************** extern void function_arg_advance PARAMS
*** 177,183 ****
tree, int));
extern rtx ix86_function_value PARAMS ((tree));
extern void ix86_init_builtins PARAMS ((void));
- extern void ix86_init_mmx_sse_builtins PARAMS ((void));
extern rtx ix86_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
#endif
--- 178,183 ----
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.355
diff -c -p -d -r1.355 i386.c
*** i386.c 2002/01/12 07:38:48 1.355
--- i386.c 2002/01/12 09:50:07
*************** static int ix86_adjust_cost PARAMS ((rtx
*** 684,689 ****
--- 684,690 ----
static void ix86_sched_init PARAMS ((FILE *, int, int));
static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
+ static void ix86_init_mmx_sse_builtins PARAMS ((void));
struct ix86_address
{
*************** static rtx ix86_expand_sse_compare PARAM
*** 701,707 ****
static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
! static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
--- 702,710 ----
static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
! static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
! tree, rtx));
! static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
*************** override_options ()
*** 1164,1170 ****
/* It makes no sense to ask for just SSE builtins, so MMX is also turned
on by -msse. */
if (TARGET_SSE)
! target_flags |= MASK_MMX;
/* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
if (TARGET_3DNOW)
--- 1167,1176 ----
/* It makes no sense to ask for just SSE builtins, so MMX is also turned
on by -msse. */
if (TARGET_SSE)
! {
! target_flags |= MASK_MMX;
! x86_prefetch_sse = true;
! }
/* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
if (TARGET_3DNOW)
*************** ix86_expand_move (mode, operands)
*** 6661,6666 ****
--- 6667,6704 ----
emit_insn (insn);
}
+ void
+ ix86_expand_vector_move (mode, operands)
+ enum machine_mode mode;
+ rtx operands[];
+ {
+ /* Force constants other than zero into memory. We do not know how
+ the instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (operands[0], mode)
+ && CONSTANT_P (operands[1]))
+ {
+ rtx addr = gen_reg_rtx (Pmode);
+ emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
+ operands[1] = gen_rtx_MEM (mode, addr);
+ }
+
+ /* Make operand1 a register if it isn't already. */
+ if ((reload_in_progress | reload_completed) == 0
+ && !register_operand (operands[0], mode)
+ && !register_operand (operands[1], mode)
+ && operands[1] != CONST0_RTX (mode))
+ {
+ rtx temp = force_reg (TImode, operands[1]);
+ emit_move_insn (operands[0], temp);
+ return;
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+ }
+
/* Attempt to expand a binary operator. Make the expansion closer to the
actual machine, then just general_operand, which will allow 3 separate
memory references (one output, two input) in a single insn. */
*************** static const struct builtin_description
*** 10748,10758 ****
{ MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
{ MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
-
{ MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
--- 10786,10791 ----
*************** ix86_init_builtins ()
*** 10865,10871 ****
/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
builtins. */
! void
ix86_init_mmx_sse_builtins ()
{
const struct builtin_description * d;
--- 10898,10904 ----
/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
builtins. */
! static void
ix86_init_mmx_sse_builtins ()
{
const struct builtin_description * d;
*************** ix86_init_mmx_sse_builtins ()
*** 10899,10912 ****
= build_function_type (integer_type_node,
tree_cons (NULL_TREE, V8QI_type_node,
endlink));
- tree int_ftype_v2si
- = build_function_type (integer_type_node,
- tree_cons (NULL_TREE, V2SI_type_node,
- endlink));
- tree v2si_ftype_int
- = build_function_type (V2SI_type_node,
- tree_cons (NULL_TREE, integer_type_node,
- endlink));
tree v4sf_ftype_v4sf_int
= build_function_type (V4SF_type_node,
tree_cons (NULL_TREE, V4SF_type_node,
--- 10932,10937 ----
*************** ix86_init_mmx_sse_builtins ()
*** 10976,10986 ****
endlink)));
tree void_ftype_void
= build_function_type (void_type_node, endlink);
- tree void_ftype_pchar_int
- = build_function_type (void_type_node,
- tree_cons (NULL_TREE, pchar_type_node,
- tree_cons (NULL_TREE, integer_type_node,
- endlink)));
tree void_ftype_unsigned
= build_function_type (void_type_node,
tree_cons (NULL_TREE, unsigned_type_node,
--- 11001,11006 ----
*************** ix86_init_mmx_sse_builtins ()
*** 10989,10996 ****
= build_function_type (unsigned_type_node, endlink);
tree di_ftype_void
= build_function_type (long_long_unsigned_type_node, endlink);
! tree ti_ftype_void
! = build_function_type (intTI_type_node, endlink);
tree v2si_ftype_v4sf
= build_function_type (V2SI_type_node,
tree_cons (NULL_TREE, V4SF_type_node,
--- 11009,11016 ----
= build_function_type (unsigned_type_node, endlink);
tree di_ftype_void
= build_function_type (long_long_unsigned_type_node, endlink);
! tree v4sf_ftype_void
! = build_function_type (V4SF_type_node, endlink);
tree v2si_ftype_v4sf
= build_function_type (V2SI_type_node,
tree_cons (NULL_TREE, V4SF_type_node,
*************** ix86_init_mmx_sse_builtins ()
*** 11007,11025 ****
= build_function_type (V4SF_type_node,
tree_cons (NULL_TREE, pfloat_type_node,
endlink));
- tree v4sf_ftype_float
- = build_function_type (V4SF_type_node,
- tree_cons (NULL_TREE, float_type_node,
- endlink));
- tree v4sf_ftype_float_float_float_float
- = build_function_type (V4SF_type_node,
- tree_cons (NULL_TREE, float_type_node,
- tree_cons (NULL_TREE, float_type_node,
- tree_cons (NULL_TREE,
- float_type_node,
- tree_cons (NULL_TREE,
- float_type_node,
- endlink)))));
/* @@@ the type is bogus */
tree v4sf_ftype_v4sf_pv2si
= build_function_type (V4SF_type_node,
--- 11027,11032 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11069,11079 ****
tree_cons (NULL_TREE, V2SI_type_node,
tree_cons (NULL_TREE, V2SI_type_node,
endlink)));
- tree ti_ftype_ti_ti
- = build_function_type (intTI_type_node,
- tree_cons (NULL_TREE, intTI_type_node,
- tree_cons (NULL_TREE, intTI_type_node,
- endlink)));
tree di_ftype_di_di
= build_function_type (long_long_unsigned_type_node,
tree_cons (NULL_TREE, long_long_unsigned_type_node,
--- 11076,11081 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11110,11120 ****
V2SF_type_node,
endlink)));
- tree void_ftype_pchar
- = build_function_type (void_type_node,
- tree_cons (NULL_TREE, pchar_type_node,
- endlink));
-
/* Add all builtins that are more or less simple operations on two
operands. */
for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
--- 11112,11117 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11142,11150 ****
case V2SImode:
type = v2si_ftype_v2si_v2si;
break;
- case TImode:
- type = ti_ftype_ti_ti;
- break;
case DImode:
type = di_ftype_di_di;
break;
--- 11139,11144 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11164,11171 ****
}
/* Add the remaining MMX insns with somewhat more complicated types. */
- def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
- def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
--- 11158,11163 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11199,11204 ****
--- 11191,11201 ----
def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
+ def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
+
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
*************** ix86_init_mmx_sse_builtins ()
*** 11222,11228 ****
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
- def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
--- 11219,11224 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11256,11263 ****
def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
- def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
- def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
/* 3DNow! extension as used in the Athlon CPU. */
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
--- 11252,11257 ----
*************** ix86_init_mmx_sse_builtins ()
*** 11267,11280 ****
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
! /* Composite intrinsics. */
! def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
! def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
! def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
! def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
! def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
! def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
! def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
}
/* Errors in the source file can cause expand_expr to return const0_rtx
--- 11261,11267 ----
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
! def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
}
/* Errors in the source file can cause expand_expr to return const0_rtx
*************** safe_vector_operand (x, mode)
*** 11293,11300 ****
emit_insn (gen_mmx_clrdi (mode == DImode ? x
: gen_rtx_SUBREG (DImode, x, 0)));
else
! emit_insn (gen_sse_clrti (mode == TImode ? x
! : gen_rtx_SUBREG (TImode, x, 0)));
return x;
}
--- 11280,11287 ----
emit_insn (gen_mmx_clrdi (mode == DImode ? x
: gen_rtx_SUBREG (DImode, x, 0)));
else
! emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
! : gen_rtx_SUBREG (V4SFmode, x, 0)));
return x;
}
*************** ix86_expand_binop_builtin (icode, arglis
*** 11342,11354 ****
return target;
}
/* Subroutine of ix86_expand_builtin to take care of stores. */
static rtx
! ix86_expand_store_builtin (icode, arglist, shuffle)
enum insn_code icode;
tree arglist;
- int shuffle;
{
rtx pat;
tree arg0 = TREE_VALUE (arglist);
--- 11329,11373 ----
return target;
}
+ /* In type_for_mode we restrict the ability to create TImode types
+ to hosts with 64-bit H_W_I. So we've defined the SSE logicals
+ to have a V4SFmode signature. Convert them in-place to TImode. */
+
+ static rtx
+ ix86_expand_timode_binop_builtin (icode, arglist, target)
+ enum insn_code icode;
+ tree arglist;
+ rtx target;
+ {
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+
+ op0 = gen_lowpart (TImode, op0);
+ op1 = gen_lowpart (TImode, op1);
+ target = gen_reg_rtx (TImode);
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
+ op0 = copy_to_mode_reg (TImode, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
+ op1 = copy_to_mode_reg (TImode, op1);
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+
+ return gen_lowpart (V4SFmode, target);
+ }
+
/* Subroutine of ix86_expand_builtin to take care of stores. */
static rtx
! ix86_expand_store_builtin (icode, arglist)
enum insn_code icode;
tree arglist;
{
rtx pat;
tree arg0 = TREE_VALUE (arglist);
*************** ix86_expand_store_builtin (icode, arglis
*** 11362,11371 ****
op1 = safe_vector_operand (op1, mode1);
op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (shuffle >= 0)
- emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
pat = GEN_FCN (icode) (op0, op1);
if (pat)
emit_insn (pat);
--- 11381,11386 ----
*************** ix86_expand_builtin (exp, target, subtar
*** 11568,11574 ****
enum insn_code icode;
tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
tree arglist = TREE_OPERAND (exp, 1);
! tree arg0, arg1, arg2, arg3;
rtx op0, op1, op2, pat;
enum machine_mode tmode, mode0, mode1, mode2;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
--- 11583,11589 ----
enum insn_code icode;
tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
tree arglist = TREE_OPERAND (exp, 1);
! tree arg0, arg1, arg2;
rtx op0, op1, op2, pat;
enum machine_mode tmode, mode0, mode1, mode2;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
*************** ix86_expand_builtin (exp, target, subtar
*** 11583,11601 ****
emit_insn (gen_sfence ());
return 0;
- case IX86_BUILTIN_M_FROM_INT:
- target = gen_reg_rtx (DImode);
- op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
- emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
- return target;
-
- case IX86_BUILTIN_M_TO_INT:
- op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
- op0 = copy_to_mode_reg (DImode, op0);
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
- return target;
-
case IX86_BUILTIN_PEXTRW:
icode = CODE_FOR_mmx_pextrw;
arg0 = TREE_VALUE (arglist);
--- 11598,11603 ----
*************** ix86_expand_builtin (exp, target, subtar
*** 11689,11694 ****
--- 11691,11709 ----
case IX86_BUILTIN_RCPSS:
return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
+ case IX86_BUILTIN_ANDPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
+ arglist, target);
+ case IX86_BUILTIN_ANDNPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
+ arglist, target);
+ case IX86_BUILTIN_ORPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
+ arglist, target);
+ case IX86_BUILTIN_XORPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
+ arglist, target);
+
case IX86_BUILTIN_LOADAPS:
return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
*************** ix86_expand_builtin (exp, target, subtar
*** 11696,11710 ****
return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
case IX86_BUILTIN_STOREAPS:
! return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
case IX86_BUILTIN_STOREUPS:
! return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
case IX86_BUILTIN_LOADSS:
return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
case IX86_BUILTIN_STORESS:
! return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
case IX86_BUILTIN_LOADHPS:
case IX86_BUILTIN_LOADLPS:
--- 11711,11725 ----
return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
case IX86_BUILTIN_STOREAPS:
! return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
case IX86_BUILTIN_STOREUPS:
! return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
case IX86_BUILTIN_LOADSS:
return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
case IX86_BUILTIN_STORESS:
! return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
case IX86_BUILTIN_LOADHPS:
case IX86_BUILTIN_LOADLPS:
*************** ix86_expand_builtin (exp, target, subtar
*** 11753,11761 ****
return 0;
case IX86_BUILTIN_MOVNTPS:
! return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
case IX86_BUILTIN_MOVNTQ:
! return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
case IX86_BUILTIN_LDMXCSR:
op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
--- 11768,11776 ----
return 0;
case IX86_BUILTIN_MOVNTPS:
! return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
case IX86_BUILTIN_MOVNTQ:
! return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
case IX86_BUILTIN_LDMXCSR:
op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
*************** ix86_expand_builtin (exp, target, subtar
*** 11769,11797 ****
emit_insn (gen_stmxcsr (target));
return copy_to_mode_reg (SImode, target);
- case IX86_BUILTIN_PREFETCH:
- icode = CODE_FOR_prefetch_sse;
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
- op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
- mode0 = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
- {
- /* @@@ better error message */
- error ("selector must be an immediate");
- return const0_rtx;
- }
-
- op0 = copy_to_mode_reg (Pmode, op0);
- pat = GEN_FCN (icode) (op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
case IX86_BUILTIN_SHUFPS:
icode = CODE_FOR_sse_shufps;
arg0 = TREE_VALUE (arglist);
--- 11784,11789 ----
*************** ix86_expand_builtin (exp, target, subtar
*** 11914,11932 ****
case IX86_BUILTIN_PMULHRW:
return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
- case IX86_BUILTIN_PREFETCH_3DNOW:
- case IX86_BUILTIN_PREFETCHW:
- icode = CODE_FOR_prefetch_3dnow;
- arg0 = TREE_VALUE (arglist);
- op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
- op1 = (fcode == IX86_BUILTIN_PREFETCH_3DNOW ? const0_rtx : const1_rtx);
- mode0 = insn_data[icode].operand[0].mode;
- pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0), op1);
- if (! pat)
- return NULL_RTX;
- emit_insn (pat);
- return NULL_RTX;
-
case IX86_BUILTIN_PF2IW:
return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
--- 11906,11911 ----
*************** ix86_expand_builtin (exp, target, subtar
*** 11944,12000 ****
case IX86_BUILTIN_PSWAPDSF:
return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
-
- /* Composite intrinsics. */
- case IX86_BUILTIN_SETPS1:
- target = assign_386_stack_local (SFmode, 0);
- arg0 = TREE_VALUE (arglist);
- emit_move_insn (adjust_address (target, SFmode, 0),
- expand_expr (arg0, NULL_RTX, VOIDmode, 0));
- op0 = gen_reg_rtx (V4SFmode);
- emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
- emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
- return op0;
-
- case IX86_BUILTIN_SETPS:
- target = assign_386_stack_local (V4SFmode, 0);
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
- arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
- emit_move_insn (adjust_address (target, SFmode, 0),
- expand_expr (arg0, NULL_RTX, VOIDmode, 0));
- emit_move_insn (adjust_address (target, SFmode, 4),
- expand_expr (arg1, NULL_RTX, VOIDmode, 0));
- emit_move_insn (adjust_address (target, SFmode, 8),
- expand_expr (arg2, NULL_RTX, VOIDmode, 0));
- emit_move_insn (adjust_address (target, SFmode, 12),
- expand_expr (arg3, NULL_RTX, VOIDmode, 0));
- op0 = gen_reg_rtx (V4SFmode);
- emit_insn (gen_sse_movaps (op0, target));
- return op0;
! case IX86_BUILTIN_CLRPS:
! target = gen_reg_rtx (TImode);
! emit_insn (gen_sse_clrti (target));
! return target;
!
! case IX86_BUILTIN_LOADRPS:
! target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
! gen_reg_rtx (V4SFmode), 1);
! emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
! return target;
!
! case IX86_BUILTIN_LOADPS1:
! target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
! gen_reg_rtx (V4SFmode), 1);
! emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
return target;
-
- case IX86_BUILTIN_STOREPS1:
- return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
- case IX86_BUILTIN_STORERPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
case IX86_BUILTIN_MMX_ZERO:
target = gen_reg_rtx (DImode);
--- 11923,11933 ----
case IX86_BUILTIN_PSWAPDSF:
return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
! case IX86_BUILTIN_SSE_ZERO:
! target = gen_reg_rtx (V4SFmode);
! emit_insn (gen_sse_clrv4sf (target));
return target;
case IX86_BUILTIN_MMX_ZERO:
target = gen_reg_rtx (DImode);
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.236
diff -c -p -d -r1.236 i386.h
*** i386.h 2002/01/11 13:01:21 1.236
--- i386.h 2002/01/12 09:50:08
*************** enum ix86_builtins
*** 2089,2096 ****
IX86_BUILTIN_CVTSS2SI,
IX86_BUILTIN_CVTTPS2PI,
IX86_BUILTIN_CVTTSS2SI,
- IX86_BUILTIN_M_FROM_INT,
- IX86_BUILTIN_M_TO_INT,
IX86_BUILTIN_MAXPS,
IX86_BUILTIN_MAXSS,
--- 2089,2094 ----
*************** enum ix86_builtins
*** 2215,2221 ****
IX86_BUILTIN_LDMXCSR,
IX86_BUILTIN_STMXCSR,
IX86_BUILTIN_SFENCE,
- IX86_BUILTIN_PREFETCH,
/* 3DNow! Original */
IX86_BUILTIN_FEMMS,
--- 2213,2218 ----
*************** enum ix86_builtins
*** 2238,2245 ****
IX86_BUILTIN_PFSUBR,
IX86_BUILTIN_PI2FD,
IX86_BUILTIN_PMULHRW,
- IX86_BUILTIN_PREFETCH_3DNOW, /* PREFETCH already used */
- IX86_BUILTIN_PREFETCHW,
/* 3DNow! Athlon Extensions */
IX86_BUILTIN_PF2IW,
--- 2235,2240 ----
*************** enum ix86_builtins
*** 2248,2264 ****
IX86_BUILTIN_PI2FW,
IX86_BUILTIN_PSWAPDSI,
IX86_BUILTIN_PSWAPDSF,
-
- /* Composite builtins, expand to more than one insn. */
- IX86_BUILTIN_SETPS1,
- IX86_BUILTIN_SETPS,
- IX86_BUILTIN_CLRPS,
- IX86_BUILTIN_SETRPS,
- IX86_BUILTIN_LOADPS1,
- IX86_BUILTIN_LOADRPS,
- IX86_BUILTIN_STOREPS1,
- IX86_BUILTIN_STORERPS,
IX86_BUILTIN_MMX_ZERO,
IX86_BUILTIN_MAX
--- 2243,2250 ----
IX86_BUILTIN_PI2FW,
IX86_BUILTIN_PSWAPDSI,
IX86_BUILTIN_PSWAPDSF,
+ IX86_BUILTIN_SSE_ZERO,
IX86_BUILTIN_MMX_ZERO,
IX86_BUILTIN_MAX
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.328
diff -c -p -d -r1.328 i386.md
*** i386.md 2002/01/12 07:38:48 1.328
--- i386.md 2002/01/12 09:50:08
***************
*** 81,87 ****
;; 32 This is a `maskmov' operation.
;; 33 This is a `movmsk' operation.
;; 34 This is a `non-temporal' move.
- ;; 35 This is a `prefetch' (SSE) operation.
;; 36 This is used to distinguish COMISS from UCOMISS.
;; 37 This is a `ldmxcsr' operation.
;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
--- 81,86 ----
***************
*** 17686,17692 ****
(define_insn "movv4sf_internal"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! (match_operand:V4SF 1 "general_operand" "xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
--- 17685,17691 ----
(define_insn "movv4sf_internal"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
***************
*** 17694,17700 ****
(define_insn "movv4si_internal"
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
! (match_operand:V4SI 1 "general_operand" "xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
--- 17693,17699 ----
(define_insn "movv4si_internal"
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
! (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
***************
*** 17702,17729 ****
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
! (match_operand:V8QI 1 "general_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv4hi_internal"
[(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
! (match_operand:V4HI 1 "general_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv2si_internal"
[(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
! (match_operand:V2SI 1 "general_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv2sf_internal"
[(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
! (match_operand:V2SF 1 "general_operand" "ym,y"))]
"TARGET_3DNOW"
"movq\\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
--- 17701,17728 ----
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
! (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv4hi_internal"
[(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
! (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv2si_internal"
[(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
! (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv2sf_internal"
[(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
! (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))]
"TARGET_3DNOW"
"movq\\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
***************
*** 17734,17767 ****
"TARGET_SSE || TARGET_64BIT"
{
if (TARGET_64BIT)
! {
! ix86_expand_move (TImode, operands);
! DONE;
! }
! /* For constants other than zero into memory. We do not know how the
! instructions used to build constants modify the upper 64 bits
! of the register, once we have that information we may be able
! to handle some of them more efficiently. */
! if ((reload_in_progress | reload_completed) == 0
! && register_operand (operands[0], TImode)
! && CONSTANT_P (operands[1]))
! {
! rtx addr = gen_reg_rtx (Pmode);
!
! emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0));
! operands[1] = gen_rtx_MEM (TImode, addr);
! }
!
! /* Make operand1 a register if it isn't already. */
! if ((reload_in_progress | reload_completed) == 0
! && !register_operand (operands[0], TImode)
! && !register_operand (operands[1], TImode)
! && operands[1] != CONST0_RTX (TImode))
! {
! rtx temp = force_reg (TImode, operands[1]);
! emit_move_insn (operands[0], temp);
! DONE;
! }
})
(define_expand "movv4sf"
--- 17733,17742 ----
"TARGET_SSE || TARGET_64BIT"
{
if (TARGET_64BIT)
! ix86_expand_move (TImode, operands);
! else
! ix86_expand_vector_move (TImode, operands);
! DONE;
})
(define_expand "movv4sf"
***************
*** 17769,17798 ****
(match_operand:V4SF 1 "general_operand" ""))]
"TARGET_SSE"
{
! /* For constants other than zero into memory. We do not know how the
! instructions used to build constants modify the upper 64 bits
! of the register, once we have that information we may be able
! to handle some of them more efficiently. */
! if ((reload_in_progress | reload_completed) == 0
! && register_operand (operands[0], V4SFmode)
! && CONSTANT_P (operands[1]))
! {
! rtx addr = gen_reg_rtx (Pmode);
!
! emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0));
! operands[1] = gen_rtx_MEM (V4SFmode, addr);
! }
!
! /* Make operand1 a register if it isn't already. */
! if ((reload_in_progress | reload_completed) == 0
! && !register_operand (operands[0], V4SFmode)
! && !register_operand (operands[1], V4SFmode)
! && operands[1] != CONST0_RTX (V4SFmode))
! {
! rtx temp = force_reg (V4SFmode, operands[1]);
! emit_move_insn (operands[0], temp);
! DONE;
! }
})
(define_expand "movv4si"
--- 17744,17751 ----
(match_operand:V4SF 1 "general_operand" ""))]
"TARGET_SSE"
{
! ix86_expand_vector_move (V4SFmode, operands);
! DONE;
})
(define_expand "movv4si"
***************
*** 17800,17829 ****
(match_operand:V4SI 1 "general_operand" ""))]
"TARGET_MMX"
{
! /* For constants other than zero into memory. We do not know how the
! instructions used to build constants modify the upper 64 bits
! of the register, once we have that information we may be able
! to handle some of them more efficiently. */
! if ((reload_in_progress | reload_completed) == 0
! && register_operand (operands[0], V4SImode)
! && CONSTANT_P (operands[1]))
! {
! rtx addr = gen_reg_rtx (Pmode);
!
! emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0));
! operands[1] = gen_rtx_MEM (V4SImode, addr);
! }
!
! /* Make operand1 a register if it isn't already. */
! if ((reload_in_progress | reload_completed) == 0
! && !register_operand (operands[0], V4SImode)
! && !register_operand (operands[1], V4SImode)
! && operands[1] != CONST0_RTX (V4SImode))
! {
! rtx temp = force_reg (V4SImode, operands[1]);
! emit_move_insn (operands[0], temp);
! DONE;
! }
})
(define_expand "movv2si"
--- 17753,17760 ----
(match_operand:V4SI 1 "general_operand" ""))]
"TARGET_MMX"
{
! ix86_expand_vector_move (V4SImode, operands);
! DONE;
})
(define_expand "movv2si"
***************
*** 17831,17860 ****
(match_operand:V2SI 1 "general_operand" ""))]
"TARGET_MMX"
{
! /* For constants other than zero into memory. We do not know how the
! instructions used to build constants modify the upper 64 bits
! of the register, once we have that information we may be able
! to handle some of them more efficiently. */
! if ((reload_in_progress | reload_completed) == 0
! && register_operand (operands[0], V2SImode)
! && CONSTANT_P (operands[1]))
! {
! rtx addr = gen_reg_rtx (Pmode);
!
! emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0));
! operands[1] = gen_rtx_MEM (V2SImode, addr);
! }
!
! /* Make operand1 a register if it isn't already. */
! if ((reload_in_progress | reload_completed) == 0
! && !register_operand (operands[0], V2SImode)
! && !register_operand (operands[1], V2SImode)
! && operands[1] != CONST0_RTX (V2SImode))
! {
! rtx temp = force_reg (V2SImode, operands[1]);
! emit_move_insn (operands[0], temp);
! DONE;
! }
})
(define_expand "movv4hi"
--- 17762,17769 ----
(match_operand:V2SI 1 "general_operand" ""))]
"TARGET_MMX"
{
! ix86_expand_vector_move (V2SImode, operands);
! DONE;
})
(define_expand "movv4hi"
***************
*** 17862,17891 ****
(match_operand:V4HI 1 "general_operand" ""))]
"TARGET_MMX"
{
! /* For constants other than zero into memory. We do not know how the
! instructions used to build constants modify the upper 64 bits
! of the register, once we have that information we may be able
! to handle some of them more efficiently. */
! if ((reload_in_progress | reload_completed) == 0
! && register_operand (operands[0], V4HImode)
! && CONSTANT_P (operands[1]))
! {
! rtx addr = gen_reg_rtx (Pmode);
!
! emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0));
! operands[1] = gen_rtx_MEM (V4HImode, addr);
! }
!
! /* Make operand1 a register if it isn't already. */
! if ((reload_in_progress | reload_completed) == 0
! && !register_operand (operands[0], V4HImode)
! && !register_operand (operands[1], V4HImode)
! && operands[1] != CONST0_RTX (V4HImode))
! {
! rtx temp = force_reg (V4HImode, operands[1]);
! emit_move_insn (operands[0], temp);
! DONE;
! }
})
(define_expand "movv8qi"
--- 17771,17778 ----
(match_operand:V4HI 1 "general_operand" ""))]
"TARGET_MMX"
{
! ix86_expand_vector_move (V4HImode, operands);
! DONE;
})
(define_expand "movv8qi"
***************
*** 17893,17957 ****
(match_operand:V8QI 1 "general_operand" ""))]
"TARGET_MMX"
{
! /* For constants other than zero into memory. We do not know how the
! instructions used to build constants modify the upper 64 bits
! of the register, once we have that information we may be able
! to handle some of them more efficiently. */
! if ((reload_in_progress | reload_completed) == 0
! && register_operand (operands[0], V8QImode)
! && CONSTANT_P (operands[1]))
! {
! rtx addr = gen_reg_rtx (Pmode);
!
! emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0));
! operands[1] = gen_rtx_MEM (V8QImode, addr);
! }
!
! /* Make operand1 a register if it isn't already. */
! if ((reload_in_progress | reload_completed) == 0
! && !register_operand (operands[0], V8QImode)
! && !register_operand (operands[1], V8QImode)
! && operands[1] != CONST0_RTX (V8QImode))
! {
! rtx temp = force_reg (V8QImode, operands[1]);
! emit_move_insn (operands[0], temp);
! DONE;
! }
})
(define_expand "movv2sf"
[(set (match_operand:V2SF 0 "general_operand" "")
(match_operand:V2SF 1 "general_operand" ""))]
"TARGET_3DNOW"
- "
{
! /* For constants other than zero into memory. We do not know how the
! instructions used to build constants modify the upper 64 bits
! of the register, once we have that information we may be able
! to handle some of them more efficiently. */
! if ((reload_in_progress | reload_completed) == 0
! && register_operand (operands[0], V2SFmode)
! && CONSTANT_P (operands[1]))
! {
! rtx addr = gen_reg_rtx (Pmode);
!
! emit_move_insn (addr,
! XEXP (force_const_mem (V2SFmode, operands[1]), 0));
! operands[1] = gen_rtx_MEM (V2SFmode, addr);
! }
!
! /* Make operand1 a register is it isn't already. */
! if ((reload_in_progress | reload_completed) == 0
! && !register_operand (operands[0], V2SFmode)
! && !register_operand (operands[1], V2SFmode)
! && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
! && operands[1] != CONST0_RTX (V2SFmode))
! {
! rtx temp = force_reg (V2SFmode, operands[1]);
! emit_move_insn (operands[0], temp);
! DONE;
! }
! }")
(define_insn_and_split "*pushti"
[(set (match_operand:TI 0 "push_operand" "=<")
--- 17780,17797 ----
(match_operand:V8QI 1 "general_operand" ""))]
"TARGET_MMX"
{
! ix86_expand_vector_move (V8QImode, operands);
! DONE;
})
(define_expand "movv2sf"
[(set (match_operand:V2SF 0 "general_operand" "")
(match_operand:V2SF 1 "general_operand" ""))]
"TARGET_3DNOW"
{
! ix86_expand_vector_move (V2SFmode, operands);
! DONE;
! })
(define_insn_and_split "*pushti"
[(set (match_operand:TI 0 "push_operand" "=<")
***************
*** 18031,18055 ****
[(set_attr "type" "mmx")])
(define_insn "movti_internal"
! [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
! (match_operand:TI 1 "general_operand" "xm,x"))]
"TARGET_SSE && !TARGET_64BIT"
"@
movaps\t{%1, %0|%0, %1}
movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "*movti_rex64"
! [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,mx,x")
! (match_operand:TI 1 "general_operand" "riFo,riF,x,m"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"@
#
#
movaps\\t{%1, %0|%0, %1}
movaps\\t{%1, %0|%0, %1}"
! [(set_attr "type" "*,*,sse,sse")
(set_attr "mode" "TI")])
(define_split
--- 17871,17897 ----
[(set_attr "type" "mmx")])
(define_insn "movti_internal"
! [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
! (match_operand:TI 1 "general_operand" "O,xm,x"))]
"TARGET_SSE && !TARGET_64BIT"
"@
+ xorps\t%0, %0
movaps\t{%1, %0|%0, %1}
movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "*movti_rex64"
! [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
! (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"@
#
#
+ xorps\t%0, %0
movaps\\t{%1, %0|%0, %1}
movaps\\t{%1, %0|%0, %1}"
! [(set_attr "type" "*,*,sse,sse,sse")
(set_attr "mode" "TI")])
(define_split
***************
*** 18064,18070 ****
;; movaps or movups
(define_insn "sse_movaps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))]
"TARGET_SSE"
"@
movaps\t{%1, %0|%0, %1}
--- 17906,17913 ----
;; movaps or movups
(define_insn "sse_movaps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! (unspec:V4SF
! [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))]
"TARGET_SSE"
"@
movaps\t{%1, %0|%0, %1}
***************
*** 18073,18079 ****
(define_insn "sse_movups"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))]
"TARGET_SSE"
"@
movups\t{%1, %0|%0, %1}
--- 17916,17923 ----
(define_insn "sse_movups"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! (unspec:V4SF
! [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))]
"TARGET_SSE"
"@
movups\t{%1, %0|%0, %1}
***************
*** 18154,18160 ****
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 12)))]
! "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movhps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
--- 17998,18005 ----
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 12)))]
! "TARGET_SSE
! && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movhps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
***************
*** 18164,18170 ****
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 3)))]
! "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movlps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
--- 18009,18016 ----
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 3)))]
! "TARGET_SSE
! && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movlps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
***************
*** 18220,18229 ****
(define_insn "vmaddv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"addss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
--- 18066,18076 ----
(define_insn "vmaddv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF
! (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"addss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
***************
*** 18231,18247 ****
(define_insn "subv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"subps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "vmsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"subss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
--- 18078,18095 ----
(define_insn "subv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"subps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "vmsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF
! (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"subss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
***************
*** 18256,18265 ****
(define_insn "vmmulv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"mulss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
--- 18104,18114 ----
(define_insn "vmmulv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF
! (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"mulss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
***************
*** 18274,18283 ****
(define_insn "vmdivv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"divss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
--- 18123,18133 ----
(define_insn "vmdivv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF
! (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"divss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
***************
*** 18287,18339 ****
(define_insn "rcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))]
"TARGET_SSE"
"rcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmrcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42)
! (match_operand:V4SF 2 "register_operand" "0")
! (const_int 1)))]
"TARGET_SSE"
"rcpss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))]
"TARGET_SSE"
"rsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43)
! (match_operand:V4SF 2 "register_operand" "0")
! (const_int 1)))]
"TARGET_SSE"
"rsqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "sqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))]
"TARGET_SSE"
"sqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm"))
! (match_operand:V4SF 2 "register_operand" "0")
! (const_int 1)))]
"TARGET_SSE"
"sqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
-
;; SSE logical operations.
;; These are not called andti3 etc. because we really really don't want
--- 18137,18193 ----
(define_insn "rcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (unspec:V4SF
! [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))]
"TARGET_SSE"
"rcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmrcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF
! (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42)
! (match_operand:V4SF 2 "register_operand" "0")
! (const_int 1)))]
"TARGET_SSE"
"rcpss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (unspec:V4SF
! [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))]
"TARGET_SSE"
"rsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF
! (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43)
! (match_operand:V4SF 2 "register_operand" "0")
! (const_int 1)))]
"TARGET_SSE"
"rsqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "sqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"sqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF
! (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
! (match_operand:V4SF 2 "register_operand" "0")
! (const_int 1)))]
"TARGET_SSE"
"sqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
;; SSE logical operations.
;; These are not called andti3 etc. because we really really don't want
***************
*** 18519,18527 ****
;; Use xor, but don't show input operands so they aren't live before
;; this insn.
! (define_insn "sse_clrti"
! [(set (match_operand:TI 0 "register_operand" "=x")
! (unspec:TI [(const_int 0)] 45))]
"TARGET_SSE"
"xorps\t{%0, %0|%0, %0}"
[(set_attr "type" "sse")
--- 18373,18381 ----
;; Use xor, but don't show input operands so they aren't live before
;; this insn.
! (define_insn "sse_clrv4sf"
! [(set (match_operand:V4SF 0 "register_operand" "=x")
! (unspec:V4SF [(const_int 0)] 45))]
"TARGET_SSE"
"xorps\t{%0, %0|%0, %0}"
[(set_attr "type" "sse")
***************
*** 18532,18539 ****
(define_insn "maskcmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(match_operator:V4SI 3 "sse_comparison_operator"
! [(match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "x")]))]
"TARGET_SSE"
"cmp%D3ps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
--- 18386,18393 ----
(define_insn "maskcmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(match_operator:V4SI 3 "sse_comparison_operator"
! [(match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "register_operand" "x")]))]
"TARGET_SSE"
"cmp%D3ps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
***************
*** 18542,18565 ****
[(set (match_operand:V4SI 0 "register_operand" "=x")
(not:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
! [(match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "x")])))]
"TARGET_SSE"
- "*
{
if (GET_CODE (operands[3]) == UNORDERED)
! return \"cmpordps\t{%2, %0|%0, %2}\";
!
! return \"cmpn%D3ps\t{%2, %0|%0, %2}\";
! }"
[(set_attr "type" "sse")])
(define_insn "vmmaskcmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_merge:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
! [(match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "x")])
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
--- 18396,18418 ----
[(set (match_operand:V4SI 0 "register_operand" "=x")
(not:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
! [(match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "register_operand" "x")])))]
"TARGET_SSE"
{
if (GET_CODE (operands[3]) == UNORDERED)
! return "cmpordps\t{%2, %0|%0, %2}";
! else
! return "cmpn%D3ps\t{%2, %0|%0, %2}";
! }
[(set_attr "type" "sse")])
(define_insn "vmmaskcmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_merge:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
! [(match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "register_operand" "x")])
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
***************
*** 18571,18588 ****
(vec_merge:V4SI
(not:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
! [(match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "x")]))
(subreg:V4SI (match_dup 1) 0)
(const_int 1)))]
"TARGET_SSE"
- "*
{
if (GET_CODE (operands[3]) == UNORDERED)
! return \"cmpordss\t{%2, %0|%0, %2}\";
!
! return \"cmpn%D3ss\t{%2, %0|%0, %2}\";
! }"
[(set_attr "type" "sse")])
(define_insn "sse_comi"
--- 18424,18440 ----
(vec_merge:V4SI
(not:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
! [(match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "register_operand" "x")]))
(subreg:V4SI (match_dup 1) 0)
(const_int 1)))]
"TARGET_SSE"
{
if (GET_CODE (operands[3]) == UNORDERED)
! return "cmpordss\t{%2, %0|%0, %2}";
! else
! return "cmpn%D3ss\t{%2, %0|%0, %2}";
! }
[(set_attr "type" "sse")])
(define_insn "sse_comi"
***************
*** 18663,18672 ****
(define_insn "vmsmaxv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"maxss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
--- 18515,18525 ----
(define_insn "vmsmaxv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF
! (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"maxss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
***************
*** 18681,18690 ****
(define_insn "vmsminv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"minss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
--- 18534,18544 ----
(define_insn "vmsminv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF
! (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
! (match_dup 1)
! (const_int 1)))]
"TARGET_SSE"
"minss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
***************
*** 18694,18749 ****
(define_insn "cvtpi2ps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (vec_duplicate:V4SF
! (float:V2SF (match_operand:V2SI 2 "register_operand" "ym")))
! (const_int 12)))]
"TARGET_SSE"
"cvtpi2ps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "cvtps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
! (vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
! (parallel
! [(const_int 0)
! (const_int 1)])))]
"TARGET_SSE"
"cvtps2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvttps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
! (vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
! (parallel
! [(const_int 0)
! (const_int 1)])))]
"TARGET_SSE"
"cvttps2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvtsi2ss"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
! (vec_duplicate:V4SF
! (float:SF (match_operand:SI 2 "register_operand" "rm")))
! (const_int 14)))]
"TARGET_SSE"
"cvtsi2ss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
! (vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
! (parallel [(const_int 0)])))]
"TARGET_SSE"
"cvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
! (vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
! (parallel [(const_int 0)])))]
"TARGET_SSE"
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
--- 18548,18605 ----
(define_insn "cvtpi2ps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF
! (match_operand:V4SF 1 "register_operand" "0")
! (vec_duplicate:V4SF
! (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
! (const_int 12)))]
"TARGET_SSE"
"cvtpi2ps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "cvtps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
! (vec_select:V2SI
! (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
! (parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
"cvtps2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvttps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
! (vec_select:V2SI
! (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
! (parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
"cvttps2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvtsi2ss"
[(set (match_operand:V4SF 0 "register_operand" "=x")
! (vec_merge:V4SF
! (match_operand:V4SF 1 "register_operand" "0")
! (vec_duplicate:V4SF
! (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
! (const_int 14)))]
"TARGET_SSE"
"cvtsi2ss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
! (vec_select:SI
! (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
! (parallel [(const_int 0)])))]
"TARGET_SSE"
"cvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
! (vec_select:SI
! (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
! (parallel [(const_int 0)])))]
"TARGET_SSE"
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
***************
*** 18877,18884 ****
[(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
(lshiftrt:V4SI
! (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
! (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 16))))]
"TARGET_MMX"
"pmulhw\t{%2, %0|%0, %2}"
--- 18733,18742 ----
[(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
(lshiftrt:V4SI
! (mult:V4SI (sign_extend:V4SI
! (match_operand:V4HI 1 "register_operand" "0"))
! (sign_extend:V4SI
! (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 16))))]
"TARGET_MMX"
"pmulhw\t{%2, %0|%0, %2}"
***************
*** 18888,18895 ****
[(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
(lshiftrt:V4SI
! (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
! (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 16))))]
"TARGET_SSE || TARGET_3DNOW_A"
"pmulhuw\t{%2, %0|%0, %2}"
--- 18746,18755 ----
[(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
(lshiftrt:V4SI
! (mult:V4SI (zero_extend:V4SI
! (match_operand:V4HI 1 "register_operand" "0"))
! (zero_extend:V4SI
! (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 16))))]
"TARGET_SSE || TARGET_3DNOW_A"
"pmulhuw\t{%2, %0|%0, %2}"
***************
*** 18899,18910 ****
[(set (match_operand:V2SI 0 "register_operand" "=y")
(plus:V2SI
(mult:V2SI
! (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
! (parallel [(const_int 0)
! (const_int 2)])))
! (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
! (parallel [(const_int 0)
! (const_int 2)]))))
(mult:V2SI
(sign_extend:V2SI (vec_select:V2HI (match_dup 1)
(parallel [(const_int 1)
--- 18759,18770 ----
[(set (match_operand:V2SI 0 "register_operand" "=y")
(plus:V2SI
(mult:V2SI
! (sign_extend:V2SI
! (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
! (parallel [(const_int 0) (const_int 2)])))
! (sign_extend:V2SI
! (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
! (parallel [(const_int 0) (const_int 2)]))))
(mult:V2SI
(sign_extend:V2SI (vec_select:V2HI (match_dup 1)
(parallel [(const_int 1)
***************
*** 19404,19478 ****
[(set_attr "type" "sse")
(set_attr "memory" "unknown")])
- (define_expand "prefetch"
- [(prefetch (match_operand:SI 0 "address_operand" "p")
- (match_operand:SI 1 "const_int_operand" "n")
- (match_operand:SI 2 "const_int_operand" "n"))]
- "TARGET_PREFETCH_SSE || TARGET_3DNOW"
- "
- {
- int rw = INTVAL (operands[1]);
- int locality = INTVAL (operands[2]);
- if (rw != 0 && rw != 1)
- abort ();
- if (locality < 0 || locality > 3)
- abort ();
- /* Use 3dNOW prefetch in case we are asking for write prefetch not
- suported by SSE counterpart or the SSE prefetch is not available
- (K6 machines). Otherwise use SSE prefetch as it allows specifying
- of locality. */
- if (TARGET_3DNOW
- && (!TARGET_PREFETCH_SSE || rw))
- {
- emit_insn (gen_prefetch_3dnow (operands[0], operands[1]));
- }
- else
- {
- int i;
- switch (locality)
- {
- case 0: /* No temporal locality. */
- i = 0;
- break;
- case 1: /* Lowest level of temporal locality. */
- i = 3;
- break;
- case 2: /* Moderate level of temporal locality. */
- i = 2;
- break;
- case 3: /* Highest level of temporal locality. */
- i = 1;
- break;
- default:
- abort (); /* We already checked for valid values above. */
- break;
- }
- emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i)));
- }
- DONE;
- }")
-
- (define_insn "prefetch_sse"
- [(unspec [(match_operand:SI 0 "address_operand" "p")
- (match_operand:SI 1 "immediate_operand" "n")] 35)]
- "TARGET_PREFETCH_SSE"
- {
- switch (INTVAL (operands[1]))
- {
- case 0:
- return "prefetchnta\t%a0";
- case 1:
- return "prefetcht0\t%a0";
- case 2:
- return "prefetcht1\t%a0";
- case 3:
- return "prefetcht2\t%a0";
- default:
- abort ();
- }
- }
- [(set_attr "type" "sse")])
-
(define_expand "sse_prologue_save"
[(parallel [(set (match_operand:BLK 0 "" "")
(unspec:BLK [(reg:DI 21)
--- 19264,19269 ----
***************
*** 19630,19648 ****
"femms"
[(set_attr "type" "mmx")])
- (define_insn "prefetch_3dnow"
- [(prefetch (match_operand:SI 0 "address_operand" "p")
- (match_operand:SI 1 "const_int_operand" "n")
- (const_int 0))]
- "TARGET_3DNOW"
- {
- if (INTVAL (operands[1]) == 0)
- return "prefetch\t%a0";
- else
- return "prefetchw\t%a0";
- }
- [(set_attr "type" "mmx")])
-
(define_insn "pf2id"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
--- 19421,19426 ----
***************
*** 19819,19822 ****
--- 19597,19658 ----
(parallel [(const_int 1) (const_int 0)])))]
"TARGET_3DNOW_A"
"pswapd\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
+ (define_expand "prefetch"
+ [(prefetch (match_operand:SI 0 "address_operand" "")
+ (match_operand:SI 1 "const_int_operand" "")
+ (match_operand:SI 2 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE || TARGET_3DNOW"
+ {
+ int rw = INTVAL (operands[1]);
+ int locality = INTVAL (operands[2]);
+ if (rw != 0 && rw != 1)
+ abort ();
+ if (locality < 0 || locality > 3)
+ abort ();
+
+ /* Use 3dNOW prefetch in case we are asking for write prefetch not
+ suported by SSE counterpart or the SSE prefetch is not available
+ (K6 machines). Otherwise use SSE prefetch as it allows specifying
+ of locality. */
+ if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+ {
+ operands[2] = GEN_INT (3);
+ }
+ else
+ {
+ operands[1] = const0_rtx;
+ }
+ })
+
+ (define_insn "*prefetch_sse"
+ [(prefetch (match_operand:SI 0 "address_operand" "")
+ (const_int 0)
+ (match_operand:SI 1 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE"
+ {
+ static const char * const patterns[4] = {
+ "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+ };
+
+ int locality = INTVAL (operands[1]);
+ if (locality < 0 || locality > 3)
+ abort ();
+
+ return patterns[locality];
+ }
+ [(set_attr "type" "sse")])
+
+ (define_insn "*prefetch_3dnow"
+ [(prefetch (match_operand:SI 0 "address_operand" "p")
+ (match_operand:SI 1 "const_int_operand" "n")
+ (const_int 0))]
+ "TARGET_3DNOW"
+ {
+ if (INTVAL (operands[1]) == 0)
+ return "prefetch\t%a0";
+ else
+ return "prefetchw\t%a0";
+ }
[(set_attr "type" "mmx")])
Index: config/i386/xmmintrin.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/xmmintrin.h,v
retrieving revision 1.1
diff -c -p -d -r1.1 xmmintrin.h
*** xmmintrin.h 2002/01/12 07:38:49 1.1
--- xmmintrin.h 2002/01/12 09:50:08
***************
*** 34,44 ****
#include <mmintrin.h>
/* The data type indended for user use. */
! typedef int __m128 __attribute__ ((mode (TI)));
/* Internal data types for implementing the instrinsics. */
! typedef int __v4sf __attribute__ ((mode (V4SF)));
! typedef int __v4si __attribute__ ((mode (V4SI)));
/* Create a selector for use with the SHUFPS instruction. */
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
--- 34,44 ----
#include <mmintrin.h>
/* The data type indended for user use. */
! typedef int __m128 __attribute__ ((__mode__(__V4SF__)));
/* Internal data types for implementing the instrinsics. */
! typedef int __v4sf __attribute__ ((__mode__(__V4SF__)));
! typedef int __v4si __attribute__ ((__mode__(__V4SI__)));
/* Create a selector for use with the SHUFPS instruction. */
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
*************** _mm_movemask_ps (__m128 __A)
*** 680,686 ****
static __inline unsigned int
_mm_getcsr (void)
{
! return __builtin_ia32_getmxcsr ();
}
/* Read exception bits from the control register. */
--- 680,686 ----
static __inline unsigned int
_mm_getcsr (void)
{
! return __builtin_ia32_stmxcsr ();
}
/* Read exception bits from the control register. */
*************** _MM_GET_FLUSH_ZERO_MODE (void)
*** 712,718 ****
static __inline void
_mm_setcsr (unsigned int __I)
{
! __builtin_ia32_setmxcsr (__I);
}
/* Set exception bits in the control register. */
--- 712,718 ----
static __inline void
_mm_setcsr (unsigned int __I)
{
! __builtin_ia32_ldmxcsr (__I);
}
/* Set exception bits in the control register. */