This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: PATCH: Enable TFmode for SSE2
On Sat, Jun 28, 2008 at 08:29:41PM +0200, Uros Bizjak wrote:
> H.J. Lu wrote:
>
>> Here is the first patch to enable TFmode for ia32. The current
>> __builtin_fabsq and __builtin_copysignq don't work with TARGET_64BIT
>> when crossing compiling from Linux/ia32 to Linux/x86-64 since they
>> need HOST_BITS_PER_WIDE_INT >= 64:
>>
>> case TImode:
>> case TFmode:
>> imode = TImode;
>> vec_mode = VOIDmode;
>> gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
>> lo = 0, hi = (HOST_WIDE_INT)1 << shift;
>> break;
>>
>> It is the same problem with TImode.
> You can create a TImode/TFmode sign bitmask via CONST_VECTOR, just look
> at ix86_build_const_vector for an example. TFmode mask is currently
> created as CONST_DOUBLE just because 128bit modes were enabled for 64bit
> targets only and this was the simplest way ;)
I will take a look at it after __float128 is enabled for ia32. It
is hard to test it when your workstation is 64bit :-).
>
> And we will again avoid 64bit HWI.
>
>> Also __float80 and __float128
>> don't depend on MMX nor SSE. I moved __float80 and __float128
>> to ix86_init_builtins. I enabled __builtin_fabsq and
>> __builtin_copysignq only if HOST_BITS_PER_WIDE_INT >= 64 and SSE2
>>
>
> If SSE2 is not a requirement, then you also need to introduce TFmode "r"
> absneg splitter using AND and OR operations, similar to XFmode absneg
> splitter. This leads to an interesting problem that TFmode values
> require 4 consecutive integer registers - i.e. eax, edx, ecx and ebx, so
> this will definitively put some stress on RA.
It may be too much for RA when you consider stack alignment, nested
function and PIC. I think we should leave it for libgcc just like
_Decimal128.
>
>> is available. Is there a way to generate library call when builtin
>> pattern isn't available?
>>
> Ugh, please don't do that.
>
>> OK for trunk?
>>
>
> The patch is otherwise OK, but please enhance the patch with the fix for
> ix86_build_signbit_mask and add "r" TFmode splitter.
I think "r" TFmode splitter will create more problems than it solves.
I will take a look at ix86_build_signbit_mask in the next step.
>
> BTW: Do we have any special alignment requirements for TFmode for 32bit
> x86? Can we just inherit _Decimal128 requirements also for TFmode?
>
Yes, TFmode should be treated the same as TDmode. Here is the updated
patch.
OK for trunk?
Thanks.
H.J.
----
2008-06-28 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (contains_aligned_value_p): Return true
for __float128.
(ix86_function_arg_boundary): Return its natural boundary for
for __float128.
(return_in_memory_32): Don't check TDmode.
(ix86_split_to_parts): Support splitting
into 4 parts and support TFmode for 32bit target.
(ix86_split_long_move): Support splitting into 4 parts.
(bdesc_args): Enable IX86_BUILTIN_FABSQ and IX86_BUILTIN_COPYSIGNQ
for SSE2.
(ix86_init_mmx_sse_builtins): Move __float80 and __float128
to ...
(ix86_init_builtins): Here.
(ix86_scalar_mode_supported_p): Always return true for TFmode.
(ix86_c_mode_for_suffix): Always return 'q' for TFmode.
* config/i386/i386.md (movtf): Check TARGET_SSE2 instead of
TARGET_64BIT.
(movtf_internal): Likewise.
(<code>tf2): Likewise.
(*absnegtf2_sse): Likewise.
(copysign<mode>3): Likewise.
(copysign<mode>3_const): Likewise.
(copysign<mode>3_var): Likewise.
(define_split UNSPEC_COPYSIGN): Likewise.
* config/i386/sse.md (*nandtf3): Likewise.
(<code>tf3): Likewise.
(*<code>tf3): Likewise.
--- gcc/config/i386/i386.c.float128 2008-06-27 18:43:05.000000000 -0700
+++ gcc/config/i386/i386.c 2008-06-28 15:40:33.000000000 -0700
@@ -4744,7 +4744,9 @@ static bool
contains_aligned_value_p (tree type)
{
enum machine_mode mode = TYPE_MODE (type);
- if (((TARGET_SSE && SSE_REG_MODE_P (mode)) || mode == TDmode)
+ if (((TARGET_SSE && SSE_REG_MODE_P (mode))
+ || mode == TDmode
+ || mode == TFmode)
&& (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
return true;
if (TYPE_ALIGN (type) < 128)
@@ -4803,8 +4805,9 @@ ix86_function_arg_boundary (enum machine
align = GET_MODE_ALIGNMENT (mode);
if (align < PARM_BOUNDARY)
align = PARM_BOUNDARY;
- /* In 32bit, only _Decimal128 is aligned to its natural boundary. */
- if (!TARGET_64BIT && mode != TDmode)
+ /* In 32bit, only _Decimal128 and __float128 are aligned to their
+ natural boundaries. */
+ if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
{
/* i386 ABI defines all arguments to be 4 byte aligned. We have to
make an exception for SSE modes since these require 128bit
@@ -4815,7 +4818,7 @@ ix86_function_arg_boundary (enum machine
to 8 byte boundaries. */
if (!type)
{
- if (!(TARGET_SSE && SSE_REG_MODE_P (mode)) && mode != TDmode)
+ if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
align = PARM_BOUNDARY;
}
else
@@ -5041,9 +5044,6 @@ return_in_memory_32 (const_tree type, en
if (mode == XFmode)
return 0;
- if (mode == TDmode)
- return 1;
-
if (size > 12)
return 1;
return 0;
@@ -14122,7 +14122,7 @@ ix86_split_to_parts (rtx operand, rtx *p
size = (GET_MODE_SIZE (mode) + 4) / 8;
gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
- gcc_assert (size >= 2 && size <= 3);
+ gcc_assert (size >= 2 && size <= 4);
/* Optimize constant pool reference to immediates. This is used by fp
moves, that force all constants to memory to allow combining. */
@@ -14142,7 +14142,7 @@ ix86_split_to_parts (rtx operand, rtx *p
operand = copy_rtx (operand);
PUT_MODE (operand, Pmode);
- parts[0] = parts[1] = parts[2] = operand;
+ parts[0] = parts[1] = parts[2] = parts[3] = operand;
return size;
}
@@ -14163,21 +14163,20 @@ ix86_split_to_parts (rtx operand, rtx *p
split_di (&operand, 1, &parts[0], &parts[1]);
else
{
+ int i;
+
if (REG_P (operand))
{
gcc_assert (reload_completed);
- parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
- parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
- if (size == 3)
- parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
+ for (i = 0; i < size; i++)
+ parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
}
else if (offsettable_memref_p (operand))
{
operand = adjust_address (operand, SImode, 0);
parts[0] = operand;
- parts[1] = adjust_address (operand, SImode, 4);
- if (size == 3)
- parts[2] = adjust_address (operand, SImode, 8);
+ for (i = 1; i < size; i++)
+ parts[i] = adjust_address (operand, SImode, 4 * i);
}
else if (GET_CODE (operand) == CONST_DOUBLE)
{
@@ -14187,6 +14186,11 @@ ix86_split_to_parts (rtx operand, rtx *p
REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
switch (mode)
{
+ case TFmode:
+ real_to_target (l, &r, mode);
+ parts[3] = gen_int_mode (l[3], SImode);
+ parts[2] = gen_int_mode (l[2], SImode);
+ break;
case XFmode:
REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
parts[2] = gen_int_mode (l[2], SImode);
@@ -14260,7 +14264,7 @@ ix86_split_to_parts (rtx operand, rtx *p
return size;
}
-/* Emit insns to perform a move or push of DI, DF, and XF values.
+/* Emit insns to perform a move or push of DI, DF, XF, and TF values.
Return false when normal moves are needed; true when all required
insns have been emitted. Operands 2-4 contain the input values
int the correct order; operands 5-7 contain the output values. */
@@ -14268,11 +14272,12 @@ ix86_split_to_parts (rtx operand, rtx *p
void
ix86_split_long_move (rtx operands[])
{
- rtx part[2][3];
- int nparts;
+ rtx part[2][4];
+ int nparts, i, j;
int push = 0;
int collisions = 0;
enum machine_mode mode = GET_MODE (operands[0]);
+ bool collisionparts[4];
/* The DFmode expanders may ask us to move double.
For 64bit target this is single move. By hiding the fact
@@ -14311,34 +14316,46 @@ ix86_split_long_move (rtx operands[])
/* When emitting push, take care for source operands on the stack. */
if (push && MEM_P (operands[1])
&& reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
- {
- if (nparts == 3)
- part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
- XEXP (part[1][2], 0));
- part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
- XEXP (part[1][1], 0));
- }
+ for (i = 0; i < nparts - 1; i++)
+ part[1][i] = change_address (part[1][i],
+ GET_MODE (part[1][i]),
+ XEXP (part[1][i + 1], 0));
/* We need to do copy in the right order in case an address register
of the source overlaps the destination. */
if (REG_P (part[0][0]) && MEM_P (part[1][0]))
{
- if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
- collisions++;
- if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
- collisions++;
- if (nparts == 3
- && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
- collisions++;
+ rtx tmp;
+
+ for (i = 0; i < nparts; i++)
+ {
+ collisionparts[i]
+ = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
+ if (collisionparts[i])
+ collisions++;
+ }
/* Collision in the middle part can be handled by reordering. */
- if (collisions == 1 && nparts == 3
- && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
+ if (collisions == 1 && nparts == 3 && collisionparts [1])
{
- rtx tmp;
tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
}
+ else if (collisions == 1
+ && nparts == 4
+ && (collisionparts [1] || collisionparts [2]))
+ {
+ if (collisionparts [1])
+ {
+ tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
+ tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
+ }
+ else
+ {
+ tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
+ tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
+ }
+ }
/* If there are more collisions, we can't handle it by reordering.
Do an lea to the last part and use only one colliding move. */
@@ -14357,11 +14374,11 @@ ix86_split_long_move (rtx operands[])
emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
part[1][0] = replace_equiv_address (part[1][0], base);
- part[1][1] = replace_equiv_address (part[1][1],
- plus_constant (base, UNITS_PER_WORD));
- if (nparts == 3)
- part[1][2] = replace_equiv_address (part[1][2],
- plus_constant (base, 8));
+ for (i = 1; i < nparts; i++)
+ {
+ tmp = plus_constant (base, UNITS_PER_WORD * i);
+ part[1][i] = replace_equiv_address (part[1][i], tmp);
+ }
}
}
@@ -14375,6 +14392,11 @@ ix86_split_long_move (rtx operands[])
emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
emit_move_insn (part[0][2], part[1][2]);
}
+ else if (nparts == 4)
+ {
+ emit_move_insn (part[0][3], part[1][3]);
+ emit_move_insn (part[0][2], part[1][2]);
+ }
}
else
{
@@ -14412,77 +14434,42 @@ ix86_split_long_move (rtx operands[])
&& REG_P (part[1][1])
&& (REGNO (part[0][0]) == REGNO (part[1][1])
|| (nparts == 3
- && REGNO (part[0][0]) == REGNO (part[1][2]))))
+ && REGNO (part[0][0]) == REGNO (part[1][2]))
+ || (nparts == 4
+ && REGNO (part[0][0]) == REGNO (part[1][3]))))
|| (collisions > 0
&& reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
{
- if (nparts == 3)
- {
- operands[2] = part[0][2];
- operands[3] = part[0][1];
- operands[4] = part[0][0];
- operands[5] = part[1][2];
- operands[6] = part[1][1];
- operands[7] = part[1][0];
- }
- else
+ for (i = 0, j = nparts - 1; i < nparts; i++, j--)
{
- operands[2] = part[0][1];
- operands[3] = part[0][0];
- operands[5] = part[1][1];
- operands[6] = part[1][0];
+ operands[2 + i] = part[0][j];
+ operands[6 + i] = part[1][j];
}
}
else
{
- if (nparts == 3)
- {
- operands[2] = part[0][0];
- operands[3] = part[0][1];
- operands[4] = part[0][2];
- operands[5] = part[1][0];
- operands[6] = part[1][1];
- operands[7] = part[1][2];
- }
- else
+ for (i = 0; i < nparts; i++)
{
- operands[2] = part[0][0];
- operands[3] = part[0][1];
- operands[5] = part[1][0];
- operands[6] = part[1][1];
+ operands[2 + i] = part[0][i];
+ operands[6 + i] = part[1][i];
}
}
/* If optimizing for size, attempt to locally unCSE nonzero constants. */
if (optimize_size)
{
- if (CONST_INT_P (operands[5])
- && operands[5] != const0_rtx
- && REG_P (operands[2]))
- {
- if (CONST_INT_P (operands[6])
- && INTVAL (operands[6]) == INTVAL (operands[5]))
- operands[6] = operands[2];
-
- if (nparts == 3
- && CONST_INT_P (operands[7])
- && INTVAL (operands[7]) == INTVAL (operands[5]))
- operands[7] = operands[2];
- }
-
- if (nparts == 3
- && CONST_INT_P (operands[6])
- && operands[6] != const0_rtx
- && REG_P (operands[3])
- && CONST_INT_P (operands[7])
- && INTVAL (operands[7]) == INTVAL (operands[6]))
- operands[7] = operands[3];
- }
-
- emit_move_insn (operands[2], operands[5]);
- emit_move_insn (operands[3], operands[6]);
- if (nparts == 3)
- emit_move_insn (operands[4], operands[7]);
+ for (j = 0; j < nparts - 1; j++)
+ if (CONST_INT_P (operands[6 + j])
+ && operands[6 + j] != const0_rtx
+ && REG_P (operands[2 + j]))
+ for (i = j; i < nparts - 1; i++)
+ if (CONST_INT_P (operands[7 + i])
+ && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
+ operands[7 + i] = operands[2 + j];
+ }
+
+ for (i = 0; i < nparts; i++)
+ emit_move_insn (operands[2 + i], operands[6 + i]);
return;
}
@@ -18674,6 +18661,9 @@ static const struct builtin_description
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
+
/* SSE2 MMX */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
@@ -18799,10 +18789,6 @@ static const struct builtin_description
/* PCLMUL */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
-
- /* 64bit */
- { OPTION_MASK_ISA_64BIT, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
- { OPTION_MASK_ISA_64BIT, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
};
/* SSE5 */
@@ -19600,47 +19586,6 @@ ix86_init_mmx_sse_builtins (void)
tree ftype;
- /* The __float80 type. */
- if (TYPE_MODE (long_double_type_node) == XFmode)
- (*lang_hooks.types.register_builtin_type) (long_double_type_node,
- "__float80");
- else
- {
- /* The __float80 type. */
- tree float80_type_node = make_node (REAL_TYPE);
-
- TYPE_PRECISION (float80_type_node) = 80;
- layout_type (float80_type_node);
- (*lang_hooks.types.register_builtin_type) (float80_type_node,
- "__float80");
- }
-
- if (TARGET_64BIT)
- {
- tree float128_type_node = make_node (REAL_TYPE);
-
- TYPE_PRECISION (float128_type_node) = 128;
- layout_type (float128_type_node);
- (*lang_hooks.types.register_builtin_type) (float128_type_node,
- "__float128");
-
- /* TFmode support builtins. */
- ftype = build_function_type (float128_type_node,
- void_list_node);
- def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
-
- ftype = build_function_type_list (float128_type_node,
- float128_type_node,
- NULL_TREE);
- def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
-
- ftype = build_function_type_list (float128_type_node,
- float128_type_node,
- float128_type_node,
- NULL_TREE);
- def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
- }
-
/* Add all special builtins with variable number of operands. */
for (i = 0, d = bdesc_special_args;
i < ARRAY_SIZE (bdesc_special_args);
@@ -20246,6 +20191,52 @@ ix86_init_mmx_sse_builtins (void)
static void
ix86_init_builtins (void)
{
+ tree float128_type_node = make_node (REAL_TYPE);
+ tree ftype, decl;
+
+ /* The __float80 type. */
+ if (TYPE_MODE (long_double_type_node) == XFmode)
+ (*lang_hooks.types.register_builtin_type) (long_double_type_node,
+ "__float80");
+ else
+ {
+ /* The __float80 type. */
+ tree float80_type_node = make_node (REAL_TYPE);
+
+ TYPE_PRECISION (float80_type_node) = 80;
+ layout_type (float80_type_node);
+ (*lang_hooks.types.register_builtin_type) (float80_type_node,
+ "__float80");
+ }
+
+ /* The __float128 type. */
+ TYPE_PRECISION (float128_type_node) = 128;
+ layout_type (float128_type_node);
+ (*lang_hooks.types.register_builtin_type) (float128_type_node,
+ "__float128");
+
+ /* TFmode support builtins. */
+ ftype = build_function_type (float128_type_node, void_list_node);
+ decl = add_builtin_function ("__builtin_infq", ftype,
+ IX86_BUILTIN_INFQ, BUILT_IN_MD,
+ NULL, NULL_TREE);
+ ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
+
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ {
+ /* Those builtins need TImode to compile. */
+ ftype = build_function_type_list (float128_type_node,
+ float128_type_node,
+ NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
+
+ ftype = build_function_type_list (float128_type_node,
+ float128_type_node,
+ float128_type_node,
+ NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
+ }
+
if (TARGET_MMX)
ix86_init_mmx_sse_builtins ();
}
@@ -24702,7 +24693,7 @@ ix86_scalar_mode_supported_p (enum machi
if (DECIMAL_FLOAT_MODE_P (mode))
return true;
else if (mode == TFmode)
- return TARGET_64BIT;
+ return true;
else
return default_scalar_mode_supported_p (mode);
}
@@ -24726,7 +24717,7 @@ ix86_vector_mode_supported_p (enum machi
static enum machine_mode
ix86_c_mode_for_suffix (char suffix)
{
- if (TARGET_64BIT && suffix == 'q')
+ if (suffix == 'q')
return TFmode;
if (TARGET_MMX && suffix == 'w')
return XFmode;
--- gcc/config/i386/i386.md.float128 2008-06-27 18:43:05.000000000 -0700
+++ gcc/config/i386/i386.md 2008-06-28 08:21:28.000000000 -0700
@@ -3261,7 +3261,7 @@
(define_expand "movtf"
[(set (match_operand:TF 0 "nonimmediate_operand" "")
(match_operand:TF 1 "nonimmediate_operand" ""))]
- "TARGET_64BIT"
+ "TARGET_SSE2"
{
ix86_expand_move (TFmode, operands);
DONE;
@@ -3270,7 +3270,7 @@
(define_insn "*movtf_internal"
[(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
(match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
- "TARGET_64BIT
+ "TARGET_SSE2
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (which_alternative)
@@ -10375,7 +10375,7 @@
(define_expand "<code>tf2"
[(set (match_operand:TF 0 "register_operand" "")
(absneg:TF (match_operand:TF 1 "register_operand" "")))]
- "TARGET_64BIT"
+ "TARGET_SSE2"
"ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
(define_insn "*absnegtf2_sse"
@@ -10384,7 +10384,7 @@
[(match_operand:TF 1 "register_operand" "0,x")]))
(use (match_operand:TF 2 "nonimmediate_operand" "xm,0"))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT"
+ "TARGET_SSE2"
"#")
;; Splitters for fp abs and neg.
@@ -10563,7 +10563,7 @@
(match_operand:CSGNMODE 1 "nonmemory_operand" "")
(match_operand:CSGNMODE 2 "register_operand" "")]
"(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || (TARGET_64BIT && (<MODE>mode == TFmode))"
+ || (TARGET_SSE2 && (<MODE>mode == TFmode))"
{
ix86_expand_copysign (operands);
DONE;
@@ -10577,7 +10577,7 @@
(match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")]
UNSPEC_COPYSIGN))]
"(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || (TARGET_64BIT && (<MODE>mode == TFmode))"
+ || (TARGET_SSE2 && (<MODE>mode == TFmode))"
"#"
"&& reload_completed"
[(const_int 0)]
@@ -10596,7 +10596,7 @@
UNSPEC_COPYSIGN))
(clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))]
"(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || (TARGET_64BIT && (<MODE>mode == TFmode))"
+ || (TARGET_SSE2 && (<MODE>mode == TFmode))"
"#")
(define_split
@@ -10609,7 +10609,7 @@
UNSPEC_COPYSIGN))
(clobber (match_scratch:<CSGNVMODE> 1 ""))]
"((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || (TARGET_64BIT && (<MODE>mode == TFmode)))
+ || (TARGET_SSE2 && (<MODE>mode == TFmode)))
&& reload_completed"
[(const_int 0)]
{
--- gcc/config/i386/sse.md.float128 2008-05-21 22:30:20.000000000 -0700
+++ gcc/config/i386/sse.md 2008-06-28 08:21:28.000000000 -0700
@@ -3895,7 +3895,7 @@
(and:TF
(not:TF (match_operand:TF 1 "register_operand" "0"))
(match_operand:TF 2 "nonimmediate_operand" "xm")))]
- "TARGET_64BIT"
+ "TARGET_SSE2"
"pandn\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
@@ -3936,7 +3936,7 @@
(plogic:TF
(match_operand:TF 1 "nonimmediate_operand" "")
(match_operand:TF 2 "nonimmediate_operand" "")))]
- "TARGET_64BIT"
+ "TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
(define_insn "*<code>tf3"
@@ -3944,7 +3944,7 @@
(plogic:TF
(match_operand:TF 1 "nonimmediate_operand" "%0")
(match_operand:TF 2 "nonimmediate_operand" "xm")))]
- "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
+ "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
"p<plogicprefix>\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")