This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, i386]: Committed; Use "z" register constraint in SSE4.1 variable blend instructions
- From: Uros Bizjak <ubizjak at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Sat, 02 Jun 2007 14:49:03 +0200
- Subject: [PATCH, i386]: Committed; Use "z" register constraint in SSE4.1 variable blend instructions
Hello!
SSE4.1 variable blend instructions can now benefit from "z" register
constraint, too. Attached patch doesn't force third argument into xmm0,
but let reload do its job. Additionally, a safe_vector_operand() guard
was added for VECTOR_MODE_P operands.
Also, there is no need to check inputs with "register_operand" if insn
predicate is already "register_operand".
Patch was bootstrapped on x86_64-pc-linux-gnu and regression tested for
all default languages. Patch is committed to SVN.
2007-06-02 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md ("sse4_1_blendvpd"): Require "z" class XMM
register for operand[3]. Adjust asm template.
("sse4_1_blendvpd"): Ditto.
("sse4_1_pblendvb"): Ditto.
* config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Call
safe_vector_operand() if input operand is VECTOR_MODE_P operand.
Do not
force operands[3] into xmm0 register for variable blend
instructions.
(ix86_expand_sse_pcmpestr): Do not check operands for
"register_operand", when insn operand predicate is
"register_operand".
(ix86_expand_sse_pcmpistr): Ditto.
Uros.
Index: sse.md
===================================================================
--- sse.md (revision 125279)
+++ sse.md (working copy)
@@ -5844,10 +5844,10 @@
[(set (match_operand:V2DF 0 "register_operand" "=x")
(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
(match_operand:V2DF 2 "nonimmediate_operand" "xm")
- (reg:V2DF 21)]
+ (match_operand:V2DF 3 "register_operand" "z")]
UNSPEC_BLENDV))]
"TARGET_SSE4_1"
- "blendvpd\t{%%xmm0, %2, %0|%0, %2, %%xmm0}"
+ "blendvpd\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2DF")])
@@ -5856,10 +5856,10 @@
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm")
- (reg:V4SF 21)]
+ (match_operand:V4SF 3 "register_operand" "z")]
UNSPEC_BLENDV))]
"TARGET_SSE4_1"
- "blendvps\t{%%xmm0, %2, %0|%0, %2, %%xmm0}"
+ "blendvps\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V4SF")])
@@ -5927,10 +5927,10 @@
[(set (match_operand:V16QI 0 "register_operand" "=x")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")
- (reg:V16QI 21)]
+ (match_operand:V16QI 3 "register_operand" "z")]
UNSPEC_BLENDV))]
"TARGET_SSE4_1"
- "pblendvb\t{%%xmm0, %2, %0|%0, %2, %%xmm0}"
+ "pblendvb\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
Index: i386.c
===================================================================
--- i386.c (revision 125279)
+++ i386.c (working copy)
@@ -16929,8 +16929,7 @@ static const struct builtin_description
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, 0, 0 },
};
-/* SSE builtins with 3 arguments and the last argument must be a 8 bit
- constant or xmm0. */
+/* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
static const struct builtin_description bdesc_sse_3arg[] =
{
/* SSE4.1 */
@@ -18279,51 +18278,48 @@ ix86_expand_sse_4_operands_builtin (enum
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
- enum machine_mode mode2;
- rtx xmm0;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
+ enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+ enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+ enum machine_mode mode3 = insn_data[icode].operand[3].mode;
- switch (icode)
- {
- case CODE_FOR_sse4_1_blendvpd:
- case CODE_FOR_sse4_1_blendvps:
- case CODE_FOR_sse4_1_pblendvb:
- /* The third argument of variable blends must be xmm0. */
- xmm0 = gen_rtx_REG (tmode, FIRST_SSE_REG);
- emit_move_insn (xmm0, op2);
- op2 = xmm0;
- break;
- default:
- mode2 = insn_data[icode].operand[2].mode;
- if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
- {
- switch (icode)
- {
- case CODE_FOR_sse4_1_roundsd:
- case CODE_FOR_sse4_1_roundss:
- error ("the third argument must be a 4-bit immediate");
- break;
- default:
- error ("the third argument must be a 8-bit immediate");
- break;
- }
- return const0_rtx;
- }
- break;
- }
+ if (VECTOR_MODE_P (mode1))
+ op0 = safe_vector_operand (op0, mode1);
+ if (VECTOR_MODE_P (mode2))
+ op1 = safe_vector_operand (op1, mode2);
+ if (VECTOR_MODE_P (mode3))
+ op2 = safe_vector_operand (op2, mode3);
if (optimize
|| target == 0
|| GET_MODE (target) != tmode
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+ if ((optimize && !register_operand (op1, mode2))
+ || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
+ op1 = copy_to_mode_reg (mode2, op1);
+
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ switch (icode)
+ {
+ case CODE_FOR_sse4_1_blendvpd:
+ case CODE_FOR_sse4_1_blendvps:
+ case CODE_FOR_sse4_1_pblendvb:
+ op2 = copy_to_mode_reg (mode3, op2);
+ break;
+
+ case CODE_FOR_sse4_1_roundsd:
+ case CODE_FOR_sse4_1_roundss:
+ error ("the third argument must be a 4-bit immediate");
+ return const0_rtx;
+
+ default:
+ error ("the third argument must be an 8-bit immediate");
+ return const0_rtx;
+ }
+
pat = GEN_FCN (icode) (target, op0, op1, op2);
if (! pat)
return 0;
@@ -18732,17 +18728,14 @@ ix86_expand_sse_pcmpestr (const struct b
if (VECTOR_MODE_P (modev4))
op2 = safe_vector_operand (op2, modev4);
- if ((optimize && !register_operand (op0, modev2))
- || !(*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
op0 = copy_to_mode_reg (modev2, op0);
- if ((optimize && !register_operand (op1, modei3))
- || !(*insn_data[d->icode].operand[3].predicate) (op1, modei3))
+ if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
op1 = copy_to_mode_reg (modei3, op1);
if ((optimize && !register_operand (op2, modev4))
|| !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
op2 = copy_to_mode_reg (modev4, op2);
- if ((optimize && !register_operand (op3, modei5))
- || !(*insn_data[d->icode].operand[5].predicate) (op3, modei5))
+ if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
op3 = copy_to_mode_reg (modei5, op3);
if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
@@ -18833,8 +18826,7 @@ ix86_expand_sse_pcmpistr (const struct b
if (VECTOR_MODE_P (modev3))
op1 = safe_vector_operand (op1, modev3);
- if ((optimize && !register_operand (op0, modev2))
- || !(*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
op0 = copy_to_mode_reg (modev2, op0);
if ((optimize && !register_operand (op1, modev3))
|| !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))