[PATCH 03/43] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
H.J. Lu
hjl.tools@gmail.com
Sat Feb 9 13:24:00 GMT 2019
Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX. For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register. Only SSE register source
operand is allowed.
PR target/89021
* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
prototype.
* config/i386/i386.c (ix86_split_mmx_punpck): New function.
* config/i386/mmx.m (mmx_punpckhbw): Changed to
define_insn_and_split to support SSE emulation.
(mmx_punpcklbw): Likewise.
(mmx_punpckhwd): Likewise.
(mmx_punpcklwd): Likewise.
(mmx_punpckhdq): Likewise.
(mmx_punpckldq): Likewise.
---
gcc/config/i386/i386-protos.h | 1 +
gcc/config/i386/i386.c | 77 +++++++++++++++++++
gcc/config/i386/mmx.md | 138 ++++++++++++++++++++++------------
3 files changed, 168 insertions(+), 48 deletions(-)
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index bb96a420a85..dc7fc38d8e4 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -202,6 +202,7 @@ extern rtx ix86_split_stack_guard (void);
extern void ix86_move_vector_high_sse_to_mmx (rtx);
extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cab35bb2242..6e67ac346dd 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20009,6 +20009,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
ix86_move_vector_high_sse_to_mmx (op0);
}
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX. */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ machine_mode mode = GET_MODE (op0);
+ rtx mask;
+ /* The corresponding SSE mode. */
+ machine_mode sse_mode, double_sse_mode;
+
+ switch (mode)
+ {
+ case E_V8QImode:
+ sse_mode = V16QImode;
+ double_sse_mode = V32QImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (16,
+ GEN_INT (0), GEN_INT (16),
+ GEN_INT (1), GEN_INT (17),
+ GEN_INT (2), GEN_INT (18),
+ GEN_INT (3), GEN_INT (19),
+ GEN_INT (4), GEN_INT (20),
+ GEN_INT (5), GEN_INT (21),
+ GEN_INT (6), GEN_INT (22),
+ GEN_INT (7), GEN_INT (23)));
+ break;
+
+ case E_V4HImode:
+ sse_mode = V8HImode;
+ double_sse_mode = V16HImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (8,
+ GEN_INT (0), GEN_INT (8),
+ GEN_INT (1), GEN_INT (9),
+ GEN_INT (2), GEN_INT (10),
+ GEN_INT (3), GEN_INT (11)));
+ break;
+
+ case E_V2SImode:
+ sse_mode = V4SImode;
+ double_sse_mode = V8SImode;
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4,
+ GEN_INT (0), GEN_INT (4),
+ GEN_INT (1), GEN_INT (5)));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Generate SSE punpcklXX. */
+ rtx dest = gen_rtx_REG (sse_mode, REGNO (op0));
+ op1 = gen_rtx_REG (sse_mode, REGNO (op1));
+ op2 = gen_rtx_REG (sse_mode, REGNO (op2));
+
+ op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+ op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+ rtx insn = gen_rtx_SET (dest, op2);
+ emit_insn (insn);
+
+ if (high_p)
+ {
+ /* Move bits 64:127 to bits 0:63. */
+ mask = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (0)));
+ dest = gen_rtx_REG (V4SImode, REGNO (dest));
+ op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+ insn = gen_rtx_SET (dest, op1);
+ emit_insn (insn);
+ }
+}
+
/* Helper function of ix86_fixup_binary_operands to canonicalize
operand order. Returns true if the operands should be swapped. */
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 5c28d935e82..1d5ed83e7b2 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1089,87 +1089,129 @@
(set_attr "type" "mmxshft,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yy")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
- "TARGET_MMX"
- "punpckhbw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhbw\t{%2, %0|%0, %2}
+ #
+ #"
+ "&& reload_completed && TARGET_MMX_WITH_SSE"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true);"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpcklbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yy")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
(const_int 3) (const_int 11)])))]
- "TARGET_MMX"
- "punpcklbw\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpcklbw\t{%2, %0|%0, %k2}
+ #
+ #"
+ "&& reload_completed && TARGET_MMX_WITH_SSE"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false);"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yy")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy"))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
- "TARGET_MMX"
- "punpckhwd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhwd\t{%2, %0|%0, %2}
+ #
+ #"
+ "&& reload_completed && TARGET_MMX_WITH_SSE"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true);"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpcklwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yy")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy"))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
- "TARGET_MMX"
- "punpcklwd\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpcklwd\t{%2, %0|%0, %k2}
+ #
+ #"
+ "&& reload_completed && TARGET_MMX_WITH_SSE"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false);"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckhdq"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhdq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
(vec_select:V2SI
(vec_concat:V4SI
- (match_operand:V2SI 1 "register_operand" "0")
- (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yy")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym,Yx,Yy"))
(parallel [(const_int 1)
(const_int 3)])))]
- "TARGET_MMX"
- "punpckhdq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckhdq\t{%2, %0|%0, %2}
+ #
+ #"
+ "&& reload_completed && TARGET_MMX_WITH_SSE"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, true);"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
-(define_insn "mmx_punpckldq"
- [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckldq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
(vec_select:V2SI
(vec_concat:V4SI
- (match_operand:V2SI 1 "register_operand" "0")
- (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yy")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym,Yx,Yy"))
(parallel [(const_int 0)
(const_int 2)])))]
- "TARGET_MMX"
- "punpckldq\t{%2, %0|%0, %k2}"
- [(set_attr "type" "mmxcvt")
- (set_attr "mode" "DI")])
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "@
+ punpckldq\t{%2, %0|%0, %k2}
+ #
+ #"
+ "&& reload_completed && TARGET_MMX_WITH_SSE"
+ [(const_int 0)]
+ "ix86_split_mmx_punpck (operands, false);"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxcvt,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_pinsrw"
[(set (match_operand:V4HI 0 "register_operand")
--
2.20.1
More information about the Gcc-patches
mailing list