PATCH: PR target/37157: [4.4 Regression] Wrong insn for _mm_unpackhi_epi64
H.J. Lu
hjl.tools@gmail.com
Tue Aug 19 06:03:00 GMT 2008
This patch moves punpckhqdq/punpcklqdq before shufpd so that we
will generate punpckhqdq/punpcklqdq, which is 1 byte shorter, instead
of shufpd. OK for trunk?
Thanks.
---
gcc/
2008-08-18 H.J. Lu <hongjiu.lu@intel.com>
PR target/37157
* config/i386/sse.md (sse2_punpckhqdq, sse2_punpcklqdq): Moved
before (sse2_shufpd_<mode>).
gcc/testsuite/
2008-08-18 H.J. Lu <hongjiu.lu@intel.com>
PR target/37157
* gcc.target/i386/sse2-unpack-1.c: New.
--- gcc/config/i386/sse.md.unpack 2008-08-15 19:41:29.000000000 -0700
+++ gcc/config/i386/sse.md 2008-08-18 20:39:32.000000000 -0700
@@ -2610,6 +2610,35 @@
(const_int 3)])))]
"TARGET_SSE2")
+;; punpcklqdq and punpckhqdq are shorter than shufpd.
+(define_insn "sse2_punpckhqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2"
+ "punpckhqdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2"
+ "punpcklqdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_shufpd_<mode>"
[(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
(vec_select:SSEMODE2D
@@ -4438,34 +4467,6 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
-(define_insn "sse2_punpckhqdq"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (vec_select:V2DI
- (vec_concat:V4DI
- (match_operand:V2DI 1 "register_operand" "0")
- (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
- (parallel [(const_int 1)
- (const_int 3)])))]
- "TARGET_SSE2"
- "punpckhqdq\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1")
- (set_attr "mode" "TI")])
-
-(define_insn "sse2_punpcklqdq"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (vec_select:V2DI
- (vec_concat:V4DI
- (match_operand:V2DI 1 "register_operand" "0")
- (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 2)])))]
- "TARGET_SSE2"
- "punpcklqdq\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1")
- (set_attr "mode" "TI")])
-
(define_insn "*sse4_1_pinsrb"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_merge:V16QI
--- gcc/testsuite/gcc.target/i386/sse2-unpack-1.c.unpack 2008-08-18 20:41:19.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/sse2-unpack-1.c 2008-08-18 20:34:38.000000000 -0700
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+#include <emmintrin.h>
+
+__m128i
+foo1 (__m128i s1, __m128i s2)
+{
+ return _mm_unpackhi_epi64 (s1, s2);
+}
+
+__m128i
+foo2 (__m128i s1, __m128i s2)
+{
+ return _mm_unpacklo_epi64 (s1, s2);
+}
+
+/* { dg-final { scan-assembler "punpcklqdq" } } */
+/* { dg-final { scan-assembler "punpckhqdq" } } */
More information about the Gcc-patches
mailing list