PATCH: PR target/37157: [4.4 Regression] Wrong insn for _mm_unpackhi_epi64

H.J. Lu hjl.tools@gmail.com
Tue Aug 19 06:03:00 GMT 2008


This patch moves punpckhqdq/punpcklqdq before shufpd so that we
will generate punpckhqdq/punpcklqdq, which is 1 byte shorter, instead
of shufpd.  OK for trunk?

Thanks.


---
gcc/

2008-08-18  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/37157
	* config/i386/sse.md (sse2_punpckhqdq, sse2_punpcklqdq): Moved
	before (sse2_shufpd_<mode>).

gcc/testsuite/

2008-08-18  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/37157
	* gcc.target/i386/sse2-unpack-1.c: New.

--- gcc/config/i386/sse.md.unpack	2008-08-15 19:41:29.000000000 -0700
+++ gcc/config/i386/sse.md	2008-08-18 20:39:32.000000000 -0700
@@ -2610,6 +2610,35 @@
 	  	     (const_int 3)])))]
   "TARGET_SSE2")
 
+;; punpcklqdq and punpckhqdq are shorter than shufpd.
+(define_insn "sse2_punpckhqdq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(vec_select:V2DI
+	  (vec_concat:V4DI
+	    (match_operand:V2DI 1 "register_operand" "0")
+	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_SSE2"
+  "punpckhqdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklqdq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(vec_select:V2DI
+	  (vec_concat:V4DI
+	    (match_operand:V2DI 1 "register_operand" "0")
+	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_SSE2"
+  "punpcklqdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
 (define_insn "sse2_shufpd_<mode>"
   [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
 	(vec_select:SSEMODE2D
@@ -4438,34 +4467,6 @@
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_punpckhqdq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(vec_select:V2DI
-	  (vec_concat:V4DI
-	    (match_operand:V2DI 1 "register_operand" "0")
-	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
-	  (parallel [(const_int 1)
-		     (const_int 3)])))]
-  "TARGET_SSE2"
-  "punpckhqdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_data16" "1")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_punpcklqdq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(vec_select:V2DI
-	  (vec_concat:V4DI
-	    (match_operand:V2DI 1 "register_operand" "0")
-	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
-	  (parallel [(const_int 0)
-		     (const_int 2)])))]
-  "TARGET_SSE2"
-  "punpcklqdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_data16" "1")
-   (set_attr "mode" "TI")])
-
 (define_insn "*sse4_1_pinsrb"
   [(set (match_operand:V16QI 0 "register_operand" "=x")
 	(vec_merge:V16QI
--- gcc/testsuite/gcc.target/i386/sse2-unpack-1.c.unpack	2008-08-18 20:41:19.000000000 -0700
+++ gcc/testsuite/gcc.target/i386/sse2-unpack-1.c	2008-08-18 20:34:38.000000000 -0700
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+#include <emmintrin.h>
+
+__m128i
+foo1 (__m128i s1, __m128i s2)
+{
+  return _mm_unpackhi_epi64 (s1, s2); 
+}
+
+__m128i
+foo2 (__m128i s1, __m128i s2)
+{
+  return _mm_unpacklo_epi64 (s1, s2); 
+}
+
+/* { dg-final { scan-assembler "punpcklqdq" } } */
+/* { dg-final { scan-assembler "punpckhqdq" } } */



More information about the Gcc-patches mailing list