PATCH: PR target/37157: [4.4 Regression] Wrong insn for _mm_unpackhi_epi64

Richard Guenther richard.guenther@gmail.com
Tue Aug 19 09:59:00 GMT 2008


On Tue, Aug 19, 2008 at 5:48 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> This patch moves punpckhqdq/punpcklqdq before shufpd so that we
> will generate punpckhqdq/punpcklqdq, which is 1 byte shorter, instead
> of shufpd.  OK for trunk?

Ok.

Thanks,
Richard.

> Thanks.
>
>
> ---
> gcc/
>
> 2008-08-18  H.J. Lu  <hongjiu.lu@intel.com>
>
>        PR target/37157
>        * config/i386/sse.md (sse2_punpckhqdq, sse2_punpcklqdq): Moved
>        before (sse2_shufpd_<mode>).
>
> gcc/testsuite/
>
> 2008-08-18  H.J. Lu  <hongjiu.lu@intel.com>
>
>        PR target/37157
>        * gcc.target/i386/sse2-unpack-1.c: New.
>
> --- gcc/config/i386/sse.md.unpack       2008-08-15 19:41:29.000000000 -0700
> +++ gcc/config/i386/sse.md      2008-08-18 20:39:32.000000000 -0700
> @@ -2610,6 +2610,35 @@
>                     (const_int 3)])))]
>   "TARGET_SSE2")
>
> +;; punpcklqdq and punpckhqdq are shorter than shufpd.
> +(define_insn "sse2_punpckhqdq"
> +  [(set (match_operand:V2DI 0 "register_operand" "=x")
> +       (vec_select:V2DI
> +         (vec_concat:V4DI
> +           (match_operand:V2DI 1 "register_operand" "0")
> +           (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
> +         (parallel [(const_int 1)
> +                    (const_int 3)])))]
> +  "TARGET_SSE2"
> +  "punpckhqdq\t{%2, %0|%0, %2}"
> +  [(set_attr "type" "sselog")
> +   (set_attr "prefix_data16" "1")
> +   (set_attr "mode" "TI")])
> +
> +(define_insn "sse2_punpcklqdq"
> +  [(set (match_operand:V2DI 0 "register_operand" "=x")
> +       (vec_select:V2DI
> +         (vec_concat:V4DI
> +           (match_operand:V2DI 1 "register_operand" "0")
> +           (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
> +         (parallel [(const_int 0)
> +                    (const_int 2)])))]
> +  "TARGET_SSE2"
> +  "punpcklqdq\t{%2, %0|%0, %2}"
> +  [(set_attr "type" "sselog")
> +   (set_attr "prefix_data16" "1")
> +   (set_attr "mode" "TI")])
> +
>  (define_insn "sse2_shufpd_<mode>"
>   [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
>        (vec_select:SSEMODE2D
> @@ -4438,34 +4467,6 @@
>    (set_attr "prefix_data16" "1")
>    (set_attr "mode" "TI")])
>
> -(define_insn "sse2_punpckhqdq"
> -  [(set (match_operand:V2DI 0 "register_operand" "=x")
> -       (vec_select:V2DI
> -         (vec_concat:V4DI
> -           (match_operand:V2DI 1 "register_operand" "0")
> -           (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
> -         (parallel [(const_int 1)
> -                    (const_int 3)])))]
> -  "TARGET_SSE2"
> -  "punpckhqdq\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "sselog")
> -   (set_attr "prefix_data16" "1")
> -   (set_attr "mode" "TI")])
> -
> -(define_insn "sse2_punpcklqdq"
> -  [(set (match_operand:V2DI 0 "register_operand" "=x")
> -       (vec_select:V2DI
> -         (vec_concat:V4DI
> -           (match_operand:V2DI 1 "register_operand" "0")
> -           (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
> -         (parallel [(const_int 0)
> -                    (const_int 2)])))]
> -  "TARGET_SSE2"
> -  "punpcklqdq\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "sselog")
> -   (set_attr "prefix_data16" "1")
> -   (set_attr "mode" "TI")])
> -
>  (define_insn "*sse4_1_pinsrb"
>   [(set (match_operand:V16QI 0 "register_operand" "=x")
>        (vec_merge:V16QI
> --- gcc/testsuite/gcc.target/i386/sse2-unpack-1.c.unpack        2008-08-18 20:41:19.000000000 -0700
> +++ gcc/testsuite/gcc.target/i386/sse2-unpack-1.c       2008-08-18 20:34:38.000000000 -0700
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2" } */
> +
> +#include <emmintrin.h>
> +
> +__m128i
> +foo1 (__m128i s1, __m128i s2)
> +{
> +  return _mm_unpackhi_epi64 (s1, s2);
> +}
> +
> +__m128i
> +foo2 (__m128i s1, __m128i s2)
> +{
> +  return _mm_unpacklo_epi64 (s1, s2);
> +}
> +
> +/* { dg-final { scan-assembler "punpcklqdq" } } */
> +/* { dg-final { scan-assembler "punpckhqdq" } } */
>



More information about the Gcc-patches mailing list