This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, i386]: Fix (part of) PR target/36222


Hello!

Attached patch implements small improvement for a generated code from the PR. Currently, following testcase:

--cut here--
typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
typedef int __v4si __attribute__ ((__vector_size__ (16)));


__m128i _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
{
 return (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
}
--cut here--

compiles to (-O2 -msse2):

       movq    %rcx, -8(%rsp)
       movq    -8(%rsp), %xmm1
       movq    %rdx, -8(%rsp)
       movq    -8(%rsp), %xmm0
       movq    %rsi, -8(%rsp)
       punpckldq       %xmm0, %xmm1
       movq    -8(%rsp), %xmm0
       movq    %rdi, -8(%rsp)
       movq    -8(%rsp), %xmm2
       punpckldq       %xmm2, %xmm0
       movq    %xmm1, %xmm2
       punpcklqdq      %xmm0, %xmm2
       movdqa  %xmm2, %xmm0
       ret

Patched gcc generates a couple of move insns less:

       movq    %rsi, -8(%rsp)
       movq    -8(%rsp), %xmm1
       movq    %rdi, -8(%rsp)
       movq    -8(%rsp), %xmm0
       movq    %rcx, -8(%rsp)
       punpckldq       %xmm0, %xmm1
       movq    -8(%rsp), %xmm0
       movq    %rdx, -8(%rsp)
       movq    -8(%rsp), %xmm2
       punpckldq       %xmm2, %xmm0
       punpcklqdq      %xmm1, %xmm0
       ret

or with -march=core2:

       movd    %edi, %xmm0
       movd    %esi, %xmm1
       movd    %edx, %xmm2
       punpckldq       %xmm0, %xmm1
       movd    %ecx, %xmm0
       punpckldq       %xmm2, %xmm0
       punpcklqdq      %xmm1, %xmm0
       ret

BTW: IMO, there is no reason to use movq for !TARGET_INTER_UNIT_MOVES targets. movl/movd would also satisfy SImode moves.

2008-05-13 Uros Bizjak <ubizjak@gmail.com>

PR target/36222
* config/i386/i386.c (ix86_expand_vector_init_general): Rearrange op0
and op1 expansion before vector concat to have less live pseudos.


testsuite/ChangeLog

2008-05-13 Uros Bizjak <ubizjak@gmail.com>

       PR target/36222
       * gcc.target/i386/pr36222-1.c: New test.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}. Patch is committed to mainline.

Uros.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 135265)
+++ config/i386/i386.c	(working copy)
@@ -23859,14 +23859,14 @@ ix86_expand_vector_init_general (bool mm
 	/* For V4SF and V4SI, we implement a concat of two V2 vectors.
 	   Recurse to load the two halves.  */
 
-	op0 = gen_reg_rtx (half_mode);
-	v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
-	ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
-
 	op1 = gen_reg_rtx (half_mode);
 	v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
 	ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
 
+	op0 = gen_reg_rtx (half_mode);
+	v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
+	ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
+
 	use_vec_concat = true;
       }
       break;
@@ -23883,10 +23883,10 @@ ix86_expand_vector_init_general (bool mm
 
   if (use_vec_concat)
     {
-      if (!register_operand (op0, half_mode))
-	op0 = force_reg (half_mode, op0);
       if (!register_operand (op1, half_mode))
 	op1 = force_reg (half_mode, op1);
+      if (!register_operand (op0, half_mode))
+	op0 = force_reg (half_mode, op0);
 
       emit_insn (gen_rtx_SET (VOIDmode, target,
 			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
Index: testsuite/gcc.target/i386/pr36222-1.c
===================================================================
--- testsuite/gcc.target/i386/pr36222-1.c	(revision 0)
+++ testsuite/gcc.target/i386/pr36222-1.c	(revision 0)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+
+__m128i _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
+{
+  return (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
+}
+
+/* { dg-final { scan-assembler-not "movdqa" } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]