This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, i386]: Fix (part of) PR target/36222
- From: Uros Bizjak <ubizjak at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 13 May 2008 23:33:27 +0200
- Subject: [PATCH, i386]: Fix (part of) PR target/36222
Hello!
Attached patch implements small improvement for a generated code from
the PR. Currently, following testcase:
--cut here--
typedef long long __m128i __attribute__ ((__vector_size__ (16),
__may_alias__));
typedef int __v4si __attribute__ ((__vector_size__ (16)));
__m128i _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
{
return (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
}
--cut here--
compiles to (-O2 -msse2):
movq %rcx, -8(%rsp)
movq -8(%rsp), %xmm1
movq %rdx, -8(%rsp)
movq -8(%rsp), %xmm0
movq %rsi, -8(%rsp)
punpckldq %xmm0, %xmm1
movq -8(%rsp), %xmm0
movq %rdi, -8(%rsp)
movq -8(%rsp), %xmm2
punpckldq %xmm2, %xmm0
movq %xmm1, %xmm2
punpcklqdq %xmm0, %xmm2
movdqa %xmm2, %xmm0
ret
Patched gcc generates a couple of move insns less:
movq %rsi, -8(%rsp)
movq -8(%rsp), %xmm1
movq %rdi, -8(%rsp)
movq -8(%rsp), %xmm0
movq %rcx, -8(%rsp)
punpckldq %xmm0, %xmm1
movq -8(%rsp), %xmm0
movq %rdx, -8(%rsp)
movq -8(%rsp), %xmm2
punpckldq %xmm2, %xmm0
punpcklqdq %xmm1, %xmm0
ret
or with -march=core2:
movd %edi, %xmm0
movd %esi, %xmm1
movd %edx, %xmm2
punpckldq %xmm0, %xmm1
movd %ecx, %xmm0
punpckldq %xmm2, %xmm0
punpcklqdq %xmm1, %xmm0
ret
BTW: IMO, there is no reason to use movq for !TARGET_INTER_UNIT_MOVES
targets. movl/movd would also satisfy SImode moves.
2008-05-13 Uros Bizjak <ubizjak@gmail.com>
PR target/36222
* config/i386/i386.c (ix86_expand_vector_init_general):
Rearrange op0
and op1 expansion before vector concat to have less live pseudos.
testsuite/ChangeLog
2008-05-13 Uros Bizjak <ubizjak@gmail.com>
PR target/36222
* gcc.target/i386/pr36222-1.c: New test.
Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}. Patch is committed to mainline.
Uros.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 135265)
+++ config/i386/i386.c (working copy)
@@ -23859,14 +23859,14 @@ ix86_expand_vector_init_general (bool mm
/* For V4SF and V4SI, we implement a concat of two V2 vectors.
Recurse to load the two halves. */
- op0 = gen_reg_rtx (half_mode);
- v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
- ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
-
op1 = gen_reg_rtx (half_mode);
v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
+ op0 = gen_reg_rtx (half_mode);
+ v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
+ ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
+
use_vec_concat = true;
}
break;
@@ -23883,10 +23883,10 @@ ix86_expand_vector_init_general (bool mm
if (use_vec_concat)
{
- if (!register_operand (op0, half_mode))
- op0 = force_reg (half_mode, op0);
if (!register_operand (op1, half_mode))
op1 = force_reg (half_mode, op1);
+ if (!register_operand (op0, half_mode))
+ op0 = force_reg (half_mode, op0);
emit_insn (gen_rtx_SET (VOIDmode, target,
gen_rtx_VEC_CONCAT (mode, op0, op1)));
Index: testsuite/gcc.target/i386/pr36222-1.c
===================================================================
--- testsuite/gcc.target/i386/pr36222-1.c (revision 0)
+++ testsuite/gcc.target/i386/pr36222-1.c (revision 0)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+
+__m128i _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
+{
+ return (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
+}
+
+/* { dg-final { scan-assembler-not "movdqa" } } */