This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] PR target/70708: Suboptimal code generated when using _mm_set_sd (X64)
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Uros Bizjak <ubizjak at gmail dot com>
- Date: Mon, 18 Apr 2016 09:07:50 -0700
- Subject: [PATCH] PR target/70708: Suboptimal code generated when using _mm_set_sd (X64)
- Authentication-results: sourceware.org; auth=none
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
"movq" should used to load double into xmm register with zero_extend:
(set (reg:V2DF 90)
(vec_concat:V2DF (reg/v:DF 88 [ d ])
(const_double:DF 0.0 [0x0.0p+0])))
Unlike "movsd", which only works with load from memory, "movq" works
with both memory and xmm register.
OK for trunk if there is no regression?
H.J.
--
gcc/
PR target/70708
* config/i386/sse.md (sse2_loadlpd): Accept load from "xm" and
replace %vmovsd with "%vmovq".
(vec_concatv2df): Likewise.
gcc/testsuite/
PR target/70708
* gcc.target/i386/pr70708.c: New test.
---
gcc/config/i386/sse.md | 12 ++++++------
gcc/testsuite/gcc.target/i386/pr70708.c | 14 ++++++++++++++
2 files changed, 20 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr70708.c
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1ffb3b9..845ef56 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8863,14 +8863,14 @@
"=x,x,x,x,x,x,x,x,m,m ,m")
(vec_concat:V2DF
(match_operand:DF 2 "nonimmediate_operand"
- " m,m,m,x,x,0,0,x,x,*f,r")
+ "xm,m,m,x,x,0,0,x,x,*f,r")
(vec_select:DF
(match_operand:V2DF 1 "vector_move_operand"
" C,0,x,0,x,x,o,o,0,0 ,0")
(parallel [(const_int 1)]))))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
- %vmovsd\t{%2, %0|%0, %2}
+ %vmovq\t{%2, %0|%0, %2}
movlpd\t{%2, %0|%0, %2}
vmovlpd\t{%2, %1, %0|%0, %1, %2}
movsd\t{%2, %0|%0, %2}
@@ -8955,10 +8955,10 @@
(set_attr "mode" "V2DF,DF,DF")])
(define_insn "vec_concatv2df"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x,v,x,x")
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
(vec_concat:V2DF
- (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0")
- (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m,C,x,m")))]
+ (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,xm,0,0")
+ (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m, C,x,m")))]
"TARGET_SSE
&& (!(MEM_P (operands[1]) && MEM_P (operands[2]))
|| (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
@@ -8970,7 +8970,7 @@
vmovddup\t{%1, %0|%0, %1}
movhpd\t{%2, %0|%0, %2}
vmovhpd\t{%2, %1, %0|%0, %1, %2}
- %vmovsd\t{%1, %0|%0, %1}
+ %vmovq\t{%1, %0|%0, %1}
movlhps\t{%2, %0|%0, %2}
movhps\t{%2, %0|%0, %2}"
[(set_attr "isa" "sse2_noavx,avx,avx512vl,sse3,avx512vl,sse2_noavx,avx,sse2,noavx,noavx")
diff --git a/gcc/testsuite/gcc.target/i386/pr70708.c b/gcc/testsuite/gcc.target/i386/pr70708.c
new file mode 100644
index 0000000..2219e61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70708.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+__m128d
+foo (double d)
+{
+ return __extension__ (__m128d){ d, 0.0 };
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-not "movsd\[ \\t\]+\[^\n\]*%xmm" } } */
+/* { dg-final { scan-assembler-not "\\(%\[er\]sp\\)" { target { ! ia32 } }} } */
--
2.5.5