This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [i386] recognize haddpd
- From: Marc Glisse <marc dot glisse at inria dot fr>
- To: Uros Bizjak <ubizjak at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Mon, 8 Oct 2012 16:40:34 +0200 (CEST)
- Subject: Re: [i386] recognize haddpd
- References: <alpine.DEB.2.02.1209021730070.12824@stedding.saclay.inria.fr> <alpine.DEB.2.02.1209111327570.3705@laptop-mg.saclay.inria.fr> <alpine.DEB.2.02.1209261712590.3773@laptop-mg.saclay.inria.fr> <CAFULd4Z7axhjgfx=4YQZNK=P1mnLerjkQ6TuL6unKjgM4RnnQg@mail.gmail.com>
On Fri, 28 Sep 2012, Uros Bizjak wrote:
2) {v[0]-v[1], v[0]-v[1]} is not recognized as a hsubpd because
vec_duplicate doesn't match vec_concat. Do we really need to duplicate (no
pun intended) the pattern?
You can add this transformation to simplify-rtx.c. Probably vec_concat
with two equal operands can be canonicalized as vec_duplicate.
Actually, it is replacing vec_duplicate with vec_concat that would help.
Well, I'll see about that later.
Here is what I came up with, trying to follow your other advice (thanks a
lot!).
Passes bootstrap+testsuite.
2012-10-08 Marc Glisse <marc.glisse@inria.fr>
gcc/
PR target/54400
* config/i386/i386.md (type attribute): Add sseadd1.
(unit attribute): Add support for sseadd1.
* config/i386/sse.md (sse3_h<plusminus_insn>v2df3): split into...
(sse3_haddv2df3): ... expander.
(*sse3_haddv2df3): ... define_insn. Accept permuted operands.
(sse3_hsubv2df3): ... define_insn.
(*sse3_haddv2df3_low): New define_insn.
(*sse3_hsubv2df3_low): New define_insn.
gcc/testsuite/
PR target/54400
* gcc.target/i386/pr54400.c: New testcase.
--
Marc Glisse
Index: gcc/testsuite/gcc.target/i386/pr54400.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr54400.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/pr54400.c (revision 0)
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
+
+#include <x86intrin.h>
+
+double f (__m128d p)
+{
+ return p[0] - p[1];
+}
+
+double g1 (__m128d p)
+{
+ return p[0] + p[1];
+}
+
+double g2 (__m128d p)
+{
+ return p[1] + p[0];
+}
+
+__m128d h (__m128d p, __m128d q)
+{
+ __m128d r = { p[0] - p[1], q[0] - q[1] };
+ return r;
+}
+
+__m128d i1 (__m128d p, __m128d q)
+{
+ __m128d r = { p[0] + p[1], q[0] + q[1] };
+ return r;
+}
+
+__m128d i2 (__m128d p, __m128d q)
+{
+ __m128d r = { p[0] + p[1], q[1] + q[0] };
+ return r;
+}
+
+__m128d i3 (__m128d p, __m128d q)
+{
+ __m128d r = { p[1] + p[0], q[0] + q[1] };
+ return r;
+}
+
+__m128d i4 (__m128d p, __m128d q)
+{
+ __m128d r = { p[1] + p[0], q[1] + q[0] };
+ return r;
+}
+
+/* { dg-final { scan-assembler-times "hsubpd" 2 } } */
+/* { dg-final { scan-assembler-times "haddpd" 6 } } */
+/* { dg-final { scan-assembler-not "unpck" } } */
Property changes on: gcc/testsuite/gcc.target/i386/pr54400.c
___________________________________________________________________
Added: svn:keywords
+ Author Date Id Revision URL
Added: svn:eol-style
+ native
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md (revision 192206)
+++ gcc/config/i386/i386.md (working copy)
@@ -320,36 +320,36 @@
;; provided in other attributes.
(define_attr "type"
"other,multi,
alu,alu1,negnot,imov,imovx,lea,
incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,imul,imulx,idiv,
icmp,test,ibr,setcc,icmov,
push,pop,call,callv,leave,
str,bitmanip,
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
- ssemuladd,sse4arg,lwp,
+ sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
+ ssediv,sseins,ssemuladd,sse4arg,lwp,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
;; Main data type used by the insn
(define_attr "mode"
"unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF"
(const_string "unknown"))
;; The CPU unit operations uses.
(define_attr "unit" "integer,i387,sse,mmx,unknown"
(cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
(const_string "i387")
(eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
+ sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,
ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
(const_string "sse")
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_string "mmx")
(eq_attr "type" "other")
(const_string "unknown")]
(const_string "integer")))
;; The (bounding maximum) length of an instruction immediate.
(define_attr "length_immediate" ""
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md (revision 192206)
+++ gcc/config/i386/sse.md (working copy)
@@ -1209,42 +1209,120 @@
(vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
(plusminus:DF
(vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
(vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
"TARGET_AVX"
"vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
-(define_insn "sse3_h<plusminus_insn>v2df3"
+(define_expand "sse3_haddv2df3"
+ [(set (match_operand:V2DF 0 "register_operand")
+ (vec_concat:V2DF
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSE3")
+
+(define_insn "*sse3_haddv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_concat:V2DF
- (plusminus:DF
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0,x")
+ (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
+ (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
+ (vec_select:DF
+ (match_dup 2)
+ (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
+ "TARGET_SSE3 && INTVAL (operands[3]) != INTVAL (operands[4])
+ && INTVAL (operands[5]) != INTVAL (operands[6])"
+ "@
+ haddpd\t{%2, %0|%0, %2}
+ vhaddpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseadd")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_hsubv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_concat:V2DF
+ (minus:DF
(vec_select:DF
(match_operand:V2DF 1 "register_operand" "0,x")
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
- (plusminus:DF
+ (minus:DF
(vec_select:DF
(match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_SSE3"
"@
- h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
- vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
+ hsubpd\t{%2, %0|%0, %2}
+ vhsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V2DF")])
+(define_insn "*sse3_haddv2df3_low"
+ [(set (match_operand:DF 0 "register_operand" "=x,x")
+ (plus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0,x")
+ (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
+ "TARGET_SSE3 && INTVAL (operands[2]) != INTVAL (operands[3])"
+ "@
+ haddpd\t{%0, %0|%0, %0}
+ vhaddpd\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseadd1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*sse3_hsubv2df3_low"
+ [(set (match_operand:DF 0 "register_operand" "=x,x")
+ (minus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0,x")
+ (parallel [(const_int 0)]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(const_int 1)]))))]
+ "TARGET_SSE3"
+ "@
+ hsubpd\t{%0, %0|%0, %0}
+ vhsubpd\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseadd1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V2DF")])
+
(define_insn "avx_h<plusminus_insn>v8sf3"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_concat:V8SF
(vec_concat:V4SF
(vec_concat:V2SF
(plusminus:SF
(vec_select:SF
(match_operand:V8SF 1 "register_operand" "x")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))