This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Optimize integral lt + blend into just blend (PR target/54700)


On Thu, Nov 29, 2018 at 05:41:59PM +0100, Uros Bizjak wrote:
> On Thu, Nov 29, 2018 at 5:28 PM Jakub Jelinek <jakub@redhat.com> wrote:
> >
> > On Thu, Nov 29, 2018 at 05:21:53PM +0100, Uros Bizjak wrote:
> > > > >         * g++.target/i386/avx2-check.h: New file.
> > > > >         * g++.target/i386/m128-check.h: New file.
> > > > >         * g++.target/i386/m256-check.h: New file.
> > > > >         * g++.target/i386/avx-os-support.h: New file.
> > > >
> > > > OK.
> > >
> > > On a second thought, should we rather use (pre-reload?)
> > > define_insn_and_split to split the combination to the blend insn?
> >
> > I've already committed it.  But can work on a patch that does that tomorrow.
> 
> Thanks. You will probably need to split it after reload, since a
> change from intvec->FPvec is needed.

Like this?  Bootstrapped/regtested on x86_64-linux and i686-linux.

2018-11-29  Jakub Jelinek  <jakub@redhat.com>

	PR target/54700
	* config/i386/sse.md
	(*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt,
	*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint,
	*<sse4_1_avx2>_pblendvb_lt): Change define_insn into
	define_insn_and_split.

--- gcc/config/i386/sse.md.jj	2018-11-29 15:32:27.597301378 +0100
+++ gcc/config/i386/sse.md	2018-11-29 18:52:42.747904630 +0100
@@ -15682,7 +15682,7 @@ (define_insn "sse4_1_blendv<ssemodesuffi
 	       ]
 	       (const_string "<ssevecmode>")))])
 
-(define_insn "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
+(define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
   [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
 	(unspec:VF_128_256
 	  [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
@@ -15693,10 +15693,12 @@ (define_insn "*<sse4_1>_blendv<ssemodesu
 	       (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C")) 0)]
 	  UNSPEC_BLENDV))]
   "TARGET_SSE4_1"
-  "@
-   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:VF_128_256
+	 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
+  "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssemov")
    (set_attr "length_immediate" "1")
@@ -15712,7 +15714,7 @@ (define_mode_attr ssefltmodesuffix
 (define_mode_attr ssefltvecmode
   [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
 
-(define_insn "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
+(define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
   [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
 	(unspec:<ssebytemode>
 	  [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
@@ -15723,10 +15725,17 @@ (define_insn "*<sse4_1>_blendv<ssefltmod
 	       (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
 	  UNSPEC_BLENDV))]
   "TARGET_SSE4_1"
-  "@
-   blendv<ssefltmodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   blendv<ssefltmodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vblendv<ssefltmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:<ssefltvecmode>
+	 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
+{
+  operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
+  operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
+  operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
+  operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
+}
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssemov")
    (set_attr "length_immediate" "1")
@@ -15834,7 +15843,7 @@ (define_insn "<sse4_1_avx2>_pblendvb"
    (set_attr "btver2_decode" "vector,vector,vector")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "*<sse4_1_avx2>_pblendvb_lt"
+(define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
   [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
 	(unspec:VI1_AVX2
 	  [(match_operand:VI1_AVX2 1 "register_operand"  "0,0,x")
@@ -15843,10 +15852,12 @@ (define_insn "*<sse4_1_avx2>_pblendvb_lt
 			(match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
 	  UNSPEC_BLENDV))]
   "TARGET_SSE4_1"
-  "@
-   pblendvb\t{%3, %2, %0|%0, %2, %3}
-   pblendvb\t{%3, %2, %0|%0, %2, %3}
-   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "#"
+  ""
+  [(set (match_dup 0)
+	(unspec:VI1_AVX2
+	 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
+  ""
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]