[PATCH][RFC] Add x86 subadd SLP pattern

Richard Biener rguenther@suse.de
Thu Jun 17 10:00:47 GMT 2021


On Thu, 17 Jun 2021, Uros Bizjak wrote:

> On Thu, Jun 17, 2021 at 11:44 AM Richard Biener <rguenther@suse.de> wrote:
> >
> > This addds SLP pattern recognition for the SSE3/AVX [v]addsubp{ds} v0, v1
> > instructions which compute { v0[0] - v1[0], v0[1], + v1[1], ... }
> > thus subtract, add alternating on lanes, starting with subtract.
> >
> > It adds a corresponding optab and direct internal function,
> > vec_subadd$a3 and at the moment to make the i386 backend changes
> > "obvious", duplicates the existing avx_addsubv4df3 pattern with
> > the new canonical name (CODE_FOR_* and gen_* are used throughout the
> > intrinsic code, so the actual change to rename all existing patterns
> > will be quite a bit bigger).  I expect some bike-shedding on
> > subadd vs. addsub so I delay that change ;)
> >
> > ARC seems to have both addsub{V2HI,V2SI,V4HI} and
> > subadd{V2HI,V2SI,V4HI}.  ARM iwmmxt has wsubaddx on V4HI but
> > I didn't dare to decipher whether it's -, + or +, -.  bfin
> > has both variants as well plus a saturating variant of both (on v2hi).
> > But none of the major vector ISAs besides x86 seem to have it.
> > Still some targets having both and opting for the naming I choose
> > would make that a better fit IMHO.
> >
> > Uros/Hontao - would mass-renaming of the x86 addsub patterns
> > to subadd be OK?  (not touching the FMA ones which have both)
> 
> We can add define_expand (this is how we approached the problem in the
> past for x86). If there are not too many of them, then this is the
> preferred approach (see below) ...

OK.

> ... until someone comes along and mass-renames everything to a generic name.

Though now that I tried it isn't so bad for the addsub case (see
patch below).  Would that be prefered or would it be an incomplete
renaming and thus the define_expand prefered?

Thanks,
Richard.

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 80c2a2c0294..3222eaedbd0 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -855,8 +855,8 @@ BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv1di3, "__
 BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF)
 
-BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
-BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
+BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_vec_subaddv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
+BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_vec_subaddv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
 BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
 BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
 BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
@@ -996,8 +996,8 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128
 /* AVX */
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_subaddv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_subaddv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 73238c9874a..48818dcf8cb 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2396,8 +2396,6 @@
    (set_attr "prefix" "<round_saeonly_scalar_prefix>")
    (set_attr "mode" "<ssescalarmode>")])
 
-/* ???  Rename avx_addsubv4df3, but its name is used throughout the backed,
-   so avoid this churn for the moment.  */
 (define_insn "vec_subaddv4df3"
   [(set (match_operand:V4DF 0 "register_operand" "=x")
 	(vec_merge:V4DF
@@ -2412,21 +2410,7 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4DF")])
 
-(define_insn "avx_addsubv4df3"
-  [(set (match_operand:V4DF 0 "register_operand" "=x")
-	(vec_merge:V4DF
-	  (minus:V4DF
-	    (match_operand:V4DF 1 "register_operand" "x")
-	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
-	  (plus:V4DF (match_dup 1) (match_dup 2))
-	  (const_int 5)))]
-  "TARGET_AVX"
-  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "V4DF")])
-
-(define_insn "sse3_addsubv2df3"
+(define_insn "vec_subaddv2df3"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x")
 	(vec_merge:V2DF
 	  (minus:V2DF
@@ -2444,7 +2428,7 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "V2DF")])
 
-(define_insn "avx_addsubv8sf3"
+(define_insn "vec_subaddv8sf3"
   [(set (match_operand:V8SF 0 "register_operand" "=x")
 	(vec_merge:V8SF
 	  (minus:V8SF
@@ -2458,7 +2442,7 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "sse3_addsubv4sf3"
+(define_insn "vec_subaddv4sf3"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x")
 	(vec_merge:V4SF
 	  (minus:V4SF


More information about the Gcc-patches mailing list