[PATCH i386 AVX512] [75/n] Update vec_init.

Kirill Yukhin kirill.yukhin@gmail.com
Fri Oct 17 12:30:00 GMT 2014


Hello Jakub,
On 15 Oct 18:23, Jakub Jelinek wrote:
> On Thu, Oct 09, 2014 at 04:13:25PM +0400, Kirill Yukhin wrote:
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -39821,6 +39823,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
> >        goto widen;
> >  
> >      case V8HImode:
> > +      if (TARGET_AVX512VL)
> > +        return ix86_vector_duplicate_value (mode, target, val);
> > +
> 
> Shouldn't that be TARGET_AVX512VL && TARGET_AVX512BW ?
Nice catch! Fixed.
 
> >        if (TARGET_SSE2)
> >  	{
> >  	  struct expand_vec_perm_d dperm;
> > @@ -39851,6 +39856,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
> >        goto widen;
> >  
> >      case V16QImode:
> > +      if (TARGET_AVX512VL)
> > +        return ix86_vector_duplicate_value (mode, target, val);
> > +
> 
> Ditto.
Ditto.

> >        if (TARGET_SSE2)
> >  	goto permute;
> >        goto widen;
> > @@ -39880,16 +39888,19 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
> >  
> >      case V16HImode:
> >      case V32QImode:
> > -      {
> > -	enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
> > -	rtx x = gen_reg_rtx (hvmode);
> > +      if (TARGET_AVX512VL)
> > +        return ix86_vector_duplicate_value (mode, target, val);
> 
> Ditto.
Ditto.
 
> > @@ -40503,6 +40515,42 @@ half:
> >  			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
> >        return;
> >  
> > +    case V64QImode:
> > +      quarter_mode = V16QImode;
> > +      half_mode = V32QImode;
> > +      goto quarter;
> > +
> > +    case V32HImode:
> > +      quarter_mode = V8HImode;
> > +      half_mode = V16HImode;
> > +      goto quarter;
 
> I wonder whether for these modes it can ever be beneficial to build them
> through interleaves/concatenations etc., if it wouldn't be better to build
> them by storing all values into memory and just reading it back.
I've tried this example:
#include <immintrin.h>

unsigned char a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14,
  a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29,
  a30, a31, a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44,
  a45, a46, a47, a48, a49, a50, a51, a52, a53, a54, a55, a56, a57, a58, a59,
  a60, a61, a62, a63;

__m512i foo ()
{
  return __extension__ (__m512i)(__v64qi){
    a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14,
      a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29,
      a30, a31, a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44,
      a45, a46, a47, a48, a49, a50, a51, a52, a53, a54, a55, a56, a57, a58, a59,
      a60, a61, a62, a63 };
}

w/ and w/o -mavx512bw (and always -mavx512f).

When, this code works, we've got 127 lines of assembly to do this init.
W/o AVX-512BW we've got > 300 lines of code (mostly on GPRs, using sal, and etc.)

Then I've looked into actual assembly w/ -mavx512bw and it turns out that no
AVX-512BW insn were generated, only AVX-512F (and below). Fixed iterator.

 
> > -(define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
> > +(define_mode_iterator VI48F_I12_AVX512BW
> > +  [V16SI V16SF V8DI V8DF
> > +  (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
> 
> What does the I12 stand for?  Wasn't it meant to be VI48F_512_AVX512BW
> or I512?
Actually, I am not awere of any name convention for iterators.
As far as I understand, name [more or less] for vector mode
should reflect:
  - Type family of the unit: float or int
  - Size of the unit: 1, 2, 4 etc. bytes
  - If possible, target predicates to enable certain modes in
    given iterator.

The name is:
  - Vector (V)
  - I48F - contains both ints and floats of size 4 and 8
  - I12 - contains ints of size 1 and 2
  - AVX512BW - affected by the target (according to previous note - to be removed)

Maybe it'll be better to name it: VF48_I1248?

--
Thanks, K

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index baf0d3d..c3202c4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -39760,6 +39760,8 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
     case V8SFmode:
     case V8SImode:
     case V2DFmode:
+    case V64QImode:
+    case V32HImode:
     case V2DImode:
     case V4SFmode:
     case V4SImode:
@@ -39790,6 +39792,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
       goto widen;
 
     case V8HImode:
+      if (TARGET_AVX512VL && TARGET_AVX512BW)
+        return ix86_vector_duplicate_value (mode, target, val);
+
       if (TARGET_SSE2)
 	{
 	  struct expand_vec_perm_d dperm;
@@ -39820,6 +39825,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
       goto widen;
 
     case V16QImode:
+      if (TARGET_AVX512VL && TARGET_AVX512BW)
+        return ix86_vector_duplicate_value (mode, target, val);
+
       if (TARGET_SSE2)
 	goto permute;
       goto widen;
@@ -39849,16 +39857,19 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
 
     case V16HImode:
     case V32QImode:
-      {
-	enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
-	rtx x = gen_reg_rtx (hvmode);
+      if (TARGET_AVX512VL && TARGET_AVX512BW)
+        return ix86_vector_duplicate_value (mode, target, val);
+      else
+	{
+	  enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
+	  rtx x = gen_reg_rtx (hvmode);
 
-	ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
-	gcc_assert (ok);
+	  ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
+	  gcc_assert (ok);
 
-	x = gen_rtx_VEC_CONCAT (mode, x, x);
-	emit_insn (gen_rtx_SET (VOIDmode, target, x));
-      }
+	  x = gen_rtx_VEC_CONCAT (mode, x, x);
+	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
+	}
       return true;
 
     default:
@@ -40420,8 +40431,9 @@ static void
 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
 				 rtx target, rtx vals)
 {
-  rtx ops[64], op0, op1;
+  rtx ops[64], op0, op1, op2, op3, op4, op5;
   enum machine_mode half_mode = VOIDmode;
+  enum machine_mode quarter_mode = VOIDmode;
   int n, i;
 
   switch (mode)
@@ -40472,6 +40484,42 @@ half:
 			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
       return;
 
+    case V64QImode:
+      quarter_mode = V16QImode;
+      half_mode = V32QImode;
+      goto quarter;
+
+    case V32HImode:
+      quarter_mode = V8HImode;
+      half_mode = V16HImode;
+      goto quarter;
+
+quarter:
+      n = GET_MODE_NUNITS (mode);
+      for (i = 0; i < n; i++)
+	ops[i] = XVECEXP (vals, 0, i);
+      op0 = gen_reg_rtx (quarter_mode);
+      op1 = gen_reg_rtx (quarter_mode);
+      op2 = gen_reg_rtx (quarter_mode);
+      op3 = gen_reg_rtx (quarter_mode);
+      op4 = gen_reg_rtx (half_mode);
+      op5 = gen_reg_rtx (half_mode);
+      ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
+					  n >> 3);
+      ix86_expand_vector_init_interleave (quarter_mode, op1,
+					  &ops [n >> 2], n >> 3);
+      ix86_expand_vector_init_interleave (quarter_mode, op2,
+					  &ops [n >> 1], n >> 3);
+      ix86_expand_vector_init_interleave (quarter_mode, op3,
+					  &ops [(n >> 1) | (n >> 2)], n >> 3);
+      emit_insn (gen_rtx_SET (VOIDmode, op4,
+			      gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
+      emit_insn (gen_rtx_SET (VOIDmode, op5,
+			      gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_VEC_CONCAT (mode, op4, op5)));
+      return;
+
     case V16QImode:
       if (!TARGET_SSE4_1)
 	break;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index dcb53df..4dfdb69 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -524,7 +524,8 @@
   (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
   (V8DI  "TARGET_AVX512F") (V8DF  "TARGET_AVX512F")
   (V4DI  "TARGET_AVX512VL") (V4DF  "TARGET_AVX512VL")])
-(define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
+(define_mode_iterator VI48F_I12
+  [V16SI V16SF V8DI V8DF V32HI V64QI])
 (define_mode_iterator VI48F
   [V16SI V16SF V8DI V8DF
    (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
@@ -17475,7 +17476,7 @@
 })
 
 (define_expand "vec_init<mode>"
-  [(match_operand:VI48F_512 0 "register_operand")
+  [(match_operand:VI48F_I12 0 "register_operand")
    (match_operand 1)]
   "TARGET_AVX512F"
 {



More information about the Gcc-patches mailing list