This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [committed] OpenMP declare simd ABI changes on x86_64/i686


On Wed, Apr 06, 2016 at 02:54:31PM +0200, Jakub Jelinek wrote:
> Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.
> I'll try to coordinate with Intel about 3) as well as the default alignment
> if aligned clause is used on declare simd without any explicit alignment.

I've missed in the ABI spec that unlike SSE2/AVX/AVX2 entrypoints, the
AVX512F in the masked case (i.e. _ZGVeM*) need to pass the mask in
integer registers rather than in vectors of characteristic type, either in
unsigned int or unsigned long long (the latter only for QImode
characteristic type), and sometimes in more than one of these (the rule is
that there should be as many mask parameters as there are vectors of the
characteristic type).

This doesn't generate perfect code right now, e.g. even for simple
#pragma omp declare simd
int bar (int a, int b)
{
  return a + b;
}
we for _ZGVeM16* generate:
        leaq    8(%rsp), %r10
        andq    $-64, %rsp
        vpbroadcastd    %edi, %zmm2
        vpaddd  %zmm1, %zmm0, %zmm0
        vpxord  %zmm1, %zmm1, %zmm1
        pushq   -8(%r10)
        pushq   %rbp
        movq    %rsp, %rbp
        pushq   %r10
        subq    $112, %rsp
        vpsrlvd .LC0(%rip), %zmm2, %zmm2
        vpandd  .LC1(%rip), %zmm2, %zmm2
        vpcmpd  $4, %zmm1, %zmm2, %k1
        kmovw   %k1, %eax
        testw   %ax, %ax
        je      .L65
        vmovdqa32       %zmm0, -112(%rbp){%k1}
        vmovdqa64       -112(%rbp), %zmm0
        addq    $112, %rsp
        popq    %r10
        popq    %rbp
        leaq    -8(%r10), %rsp
where it really should do:
	kmovw	%edi, %k1
	vpaddd	%zmm1, %zmm0, %zmm0{z}{%k1}
or so, but perhaps we should in the vectorizer recognize
  vect_cst__50 = {mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D)};
  vect__8.627_51 = vect_cst__50 >> { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
  vect__9.628_53 = vect__8.627_51 & { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
  mask__36.631_57 = vect__9.628_53 != { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for VECTOR_BOOLEAN_P mask__36.631 as
  mask__36.631_57 = VIEW_CONVERT_EXPR <mask.321_7(D)>;
and eventually handle even more complex cases.  That said, we still don't use the
masked clones in the vectorizer (I thought it went in, but apparently it
didn't, will need to look for the discussions), so it is not top priority
right now, just what's important is to get the ABI right.

Thus, I've committed following fix after bootstrapping/regtesting on
x86_64-linux and i686-linux:

2016-04-07  Jakub Jelinek  <jakub@redhat.com>

	* cgraph.h (struct cgraph_simd_clone): Add mask_mode field.
	* omp-low.c (simd_clone_init_simd_arrays, simd_clone_adjust): Handle
	node->simdclone->mask_mode != VOIDmode masks.
	(simd_clone_adjust_argument_types): Likewise.  Move sc var definition
	earlier, use it instead of node->simdclone.
	* config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen):
	Set clonei->mask_mode.

	* c-c++-common/attr-simd.c: Add scan-assembler* directives for AVX512F
	clones.
	* c-c++-common/attr-simd-2.c: Likewise.
	* c-c++-common/attr-simd-4.c: Likewise.
	* gcc.dg/gomp/simd-clones-2.c: Likewise.
	* gcc.dg/gomp/simd-clones-3.c: Likewise.

--- gcc/cgraph.h.jj	2016-04-04 12:28:41.000000000 +0200
+++ gcc/cgraph.h	2016-04-07 10:56:36.534410726 +0200
@@ -766,6 +766,11 @@ struct GTY(()) cgraph_simd_clone {
   /* Max hardware vector size in bits for floating point vectors.  */
   unsigned int vecsize_float;
 
+  /* Machine mode of the mask argument(s), if they are to be passed
+     as bitmasks in integer argument(s).  VOIDmode if masks are passed
+     as vectors of characteristic type.  */
+  machine_mode mask_mode;
+
   /* The mangling character for a given vector size.  This is used
      to determine the ISA mangling bit as specified in the Intel
      Vector ABI.  */
--- gcc/omp-low.c.jj	2016-04-06 14:40:57.000000000 +0200
+++ gcc/omp-low.c	2016-04-07 21:32:47.633630411 +0200
@@ -18916,7 +18916,9 @@ simd_clone_adjust_argument_types (struct
   adjustments.create (args.length ());
   unsigned i, j, veclen;
   struct ipa_parm_adjustment adj;
-  for (i = 0; i < node->simdclone->nargs; ++i)
+  struct cgraph_simd_clone *sc = node->simdclone;
+
+  for (i = 0; i < sc->nargs; ++i)
     {
       memset (&adj, 0, sizeof (adj));
       tree parm = args[i];
@@ -18924,10 +18926,10 @@ simd_clone_adjust_argument_types (struct
       adj.base_index = i;
       adj.base = parm;
 
-      node->simdclone->args[i].orig_arg = node->definition ? parm : NULL_TREE;
-      node->simdclone->args[i].orig_type = parm_type;
+      sc->args[i].orig_arg = node->definition ? parm : NULL_TREE;
+      sc->args[i].orig_type = parm_type;
 
-      switch (node->simdclone->args[i].arg_type)
+      switch (sc->args[i].arg_type)
 	{
 	default:
 	  /* No adjustment necessary for scalar arguments.  */
@@ -18936,29 +18938,29 @@ simd_clone_adjust_argument_types (struct
 	case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
 	case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
 	  if (node->definition)
-	    node->simdclone->args[i].simd_array
+	    sc->args[i].simd_array
 	      = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
 				       TREE_TYPE (parm_type),
-				       node->simdclone->simdlen);
+				       sc->simdlen);
 	  adj.op = IPA_PARM_OP_COPY;
 	  break;
 	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
 	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
 	case SIMD_CLONE_ARG_TYPE_VECTOR:
 	  if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type))
-	    veclen = node->simdclone->vecsize_int;
+	    veclen = sc->vecsize_int;
 	  else
-	    veclen = node->simdclone->vecsize_float;
+	    veclen = sc->vecsize_float;
 	  veclen /= GET_MODE_BITSIZE (TYPE_MODE (parm_type));
-	  if (veclen > node->simdclone->simdlen)
-	    veclen = node->simdclone->simdlen;
+	  if (veclen > sc->simdlen)
+	    veclen = sc->simdlen;
 	  adj.arg_prefix = "simd";
 	  if (POINTER_TYPE_P (parm_type))
 	    adj.type = build_vector_type (pointer_sized_int_node, veclen);
 	  else
 	    adj.type = build_vector_type (parm_type, veclen);
-	  node->simdclone->args[i].vector_type = adj.type;
-	  for (j = veclen; j < node->simdclone->simdlen; j += veclen)
+	  sc->args[i].vector_type = adj.type;
+	  for (j = veclen; j < sc->simdlen; j += veclen)
 	    {
 	      adjustments.safe_push (adj);
 	      if (j == veclen)
@@ -18967,23 +18969,21 @@ simd_clone_adjust_argument_types (struct
 		  adj.op = IPA_PARM_OP_NEW;
 		  adj.arg_prefix = "simd";
 		  adj.base_index = i;
-		  adj.type = node->simdclone->args[i].vector_type;
+		  adj.type = sc->args[i].vector_type;
 		}
 	    }
 
 	  if (node->definition)
-	    node->simdclone->args[i].simd_array
+	    sc->args[i].simd_array
 	      = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
-				       parm_type, node->simdclone->simdlen);
+				       parm_type, sc->simdlen);
 	}
       adjustments.safe_push (adj);
     }
 
-  if (node->simdclone->inbranch)
+  if (sc->inbranch)
     {
-      tree base_type
-	= simd_clone_compute_base_data_type (node->simdclone->origin,
-					     node->simdclone);
+      tree base_type = simd_clone_compute_base_data_type (sc->origin, sc);
 
       memset (&adj, 0, sizeof (adj));
       adj.op = IPA_PARM_OP_NEW;
@@ -18991,31 +18991,41 @@ simd_clone_adjust_argument_types (struct
 
       adj.base_index = i;
       if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type))
-	veclen = node->simdclone->vecsize_int;
+	veclen = sc->vecsize_int;
       else
-	veclen = node->simdclone->vecsize_float;
+	veclen = sc->vecsize_float;
       veclen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
-      if (veclen > node->simdclone->simdlen)
-	veclen = node->simdclone->simdlen;
-      if (POINTER_TYPE_P (base_type))
+      if (veclen > sc->simdlen)
+	veclen = sc->simdlen;
+      if (sc->mask_mode != VOIDmode)
+	adj.type
+	  = lang_hooks.types.type_for_mode (sc->mask_mode, 1);
+      else if (POINTER_TYPE_P (base_type))
 	adj.type = build_vector_type (pointer_sized_int_node, veclen);
       else
 	adj.type = build_vector_type (base_type, veclen);
       adjustments.safe_push (adj);
 
-      for (j = veclen; j < node->simdclone->simdlen; j += veclen)
+      for (j = veclen; j < sc->simdlen; j += veclen)
 	adjustments.safe_push (adj);
 
       /* We have previously allocated one extra entry for the mask.  Use
 	 it and fill it.  */
-      struct cgraph_simd_clone *sc = node->simdclone;
       sc->nargs++;
+      if (sc->mask_mode != VOIDmode)
+	base_type = boolean_type_node;
       if (node->definition)
 	{
 	  sc->args[i].orig_arg
 	    = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type);
-	  sc->args[i].simd_array
-	    = create_tmp_simd_array ("mask", base_type, sc->simdlen);
+	  if (sc->mask_mode == VOIDmode)
+	    sc->args[i].simd_array
+	      = create_tmp_simd_array ("mask", base_type, sc->simdlen);
+	  else if (veclen < sc->simdlen)
+	    sc->args[i].simd_array
+	      = create_tmp_simd_array ("mask", adj.type, sc->simdlen / veclen);
+	  else
+	    sc->args[i].simd_array = NULL_TREE;
 	}
       sc->args[i].orig_type = base_type;
       sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
@@ -19083,6 +19093,27 @@ simd_clone_init_simd_arrays (struct cgra
       node->simdclone->args[i].vector_arg = arg;
 
       tree array = node->simdclone->args[i].simd_array;
+      if (node->simdclone->mask_mode != VOIDmode
+	  && node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
+	{
+	  if (array == NULL_TREE)
+	    continue;
+	  unsigned int l
+	    = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (array))));
+	  for (k = 0; k <= l; k++)
+	    {
+	      if (k)
+		{
+		  arg = DECL_CHAIN (arg);
+		  j++;
+		}
+	      tree t = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (array)),
+			       array, size_int (k), NULL, NULL);
+	      t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
+	      gimplify_and_add (t, &seq);
+	    }
+	  continue;
+	}
       if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)) == node->simdclone->simdlen)
 	{
 	  tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
@@ -19453,7 +19484,7 @@ simd_clone_adjust (struct cgraph_node *n
   e->probability = REG_BR_PROB_BASE;
   gsi = gsi_last_bb (incr_bb);
   gimple *g = gimple_build_assign (iter2, PLUS_EXPR, iter1,
-				  build_int_cst (unsigned_type_node, 1));
+				   build_int_cst (unsigned_type_node, 1));
   gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
 
   /* Mostly annotate the loop for the vectorizer (the rest is done below).  */
@@ -19469,21 +19500,68 @@ simd_clone_adjust (struct cgraph_node *n
       gimple_stmt_iterator gsi = gsi_last_bb (loop->header);
       tree mask_array
 	= node->simdclone->args[node->simdclone->nargs - 1].simd_array;
-      tree mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
-      tree aref = build4 (ARRAY_REF,
-			  TREE_TYPE (TREE_TYPE (mask_array)),
-			  mask_array, iter1,
-			  NULL, NULL);
-      g = gimple_build_assign (mask, aref);
-      gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
-      int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref)));
-      if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
-	{
-	  aref = build1 (VIEW_CONVERT_EXPR,
-			 build_nonstandard_integer_type (bitsize, 0), mask);
-	  mask = make_ssa_name (TREE_TYPE (aref));
+      tree mask;
+      if (node->simdclone->mask_mode != VOIDmode)
+	{
+	  tree shift_cnt;
+	  if (mask_array == NULL_TREE)
+	    {
+	      tree arg = node->simdclone->args[node->simdclone->nargs
+					       - 1].vector_arg;
+	      mask = get_or_create_ssa_default_def (cfun, arg);
+	      shift_cnt = iter1;
+	    }
+	  else
+	    {
+	      tree maskt = TREE_TYPE (mask_array);
+	      int c = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt)));
+	      c = node->simdclone->simdlen / (c + 1);
+	      int s = exact_log2 (c);
+	      gcc_assert (s > 0);
+	      c--;
+	      tree idx = make_ssa_name (TREE_TYPE (iter1));
+	      g = gimple_build_assign (idx, RSHIFT_EXPR, iter1,
+				       build_int_cst (NULL_TREE, s));
+	      gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+	      mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
+	      tree aref = build4 (ARRAY_REF,
+				  TREE_TYPE (TREE_TYPE (mask_array)),
+				  mask_array, idx, NULL, NULL);
+	      g = gimple_build_assign (mask, aref);
+	      gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+	      shift_cnt = make_ssa_name (TREE_TYPE (iter1));
+	      g = gimple_build_assign (shift_cnt, BIT_AND_EXPR, iter1,
+				       build_int_cst (TREE_TYPE (iter1), c));
+	      gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+	    }
+	  g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
+				   RSHIFT_EXPR, mask, shift_cnt);
+	  gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+	  mask = gimple_assign_lhs (g);
+	  g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
+				   BIT_AND_EXPR, mask,
+				   build_int_cst (TREE_TYPE (mask), 1));
+	  gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+	  mask = gimple_assign_lhs (g);
+	}
+      else
+	{
+	  mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
+	  tree aref = build4 (ARRAY_REF,
+			      TREE_TYPE (TREE_TYPE (mask_array)),
+			      mask_array, iter1, NULL, NULL);
 	  g = gimple_build_assign (mask, aref);
 	  gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+	  int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref)));
+	  if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
+	    {
+	      aref = build1 (VIEW_CONVERT_EXPR,
+			     build_nonstandard_integer_type (bitsize, 0),
+							     mask);
+	      mask = make_ssa_name (TREE_TYPE (aref));
+	      g = gimple_build_assign (mask, aref);
+	      gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+	    }
 	}
 
       g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)),
--- gcc/config/i386/i386.c.jj	2016-04-06 14:46:29.000000000 +0200
+++ gcc/config/i386/i386.c	2016-04-07 11:30:26.988067880 +0200
@@ -53747,7 +53747,7 @@ ix86_memmodel_check (unsigned HOST_WIDE_
   return val;
 }
 
-/* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
+/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
    CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
    CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
    or number of vecsize_mangle variants that should be emitted.  */
@@ -53834,6 +53834,7 @@ ix86_simd_clone_compute_vecsize_and_simd
       clonei->vecsize_mangle = "bcde"[num];
       ret = 4;
     }
+  clonei->mask_mode = VOIDmode;
   switch (clonei->vecsize_mangle)
     {
     case 'b':
@@ -53851,6 +53852,10 @@ ix86_simd_clone_compute_vecsize_and_simd
     case 'e':
       clonei->vecsize_int = 512;
       clonei->vecsize_float = 512;
+      if (TYPE_MODE (base_type) == QImode)
+	clonei->mask_mode = DImode;
+      else
+	clonei->mask_mode = SImode;
       break;
     }
   if (clonei->simdlen == 0)
--- gcc/testsuite/c-c++-common/attr-simd.c.jj	2015-11-20 08:17:52.000000000 +0100
+++ gcc/testsuite/c-c++-common/attr-simd.c	2016-04-07 21:34:35.796149182 +0200
@@ -18,6 +18,8 @@ int simd_attr (void)
 /* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 
 extern
 #ifdef __cplusplus
@@ -36,3 +38,5 @@ int simd_attr2 (void)
 /* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
--- gcc/testsuite/c-c++-common/attr-simd-2.c.jj	2015-11-18 11:19:20.000000000 +0100
+++ gcc/testsuite/c-c++-common/attr-simd-2.c	2016-04-07 21:34:54.529892634 +0200
@@ -19,3 +19,5 @@ int simd_attr (void)
 /* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
--- gcc/testsuite/c-c++-common/attr-simd-4.c.jj	2015-12-07 12:17:55.000000000 +0100
+++ gcc/testsuite/c-c++-common/attr-simd-4.c	2016-04-07 21:36:30.975570536 +0200
@@ -15,9 +15,11 @@ int simd_attr (void)
 /* { dg-final { scan-assembler-times "_ZGVbN4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "_ZGVcN4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-not "_ZGVbM4_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-not "_ZGVcM4_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-not "_ZGVdM8_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-not "_ZGVeM16_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */
 
 extern
 #ifdef __cplusplus
@@ -33,6 +35,8 @@ int simd_attr2 (void)
 /* { dg-final { scan-assembler-not "_ZGVbN4_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-not "_ZGVcN4_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-not "_ZGVdN8_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-not "_ZGVeN16_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "_ZGVbM4_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
 /* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
--- gcc/testsuite/gcc.dg/gomp/simd-clones-2.c.jj	2015-05-29 15:03:14.000000000 +0200
+++ gcc/testsuite/gcc.dg/gomp/simd-clones-2.c	2016-04-07 21:37:27.034801725 +0200
@@ -23,3 +23,6 @@ float setArray(float *a, float x, int k)
 /* { dg-final { scan-tree-dump "_ZGVdN8ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
 /* { dg-final { scan-tree-dump "_ZGVdN8vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
 /* { dg-final { scan-tree-dump "_ZGVdM8vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVeN16ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVeN16vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVeM16vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
--- gcc/testsuite/gcc.dg/gomp/simd-clones-3.c.jj	2015-05-29 15:03:14.000000000 +0200
+++ gcc/testsuite/gcc.dg/gomp/simd-clones-3.c	2016-04-07 21:38:03.712298720 +0200
@@ -15,3 +15,5 @@ int addit(int a, int b, int c)
 /* { dg-final { scan-tree-dump "_ZGVcM4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
 /* { dg-final { scan-tree-dump "_ZGVdN8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
 /* { dg-final { scan-tree-dump "_ZGVdM8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVeN16vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVeM16vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]