This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Improve vector equality comparisons with SSE4.1 (PR target/78102)


On October 25, 2016 3:48:41 PM GMT+02:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>SSE4.1 added PCMPEQQ instruction, but only SSE4.2 added PCMPGTQ, and
>we've switched _mm_cmpeq_epi64 in r217608 from using __builtin_ia32_*
>to generic vector comparison, which works fine for SSE4.2, but not for
>SSE4.1.  The following patch adds optabs etc. so that we can support
>vector equality/non-equality integer comparisons even when other
>vector comparisons aren't supported.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

>2016-10-25  Jakub Jelinek  <jakub@redhat.com>
>
>	PR target/78102
>	* optabs.def (vcondeq_optab, vec_cmpeq_optab): New optabs.
>	* optabs.c (expand_vec_cond_expr): For comparison codes
>	EQ_EXPR and NE_EXPR, attempt vcondeq_optab as fallback.
>	(expand_vec_cmp_expr): For comparison codes
>	EQ_EXPR and NE_EXPR, attempt vec_cmpeq_optab as fallback.
>	* optabs-tree.h (expand_vec_cmp_expr_p, expand_vec_cond_expr_p):
>	Add enum tree_code argument.
>	* optabs-query.h (get_vec_cmp_eq_icode, get_vcond_eq_icode): New
>	inline functions.
>	* optabs-tree.c (expand_vec_cmp_expr_p): Add CODE argument.  For
>	CODE EQ_EXPR or NE_EXPR, attempt to use vec_cmpeq_optab as
>	fallback.
>	(expand_vec_cond_expr_p): Add CODE argument.  For CODE EQ_EXPR or
>	NE_EXPR, attempt to use vcondeq_optab as fallback.
>	* tree-vect-generic.c (expand_vector_comparison,
>	expand_vector_divmod, expand_vector_condition): Adjust
>	expand_vec_cmp_expr_p and expand_vec_cond_expr_p callers.
>	* tree-vect-stmts.c (vectorizable_condition,
>	vectorizable_comparison): Likewise.
>	* tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern,
>	check_bool_pattern, search_type_for_mask_1): Likewise.
>	* expr.c (do_store_flag): Likewise.
>	* doc/md.texi (@code{vec_cmpeq@var{m}@var{n}},
>	@code{vcondeq@var{m}@var{n}}): Document.
>	* config/i386/sse.md (vec_cmpeqv2div2di, vcondeq<VI8F_128:mode>v2di):
>	New expanders.
>testsuite/
>	* gcc.target/i386/pr78102.c: New test.
>
>--- gcc/optabs.def.jj	2016-10-14 12:31:49.000000000 +0200
>+++ gcc/optabs.def	2016-10-25 11:30:13.497467507 +0200
>@@ -82,9 +82,11 @@ OPTAB_CD(vec_load_lanes_optab, "vec_load
> OPTAB_CD(vec_store_lanes_optab, "vec_store_lanes$a$b")
> OPTAB_CD(vcond_optab, "vcond$a$b")
> OPTAB_CD(vcondu_optab, "vcondu$a$b")
>+OPTAB_CD(vcondeq_optab, "vcondeq$a$b")
> OPTAB_CD(vcond_mask_optab, "vcond_mask_$a$b")
> OPTAB_CD(vec_cmp_optab, "vec_cmp$a$b")
> OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b")
>+OPTAB_CD(vec_cmpeq_optab, "vec_cmpeq$a$b")
> OPTAB_CD(maskload_optab, "maskload$a$b")
> OPTAB_CD(maskstore_optab, "maskstore$a$b")
> 
>--- gcc/optabs.c.jj	2016-10-17 08:42:34.000000000 +0200
>+++ gcc/optabs.c	2016-10-25 11:49:02.800334415 +0200
>@@ -5636,7 +5636,12 @@ expand_vec_cond_expr (tree vec_cond_type
> 
>   icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
>   if (icode == CODE_FOR_nothing)
>-    return 0;
>+    {
>+      if (tcode == EQ_EXPR || tcode == NE_EXPR)
>+	icode = get_vcond_eq_icode (mode, cmp_op_mode);
>+      if (icode == CODE_FOR_nothing)
>+	return 0;
>+    }
> 
>comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode,
>4);
>   rtx_op1 = expand_normal (op1);
>@@ -5675,7 +5680,12 @@ expand_vec_cmp_expr (tree type, tree exp
> 
>   icode = get_vec_cmp_icode (vmode, mask_mode, unsignedp);
>   if (icode == CODE_FOR_nothing)
>-    return 0;
>+    {
>+      if (tcode == EQ_EXPR || tcode == NE_EXPR)
>+	icode = get_vec_cmp_eq_icode (vmode, mask_mode);
>+      if (icode == CODE_FOR_nothing)
>+	return 0;
>+    }
> 
>comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode,
>2);
>   create_output_operand (&ops[0], target, mask_mode);
>--- gcc/optabs-tree.h.jj	2016-01-14 19:57:53.000000000 +0100
>+++ gcc/optabs-tree.h	2016-10-25 11:34:29.605262354 +0200
>@@ -38,8 +38,8 @@ enum optab_subtype
>optab optab_for_tree_code (enum tree_code, const_tree, enum
>optab_subtype);
>bool supportable_convert_operation (enum tree_code, tree, tree, tree *,
> 				    enum tree_code *);
>-bool expand_vec_cmp_expr_p (tree, tree);
>-bool expand_vec_cond_expr_p (tree, tree);
>+bool expand_vec_cmp_expr_p (tree, tree, enum tree_code);
>+bool expand_vec_cond_expr_p (tree, tree, enum tree_code);
> void init_tree_optimization_optabs (tree);
> 
> #endif
>--- gcc/optabs-query.h.jj	2016-01-04 14:55:51.000000000 +0100
>+++ gcc/optabs-query.h	2016-10-25 11:50:12.448462773 +0200
>@@ -90,6 +90,15 @@ get_vec_cmp_icode (machine_mode vmode, m
>   return convert_optab_handler (tab, vmode, mask_mode);
> }
> 
>+/* Return insn code for a comparison operator with VMODE
>+   resultin MASK_MODE (only for EQ/NE).  */
>+
>+static inline enum insn_code
>+get_vec_cmp_eq_icode (machine_mode vmode, machine_mode mask_mode)
>+{
>+  return convert_optab_handler (vec_cmpeq_optab, vmode, mask_mode);
>+}
>+
> /* Return insn code for a conditional operator with a comparison in
>mode CMODE, unsigned if UNS is true, resulting in a value of mode
>VMODE.  */
> 
>@@ -113,6 +122,15 @@ get_vcond_mask_icode (machine_mode vmode
>   return convert_optab_handler (vcond_mask_optab, vmode, mmode);
> }
> 
>+/* Return insn code for a conditional operator with a comparison in
>+   mode CMODE (only EQ/NE), resulting in a value of mode VMODE.  */
>+
>+static inline enum insn_code
>+get_vcond_eq_icode (machine_mode vmode, machine_mode cmode)
>+{
>+  return convert_optab_handler (vcondeq_optab, vmode, cmode);
>+}
>+
> /* Enumerates the possible extraction_insn operations.  */
> enum extraction_pattern { EP_insv, EP_extv, EP_extzv };
> 
>--- gcc/optabs-tree.c.jj	2016-02-10 16:01:58.000000000 +0100
>+++ gcc/optabs-tree.c	2016-10-25 11:39:10.498747012 +0200
>@@ -305,12 +305,16 @@ supportable_convert_operation (enum tree
>    and resulting mask with MASK_TYPE.  */
> 
> bool
>-expand_vec_cmp_expr_p (tree value_type, tree mask_type)
>+expand_vec_cmp_expr_p (tree value_type, tree mask_type, enum tree_code
>code)
> {
>-  enum insn_code icode = get_vec_cmp_icode (TYPE_MODE (value_type),
>-					    TYPE_MODE (mask_type),
>-					    TYPE_UNSIGNED (value_type));
>-  return (icode != CODE_FOR_nothing);
>+  if (get_vec_cmp_icode (TYPE_MODE (value_type), TYPE_MODE
>(mask_type),
>+			 TYPE_UNSIGNED (value_type)) != CODE_FOR_nothing)
>+    return true;
>+  if ((code == EQ_EXPR || code == NE_EXPR)
>+      && (get_vec_cmp_eq_icode (TYPE_MODE (value_type), TYPE_MODE
>(mask_type))
>+	  != CODE_FOR_nothing))
>+    return true;
>+  return false;
> }
> 
> /* Return TRUE iff, appropriate vector insns are available
>@@ -318,7 +322,7 @@ expand_vec_cmp_expr_p (tree value_type,
>    with operand vector types in CMP_OP_TYPE.  */
> 
> bool
>-expand_vec_cond_expr_p (tree value_type, tree cmp_op_type)
>+expand_vec_cond_expr_p (tree value_type, tree cmp_op_type, enum
>tree_code code)
> {
>   machine_mode value_mode = TYPE_MODE (value_type);
>   machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type);
>@@ -328,10 +332,16 @@ expand_vec_cond_expr_p (tree value_type,
>     return true;
> 
>   if (GET_MODE_SIZE (value_mode) != GET_MODE_SIZE (cmp_op_mode)
>-      || GET_MODE_NUNITS (value_mode) != GET_MODE_NUNITS (cmp_op_mode)
>-      || get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE
>(cmp_op_type),
>-			  TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing)
>+      || GET_MODE_NUNITS (value_mode) != GET_MODE_NUNITS
>(cmp_op_mode))
>     return false;
>+
>+  if (get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE
>(cmp_op_type),
>+		       TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing
>+      && ((code != EQ_EXPR && code != NE_EXPR)
>+	  || get_vcond_eq_icode (TYPE_MODE (value_type),
>+				 TYPE_MODE (cmp_op_type)) == CODE_FOR_nothing))
>+    return false;
>+
>   return true;
> }
> 
>--- gcc/tree-vect-generic.c.jj	2016-09-16 22:19:42.000000000 +0200
>+++ gcc/tree-vect-generic.c	2016-10-25 11:43:56.389169130 +0200
>@@ -356,8 +356,8 @@ expand_vector_comparison (gimple_stmt_it
>                           tree op1, enum tree_code code)
> {
>   tree t;
>-  if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type)
>-      && !expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
>+  if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
>+      && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
>     t = expand_vector_piecewise (gsi, do_compare, type,
> 				 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
>   else
>@@ -630,7 +630,7 @@ expand_vector_divmod (gimple_stmt_iterat
> 		}
> 	    }
> 	  if (addend == NULL_TREE
>-	      && expand_vec_cond_expr_p (type, type))
>+	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
> 	    {
> 	      tree zero, cst, cond, mask_type;
> 	      gimple *stmt;
>@@ -878,7 +878,7 @@ expand_vector_condition (gimple_stmt_ite
>       comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
>     }
> 
>-  if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
>+  if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
>     return;
> 
>   /* TODO: try and find a smaller vector type.  */
>--- gcc/tree-vect-stmts.c.jj	2016-09-29 22:53:15.000000000 +0200
>+++ gcc/tree-vect-stmts.c	2016-10-25 11:45:35.848924398 +0200
>@@ -7710,7 +7710,8 @@ vectorizable_condition (gimple *stmt, gi
>   if (!vec_stmt)
>     {
>       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
>-      return expand_vec_cond_expr_p (vectype, comp_vectype);
>+      return expand_vec_cond_expr_p (vectype, comp_vectype,
>+				     TREE_CODE (cond_expr));
>     }
> 
>   /* Transform.  */
>@@ -8013,7 +8014,7 @@ vectorizable_comparison (gimple *stmt, g
>vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 !=
>NOP_EXPR)),
> 			      dts, NULL, NULL);
>       if (bitop1 == NOP_EXPR)
>-	return expand_vec_cmp_expr_p (vectype, mask_type);
>+	return expand_vec_cmp_expr_p (vectype, mask_type, code);
>       else
> 	{
> 	  machine_mode mode = TYPE_MODE (vectype);
>--- gcc/tree-vect-patterns.c.jj	2016-09-16 22:19:42.000000000 +0200
>+++ gcc/tree-vect-patterns.c	2016-10-25 11:45:07.847274837 +0200
>@@ -3073,7 +3073,7 @@ vect_recog_mixed_size_cond_pattern (vec<
>   if (vectype == NULL_TREE)
>     return NULL;
> 
>-  if (expand_vec_cond_expr_p (vectype, comp_vectype))
>+  if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE
>(cond_expr)))
>     return NULL;
> 
>   if (itype == NULL_TREE)
>@@ -3088,7 +3088,7 @@ vect_recog_mixed_size_cond_pattern (vec<
>   if (vecitype == NULL_TREE)
>     return NULL;
> 
>-  if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
>+  if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE
>(cond_expr)))
>     return NULL;
> 
>   if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
>@@ -3195,7 +3195,7 @@ check_bool_pattern (tree var, vec_info *
> 
> 	  tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
> 	  if (mask_type
>-	      && expand_vec_cmp_expr_p (comp_vectype, mask_type))
>+	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
> 	    return false;
> 
> 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
>@@ -3209,7 +3209,7 @@ check_bool_pattern (tree var, vec_info *
> 	    }
> 	  else
> 	    vecitype = comp_vectype;
>-	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype))
>+	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
> 	    return false;
> 	}
>       else
>@@ -3537,7 +3537,7 @@ search_type_for_mask_1 (tree var, vec_in
> 
> 	  mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
> 	  if (!mask_type
>-	      || !expand_vec_cmp_expr_p (comp_vectype, mask_type))
>+	      || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
> 	    {
> 	      res = NULL_TREE;
> 	      break;
>--- gcc/expr.c.jj	2016-10-14 21:36:10.000000000 +0200
>+++ gcc/expr.c	2016-10-25 11:40:07.549033035 +0200
>@@ -11286,7 +11286,7 @@ do_store_flag (sepops ops, rtx target, m
>     {
>       tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
>       if (VECTOR_BOOLEAN_TYPE_P (ops->type)
>-	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type))
>+	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code))
> 	return expand_vec_cmp_expr (ops->type, ifexp, target);
>       else
> 	{
>--- gcc/doc/md.texi.jj	2016-09-27 20:11:12.000000000 +0200
>+++ gcc/doc/md.texi	2016-10-25 12:15:13.039668617 +0200
>@@ -4730,6 +4730,14 @@ value of all-zeros.
> @item @samp{vec_cmpu@var{m}@var{n}}
>Similar to @code{vec_cmp@var{m}@var{n}} but perform unsigned vector
>comparison.
> 
>+@cindex @code{vec_cmpeq@var{m}@var{n}} instruction pattern
>+@item @samp{vec_cmpeq@var{m}@var{n}}
>+Similar to @code{vec_cmp@var{m}@var{n}} but perform equality or
>non-equality
>+vector comparison only.  If @code{vec_cmp@var{m}@var{n}}
>+or @code{vec_cmpu@var{m}@var{n}} instruction pattern is supported,
>+it will be preferred over @code{vec_cmpeq@var{m}@var{n}}, so there is
>+no need to define this instruction pattern if the others are
>supported.
>+
> @cindex @code{vcond@var{m}@var{n}} instruction pattern
> @item @samp{vcond@var{m}@var{n}}
> Output a conditional vector move.  Operand 0 is the destination to
>@@ -4746,6 +4754,14 @@ comparison with a truth value of all-one
> Similar to @code{vcond@var{m}@var{n}} but performs unsigned vector
> comparison.
> 
>+@cindex @code{vcondeq@var{m}@var{n}} instruction pattern
>+@item @samp{vcondeq@var{m}@var{n}}
>+Similar to @code{vcond@var{m}@var{n}} but performs equality or
>+non-equality vector comparison only.  If @code{vcond@var{m}@var{n}}
>+or @code{vcondu@var{m}@var{n}} instruction pattern is supported,
>+it will be preferred over @code{vcondeq@var{m}@var{n}}, so there is
>+no need to define this instruction pattern if the others are
>supported.
>+
> @cindex @code{vcond_mask_@var{m}@var{n}} instruction pattern
> @item @samp{vcond_mask_@var{m}@var{n}}
>Similar to @code{vcond@var{m}@var{n}} but operand 3 holds a
>pre-computed
>--- gcc/config/i386/sse.md.jj	2016-10-25 12:10:06.142514449 +0200
>+++ gcc/config/i386/sse.md	2016-10-25 12:10:27.236250116 +0200
>@@ -2652,6 +2652,18 @@ (define_expand "vec_cmpuv2div2di"
>   DONE;
> })
> 
>+(define_expand "vec_cmpeqv2div2di"
>+  [(set (match_operand:V2DI 0 "register_operand")
>+	(match_operator:V2DI 1 ""
>+	  [(match_operand:V2DI 2 "register_operand")
>+	   (match_operand:V2DI 3 "vector_operand")]))]
>+  "TARGET_SSE4_1"
>+{
>+  bool ok = ix86_expand_int_vec_cmp (operands);
>+  gcc_assert (ok);
>+  DONE;
>+})
>+
> (define_expand "vcond<V_512:mode><VF_512:mode>"
>   [(set (match_operand:V_512 0 "register_operand")
> 	(if_then_else:V_512
>@@ -11156,6 +11168,21 @@ (define_expand "vcondu<VI8F_128:mode>v2d
> {
>   bool ok = ix86_expand_int_vcond (operands);
>   gcc_assert (ok);
>+  DONE;
>+})
>+
>+(define_expand "vcondeq<VI8F_128:mode>v2di"
>+  [(set (match_operand:VI8F_128 0 "register_operand")
>+	(if_then_else:VI8F_128
>+	  (match_operator 3 ""
>+	    [(match_operand:V2DI 4 "vector_operand")
>+	     (match_operand:V2DI 5 "general_operand")])
>+	  (match_operand:VI8F_128 1)
>+	  (match_operand:VI8F_128 2)))]
>+  "TARGET_SSE4_1"
>+{
>+  bool ok = ix86_expand_int_vcond (operands);
>+  gcc_assert (ok);
>   DONE;
> })
> 
>--- gcc/testsuite/gcc.target/i386/pr78102.c.jj	2016-10-25
>12:28:37.868587498 +0200
>+++ gcc/testsuite/gcc.target/i386/pr78102.c	2016-10-25
>12:26:37.000000000 +0200
>@@ -0,0 +1,24 @@
>+/* PR target/78102 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -mno-sse4.2 -msse4.1" } */
>+/* { dg-final { scan-assembler-times "pcmpeqq" 3 } } */
>+
>+#include <x86intrin.h>
>+
>+__m128i
>+foo (const __m128i x, const __m128i y)
>+{
>+  return _mm_cmpeq_epi64 (x, y);
>+}
>+
>+__v2di
>+bar (const __v2di x, const __v2di y)
>+{
>+  return x == y;
>+}
>+
>+__v2di
>+baz (const __v2di x, const __v2di y)
>+{
>+  return x != y;
>+}
>
>	Jakub



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]