[gcc(refs/users/clyon/heads/mve-autovec)] arm: Add support for MVE vector compare with GCC vectors

Christophe Lyon clyon@gcc.gnu.org
Mon Mar 1 18:20:05 GMT 2021


https://gcc.gnu.org/g:7c240fe8e899e105f40dc6c1191067a27d090cf6

commit 7c240fe8e899e105f40dc6c1191067a27d090cf6
Author: Christophe Lyon <christophe.lyon@linaro.org>
Date:   Fri Feb 26 16:33:59 2021 +0100

    arm: Add support for MVE vector compare with GCC vectors
    
    Since MVE has a different set of vector comparison operators than
    Neon, we have to update the expansion to take into account the new
    ones, for instance 'NE' for which MVE does not require to use 'EQ'
    with the inverted condition.
    
    Conversely, Neon supports comparisons with #0, MVE does not.
    
    For:
    typedef long int vs32 __attribute__((vector_size(16)));
    vs32 cmp_eq_vs32_reg (vs32 a, vs32 b) { return a == b; }
    
    we now generate:
    cmp_eq_vs32_reg:
            vldr.64 d4, .L123       @ 8     [c=8 l=4]  *mve_movv4si/8
            vldr.64 d5, .L123+8
            vldr.64 d6, .L123+16    @ 9     [c=8 l=4]  *mve_movv4si/8
            vldr.64 d7, .L123+24
            vcmp.i32  eq, q0, q1    @ 7     [c=16 l=4]  mve_vcmpeqq_v4si
            vpsel q0, q3, q2        @ 15    [c=8 l=4]  mve_vpselq_sv4si
            bx      lr      @ 26    [c=8 l=4]  *thumb2_return
    .L124:
            .align  3
    .L123:
            .word   0
            .word   0
            .word   0
            .word   0
            .word   1
            .word   1
            .word   1
            .word   1
    
    For some reason emit_move_insn (zero, CONST0_RTX (cmp_mode)) produces
    a pair of vldr instead of vmov.i32, qX, #0
    
    2021-03-01  Christophe Lyon  <christophe.lyon@linaro.org>
    
            gcc/
            * config/arm/arm.c (arm_expand_vector_compare): Add support for
            MVE.
            * config/arm/mve.md (@mve_vcmp<mve_cmp_op>q_<mode>): Add '@' prefix.
            (@mve_vpselq_<supf><mode>): Likewise.
            * config/arm/neon.md (vec_cmp<mode><v_cmp_result): Enable for MVE
            and move to vec-common.md.
            (vec_cmpu<mode><mode>): Likewise.
            (vcond<mode><mode>): Likewise.
            (vcondu<mode><v_cmp_result>): Likewise.
            * config/arm/vec-common.md (vec_cmp<mode><v_cmp_result): Moved
            from neon.md.
            (vec_cmpu<mode><mode>): Likewise.
            (vcond<mode><mode>): Likewise.
            (vcondu<mode><v_cmp_result>): Likewise.
    
            gcc/testsuite
            * gcc.target/arm/simd/mve-compare-1.c: New test.

Diff:
---
 gcc/config/arm/arm.c                              | 121 ++++++++++++++--------
 gcc/config/arm/mve.md                             |   4 +-
 gcc/config/arm/neon.md                            |  53 +---------
 gcc/config/arm/vec-common.md                      |  61 +++++++++++
 gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c |  78 ++++++++++++++
 5 files changed, 222 insertions(+), 95 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e22396dbcd5..f3424800f24 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -30898,55 +30898,74 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
   machine_mode cmp_mode = GET_MODE (op0);
 
   bool inverted;
-  switch (code)
-    {
-    /* For these we need to compute the inverse of the requested
-       comparison.  */
-    case UNORDERED:
-    case UNLT:
-    case UNLE:
-    case UNGT:
-    case UNGE:
-    case UNEQ:
-    case NE:
-      code = reverse_condition_maybe_unordered (code);
-      if (!can_invert)
-	{
-	  /* Recursively emit the inverted comparison into a temporary
-	     and then store its inverse in TARGET.  This avoids reusing
-	     TARGET (which for integer NE could be one of the inputs).  */
-	  rtx tmp = gen_reg_rtx (cmp_result_mode);
-	  if (arm_expand_vector_compare (tmp, code, op0, op1, true))
-	    gcc_unreachable ();
-	  emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
-	  return false;
-	}
-      inverted = true;
-      break;
 
-    default:
+  /* MVE supports more comparisons than Neon.  */
+  if (TARGET_HAVE_MVE)
       inverted = false;
-      break;
-    }
+  else
+    switch (code)
+      {
+	/* For these we need to compute the inverse of the requested
+	   comparison.  */
+      case UNORDERED:
+      case UNLT:
+      case UNLE:
+      case UNGT:
+      case UNGE:
+      case UNEQ:
+      case NE:
+	code = reverse_condition_maybe_unordered (code);
+	if (!can_invert)
+	  {
+	    /* Recursively emit the inverted comparison into a temporary
+	       and then store its inverse in TARGET.  This avoids reusing
+	       TARGET (which for integer NE could be one of the inputs).  */
+	    rtx tmp = gen_reg_rtx (cmp_result_mode);
+	    if (arm_expand_vector_compare (tmp, code, op0, op1, true))
+	      gcc_unreachable ();
+	    emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
+	    return false;
+	  }
+	inverted = true;
+	break;
+
+      default:
+	inverted = false;
+	break;
+      }
 
   switch (code)
     {
-    /* These are natively supported for zero comparisons, but otherwise
-       require the operands to be swapped.  */
+    /* These are natively supported by Neon for zero comparisons, but otherwise
+       require the operands to be swapped. For MVE, we can only compare
+       registers.  */
     case LE:
     case LT:
-      if (op1 != CONST0_RTX (cmp_mode))
-	{
-	  code = swap_condition (code);
-	  std::swap (op0, op1);
-	}
+      if (!TARGET_HAVE_MVE)
+	if (op1 != CONST0_RTX (cmp_mode))
+	  {
+	    code = swap_condition (code);
+	    std::swap (op0, op1);
+	  }
       /* Fall through.  */
 
-    /* These are natively supported for both register and zero operands.  */
+    /* These are natively supported by Neon for both register and zero
+       operands. MVE supports registers only.  */
     case EQ:
     case GE:
     case GT:
-      emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
+    case NE:
+      if (TARGET_HAVE_MVE) {
+	rtx vpr_p0 = gen_reg_rtx (HImode);
+	emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
+	rtx zero = gen_reg_rtx (cmp_mode);
+	rtx one = gen_reg_rtx (cmp_mode);
+	emit_move_insn (zero, CONST0_RTX (cmp_mode));
+	emit_move_insn (one, CONST1_RTX (cmp_mode));
+	emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
+      }
+      else
+	emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
       return inverted;
 
     /* These are natively supported for register operands only.
@@ -30954,16 +30973,36 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
        or canonicalized by target-independent code.  */
     case GEU:
     case GTU:
-      emit_insn (gen_neon_vc (code, cmp_mode, target,
-			      op0, force_reg (cmp_mode, op1)));
+      if (TARGET_HAVE_MVE) {
+	rtx vpr_p0 = gen_reg_rtx (HImode);
+	emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
+	rtx zero = gen_reg_rtx (cmp_mode);
+	rtx one = gen_reg_rtx (cmp_mode);
+	emit_move_insn (zero, CONST0_RTX (cmp_mode));
+	emit_move_insn (one, CONST1_RTX (cmp_mode));
+	emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
+      }
+      else
+	emit_insn (gen_neon_vc (code, cmp_mode, target,
+				op0, force_reg (cmp_mode, op1)));
       return inverted;
 
     /* These require the operands to be swapped and likewise do not
        support comparisons with zero.  */
     case LEU:
     case LTU:
-      emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
-			      target, force_reg (cmp_mode, op1), op0));
+      if (TARGET_HAVE_MVE) {
+	rtx vpr_p0 = gen_reg_rtx (HImode);
+	emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, vpr_p0, force_reg (cmp_mode, op1), op0));
+	rtx zero = gen_reg_rtx (cmp_mode);
+	rtx one = gen_reg_rtx (cmp_mode);
+	emit_move_insn (zero, CONST0_RTX (cmp_mode));
+	emit_move_insn (one, CONST1_RTX (cmp_mode));
+	emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
+      }
+      else
+	emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
+				target, force_reg (cmp_mode, op1), op0));
       return inverted;
 
     /* These need a combination of two comparisons.  */
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index c3f9d81283f..7eb9a470de4 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -842,7 +842,7 @@
 ;;
 ;; [vcmpneq_, vcmpcsq_, vcmpeqq_, vcmpgeq_, vcmpgtq_, vcmphiq_, vcmpleq_, vcmpltq_])
 ;;
-(define_insn "mve_vcmp<mve_cmp_op>q_<mode>"
+(define_insn "@mve_vcmp<mve_cmp_op>q_<mode>"
   [
    (set (match_operand:HI 0 "vpr_register_operand" "=Up")
 	(MVE_COMPARISONS:HI (match_operand:MVE_2 1 "s_register_operand" "w")
@@ -3463,7 +3463,7 @@
 ;;
 ;; [vpselq_u, vpselq_s])
 ;;
-(define_insn "mve_vpselq_<supf><mode>"
+(define_insn "@mve_vpselq_<supf><mode>"
   [
    (set (match_operand:MVE_1 0 "s_register_operand" "=w")
 	(unspec:MVE_1 [(match_operand:MVE_1 1 "s_register_operand" "w")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index fec2cc91d24..903f4b2d0eb 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1416,30 +1416,6 @@
   [(set_attr "type" "neon_qsub<q>")]
 )
 
-(define_expand "vec_cmp<mode><v_cmp_result>"
-  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
-	(match_operator:<V_cmp_result> 1 "comparison_operator"
-	  [(match_operand:VDQW 2 "s_register_operand")
-	   (match_operand:VDQW 3 "reg_or_zero_operand")]))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
-  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
-			     operands[2], operands[3], false);
-  DONE;
-})
-
-(define_expand "vec_cmpu<mode><mode>"
-  [(set (match_operand:VDQIW 0 "s_register_operand")
-	(match_operator:VDQIW 1 "comparison_operator"
-	  [(match_operand:VDQIW 2 "s_register_operand")
-	   (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
-  "TARGET_NEON"
-{
-  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
-			     operands[2], operands[3], false);
-  DONE;
-})
-
 ;; Conditional instructions.  These are comparisons with conditional moves for
 ;; vectors.  They perform the assignment:
 ;;   
@@ -1447,20 +1423,7 @@
 ;;
 ;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
 ;; element-wise.
-
-(define_expand "vcond<mode><mode>"
-  [(set (match_operand:VDQW 0 "s_register_operand")
-	(if_then_else:VDQW
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:VDQW 4 "s_register_operand")
-	     (match_operand:VDQW 5 "reg_or_zero_operand")])
-	  (match_operand:VDQW 1 "s_register_operand")
-	  (match_operand:VDQW 2 "s_register_operand")))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
-  arm_expand_vcond (operands, <V_cmp_result>mode);
-  DONE;
-})
+;; See also vec-common.md
 
 (define_expand "vcond<V_cvtto><mode>"
   [(set (match_operand:<V_CVTTO> 0 "s_register_operand")
@@ -1476,20 +1439,6 @@
   DONE;
 })
 
-(define_expand "vcondu<mode><v_cmp_result>"
-  [(set (match_operand:VDQW 0 "s_register_operand")
-	(if_then_else:VDQW
-	  (match_operator 3 "arm_comparison_operator"
-	    [(match_operand:<V_cmp_result> 4 "s_register_operand")
-	     (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
-	  (match_operand:VDQW 1 "s_register_operand")
-	  (match_operand:VDQW 2 "s_register_operand")))]
-  "TARGET_NEON"
-{
-  arm_expand_vcond (operands, <V_cmp_result>mode);
-  DONE;
-})
-
 (define_expand "vcond_mask_<mode><v_cmp_result>"
   [(set (match_operand:VDQW 0 "s_register_operand")
 	(if_then_else:VDQW
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 01864fb05de..96c2d578051 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -361,3 +361,64 @@
       DONE;
     }
 })
+
+(define_expand "vec_cmp<mode><v_cmp_result>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
+	(match_operator:<V_cmp_result> 1 "comparison_operator"
+	  [(match_operand:VDQW 2 "s_register_operand")
+	   (match_operand:VDQW 3 "reg_or_zero_operand")]))]
+  "(TARGET_NEON || TARGET_HAVE_MVE) && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+			     operands[2], operands[3], false);
+  DONE;
+})
+
+(define_expand "vec_cmpu<mode><mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand")
+	(match_operator:VDQIW 1 "comparison_operator"
+	  [(match_operand:VDQIW 2 "s_register_operand")
+	   (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
+  "TARGET_NEON || TARGET_HAVE_MVE"
+{
+  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+			     operands[2], operands[3], false);
+  DONE;
+})
+
+;; Conditional instructions.  These are comparisons with conditional moves for
+;; vectors.  They perform the assignment:
+;;   
+;;     Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
+;;
+;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
+;; element-wise.
+
+(define_expand "vcond<mode><mode>"
+  [(set (match_operand:VDQW 0 "s_register_operand")
+	(if_then_else:VDQW
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:VDQW 4 "s_register_operand")
+	     (match_operand:VDQW 5 "reg_or_zero_operand")])
+	  (match_operand:VDQW 1 "s_register_operand")
+	  (match_operand:VDQW 2 "s_register_operand")))]
+  "(TARGET_NEON || TARGET_HAVE_MVE) && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  arm_expand_vcond (operands, <V_cmp_result>mode);
+  DONE;
+})
+
+(define_expand "vcondu<mode><v_cmp_result>"
+  [(set (match_operand:VDQW 0 "s_register_operand")
+	(if_then_else:VDQW
+	  (match_operator 3 "arm_comparison_operator"
+	    [(match_operand:<V_cmp_result> 4 "s_register_operand")
+	     (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
+	  (match_operand:VDQW 1 "s_register_operand")
+	  (match_operand:VDQW 2 "s_register_operand")))]
+  "TARGET_NEON || TARGET_HAVE_MVE"
+{
+  arm_expand_vcond (operands, <V_cmp_result>mode);
+  DONE;
+})
+
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c b/gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c
new file mode 100644
index 00000000000..e8516bfd3b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c
@@ -0,0 +1,78 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#define COMPARE_REG(NAME, OP, TYPE) \
+  TYPE \
+  cmp_##NAME##_##TYPE##_reg (TYPE a, TYPE b) \
+  { \
+    return a OP b; \
+  }
+
+#define COMPARE_REG_AND_ZERO(NAME, OP, TYPE) \
+  COMPARE_REG (NAME, OP, TYPE) \
+  \
+  TYPE \
+  cmp_##NAME##_##TYPE##_zero (TYPE a) \
+  { \
+    return a OP (TYPE) {}; \
+  }
+
+#define COMPARE_TYPE(TYPE, COMPARE_ORDERED) \
+  COMPARE_REG_AND_ZERO (eq, ==, TYPE) \
+  COMPARE_REG_AND_ZERO (ne, !=, TYPE) \
+  COMPARE_ORDERED (lt, <, TYPE) \
+  COMPARE_ORDERED (le, <=, TYPE) \
+  COMPARE_ORDERED (gt, >, TYPE) \
+  COMPARE_ORDERED (ge, >=, TYPE)
+
+#define TEST_TYPE(NAME, ELEM, COMPARE_ORDERED, SIZE)  \
+  typedef ELEM NAME##SIZE __attribute__((vector_size(SIZE))); \
+  COMPARE_TYPE (NAME##SIZE, COMPARE_ORDERED)
+
+/* 64-bits vectors, not vectorized.  */
+TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO, 8)
+TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG, 8)
+TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO, 8)
+TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG, 8)
+TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO, 8)
+TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG, 8)
+
+/* 128-bits vectors.  */
+TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO, 16)
+TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG, 16)
+TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO, 16)
+TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG, 16)
+TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO, 16)
+TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG, 16)
+
+/* { 8 bits } x { eq, ne, lt, le, gt, ge, hi, cs }.
+/* { dg-final { scan-assembler-times {\tvcmp.i8  eq, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.i8  ne, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8  lt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8  le, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8  gt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8  ge, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u8  hi, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u8  cs, q[0-9]+, q[0-9]+\n} 2 } } */
+
+/* { 16 bits } x { eq, ne, lt, le, gt, ge, hi, cs }.
+/* { dg-final { scan-assembler-times {\tvcmp.i16  eq, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.i16  ne, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16  lt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16  le, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16  gt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16  ge, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u16  hi, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u16  cs, q[0-9]+, q[0-9]+\n} 2 } } */
+
+/* { 32 bits } x { eq, ne, lt, le, gt, ge, hi, cs }.
+/* { dg-final { scan-assembler-times {\tvcmp.i32  eq, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.i32  ne, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32  lt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32  le, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32  gt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32  ge, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u32  hi, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u32  cs, q[0-9]+, q[0-9]+\n} 2 } } */


More information about the Gcc-cvs mailing list