]> gcc.gnu.org Git - gcc.git/commitdiff
RISC-V: Fixed failed rvv combine testcases
authorLehua Ding <lehua.ding@rivai.ai>
Tue, 7 Nov 2023 07:33:20 +0000 (15:33 +0800)
committerLehua Ding <lehua.ding@rivai.ai>
Tue, 7 Nov 2023 07:53:53 +0000 (15:53 +0800)
This patch fixed the fellowing failed testcases on the trunk:
FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c scan-assembler-times \\tvfwredusum\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 2
...
FAIL: gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c scan-assembler-times \\tvwredsumu\\.vs\\tv[0-9]+,v[0-9]+,v[0-9]+,v0\\.t 3
...

The reason for these failed testcases is the introduce of .VCOND_MASK_LEN
in midend for other bugfix and further leads to a new vcond_mask_len rtl
pattern after expand. So we need add new combine patterns handle this case.

Consider this code:

int16_t foo (int8_t *restrict a, int8_t *restrict pred)
{
  int16_t sum = 0;
  for (int i = 0; i < 16; i += 1)
    if (pred[i])
      sum += a[i];
  return sum;
}

Before this patch:
foo:
        vsetivli        zero,16,e8,m1,ta,ma
        vle8.v  v0,0(a1)
        vsetvli a5,zero,e8,m1,ta,ma
        vmsne.vi        v0,v0,0
        vsetvli zero,zero,e16,m2,ta,ma
        li      a3,0
        vmv.v.i v2,0
        vsetivli        zero,16,e16,m2,ta,ma
        vle8.v  v6,0(a0),v0.t
        vmv.s.x v1,a3
        vsetvli a5,zero,e16,m2,ta,ma
        vsext.vf2       v4,v6
        vsetivli        zero,16,e16,m2,tu,ma
        vmerge.vvm      v2,v2,v4,v0
        vsetvli a5,zero,e16,m2,ta,ma
        vredsum.vs      v2,v2,v1
        vmv.x.s a0,v2
        slliw   a0,a0,16
        sraiw   a0,a0,16
        ret

After this patch:
foo:
vsetivli zero,16,e16,m2,ta,ma
li a5,0
vle8.v v0,0(a1)
vmv.s.x v1,a5
vsetvli zero,zero,e8,m1,ta,ma
vmsne.vi v0,v0,0
vle8.v v2,0(a0),v0.t
vwredsum.vs v1,v2,v1,v0.t
vsetvli zero,zero,e16,m1,ta,ma
vmv.x.s a0,v1
slliw a0,a0,16
sraiw a0,a0,16
ret

Combine the vsext.vf2, vmerge.vvm, and vredsum.vs instructions while
reducing the corresponding vsetvl instructions.

gcc/ChangeLog:

* config/riscv/autovec-opt.md (*cond_len_<optab><v_double_trunc><mode>):
New combine pattern.
(*cond_len_<optab><v_quad_trunc><mode>): Ditto.
(*cond_len_<optab><v_oct_trunc><mode>): Ditto.
(*cond_len_extend<v_double_trunc><mode>): Ditto.
(*cond_len_widen_reduc_plus_scal_<mode>): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c:
* gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c:

gcc/config/riscv/autovec-opt.md
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-2.c

index d0f8b3cde4ec06b167ab34ff336d9692fbf3e56b..3c87e66ea491dac2ce8fc7b37a6a93a54b13dd61 100644 (file)
 }
 [(set_attr "type" "vector")])
 
+;; Combine sign_extend/zero_extend(vf2) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_double_trunc><mode>"
+  [(set (match_operand:VWEXTI 0 "register_operand")
+    (if_then_else:VWEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VWEXTI
+        (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+        (match_operand:VWEXTI 1 "vector_merge_operand")
+       (match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf2 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine sign_extend/zero_extend(vf4) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_quad_trunc><mode>"
+  [(set (match_operand:VQEXTI 0 "register_operand")
+    (if_then_else:VQEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VQEXTI
+        (any_extend:VQEXTI (match_operand:<V_QUAD_TRUNC> 2 "register_operand"))
+        (match_operand:VQEXTI 1 "vector_merge_operand")
+       (match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf4 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine sign_extend/zero_extend(vf8) and vcond_mask_len
+(define_insn_and_split "*cond_len_<optab><v_oct_trunc><mode>"
+  [(set (match_operand:VOEXTI 0 "register_operand")
+    (if_then_else:VOEXTI
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VOEXTI
+        (any_extend:VOEXTI (match_operand:<V_OCT_TRUNC> 2 "register_operand"))
+        (match_operand:VOEXTI 1 "vector_merge_operand")
+       (match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_<optab><mode>_vf8 (operands[0], operands[3], operands[1], operands[2],
+                                         operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine trunc(vf2) + vcond_mask
 (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
 }
 [(set_attr "type" "vector")])
 
+;; Combine FP extend(vf2) and vcond_mask_len
+(define_insn_and_split "*cond_len_extend<v_double_trunc><mode>"
+  [(set (match_operand:VWEXTF_ZVFHMIN 0 "register_operand")
+    (if_then_else:VWEXTF_ZVFHMIN
+      (unspec:<VM>
+        [(match_operand 4 "vector_length_operand")
+         (match_operand 5 "const_int_operand")
+         (match_operand 6 "const_int_operand")
+         (reg:SI VL_REGNUM)
+         (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+      (vec_merge:VWEXTF_ZVFHMIN
+        (float_extend:VWEXTF_ZVFHMIN (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))
+        (match_operand:VWEXTF_ZVFHMIN 1 "vector_merge_operand")
+       (match_operand:<VM> 3 "register_operand"))
+      (match_dup 1)))]
+  "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_pred_extend<mode> (operands[0], operands[3], operands[1], operands[2],
+                                    operands[4], operands[5], operands[6], CONST0_RTX (Pmode)));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine FP trunc(vf2) + vcond_mask
 (define_insn_and_split "*cond_trunc<mode><v_double_trunc>"
   [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
 }
 [(set_attr "type" "vector")])
 
+;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_len_extend is vector const 0
+(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (unspec:<VM> [
+              (match_operand 2 "vector_length_operand")
+              (const_int 0)
+              (const_int 0)
+              (reg:SI VL_REGNUM)
+              (reg:SI VTYPE_REGNUM)
+            ] UNSPEC_VPREDICATE)
+            (vec_merge:<V_DOUBLE_EXTEND>
+              (any_extend:<V_DOUBLE_EXTEND>
+                (match_operand:VI_QHS_NO_M8 3 "register_operand"))
+              (if_then_else:<V_DOUBLE_EXTEND>
+                (unspec:<VM> [
+                  (match_operand:<VM> 4 "vector_all_trues_mask_operand")
+                  (match_operand 5 "vector_length_operand")
+                  (match_operand 6 "const_int_operand")
+                  (match_operand 7 "const_int_operand")
+                  (match_operand 8 "const_1_or_2_operand")
+                  (reg:SI VL_REGNUM)
+                  (reg:SI VTYPE_REGNUM)
+                ] UNSPEC_VPREDICATE)
+                (match_operand:<V_DOUBLE_EXTEND> 9 "vector_const_0_operand")
+                (match_operand:<V_DOUBLE_EXTEND> 10 "vector_merge_operand"))
+              (match_operand:<VM> 1 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_dup 4)
+                (match_dup 5)
+                (match_dup 6)
+                (match_dup 7)
+                (match_dup 8)
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_dup 9)
+              (match_dup 10)))
+        ] UNSPEC_REDUC_SUM))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
+  riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+                                  riscv_vector::REDUCE_OP_M,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; Combine mask_extend + vfredsum to mask_vfwredusum
 ;; where the mrege of mask_extend is vector const 0
 (define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
 }
 [(set_attr "type" "vector")])
 
+;; Combine mask_len_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_len_extend is vector const 0
+(define_insn_and_split "*cond_len_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (unspec:<VM> [
+              (match_operand 2 "vector_length_operand")
+              (const_int 0)
+              (const_int 0)
+              (reg:SI VL_REGNUM)
+              (reg:SI VTYPE_REGNUM)
+            ] UNSPEC_VPREDICATE)
+            (vec_merge:<V_DOUBLE_EXTEND>
+              (float_extend:<V_DOUBLE_EXTEND>
+                (match_operand:VF_HS_NO_M8 3 "register_operand"))
+              (if_then_else:<V_DOUBLE_EXTEND>
+                (unspec:<VM> [
+                  (match_operand:<VM> 4 "vector_all_trues_mask_operand")
+                  (match_operand 5 "vector_length_operand")
+                  (match_operand 6 "const_int_operand")
+                  (match_operand 7 "const_int_operand")
+                  (match_operand 8 "const_1_or_2_operand")
+                  (reg:SI VL_REGNUM)
+                  (reg:SI VTYPE_REGNUM)
+                ] UNSPEC_VPREDICATE)
+                (match_operand:<V_DOUBLE_EXTEND> 9 "vector_const_0_operand")
+                (match_operand:<V_DOUBLE_EXTEND> 10 "vector_merge_operand"))
+              (match_operand:<VM> 1 "register_operand"))
+            (if_then_else:<V_DOUBLE_EXTEND>
+              (unspec:<VM> [
+                (match_dup 4)
+                (match_dup 5)
+                (match_dup 6)
+                (match_dup 7)
+                (match_dup 8)
+                (reg:SI VL_REGNUM)
+                (reg:SI VTYPE_REGNUM)
+              ] UNSPEC_VPREDICATE)
+              (match_dup 9)
+              (match_dup 10)))
+        ] UNSPEC_REDUC_SUM_UNORDERED))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
+  riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+                                  riscv_vector::REDUCE_OP_M_FRM_DYN,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; =============================================================================
 ;; Misc combine patterns
 ;; =============================================================================
index 22a71048684e1efab0ed9e0f290740da6cdb8e99..47889f3a1cd813ae99394dfb1d800086d871221e 100644 (file)
 
 #define TEST_ALL(TEST)                                                         \
   TEST (int16_t, int8_t, 16)                                                   \
+  TEST (int32_t, int8_t, 8)                                                    \
   TEST (int32_t, int16_t, 8)                                                   \
+  TEST (int64_t, int8_t, 4)                                                    \
+  TEST (int64_t, int16_t, 4)                                                   \
   TEST (int64_t, int32_t, 4)                                                   \
   TEST (uint16_t, uint8_t, 16)                                                 \
+  TEST (uint32_t, uint8_t, 8)                                                  \
   TEST (uint32_t, uint16_t, 8)                                                 \
+  TEST (uint64_t, uint8_t, 4)                                                  \
+  TEST (uint64_t, uint16_t, 4)                                                 \
   TEST (uint64_t, uint32_t, 4)                                                 \
   TEST (float, _Float16, 8)                                                    \
+  TEST (double, _Float16, 4)                                                   \
   TEST (double, float, 4)
 
 TEST_ALL (TEST_TYPE)
 
-/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
index 7c8fedd072b234a31b8f096a01d76b3d9d84093c..662d1351215703c241605ebd6371e39794bb2067 100644 (file)
@@ -1,30 +1,12 @@
 /* { dg-do compile } */
 /* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
-#include <stdint-gcc.h>
 
-#define TEST_TYPE(TYPE1, TYPE2, N)                                             \
-  __attribute__ ((noipa))                                                      \
-  TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred)      \
-  {                                                                            \
-    TYPE1 sum = 0;                                                             \
-    for (int i = 0; i < N; i += 1)                                             \
-      if (pred[i])                                                             \
-       sum += a[i];                                                           \
-    return sum;                                                                \
-  }
+#include "cond_widen_reduc-1.c"
 
-#define TEST_ALL(TEST)                                                         \
-  TEST (int16_t, int8_t, 16)                                                   \
-  TEST (int32_t, int16_t, 8)                                                   \
-  TEST (int64_t, int32_t, 4)                                                   \
-  TEST (uint16_t, uint8_t, 16)                                                 \
-  TEST (uint32_t, uint16_t, 8)                                                 \
-  TEST (uint64_t, uint32_t, 4)                                                 \
-  TEST (float, _Float16, 8)                                                    \
-  TEST (double, float, 4)
-
-TEST_ALL (TEST_TYPE)
-
-/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
 /* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4\tv[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8\tv[0-9]+,v[0-9]+,v0\.t} 1 } } */
This page took 0.076479 seconds and 5 git commands to generate.