"TARGET_VECTOR"
{})
+(define_insn "*pred_cmp<mode>_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "comparison_except_ltge_operator"
+ [(match_operand:VI 3 "register_operand" " vr")
+ (match_operand:VI 4 "vector_arith_operand" "vrvi")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.v%o4\t%0,%3,%v4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr, vr, vr")
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr, &vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, vr, vr, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "comparison_except_ltge_operator"
- [(match_operand:VI 4 "register_operand" " vr, vr, vr, vr")
- (match_operand:VI 5 "vector_arith_operand" " vr, vr, vi, vi")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ [(match_operand:VI 4 "register_operand" " vr, 0, vr, 0, 0, vr, 0, vr, vr")
+ (match_operand:VI 5 "vector_arith_operand" " vrvi, vrvi, 0, 0, vrvi, 0, 0, vrvi, vrvi")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, vu, vu, 0, 0, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.v%o5\t%0,%4,%v5%p1"
[(set_attr "type" "vicmp")
"TARGET_VECTOR"
{})
+(define_insn "*pred_ltge<mode>_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "ltge_operator"
+ [(match_operand:VI 3 "register_operand" " vr")
+ (match_operand:VI 4 "vector_neg_arith_operand" "vrvj")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.v%o4\t%0,%3,%v4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_ltge<mode>"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr, vr, vr")
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_ltge<mode>_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr, &vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, vr, vr, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "ltge_operator"
- [(match_operand:VI 4 "register_operand" " vr, vr, vr, vr")
- (match_operand:VI 5 "vector_neg_arith_operand" " vr, vr, vj, vj")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ [(match_operand:VI 4 "register_operand" " vr, 0, vr, 0, 0, vr, 0, vr, vr")
+ (match_operand:VI 5 "vector_neg_arith_operand" " vrvj, vrvj, 0, 0, vrvj, 0, 0, vrvj, vrvj")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, vu, vu, 0, 0, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.v%o5\t%0,%4,%v5%p1"
[(set_attr "type" "vicmp")
"TARGET_VECTOR"
{})
+(define_insn "*pred_cmp<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "comparison_except_eqge_operator"
+ [(match_operand:VI_QHS 3 "register_operand" " vr")
+ (vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "register_operand" " r"))])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "comparison_except_eqge_operator"
- [(match_operand:VI_QHS 4 "register_operand" " vr, vr")
+ [(match_operand:VI_QHS 4 "register_operand" " vr, 0, 0, vr, vr")
(vec_duplicate:VI_QHS
- (match_operand:<VEL> 5 "register_operand" " r, r"))])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " r, r, r, r, r"))])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
"TARGET_VECTOR"
{})
+(define_insn "*pred_eqne<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "equality_operator"
+ [(vec_duplicate:VI_QHS
+ (match_operand:<VEL> 4 "register_operand" " r"))
+ (match_operand:VI_QHS 3 "register_operand" " vr")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_eqne<mode>_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_eqne<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "equality_operator"
[(vec_duplicate:VI_QHS
- (match_operand:<VEL> 5 "register_operand" " r, r"))
- (match_operand:VI_QHS 4 "register_operand" " vr, vr")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " r, r, r, r, r"))
+ (match_operand:VI_QHS 4 "register_operand" " vr, 0, 0, vr, vr")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
DONE;
})
+(define_insn "*pred_cmp<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "comparison_except_eqge_operator"
+ [(match_operand:VI_D 3 "register_operand" " vr")
+ (vec_duplicate:VI_D
+ (match_operand:<VEL> 4 "register_operand" " r"))])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
+(define_insn "*pred_eqne<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "equality_operator"
+ [(vec_duplicate:VI_D
+ (match_operand:<VEL> 4 "register_operand" " r"))
+ (match_operand:VI_D 3 "register_operand" " vr")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "comparison_except_eqge_operator"
- [(match_operand:VI_D 4 "register_operand" " vr, vr")
+ [(match_operand:VI_D 4 "register_operand" " vr, 0, 0, vr, vr")
(vec_duplicate:VI_D
- (match_operand:<VEL> 5 "register_operand" " r, r"))])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " r, r, r, r, r"))])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_eqne<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "equality_operator"
[(vec_duplicate:VI_D
- (match_operand:<VEL> 5 "register_operand" " r, r"))
- (match_operand:VI_D 4 "register_operand" " vr, vr")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " r, r, r, r, r"))
+ (match_operand:VI_D 4 "register_operand" " vr, 0, 0, vr, vr")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
(set_attr "mode" "<MODE>")])
+(define_insn "*pred_cmp<mode>_extended_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "comparison_except_eqge_operator"
+ [(match_operand:VI_D 3 "register_operand" " vr")
+ (vec_duplicate:VI_D
+ (sign_extend:<VEL>
+ (match_operand:<VSUBEL> 4 "register_operand" " r")))])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_extended_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
(set_attr "mode" "<MODE>")])
(define_insn "*pred_cmp<mode>_extended_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "comparison_except_eqge_operator"
- [(match_operand:VI_D 4 "register_operand" " vr, vr")
+ [(match_operand:VI_D 4 "register_operand" " vr, 0, 0, vr, vr")
(vec_duplicate:VI_D
(sign_extend:<VEL>
- (match_operand:<VSUBEL> 5 "register_operand" " r, r")))])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VSUBEL> 5 "register_operand" " r, r, r, r, r")))])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
(set_attr "mode" "<MODE>")])
+(define_insn "*pred_eqne<mode>_extended_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "equality_operator"
+ [(vec_duplicate:VI_D
+ (sign_extend:<VEL>
+ (match_operand:<VSUBEL> 4 "register_operand" " r")))
+ (match_operand:VI_D 3 "register_operand" " vr")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vms%B2.vx\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vicmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_eqne<mode>_extended_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
(set_attr "mode" "<MODE>")])
(define_insn "*pred_eqne<mode>_extended_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "equality_operator"
[(vec_duplicate:VI_D
(sign_extend:<VEL>
- (match_operand:<VSUBEL> 5 "register_operand" " r, r")))
- (match_operand:VI_D 4 "register_operand" " vr, vr")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VSUBEL> 5 "register_operand" " r, r, r, r, r")))
+ (match_operand:VI_D 4 "register_operand" " vr, 0, 0, vr, vr")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vms%B3.vx\t%0,%4,%5%p1"
[(set_attr "type" "vicmp")
[(set_attr "type" "vfcmp")
(set_attr "mode" "<MODE>")])
+(define_insn "*pred_cmp<mode>_narrow_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "signed_order_operator"
+ [(match_operand:VF 3 "register_operand" " vr")
+ (match_operand:VF 4 "register_operand" " vr")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vmf%B2.vv\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vfcmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, vr, vr, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "signed_order_operator"
- [(match_operand:VF 4 "register_operand" " vr, vr")
- (match_operand:VF 5 "register_operand" " vr, vr")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ [(match_operand:VF 4 "register_operand" " vr, 0, vr, 0, 0, vr, 0, vr, vr")
+ (match_operand:VF 5 "register_operand" " vr, vr, 0, 0, vr, 0, 0, vr, vr")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, vu, vu, 0, 0, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vmf%B3.vv\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
"TARGET_VECTOR"
{})
+(define_insn "*pred_cmp<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "signed_order_operator"
+ [(match_operand:VF 3 "register_operand" " vr")
+ (vec_duplicate:VF
+ (match_operand:<VEL> 4 "register_operand" " f"))])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vmf%B2.vf\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vfcmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_cmp<mode>_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_cmp<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "signed_order_operator"
- [(match_operand:VF 4 "register_operand" " vr, vr")
+ [(match_operand:VF 4 "register_operand" " vr, 0, 0, vr, vr")
(vec_duplicate:VF
- (match_operand:<VEL> 5 "register_operand" " f, f"))])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " f, f, f, f, f"))])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vmf%B3.vf\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
"TARGET_VECTOR"
{})
+(define_insn "*pred_eqne<mode>_scalar_merge_tie_mask"
+ [(set (match_operand:<VM> 0 "register_operand" "=vm")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "register_operand" " 0")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 2 "equality_operator"
+ [(vec_duplicate:VF
+ (match_operand:<VEL> 4 "register_operand" " f"))
+ (match_operand:VF 3 "register_operand" " vr")])
+ (match_dup 1)))]
+ "TARGET_VECTOR"
+ "vmf%B2.vf\t%0,%3,%4,v0.t"
+ [(set_attr "type" "vfcmp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "merge_op_idx" "1")
+ (set_attr "vl_op_idx" "5")
+ (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
+ (set (attr "avl_type") (symbol_ref "INTVAL (operands[7])"))])
+
;; We don't use early-clobber for LMUL <= 1 to get better codegen.
(define_insn "*pred_eqne<mode>_scalar"
[(set (match_operand:<VM> 0 "register_operand" "=vr, vr")
;; We use early-clobber for source LMUL > dest LMUL.
(define_insn "*pred_eqne<mode>_scalar_narrow"
- [(set (match_operand:<VM> 0 "register_operand" "=&vr, &vr")
+ [(set (match_operand:<VM> 0 "register_operand" "=vm, vr, vr, &vr, &vr")
(if_then_else:<VM>
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 6 "vector_length_operand" " rK, rK")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " 0,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 6 "vector_length_operand" " rK, rK, rK, rK, rK")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "equality_operator"
[(vec_duplicate:VF
- (match_operand:<VEL> 5 "register_operand" " f, f"))
- (match_operand:VF 4 "register_operand" " vr, vr")])
- (match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
+ (match_operand:<VEL> 5 "register_operand" " f, f, f, f, f"))
+ (match_operand:VF 4 "register_operand" " vr, 0, 0, vr, vr")])
+ (match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
"TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
"vmf%B3.vf\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
__riscv_vsm_v_b32 (out, m4, 4);
}
-/* { dg-final { scan-assembler-times {vmv} 2 } } */
+/* { dg-final { scan-assembler-not {vmv} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_mu(m1,m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_m(m1,v1,v1,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vv_u16m8_b2_mu(m1,m2,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmseq_vv_i32m8_b4_mu (m3, m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmseq_vv_i32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmseq_vv_i32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmseq_vv_i32m8_b4_mu (m3, m4, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vv_i32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmseq_vv_i32m8_b4_mu (m3, m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vv_i32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmseq_vv_i32m8_b4_m (m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vv_i32m8_b4 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vv_i32m8_b4_m (mask, v3, v4,32);
+ mask = __riscv_vmseq_vv_i32m8_b4_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vv_i32m1_b32 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vv_i32m1_b32_m (mask, v3, v4,32);
+ mask = __riscv_vmseq_vv_i32m1_b32_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_mu(m1,m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_m(m1,v1,v1,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vuint16m8_t v2 = __riscv_vle16_v_u16m8 (base2, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vv_u16m8_b2_mu(m1,m2,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmslt_vv_i32m8_b4_mu (m3, m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmslt_vv_i32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vv_i32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmslt_vv_i32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmslt_vv_i32m8_b4_mu (m3, m4, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vv_i32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmslt_vv_i32m8_b4_mu (m3, m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vv_i32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmslt_vv_i32m8_b4_m (m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vv_i32m8_b4 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vv_i32m8_b4_m (mask, v3, v4,32);
+ mask = __riscv_vmslt_vv_i32m8_b4_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vv_i32m1_b32 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vv_i32m1_b32_m (mask, v3, v4,32);
+ mask = __riscv_vmslt_vv_i32m1_b32_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i32m8_b4 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i32m8_b4_m (mask, v3, x,32);
+ mask = __riscv_vmseq_vx_i32m8_b4_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i32m1_b32 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i32m1_b32_m (mask, v3, x,32);
+ mask = __riscv_vmseq_vx_i32m1_b32_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i32m8_b4 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i32m8_b4_m (mask, v3, x,32);
+ mask = __riscv_vmslt_vx_i32m8_b4_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i32m1_b32 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i32m1_b32_m (mask, v3, x,32);
+ mask = __riscv_vmslt_vx_i32m1_b32_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_mu(m1,m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_m(m1,v1, -16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmseq_vx_u16m8_b2_mu(m1,m2,v1, -16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m3, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m4, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_mu (m3, m3, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ vbool4_t m4 = __riscv_vmseq_vx_i32m8_b4_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i32m8_b4 (v, -16, 4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i32m8_b4_m (mask, v3, -16,32);
+ mask = __riscv_vmseq_vx_i32m8_b4_mu (mask, mask, v4, -16, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i32m1_b32 (v, -16, 4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i32m1_b32_m (mask, v3, -16,32);
+ mask = __riscv_vmseq_vx_i32m1_b32_mu (mask, mask, v4, -16, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_mu(m1,m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_m(m1,v1, -15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint16m8_t v1 = __riscv_vle16_v_u16m8 (base1, vl);
+ vbool2_t m1 = __riscv_vlm_v_b2 (base3, vl);
+ vbool2_t m2 = __riscv_vlm_v_b2 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool2_t v = __riscv_vmsltu_vx_u16m8_b2_mu(m1,m2,v1, -15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b2 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m3, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m4, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_mu (m3, m3, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vint32m8_t v = __riscv_vle32_v_i32m8 (in, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ vbool4_t m4 = __riscv_vmslt_vx_i32m8_b4_m (m3, v2, -15,4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vint32m8_t v = __riscv_vle32_v_i32m8 (base1, 4);
+ vint32m8_t v2 = __riscv_vle32_v_i32m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i32m8_b4 (v, -15,4);
+ for (int i = 0; i < n; i++){
+ vint32m8_t v3 = __riscv_vle32_v_i32m8 (base1 + i, 4);
+ vint32m8_t v4 = __riscv_vle32_v_i32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i32m8_b4_m (mask, v3, -15,32);
+ mask = __riscv_vmslt_vx_i32m8_b4_mu (mask, mask, v4, -15,32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vint32m1_t v = __riscv_vle32_v_i32m1 (base1, 4);
+ vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i32m1_b32 (v, -15,4);
+ for (int i = 0; i < n; i++){
+ vint32m1_t v3 = __riscv_vle32_v_i32m1 (base1 + i, 4);
+ vint32m1_t v4 = __riscv_vle32_v_i32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i32m1_b32_m (mask, v3, -15,32);
+ mask = __riscv_vmslt_vx_i32m1_b32_mu (mask, mask, v4, -15,32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8_m (mask, v3, x,32);
+ mask = __riscv_vmseq_vx_i64m8_b8_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64_m (mask, v3, x,32);
+ mask = __riscv_vmseq_vx_i64m1_b64_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t x)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, x, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8_m (mask, v3, x,32);
+ mask = __riscv_vmslt_vx_i64m8_b8_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, int32_t x)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64_m (mask, v3, x,32);
+ mask = __riscv_vmslt_vx_i64m1_b64_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,-16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m2,v1,-16,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m4, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, -16, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8 (v, -16, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8_m (mask, v3, -16,32);
+ mask = __riscv_vmseq_vx_i64m8_b8_mu (mask, mask, v4, -16, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64 (v, -16, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64_m (mask, v3, -16,32);
+ mask = __riscv_vmseq_vx_i64m1_b64_mu (mask, mask, v4, -16, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,-15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m2,v1,-15,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m4, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, -15, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8 (v, -15, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8_m (mask, v3, -15,32);
+ mask = __riscv_vmslt_vx_i64m8_b8_mu (mask, mask, v4, -15, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64 (v, -15, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64_m (mask, v3, -15,32);
+ mask = __riscv_vmslt_vx_i64m1_b64_mu (mask, mask, v4, -15, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmseq_vx_u64m8_b8_mu(m1,m2,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m4, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_mu (m3, m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmseq_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8 (v, 0xAAAA, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m8_b8_m (mask, v3, 0xAAAA,32);
+ mask = __riscv_vmseq_vx_i64m8_b8_mu (mask, mask, v4, 0xAAAA, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64 (v, 0xAAAA, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmseq_vx_i64m1_b64_m (mask, v3, 0xAAAA,32);
+ mask = __riscv_vmseq_vx_i64m1_b64_mu (mask, mask, v4, 0xAAAA, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_m(m1,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl)
+{
+ vuint64m8_t v1 = __riscv_vle64_v_u64m8 (base1, vl);
+ vbool8_t m1 = __riscv_vlm_v_b8 (base3, vl);
+ vbool8_t m2 = __riscv_vlm_v_b8 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool8_t v = __riscv_vmsltu_vx_u64m8_b8_mu(m1,m2,v1,0xAAAA,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b8 (out,v,vl);
+}
+
+void f7 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f8 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f9 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+ vbool8_t m5 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m4, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m5, 4);
+}
+
+void f10 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_mu (m3, m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f11 (void * in, void *out)
+{
+ vbool8_t mask = *(vbool8_t*)in;
+ asm volatile ("":::"memory");
+ vint64m8_t v = __riscv_vle64_v_i64m8 (in, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, in, 4);
+ vbool8_t m3 = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ vbool8_t m4 = __riscv_vmslt_vx_i64m8_b8_m (m3, v2, 0xAAAA, 4);
+ __riscv_vsm_v_b8 (out, m3, 4);
+ __riscv_vsm_v_b8 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool8_t mask = *(vbool8_t*)base1;
+ vint64m8_t v = __riscv_vle64_v_i64m8 (base1, 4);
+ vint64m8_t v2 = __riscv_vle64_v_i64m8_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8 (v, 0xAAAA, 4);
+ for (int i = 0; i < n; i++){
+ vint64m8_t v3 = __riscv_vle64_v_i64m8 (base1 + i, 4);
+ vint64m8_t v4 = __riscv_vle64_v_i64m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m8_b8_m (mask, v3, 0xAAAA,32);
+ mask = __riscv_vmslt_vx_i64m8_b8_mu (mask, mask, v4, 0xAAAA, 32);
+ }
+ __riscv_vsm_v_b8 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool64_t mask = *(vbool64_t*)base1;
+ vint64m1_t v = __riscv_vle64_v_i64m1 (base1, 4);
+ vint64m1_t v2 = __riscv_vle64_v_i64m1_m (mask, base1, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64 (v, 0xAAAA, 4);
+ for (int i = 0; i < n; i++){
+ vint64m1_t v3 = __riscv_vle64_v_i64m1 (base1 + i, 4);
+ vint64m1_t v4 = __riscv_vle64_v_i64m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmslt_vx_i64m1_b64_m (mask, v3, 0xAAAA,32);
+ mask = __riscv_vmslt_vx_i64m1_b64_mu (mask, mask, v4, 0xAAAA, 32);
+ }
+ __riscv_vsm_v_b64 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_mu(m1,m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_m(m1,v1,v1,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_m(m1,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8 (base2, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ vbool4_t m2 = __riscv_vlm_v_b4 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vv_f32m8_b4_mu(m1,m2,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f7 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vv_f32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmfeq_vv_f32m8_b4_mu (m3, m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vv_f32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmfeq_vv_f32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vv_f32m8_b4 (v, v, 4);
+ vbool4_t m4 = __riscv_vmfeq_vv_f32m8_b4_m (m3, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmfeq_vv_f32m8_b4_mu (m3, m4, v2, v2, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vv_f32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmfeq_vv_f32m8_b4_mu (m3, m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, int32_t x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vv_f32m8_b4 (v, v2, 4);
+ vbool4_t m4 = __riscv_vmfeq_vv_f32m8_b4_m (m3, v2, v, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (base1, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, base1, 4);
+ mask = __riscv_vmfeq_vv_f32m8_b4 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m8_t v3 = __riscv_vle32_v_f32m8 (base1 + i, 4);
+ vfloat32m8_t v4 = __riscv_vle32_v_f32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmfeq_vv_f32m8_b4_m (mask, v3, v4,32);
+ mask = __riscv_vmfeq_vv_f32m8_b4_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vfloat32m1_t v = __riscv_vle32_v_f32m1 (base1, 4);
+ vfloat32m1_t v2 = __riscv_vle32_v_f32m1_m (mask, base1, 4);
+ mask = __riscv_vmfeq_vv_f32m1_b32 (v, v2, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m1_t v3 = __riscv_vle32_v_f32m1 (base1 + i, 4);
+ vfloat32m1_t v4 = __riscv_vle32_v_f32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmfeq_vv_f32m1_b32_m (mask, v3, v4,32);
+ mask = __riscv_vmfeq_vv_f32m1_b32_mu (mask, mask, v4, v4, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ vbool4_t m2 = __riscv_vlm_v_b4 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmfeq_vf_f32m8_b4_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f7 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmfeq_vf_f32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmfeq_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmfeq_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmfeq_vf_f32m8_b4_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmfeq_vf_f32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmfeq_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, float x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (base1, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, base1, 4);
+ mask = __riscv_vmfeq_vf_f32m8_b4 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m8_t v3 = __riscv_vle32_v_f32m8 (base1 + i, 4);
+ vfloat32m8_t v4 = __riscv_vle32_v_f32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmfeq_vf_f32m8_b4_m (mask, v3, x,32);
+ mask = __riscv_vmfeq_vf_f32m8_b4_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, float x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vfloat32m1_t v = __riscv_vle32_v_f32m1 (base1, 4);
+ vfloat32m1_t v2 = __riscv_vle32_v_f32m1_m (mask, base1, 4);
+ mask = __riscv_vmfeq_vf_f32m1_b32 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m1_t v3 = __riscv_vle32_v_f32m1 (base1 + i, 4);
+ vfloat32m1_t v4 = __riscv_vle32_v_f32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmfeq_vf_f32m1_b32_m (mask, v3, x,32);
+ mask = __riscv_vmfeq_vf_f32m1_b32_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f0 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_mu(m1,m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f5 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_m(m1,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f6 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, float x)
+{
+ vfloat32m8_t v1 = __riscv_vle32_v_f32m8 (base1, vl);
+ vbool4_t m1 = __riscv_vlm_v_b4 (base3, vl);
+ vbool4_t m2 = __riscv_vlm_v_b4 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23","v24","v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ vbool4_t v = __riscv_vmflt_vf_f32m8_b4_mu(m1,m2,v1,x,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v28", "v29", "v30", "v31");
+
+ __riscv_vsm_v_b4 (out,v,vl);
+}
+
+void f7 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmflt_vf_f32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f8 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmflt_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f9 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmflt_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+ vbool4_t m5 = __riscv_vmflt_vf_f32m8_b4_mu (m3, m4, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m5, 4);
+}
+
+void f10 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmflt_vf_f32m8_b4_mu (m3, m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f11 (void * in, void *out, float x)
+{
+ vbool4_t mask = *(vbool4_t*)in;
+ asm volatile ("":::"memory");
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (in, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, in, 4);
+ vbool4_t m3 = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ vbool4_t m4 = __riscv_vmflt_vf_f32m8_b4_m (m3, v2, x, 4);
+ __riscv_vsm_v_b4 (out, m3, 4);
+ __riscv_vsm_v_b4 (out, m4, 4);
+}
+
+void f12 (void* base1,void* base2,void* out,int n, float x)
+{
+ vbool4_t mask = *(vbool4_t*)base1;
+ vfloat32m8_t v = __riscv_vle32_v_f32m8 (base1, 4);
+ vfloat32m8_t v2 = __riscv_vle32_v_f32m8_m (mask, base1, 4);
+ mask = __riscv_vmflt_vf_f32m8_b4 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m8_t v3 = __riscv_vle32_v_f32m8 (base1 + i, 4);
+ vfloat32m8_t v4 = __riscv_vle32_v_f32m8_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmflt_vf_f32m8_b4_m (mask, v3, x,32);
+ mask = __riscv_vmflt_vf_f32m8_b4_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b4 (out, mask, 32);
+}
+
+void f13 (void* base1,void* base2,void* out,int n, float x)
+{
+ vbool32_t mask = *(vbool32_t*)base1;
+ vfloat32m1_t v = __riscv_vle32_v_f32m1 (base1, 4);
+ vfloat32m1_t v2 = __riscv_vle32_v_f32m1_m (mask, base1, 4);
+ mask = __riscv_vmflt_vf_f32m1_b32 (v, x, 4);
+ for (int i = 0; i < n; i++){
+ vfloat32m1_t v3 = __riscv_vle32_v_f32m1 (base1 + i, 4);
+ vfloat32m1_t v4 = __riscv_vle32_v_f32m1_m (mask, base1 + i * 2, 4);
+ mask = __riscv_vmflt_vf_f32m1_b32_m (mask, v3, x,32);
+ mask = __riscv_vmflt_vf_f32m1_b32_mu (mask, mask, v4, x, 32);
+ }
+ __riscv_vsm_v_b32 (out, mask, 32);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */