[PATCH V3] RISC-V: Using merge approach to optimize repeating sequence in vec_init
juzhe.zhong@rivai.ai
juzhe.zhong@rivai.ai
Fri May 12 15:38:28 GMT 2023
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Address comments from Kito.
Consider this following case:
typedef int64_t vnx32di __attribute__ ((vector_size (256)));
__attribute__ ((noipa)) void
f_vnx32di (int64_t a, int64_t b, int64_t *out)
{
vnx32di v
= {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
*(vnx32di *) out = v;
}
Since we dont't have SEW = 128 in vec_duplicate, we can't combine ab into SEW = 128 element and then
broadcast this big element.
This patch is optimize the case as above.
-march=rv64gcv_zvl256b --param riscv-autovec-preference=fixed-vlmax
Before this patch:
..
vslide1down.vx (x31 times)
..
After this patch:
li a5,-1431654400
addi a5,a5,-1365
li a3,-1431654400
addi a3,a3,-1366
slli a5,a5,32
add a5,a5,a3
vsetvli a4,zero,e64,m8,ta,ma
vmv.v.x v8,a0
vmv.s.x v0,a5
vmerge.vxm v8,v8,a1,v0
vs8r.v v8,0(a2)
ret
gcc/ChangeLog:
* config/riscv/riscv-v.cc (rvv_builder::can_duplicate_repeating_sequence_p): New function.
(rvv_builder::get_merged_repeating_sequence): Ditto.
(rvv_builder::repeating_sequence_use_merge_profitable_p): Ditto.
(rvv_builder::get_merge_mask_bitfield): Ditto.
(emit_scalar_move_op): Ditto.
(emit_merge_op): Ditto.
(expand_vector_init_merge_repeating_sequence): Ditto.
(expand_vec_init): Add merge approach for reapeating sequence.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-10.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-11.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-7.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-8.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-9.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-11.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-7.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-8.c: New test.
---
gcc/config/riscv/riscv-v.cc | 245 ++++++++++++++++--
.../riscv/rvv/autovec/vls-vlmax/repeat-10.c | 19 ++
.../riscv/rvv/autovec/vls-vlmax/repeat-11.c | 25 ++
.../riscv/rvv/autovec/vls-vlmax/repeat-7.c | 25 ++
.../riscv/rvv/autovec/vls-vlmax/repeat-8.c | 15 ++
.../riscv/rvv/autovec/vls-vlmax/repeat-9.c | 16 ++
.../rvv/autovec/vls-vlmax/repeat_run-11.c | 45 ++++
.../rvv/autovec/vls-vlmax/repeat_run-7.c | 45 ++++
.../rvv/autovec/vls-vlmax/repeat_run-8.c | 41 +++
9 files changed, 453 insertions(+), 23 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-10.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-11.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-8.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-9.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-11.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-8.c
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index b8dc333f54e..b336b11228b 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -72,11 +72,14 @@ public:
{
add_input_operand (RVV_VUNDEF (mode), mode);
}
- void add_policy_operand (enum tail_policy vta, enum mask_policy vma)
+ void add_policy_operand (enum tail_policy vta)
{
rtx tail_policy_rtx = gen_int_mode (vta, Pmode);
- rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
add_input_operand (tail_policy_rtx, Pmode);
+ }
+ void add_policy_operand (enum mask_policy vma)
+ {
+ rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
add_input_operand (mask_policy_rtx, Pmode);
}
void add_avl_type_operand (avl_type type)
@@ -99,25 +102,36 @@ public:
add_vundef_operand (dest_mode);
}
- void set_len_and_policy (rtx len, bool force_vlmax = false)
- {
- bool vlmax_p = force_vlmax;
- gcc_assert (has_dest);
+ void set_dest_merge (rtx dest)
+ {
+ dest_mode = GET_MODE (dest);
+ has_dest = true;
+ add_output_operand (dest, dest_mode);
+ add_vundef_operand (dest_mode);
+ }
- if (!len)
- {
- vlmax_p = true;
- len = gen_reg_rtx (Pmode);
- emit_vlmax_vsetvl (dest_mode, len);
- }
+ void set_len_and_policy (rtx len, bool force_vlmax = false, bool ta_p = true,
+ bool ma_p = true)
+ {
+ bool vlmax_p = force_vlmax;
+ gcc_assert (has_dest);
- add_input_operand (len, Pmode);
+ if (!len)
+ {
+ vlmax_p = true;
+ len = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (dest_mode, len);
+ }
- if (GET_MODE_CLASS (dest_mode) != MODE_VECTOR_BOOL)
- add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
+ add_input_operand (len, Pmode);
- add_avl_type_operand (vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
- }
+ if (ta_p && GET_MODE_CLASS (dest_mode) != MODE_VECTOR_BOOL)
+ add_policy_operand (get_prefer_tail_policy ());
+ if (ma_p && GET_MODE_CLASS (dest_mode) != MODE_VECTOR_BOOL)
+ add_policy_operand (get_prefer_mask_policy ());
+
+ add_avl_type_operand (vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
+ }
void expand (enum insn_code icode, bool temporary_volatile_p = false)
{
@@ -1140,19 +1154,31 @@ public:
: rtx_vector_builder (mode, npatterns, nelts_per_pattern)
{
m_inner_mode = GET_MODE_INNER (mode);
- m_inner_size = GET_MODE_BITSIZE (m_inner_mode).to_constant ();
+ m_inner_size_bits = GET_MODE_BITSIZE (m_inner_mode);
+ m_inner_size_bytes = GET_MODE_SIZE (m_inner_mode);
+ gcc_assert (
+ int_mode_for_size (inner_size (), 0).exists (&m_inner_int_mode));
}
bool can_duplicate_repeating_sequence_p ();
rtx get_merged_repeating_sequence ();
+ bool repeating_sequence_use_merge_profitable_p ();
+ rtx get_merge_mask_bitfield (unsigned int) const;
+
machine_mode new_mode () const { return m_new_mode; }
+ scalar_mode inner_mode () const { return m_inner_mode; }
+ scalar_int_mode inner_int_mode () const { return m_inner_int_mode; }
+ unsigned int inner_units () const { return m_inner_size_bytes; }
+ unsigned int inner_size () const { return m_inner_size_bits; }
private:
- machine_mode m_inner_mode;
+ scalar_mode m_inner_mode;
+ scalar_int_mode m_inner_int_mode;
machine_mode m_new_mode;
scalar_int_mode m_new_inner_mode;
- unsigned int m_inner_size;
+ unsigned int m_inner_size_bits;
+ unsigned int m_inner_size_bytes;
};
/* Return true if the vector duplicated by a super element which is the fusion
@@ -1163,7 +1189,7 @@ bool
rvv_builder::can_duplicate_repeating_sequence_p ()
{
poly_uint64 new_size = exact_div (full_nelts (), npatterns ());
- unsigned int new_inner_size = m_inner_size * npatterns ();
+ unsigned int new_inner_size = m_inner_size_bits * npatterns ();
if (!int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode)
|| GET_MODE_SIZE (m_new_inner_mode) > UNITS_PER_WORD
|| !get_vector_mode (m_new_inner_mode, new_size).exists (&m_new_mode))
@@ -1178,11 +1204,11 @@ rvv_builder::get_merged_repeating_sequence ()
scalar_int_mode mode = Pmode;
rtx target = gen_reg_rtx (mode);
emit_move_insn (target, const0_rtx);
- rtx imm = gen_int_mode ((1ULL << m_inner_size) - 1, mode);
+ rtx imm = gen_int_mode ((1ULL << m_inner_size_bits) - 1, mode);
/* { a, b, a, b }: Generate duplicate element = b << bits | a. */
for (unsigned int i = 0; i < npatterns (); i++)
{
- unsigned int loc = m_inner_size * i;
+ unsigned int loc = m_inner_size_bits * i;
rtx shift = gen_int_mode (loc, mode);
rtx ele = gen_lowpart (mode, elt (i));
rtx tmp = expand_simple_binop (mode, AND, ele, imm, NULL_RTX, false,
@@ -1198,6 +1224,61 @@ rvv_builder::get_merged_repeating_sequence ()
return target;
}
+/* Return true if it is a repeating sequence that using
+ merge approach has better codegen than using default
+ approach (slide1down).
+
+ Sequence A:
+ {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b}
+
+ nelts = 16
+ npatterns = 2
+
+ for merging a we need mask 101010....
+ for merging b we need mask 010101....
+ building a mask in scalar register need 3 instructions (COST = 3)
+ which is consist of 2 scalar instruction + 1 scalar move.
+ Then we need vector merge to merge them.
+
+ So the overal (roughly) COST of Sequence A = (3 + 1) * npatterns = 8.
+ If we use slide1down, the COST = nelts = 16 > 8 (COST of merge).
+ So return true in this case as it is profitable.
+
+ Sequence B:
+ {a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h}
+
+ nelts = 16
+ npatterns = 8
+
+ COST of merge approach = (3 + 1) * npatterns = 24
+ COST of slide1down approach = nelts = 16
+ Return false in this case as it is NOT profitable in merge approach.
+*/
+bool
+rvv_builder::repeating_sequence_use_merge_profitable_p ()
+{
+ return repeating_sequence_p (0, full_nelts ().to_constant (), npatterns ())
+ && inner_units () <= UNITS_PER_WORD
+ && 3 * npatterns () < full_nelts ().to_constant ();
+}
+
+/* Get the mask for merge approach.
+
+ Consider such following case:
+ {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b}
+ To merge "a", the mask should be 1010....
+ To merge "b", the mask should be 0101....
+*/
+rtx
+rvv_builder::get_merge_mask_bitfield (unsigned int index) const
+{
+ uint64_t base_mask = (1ULL << index);
+ uint64_t mask = 0;
+ for (unsigned int i = 0; i < (64 / npatterns ()); i++)
+ mask |= base_mask << (i * npatterns ());
+ return gen_int_mode (mask, inner_int_mode ());
+}
+
/* Subroutine of riscv_vector_expand_vector_init.
Works as follows:
(a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of BUILDER.
@@ -1226,6 +1307,109 @@ expand_vector_init_insert_elems (rtx target, const rvv_builder &builder,
}
}
+/* Emit vmv.s.x instruction. */
+
+static void
+emit_scalar_move_op (rtx dest, rtx src, machine_mode mask_mode)
+{
+ insn_expander<8> e;
+ machine_mode mode = GET_MODE (dest);
+ rtx scalar_move_mask = gen_scalar_move_mask (mode);
+ e.set_dest_and_mask (scalar_move_mask, dest, mask_mode);
+ e.add_input_operand (src, GET_MODE_INNER (mode));
+ e.set_len_and_policy (const1_rtx, false);
+ e.expand (code_for_pred_broadcast (mode), false);
+}
+
+/* Emit merge instruction. */
+
+static void
+emit_merge_op (rtx dest, rtx src1, rtx src2, rtx mask)
+{
+ insn_expander<8> e;
+ machine_mode mode = GET_MODE (dest);
+ e.set_dest_merge (dest);
+ e.add_input_operand (src1, mode);
+ if (VECTOR_MODE_P (GET_MODE (src2)))
+ e.add_input_operand (src2, mode);
+ else
+ e.add_input_operand (src2, GET_MODE_INNER (mode));
+
+ e.add_input_operand (mask, GET_MODE (mask));
+ e.set_len_and_policy (NULL_RTX, true, true, false);
+ if (VECTOR_MODE_P (GET_MODE (src2)))
+ e.expand (code_for_pred_merge (mode), false);
+ else
+ e.expand (code_for_pred_merge_scalar (mode), false);
+}
+
+/* Use merge approach to initialize the vector with repeating sequence.
+ v = {a, b, a, b, a, b, a, b}.
+
+ v = broadcast (a).
+ mask = 0b01010101....
+ v = merge (v, b, mask)
+*/
+static void
+expand_vector_init_merge_repeating_sequence (rtx target,
+ const rvv_builder &builder)
+{
+ machine_mode mask_mode;
+ gcc_assert (get_mask_mode (builder.mode ()).exists (&mask_mode));
+
+ machine_mode dup_mode = builder.mode ();
+ if (known_gt (GET_MODE_SIZE (dup_mode), BYTES_PER_RISCV_VECTOR))
+ {
+ poly_uint64 nunits
+ = exact_div (BYTES_PER_RISCV_VECTOR, builder.inner_units ());
+ gcc_assert (
+ get_vector_mode (builder.inner_int_mode (), nunits).exists (&dup_mode));
+ }
+ else
+ {
+ if (FLOAT_MODE_P (dup_mode))
+ gcc_assert (get_vector_mode (builder.inner_int_mode (),
+ GET_MODE_NUNITS (dup_mode))
+ .exists (&dup_mode));
+ }
+
+ machine_mode dup_mask_mode;
+ gcc_assert (get_mask_mode (dup_mode).exists (&dup_mask_mode));
+
+ /* Step 1: Broadcast the 1st-pattern. */
+ emit_len_op (code_for_pred_broadcast (builder.mode ()), target,
+ force_reg (builder.inner_mode (), builder.elt (0)), NULL_RTX,
+ mask_mode);
+
+ /* Step 2: Merge each non 1st pattern. */
+ for (unsigned int i = 1; i < builder.npatterns (); i++)
+ {
+ /* Step 2-1: Generate mask register v0 for each merge. */
+ rtx mask_bitfield = builder.get_merge_mask_bitfield (i);
+ rtx mask = gen_reg_rtx (mask_mode);
+ rtx dup = gen_reg_rtx (dup_mode);
+ if (builder.inner_size () >= builder.full_nelts ().to_constant ())
+ {
+ /* Use vmv.s.x. */
+ emit_scalar_move_op (dup, mask_bitfield, dup_mask_mode);
+ }
+ else
+ {
+ /* Use vmv.v.x. */
+ unsigned int mask_num = CEIL (builder.full_nelts ().to_constant (),
+ builder.inner_size ());
+ rtx vl = gen_int_mode (mask_num, Pmode);
+ emit_len_op (code_for_pred_broadcast (dup_mode), dup,
+ force_reg (GET_MODE_INNER (dup_mode), mask_bitfield), vl,
+ dup_mask_mode);
+ }
+ emit_move_insn (mask, gen_lowpart (mask_mode, dup));
+
+ /* Step 2-2: Merge pattern according to the mask. */
+ emit_merge_op (target, target, builder.elt (i), mask);
+ }
+}
+
/* Initialize register TARGET from the elements in PARALLEL rtx VALS. */
void
@@ -1249,6 +1433,21 @@ expand_vec_init (rtx target, rtx vals)
emit_move_insn (target, gen_lowpart (mode, dup));
return;
}
+
+ /* Case 2: Optimize repeating sequence cases that Case 1 can
+ not handle and it is profitable.
+
+ For example:
+ ELEMENT BITSIZE = 64.
+ v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b}.
+ We can't find a vector mode for "ab" which will be combined into
+ 128-bit element to duplicate. */
+ if (v.repeating_sequence_use_merge_profitable_p ())
+ {
+ expand_vector_init_merge_repeating_sequence (target, v);
+ return;
+ }
+
/* TODO: We will support more Initialization of vector in the future. */
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-10.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-10.c
new file mode 100644
index 00000000000..f66744ef4d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-10.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv_zvl1024b -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+typedef int64_t vnx16di __attribute__ ((vector_size (1024)));
+
+__attribute__ ((noipa)) void
+f_vnx16di (int64_t a, int64_t b, int64_t *out)
+{
+ vnx16di v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
+ a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
+ a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
+ a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,};
+ *(vnx16di *) out = v;
+}
+
+/* { dg-final { scan-assembler-times {vmv\.v\.x\tv[0-9]+,\s*[a-x0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vmerge\.vxm\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-11.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-11.c
new file mode 100644
index 00000000000..e1ced05d1fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-11.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+typedef double vnx8df __attribute__ ((vector_size (64)));
+typedef double vnx16df __attribute__ ((vector_size (128)));
+
+__attribute__ ((noipa)) void
+f_vnx8df (double a, double b, double *out)
+{
+ vnx8df v = {a, b, a, b, a, b, a, b};
+ *(vnx8df *) out = v;
+}
+
+__attribute__ ((noipa)) void
+f_vnx16df (double a, double b, double *out)
+{
+ vnx16df v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
+ *(vnx16df *) out = v;
+}
+
+
+/* { dg-final { scan-assembler-times {vmv\.s\.x\tv[0-9]+,\s*[a-x0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfmerge\.vfm\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-7.c
new file mode 100644
index 00000000000..8e791b29466
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-7.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+typedef int64_t vnx8di __attribute__ ((vector_size (64)));
+typedef int64_t vnx16di __attribute__ ((vector_size (128)));
+
+__attribute__ ((noipa)) void
+f_vnx8di (int64_t a, int64_t b, int64_t *out)
+{
+ vnx8di v = {a, b, a, b, a, b, a, b};
+ *(vnx8di *) out = v;
+}
+
+__attribute__ ((noipa)) void
+f_vnx16di (int64_t a, int64_t b, int64_t *out)
+{
+ vnx16di v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
+ *(vnx16di *) out = v;
+}
+
+
+/* { dg-final { scan-assembler-times {vmv\.s\.x\tv[0-9]+,\s*[a-x0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vmerge\.vxm\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-8.c
new file mode 100644
index 00000000000..2f61465e84f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-8.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+typedef int64_t vnx8di __attribute__ ((vector_size (64)));
+
+__attribute__ ((noipa)) void
+f_vnx8di (int64_t a, int64_t b, int64_t c, int64_t d, int64_t *out)
+{
+ vnx8di v = {a, b, c, d, a, b, c, d};
+ *(vnx8di *) out = v;
+}
+
+/* { dg-final { scan-assembler-times {vslide1down\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 7 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-9.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-9.c
new file mode 100644
index 00000000000..18e67d0e938
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-9.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+typedef int64_t vnx16di __attribute__ ((vector_size (128)));
+
+__attribute__ ((noipa)) void
+f_vnx16di (int64_t a, int64_t b, int64_t c, int64_t d, int64_t *out)
+{
+ vnx16di v = {a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d};
+ *(vnx16di *) out = v;
+}
+
+/* { dg-final { scan-assembler-times {vmv\.s\.x\tv[0-9]+,\s*[a-x0-9]+} 3 } } */
+/* { dg-final { scan-assembler-times {vmerge\.vxm\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0} 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-11.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-11.c
new file mode 100644
index 00000000000..38d096791bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-11.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
+
+#include "repeat-11.c"
+
+int
+main ()
+{
+ double a = -1789089.23423;
+ double b = -8916156.45644;
+
+ double v_vnx8df[sizeof (vnx8df) / sizeof (double)];
+ f_vnx8df (a, b, v_vnx8df);
+ for (int i = 0; i < sizeof (vnx8df) / sizeof (double); i++)
+ {
+ if (i % 2 == 0)
+ {
+ if (v_vnx8df[i] != a)
+ __builtin_abort ();
+ }
+ else
+ {
+ if (v_vnx8df[i] != b)
+ __builtin_abort ();
+ }
+ }
+
+ double v_vnx16df[sizeof (vnx16df) / sizeof (double)];
+ f_vnx16df (a, b, v_vnx16df);
+ for (int i = 0; i < sizeof (vnx16df) / sizeof (double); i++)
+ {
+ if (i % 2 == 0)
+ {
+ if (v_vnx16df[i] != a)
+ __builtin_abort ();
+ }
+ else
+ {
+ if (v_vnx16df[i] != b)
+ __builtin_abort ();
+ }
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-7.c
new file mode 100644
index 00000000000..87e59b6b8e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-7.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
+
+#include "repeat-7.c"
+
+int
+main ()
+{
+ int64_t a = -178908923423;
+ int64_t b = -891615645644;
+
+ int64_t v_vnx8di[sizeof (vnx8di) / sizeof (int64_t)];
+ f_vnx8di (a, b, v_vnx8di);
+ for (int i = 0; i < sizeof (vnx8di) / sizeof (int64_t); i++)
+ {
+ if (i % 2 == 0)
+ {
+ if (v_vnx8di[i] != a)
+ __builtin_abort ();
+ }
+ else
+ {
+ if (v_vnx8di[i] != b)
+ __builtin_abort ();
+ }
+ }
+
+ int64_t v_vnx16di[sizeof (vnx16di) / sizeof (int64_t)];
+ f_vnx16di (a, b, v_vnx16di);
+ for (int i = 0; i < sizeof (vnx16di) / sizeof (int64_t); i++)
+ {
+ if (i % 2 == 0)
+ {
+ if (v_vnx16di[i] != a)
+ __builtin_abort ();
+ }
+ else
+ {
+ if (v_vnx16di[i] != b)
+ __builtin_abort ();
+ }
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-8.c
new file mode 100644
index 00000000000..eeb2cada1b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-8.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
+
+#include "repeat-9.c"
+
+int
+main ()
+{
+ int64_t a = -178908923423;
+ int64_t b = -891615645644;
+ int64_t c = 78908923423;
+ int64_t d = 81615645644;
+
+ int64_t v_vnx16di[sizeof (vnx16di) / sizeof (int64_t)];
+ f_vnx16di (a, b, c, d, v_vnx16di);
+ for (int i = 0; i < sizeof (vnx16di) / sizeof (int64_t); i++)
+ {
+ if (i % 4 == 0)
+ {
+ if (v_vnx16di[i] != a)
+ __builtin_abort ();
+ }
+ else if (i % 4 == 1)
+ {
+ if (v_vnx16di[i] != b)
+ __builtin_abort ();
+ }
+ else if (i % 4 == 2)
+ {
+ if (v_vnx16di[i] != c)
+ __builtin_abort ();
+ }
+ else
+ {
+ if (v_vnx16di[i] != d)
+ __builtin_abort ();
+ }
+ }
+
+ return 0;
+}
--
2.36.3
More information about the Gcc-patches
mailing list