[PATCH] RISC-V: Allow unaligned accesses in cpymemsi expansion
Christoph Muellner
cmuellner@gcc.gnu.org
Thu Jul 29 14:33:56 GMT 2021
The RISC-V cpymemsi expansion is called, whenever the by-pieces
infrastructure will not be taking care of the builtin expansion.
Currently, that's the case for e.g. memcpy() with n <= 24 bytes.
The code emitted by the by-pieces infrastructure emits code, that
performs unaligned accesses if the target's
riscv_slow_unaligned_access_p is false (and n is not 1).
If n > 24, then the RISC-V cpymemsi expansion is called, which is
implemented in riscv_expand_block_move(). The current implementation
does not check riscv_slow_unaligned_access_p and never emits unaligned
accesses.
Since by-pieces emits unaligned accesses, it is reasonable to implement
the same behaviour in the cpymemsi expansion. And that's what this patch
is doing.
The patch checks riscv_slow_unaligned_access_p at the entry and sets
the allowed alignment accordingly. This alignment is then propagated
down to the routines that emit the actual instructions.
Without the patch a memcpy() with n==25 will be exanded only
if the given pointers are aligned. With the patch also unaligned
pointers are accepted if riscv_slow_unaligned_access_p is false.
gcc/ChangeLog:
* config/riscv/riscv.c (riscv_block_move_straight): Add
parameter align.
(riscv_adjust_block_mem): Replace parameter length by parameter
align.
(riscv_block_move_loop): Add parameter align.
(riscv_expand_block_move): Set alignment properly if the target
has fast unaligned access.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/builtins-strict-align.c: New test.
* gcc.target/riscv/builtins-unaligned-1.c: New test.
* gcc.target/riscv/builtins-unaligned-2.c: New test.
* gcc.target/riscv/builtins-unaligned-3.c: New test.
* gcc.target/riscv/builtins-unaligned-4.c: New test.
* gcc.target/riscv/builtins.h: New test.
Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
---
gcc/config/riscv/riscv.c | 53 +++++++++++--------
.../gcc.target/riscv/builtins-strict-align.c | 13 +++++
.../gcc.target/riscv/builtins-unaligned-1.c | 15 ++++++
.../gcc.target/riscv/builtins-unaligned-2.c | 15 ++++++
.../gcc.target/riscv/builtins-unaligned-3.c | 15 ++++++
.../gcc.target/riscv/builtins-unaligned-4.c | 15 ++++++
gcc/testsuite/gcc.target/riscv/builtins.h | 10 ++++
7 files changed, 115 insertions(+), 21 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins.h
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..0596a9ff1b6 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -3173,11 +3173,13 @@ riscv_legitimize_call_address (rtx addr)
return addr;
}
-/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST
+ with accesses that are ALIGN bytes aligned.
Assume that the areas do not overlap. */
static void
-riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length)
+riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
+ unsigned HOST_WIDE_INT align)
{
unsigned HOST_WIDE_INT offset, delta;
unsigned HOST_WIDE_INT bits;
@@ -3185,8 +3187,7 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length)
enum machine_mode mode;
rtx *regs;
- bits = MAX (BITS_PER_UNIT,
- MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest))));
+ bits = MAX (BITS_PER_UNIT, MIN (BITS_PER_WORD, align));
mode = mode_for_size (bits, MODE_INT, 0).require ();
delta = bits / BITS_PER_UNIT;
@@ -3211,21 +3212,20 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length)
{
src = adjust_address (src, BLKmode, offset);
dest = adjust_address (dest, BLKmode, offset);
- move_by_pieces (dest, src, length - offset,
- MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN);
+ move_by_pieces (dest, src, length - offset, align, RETURN_BEGIN);
}
}
/* Helper function for doing a loop-based block operation on memory
- reference MEM. Each iteration of the loop will operate on LENGTH
- bytes of MEM.
+ reference MEM.
Create a new base register for use within the loop and point it to
the start of MEM. Create a new memory reference that uses this
- register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
+ register and has an alignment of ALIGN. Store them in *LOOP_REG
+ and *LOOP_MEM respectively. */
static void
-riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length,
+riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT align,
rtx *loop_reg, rtx *loop_mem)
{
*loop_reg = copy_addr_to_reg (XEXP (mem, 0));
@@ -3233,15 +3233,17 @@ riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length,
/* Although the new mem does not refer to a known location,
it does keep up to LENGTH bytes of alignment. */
*loop_mem = change_address (mem, BLKmode, *loop_reg);
- set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
+ set_mem_align (*loop_mem, align);
}
/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
- bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
- the memory regions do not overlap. */
+ bytes at a time. LENGTH must be at least BYTES_PER_ITER. The alignment
+ of the access can be set by ALIGN. Assume that the memory regions do not
+ overlap. */
static void
riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
+ unsigned HOST_WIDE_INT align,
unsigned HOST_WIDE_INT bytes_per_iter)
{
rtx label, src_reg, dest_reg, final_src, test;
@@ -3251,8 +3253,8 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
length -= leftover;
/* Create registers and memory references for use within the loop. */
- riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
- riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
+ riscv_adjust_block_mem (src, align, &src_reg, &src);
+ riscv_adjust_block_mem (dest, align, &dest_reg, &dest);
/* Calculate the value that SRC_REG should have after the last iteration
of the loop. */
@@ -3264,7 +3266,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
emit_label (label);
/* Emit the loop body. */
- riscv_block_move_straight (dest, src, bytes_per_iter);
+ riscv_block_move_straight (dest, src, bytes_per_iter, align);
/* Move on to the next block. */
riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
@@ -3276,7 +3278,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
/* Mop up any left-over bytes. */
if (leftover)
- riscv_block_move_straight (dest, src, leftover);
+ riscv_block_move_straight (dest, src, leftover, align);
else
emit_insn(gen_nop ());
}
@@ -3292,8 +3294,17 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
unsigned HOST_WIDE_INT hwi_length = UINTVAL (length);
unsigned HOST_WIDE_INT factor, align;
- align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD);
- factor = BITS_PER_WORD / align;
+ if (riscv_slow_unaligned_access_p)
+ {
+ align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD);
+ factor = BITS_PER_WORD / align;
+ }
+ else
+ {
+ /* Assume data to be aligned. */
+ align = hwi_length * BITS_PER_UNIT;
+ factor = 1;
+ }
if (optimize_function_for_size_p (cfun)
&& hwi_length * factor * UNITS_PER_WORD > MOVE_RATIO (false))
@@ -3301,7 +3312,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
if (hwi_length <= (RISCV_MAX_MOVE_BYTES_STRAIGHT / factor))
{
- riscv_block_move_straight (dest, src, INTVAL (length));
+ riscv_block_move_straight (dest, src, hwi_length, align);
return true;
}
else if (optimize && align >= BITS_PER_WORD)
@@ -3321,7 +3332,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
iter_words = i;
}
- riscv_block_move_loop (dest, src, bytes, iter_words * UNITS_PER_WORD);
+ riscv_block_move_loop (dest, src, bytes, align, iter_words * UNITS_PER_WORD);
return true;
}
}
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
new file mode 100644
index 00000000000..6f7b1f324de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
@@ -0,0 +1,13 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64 -mstrict-align" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(12)
+
+/* { dg-final { scan-assembler-times "lbu" 12 } } */
+/* { dg-final { scan-assembler-times "sb" 12 } } */
+/* { dg-final { scan-assembler-not "lw" } } */
+/* { dg-final { scan-assembler-not "sw" } } */
+/* { dg-final { scan-assembler-not "ld" } } */
+/* { dg-final { scan-assembler-not "sd" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c
new file mode 100644
index 00000000000..f97d60a35d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c
@@ -0,0 +1,15 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(7)
+
+/* { dg-final { scan-assembler-not "ld" } } */
+/* { dg-final { scan-assembler-not "sd" } } */
+/* { dg-final { scan-assembler-times "lw" 1 } } */
+/* { dg-final { scan-assembler-times "sw" 1 } } */
+/* { dg-final { scan-assembler-times "lh" 1 } } */
+/* { dg-final { scan-assembler-times "sh" 1 } } */
+/* { dg-final { scan-assembler-times "lbu" 1 } } */
+/* { dg-final { scan-assembler-times "sb" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c
new file mode 100644
index 00000000000..b373651d241
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c
@@ -0,0 +1,15 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(8)
+
+/* { dg-final { scan-assembler-times "ld" 1 } } */
+/* { dg-final { scan-assembler-times "sd" 1 } } */
+/* { dg-final { scan-assembler-not "lw" } } */
+/* { dg-final { scan-assembler-not "sw" } } */
+/* { dg-final { scan-assembler-not "lh" } } */
+/* { dg-final { scan-assembler-not "sh" } } */
+/* { dg-final { scan-assembler-not "lbu" } } */
+/* { dg-final { scan-assembler-not "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c
new file mode 100644
index 00000000000..3f4a6b9630b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c
@@ -0,0 +1,15 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(31)
+
+/* { dg-final { scan-assembler-times "ld" 3 } } */
+/* { dg-final { scan-assembler-times "sd" 3 } } */
+/* { dg-final { scan-assembler-times "lw" 1 } } */
+/* { dg-final { scan-assembler-times "sw" 1 } } */
+/* { dg-final { scan-assembler-times "lh" 1 } } */
+/* { dg-final { scan-assembler-times "sh" 1 } } */
+/* { dg-final { scan-assembler-times "lbu" 1 } } */
+/* { dg-final { scan-assembler-times "sb" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c
new file mode 100644
index 00000000000..26fcb7a71a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c
@@ -0,0 +1,15 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(63)
+
+/* { dg-final { scan-assembler-times "ld" 7 } } */
+/* { dg-final { scan-assembler-times "sd" 7 } } */
+/* { dg-final { scan-assembler-times "lw" 1 } } */
+/* { dg-final { scan-assembler-times "sw" 1 } } */
+/* { dg-final { scan-assembler-times "lh" 1 } } */
+/* { dg-final { scan-assembler-times "sh" 1 } } */
+/* { dg-final { scan-assembler-times "lbu" 1 } } */
+/* { dg-final { scan-assembler-times "sb" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins.h b/gcc/testsuite/gcc.target/riscv/builtins.h
new file mode 100644
index 00000000000..5cad5fe194b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins.h
@@ -0,0 +1,10 @@
+#ifndef BUILTINS_H
+#define BUILTINS_H
+
+#define DO_MEMCPY_N(N) \
+void do_memcpy_##N (void *d, const void *s) \
+{ \
+ __builtin_memcpy (d, s, N); \
+}
+
+#endif /* BUILTINS_H */
--
2.31.1
More information about the Gcc-patches
mailing list