[PATCH] rs6000: Use REAL_TYPE to copy when block move array in structure[PR65421]
Xionghu Luo
luoxhu@linux.ibm.com
Tue Jun 2 09:41:50 GMT 2020
Double array in structure as function arguments or return value is accessed
by BLKmode, they are stored to stack and load from stack with redundant
conversion from DF->DI->DF. This patch checks the homogeneous type and
use the actual element type to do block move to by pass the conversions.
gcc/ChangeLog:
2020-06-02 Xionghu Luo <luoxhu@linux.ibm.com>
PR target/65421
* config/rs6000/rs6000-string.c (expand_block_move): Use
elt_mode to copy when homogeneous REAL_TYPE.
gcc/testsuite/ChangeLog:
2020-06-02 Xionghu Luo <luoxhu@linux.ibm.com>
PR target/65421
* gcc.target/powerpc/pr65421.c: New test.
---
gcc/config/rs6000/rs6000-string.c | 15 ++++++++++++++-
gcc/testsuite/gcc.target/powerpc/pr65421.c | 17 +++++++++++++++++
2 files changed, 31 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421.c
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index fe7177f10fd..ea217840d88 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -37,6 +37,7 @@
#include "target.h"
#include "profile-count.h"
#include "predict.h"
+#include "rs6000-internal.h"
/* Expand a block clear operation, and return 1 if successful. Return 0
if we should let the compiler generate normal code.
@@ -2733,6 +2734,7 @@ expand_block_move (rtx operands[], bool might_overlap)
rtx loads[MAX_MOVE_REG];
rtx stores[MAX_MOVE_REG];
int num_reg = 0;
+ machine_mode elt_mode = DImode;
/* If this is not a fixed size move, just call memcpy */
if (! constp)
@@ -2750,6 +2752,17 @@ expand_block_move (rtx operands[], bool might_overlap)
if (bytes > rs6000_block_move_inline_limit)
return 0;
+ tree type = TREE_TYPE (MEM_EXPR (orig_dest));
+ if (TREE_CODE (type) == RECORD_TYPE
+ && rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type, NULL,
+ NULL))
+ {
+ tree field_type = TREE_TYPE (first_field (type));
+ if (field_type && TREE_CODE (field_type) == ARRAY_TYPE
+ && TREE_CODE (TREE_TYPE (field_type)) == REAL_TYPE)
+ elt_mode = TYPE_MODE (TREE_TYPE (field_type));
+ }
+
for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
{
union {
@@ -2771,7 +2784,7 @@ expand_block_move (rtx operands[], bool might_overlap)
&& (align >= 64 || !STRICT_ALIGNMENT))
{
move_bytes = 8;
- mode = DImode;
+ mode = elt_mode;
gen_func.mov = gen_movdi;
if (offset == 0 && align < 64)
{
diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421.c b/gcc/testsuite/gcc.target/powerpc/pr65421.c
new file mode 100644
index 00000000000..ec8f4824de5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr65421.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+typedef struct
+{
+ double a[4];
+} A;
+
+A
+foo (const A *a)
+{
+ return *a;
+}
+
+/* { dg-final { scan-assembler-not {\mld\M} } } */
+/* { dg-final { scan-assembler-not {\mstd\M} } } */
+/* { dg-final { scan-assembler-times {\mlfd\M} 4 } } */
--
2.21.0.777.g83232e3864
More information about the Gcc-patches
mailing list