This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH][GCC][AARCH64]Bad code-gen for structure/block/unaligned memory access
- From: Tamar Christina <Tamar dot Christina at arm dot com>
- To: James Greenhalgh <james dot greenhalgh at arm dot com>
- Cc: nd at arm dot com, richard dot sandiford at linaro dot org, gcc-patches at gcc dot gnu dot org, Richard dot Earnshaw at arm dot com, Marcus dot Shawcroft at arm dot com
- Date: Tue, 14 Nov 2017 18:43:38 +0000
- Subject: Re: [PATCH][GCC][AARCH64]Bad code-gen for structure/block/unaligned memory access
- Authentication-results: sourceware.org; auth=none
- Authentication-results: spf=none (sender IP is ) smtp.mailfrom=Tamar dot Christina at arm dot com;
- Nodisclaimer: True
- References: <VI1PR0801MB20313B74A43EAC4756E8C8BDFFC80@VI1PR0801MB2031.eurprd08.prod.outlook.com> <87a85ic3i1.fsf@linaro.org> <HE1PR0801MB202735017D77BF86B97A82F2FFCE0@HE1PR0801MB2027.eurprd08.prod.outlook.com> <VI1PR0801MB2031C1DB43DDAD286965D8BFFFD60@VI1PR0801MB2031.eurprd08.prod.outlook.com> <20170830155619.GA30919@arm.com> <HE1PR08MB050739ADF44BE7FE11E826FBFF280@HE1PR08MB0507.eurprd08.prod.outlook.com> <20171114174803.GA16434@arm.com>
- Spamdiagnosticmetadata: NSPM
- Spamdiagnosticoutput: 1:99
Th 11/14/2017 17:48, James Greenhalgh wrote:
> On Tue, Nov 14, 2017 at 04:05:12PM +0000, Tamar Christina wrote:
> > Hi James,
> >
> > I have split off the aarch64 bit off from the generic parts and processed your feedback.
> >
> > Attached is the reworked patch.
> >
> > Ok for Tunk?
>
> Thanks for the respin, I'm a bit confused by this comment.
Sorry, the comment was a bit nonsensical. I update the last part but didn't re-read the comment
as a whole.
I've respun the patch again.
Thanks,
Tamar
>
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index e89c8156976cecf200cd67c1e938c8156c1240c4..bc197cc973a2bbc2c263809c7e8ccfbc86309c26 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -13974,6 +13974,28 @@ aarch64_expand_movmem (rtx *operands)
> > base = copy_to_mode_reg (Pmode, XEXP (src, 0));
> > src = adjust_automodify_address (src, VOIDmode, base, 0);
> >
> > + /* Optimize routines for MEM to REG copies.
> > + This particular function only handles copying to two
> > + destination types: 1) a regular register and 2) the stack.
> > + When writing to a regular register we may end up writting too much in cases
> > + where the register already contains a live value or when no data padding is
> > + happening so we disallow regular registers to use this new code path. */
>
> I'm struggling to understand when you'd end up with a struct held in
> a partial register going through this code path. Maybe the restriction
> makes sense, can you write a testcase, or show some sample code where
> this goes wrong (or simplify the comment).
>
> Thanks,
> James
>
--
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e89c8156976cecf200cd67c1e938c8156c1240c4..2597ec70ff2c49c8276622d3f9d6fe394a1f80c1 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13974,6 +13974,26 @@ aarch64_expand_movmem (rtx *operands)
base = copy_to_mode_reg (Pmode, XEXP (src, 0));
src = adjust_automodify_address (src, VOIDmode, base, 0);
+ /* Optimize routines for MEM to REG copies.
+ This particular function only handles copying to two
+ destination types: 1) a regular register and 2) the stack and only when
+ the amount to copy fits in less than 8 bytes. */
+ if (n < 8 && !REG_P (XEXP (operands[0], 0)))
+ {
+ machine_mode dest_mode
+ = smallest_mode_for_size (n * BITS_PER_UNIT, MODE_INT);
+ rtx result = gen_reg_rtx (dest_mode);
+ emit_insn (gen_move_insn (result, GEN_INT (0)));
+
+ src = adjust_address (src, dest_mode, 0);
+ emit_insn (gen_move_insn (result, src));
+ src = aarch64_progress_pointer (src);
+
+ dst = adjust_address (dst, dest_mode, 0);
+ emit_insn (gen_move_insn (dst, result));
+ return true;
+ }
+
/* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
1-byte chunk. */
if (n < 4)
diff --git a/gcc/testsuite/gcc.target/aarch64/structs.c b/gcc/testsuite/gcc.target/aarch64/structs.c
new file mode 100644
index 0000000000000000000000000000000000000000..2be8fae9479500cdbd5f22c6ab4f6adcdd7fcbce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/structs.c
@@ -0,0 +1,226 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2" } */
+/* Copyright 1996, 1999, 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>.
+
+ Please email any bugs, comments, and/or additions to this file to:
+ bug-gdb@prep.ai.mit.edu */
+
+struct struct1 { char a;};
+struct struct2 { char a, b;};
+struct struct3 { char a, b, c; };
+struct struct4 { char a, b, c, d; };
+struct struct5 { char a, b, c, d, e; };
+struct struct6 { char a, b, c, d, e, f; };
+struct struct7 { char a, b, c, d, e, f, g; };
+struct struct8 { char a, b, c, d, e, f, g, h; };
+struct struct9 { char a, b, c, d, e, f, g, h, i; };
+struct struct10 { char a, b, c, d, e, f, g, h, i, j; };
+struct struct11 { char a, b, c, d, e, f, g, h, i, j, k; };
+struct struct12 { char a, b, c, d, e, f, g, h, i, j, k, l; };
+struct struct16 { char a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p; };
+
+struct struct1 foo1 = {'1'}, L1;
+struct struct2 foo2 = { 'a', 'b'}, L2;
+struct struct3 foo3 = { 'A', 'B', 'C'}, L3;
+struct struct4 foo4 = {'1', '2', '3', '4'}, L4;
+struct struct5 foo5 = {'a', 'b', 'c', 'd', 'e'}, L5;
+struct struct6 foo6 = {'A', 'B', 'C', 'D', 'E', 'F'}, L6;
+struct struct7 foo7 = {'1', '2', '3', '4', '5', '6', '7'}, L7;
+struct struct8 foo8 = {'1', '2', '3', '4', '5', '6', '7', '8'}, L8;
+struct struct9 foo9 = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'}, L9;
+struct struct10 foo10 = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'}, L10;
+struct struct11 foo11 = {
+ '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B'}, L11;
+struct struct12 foo12 = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L'}, L12;
+struct struct16 foo16 = {
+ 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p'}, L16;
+
+struct struct1 fun1()
+{
+ return foo1;
+}
+struct struct2 fun2()
+{
+ return foo2;
+}
+struct struct3 fun3()
+{
+ return foo3;
+}
+struct struct4 fun4()
+{
+ return foo4;
+}
+struct struct5 fun5()
+{
+ return foo5;
+}
+struct struct6 fun6()
+{
+ return foo6;
+}
+struct struct7 fun7()
+{
+ return foo7;
+}
+struct struct8 fun8()
+{
+ return foo8;
+}
+struct struct9 fun9()
+{
+ return foo9;
+}
+struct struct10 fun10()
+{
+ return foo10;
+}
+struct struct11 fun11()
+{
+ return foo11;
+}
+struct struct12 fun12()
+{
+ return foo12;
+}
+struct struct16 fun16()
+{
+ return foo16;
+}
+
+#ifdef PROTOTYPES
+void Fun1(struct struct1 foo1)
+#else
+void Fun1(foo1)
+ struct struct1 foo1;
+#endif
+{
+ L1 = foo1;
+}
+#ifdef PROTOTYPES
+void Fun2(struct struct2 foo2)
+#else
+void Fun2(foo2)
+ struct struct2 foo2;
+#endif
+{
+ L2 = foo2;
+}
+#ifdef PROTOTYPES
+void Fun3(struct struct3 foo3)
+#else
+void Fun3(foo3)
+ struct struct3 foo3;
+#endif
+{
+ L3 = foo3;
+}
+#ifdef PROTOTYPES
+void Fun4(struct struct4 foo4)
+#else
+void Fun4(foo4)
+ struct struct4 foo4;
+#endif
+{
+ L4 = foo4;
+}
+#ifdef PROTOTYPES
+void Fun5(struct struct5 foo5)
+#else
+void Fun5(foo5)
+ struct struct5 foo5;
+#endif
+{
+ L5 = foo5;
+}
+#ifdef PROTOTYPES
+void Fun6(struct struct6 foo6)
+#else
+void Fun6(foo6)
+ struct struct6 foo6;
+#endif
+{
+ L6 = foo6;
+}
+#ifdef PROTOTYPES
+void Fun7(struct struct7 foo7)
+#else
+void Fun7(foo7)
+ struct struct7 foo7;
+#endif
+{
+ L7 = foo7;
+}
+#ifdef PROTOTYPES
+void Fun8(struct struct8 foo8)
+#else
+void Fun8(foo8)
+ struct struct8 foo8;
+#endif
+{
+ L8 = foo8;
+}
+#ifdef PROTOTYPES
+void Fun9(struct struct9 foo9)
+#else
+void Fun9(foo9)
+ struct struct9 foo9;
+#endif
+{
+ L9 = foo9;
+}
+#ifdef PROTOTYPES
+void Fun10(struct struct10 foo10)
+#else
+void Fun10(foo10)
+ struct struct10 foo10;
+#endif
+{
+ L10 = foo10;
+}
+#ifdef PROTOTYPES
+void Fun11(struct struct11 foo11)
+#else
+void Fun11(foo11)
+ struct struct11 foo11;
+#endif
+{
+ L11 = foo11;
+}
+#ifdef PROTOTYPES
+void Fun12(struct struct12 foo12)
+#else
+void Fun12(foo12)
+ struct struct12 foo12;
+#endif
+{
+ L12 = foo12;
+}
+#ifdef PROTOTYPES
+void Fun16(struct struct16 foo16)
+#else
+void Fun16(foo16)
+ struct struct16 foo16;
+#endif
+{
+ L16 = foo16;
+}
+
+/* { dg-final { scan-assembler-not {bfi} } } */
+/* { dg-final { scan-assembler-times {bfx} 5 } } */