[Patch ARM 2/6] Fix Large struct mode splitters for cases where registers are not TImode.
Ramana Radhakrishnan
ramana.radhakrishnan@linaro.org
Mon Jul 30 11:46:00 GMT 2012
> Patch 2 is a bug fix that fixes up the splitters so that they take
> into account the right register for the right mode . For instance a
> register not fit for a TImode value shouldn't be put in one even if
> the larger mode allows a different register . This is possible for
> OImode values or indeed HFA style values being passed around as
> parameters and is potentially an issue for folks building hard-float
> systems with neon and using some of the large structures.
,
The large struct mode splitters don't take into account whether
a TImode value can be generated from a value that is in an appropriate
neon register for that value. This is possible in cases where you have
an EImode, OImode, CImode or TImode value in the appropriate registers
as these could be passed in their corresponding neon D registers.
This was exposed by the tests for v{ld/st/tbl/tbx}2/3/4{lane/}* and
friends in the new set of tests that follow at the end of this patch
series.
This is a problem for folks using the new hard float ABI and passing
such values in registers - so it might not show up that much in practice
but it's certainly worth backporting after sitting in trunk for a few
days. It certainly is not a regression since this bug has always been
there but it is a fundamental correctness issue in the backend with respect
to such splits, so I'd like some more consensus on whether this can be
safely backported.
regards,
Ramana
2012-07-27 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
PR target/
* config/arm/arm-protos.h (arm_split_eimoves): Declare.
(arm_split_tocx_imoves): Declare.
* config/arm/iterators.md (TOCXI): New.
* config/arm/neon.md (EI TI OI CI XI mode splitters): Unify
and use iterator. Simplify EImode splitter. Move logic to ...
* config/arm/arm.c (arm_split_eimoves): here .. Handle
case for EImode values in registers not suitable for splits
into TImode values.
(arm_split_tocx_imoves): Likewise.
---
gcc/config/arm/arm-protos.h | 3 +
gcc/config/arm/arm.c | 91 +++++++++++++++++++++++++++++++++++++++++++
gcc/config/arm/iterators.md | 3 +
gcc/config/arm/neon.md | 84 +++++-----------------------------------
4 files changed, 107 insertions(+), 74 deletions(-)
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index c590ef4..dc93c5d 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -248,6 +248,9 @@ extern int vfp3_const_double_for_fract_bits (rtx);
extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
rtx);
extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
+extern void arm_split_tocx_imoves (rtx *, enum machine_mode);
+extern void arm_split_eimoves (rtx *);
+
#endif /* RTX_CODE */
extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1f3f9b3..b281485 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -26410,4 +26410,95 @@ arm_validize_comparison (rtx *comparison, rtx
* op1, rtx * op2)
}
+/* EImode values are usually in 3 DImode registers. This could be suitably
+ split into TImode moves and DImode moves. */
+void
+arm_split_eimoves (rtx *operands)
+{
+ int rdest = REGNO (operands[0]);
+ int rsrc = REGNO (operands[1]);
+ int count = 0;
+ int increment = 0;
+ rtx dest[3], src[3];
+ int i, j;
+
+ if (NEON_REGNO_OK_FOR_QUAD (rdest) && NEON_REGNO_OK_FOR_QUAD (rsrc))
+ {
+ dest[0] = gen_rtx_REG (TImode, rdest);
+ src[0] = gen_rtx_REG (TImode, rsrc);
+ count = 2;
+ increment = 4;
+ }
+ else
+ {
+ dest[0] = gen_rtx_REG (DImode, rdest);
+ src[0] = gen_rtx_REG (DImode, rsrc);
+ dest[1] = gen_rtx_REG (DImode, rdest + 2);
+ src[1] = gen_rtx_REG (DImode, rsrc + 2);
+ count = 3;
+ increment = 2;
+ }
+
+ dest[count - 1] = gen_rtx_REG (DImode, rdest + 4);
+ src[count - 1] = gen_rtx_REG (DImode, rsrc + 4);
+
+ neon_disambiguate_copy (operands, dest, src, count);
+
+ for (i = 0, j = 0 ; j < count ; i = i + 2, j++)
+ emit_move_insn (operands[i], operands[i + 1]);
+
+ return;
+}
+
+/* Split TI, CI, OI and XImode moves into appropriate smaller
+ forms. */
+void
+arm_split_tocx_imoves (rtx *operands, enum machine_mode mode)
+{
+ int rdest = REGNO (operands[0]);
+ int rsrc = REGNO (operands[1]);
+ enum machine_mode split_mode;
+ int count = 0;
+ int factor = 0;
+ int j;
+ /* We never should need more than 8 DImode registers in the worst case. */
+ rtx dest[8], src[8];
+ int i;
+
+ if (NEON_REGNO_OK_FOR_QUAD (rdest) && NEON_REGNO_OK_FOR_QUAD (rsrc))
+ {
+ split_mode = TImode;
+ if (dump_file)
+ fprintf (dump_file, "split_mode is TImode\n");
+ }
+ else
+ {
+ split_mode = DImode;
+ if (dump_file)
+ fprintf (dump_file, "split_mode is DImode\n");
+ }
+
+
+ count = GET_MODE_SIZE (mode) / GET_MODE_SIZE (split_mode);
+ factor = GET_MODE_SIZE (split_mode) / UNITS_PER_WORD;
+
+ if (dump_file)
+ fprintf (dump_file, "count %d factor %d\n", count, factor);
+
+ for (i = 0 ; i < count; i++)
+ {
+ dest[i] = gen_rtx_REG (split_mode, rdest + i * factor );
+ src[i] = gen_rtx_REG (split_mode, rsrc + i * factor);
+ }
+
+ neon_disambiguate_copy (operands, dest, src, count);
+ for (j = 0, i = 0 ; j < count ; j++, i = i + 2)
+ {
+ emit_move_insn (operands[i], operands[i + 1]);
+ }
+
+ return;
+
+}
+
#include "gt-arm.h"
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index def8d9f..3474d16 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -89,6 +89,9 @@
;; Opaque structure types wider than TImode.
(define_mode_iterator VSTRUCT [EI OI CI XI])
+;; Opaque structure types other than EImode.
+(define_mode_iterator TOCXI [TI OI CI XI])
+
;; Opaque structure types used in table lookups (except vtbl1/vtbx1).
(define_mode_iterator VTAB [TI EI OI])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 1ffbb7d..7434625 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -293,85 +293,21 @@
[(set (match_operand:EI 0 "s_register_operand" "")
(match_operand:EI 1 "s_register_operand" ""))]
"TARGET_NEON && reload_completed"
- [(set (match_dup 0) (match_dup 1))
- (set (match_dup 2) (match_dup 3))]
+ [(const_int 0)]
{
- int rdest = REGNO (operands[0]);
- int rsrc = REGNO (operands[1]);
- rtx dest[2], src[2];
-
- dest[0] = gen_rtx_REG (TImode, rdest);
- src[0] = gen_rtx_REG (TImode, rsrc);
- dest[1] = gen_rtx_REG (DImode, rdest + 4);
- src[1] = gen_rtx_REG (DImode, rsrc + 4);
-
- neon_disambiguate_copy (operands, dest, src, 2);
+ arm_split_eimoves (operands);
+ DONE;
})
-(define_split
- [(set (match_operand:OI 0 "s_register_operand" "")
- (match_operand:OI 1 "s_register_operand" ""))]
+;; Splitter for TI, OI, CI and XI modes.
+(define_split ;; TI, OI, CI and XImode move split.
+ [(set (match_operand:TOCXI 0 "s_register_operand" "")
+ (match_operand:TOCXI 1 "s_register_operand" ""))]
"TARGET_NEON && reload_completed"
- [(set (match_dup 0) (match_dup 1))
- (set (match_dup 2) (match_dup 3))]
+ [(const_int 0)]
{
- int rdest = REGNO (operands[0]);
- int rsrc = REGNO (operands[1]);
- rtx dest[2], src[2];
-
- dest[0] = gen_rtx_REG (TImode, rdest);
- src[0] = gen_rtx_REG (TImode, rsrc);
- dest[1] = gen_rtx_REG (TImode, rdest + 4);
- src[1] = gen_rtx_REG (TImode, rsrc + 4);
-
- neon_disambiguate_copy (operands, dest, src, 2);
-})
-
-(define_split
- [(set (match_operand:CI 0 "s_register_operand" "")
- (match_operand:CI 1 "s_register_operand" ""))]
- "TARGET_NEON && reload_completed"
- [(set (match_dup 0) (match_dup 1))
- (set (match_dup 2) (match_dup 3))
- (set (match_dup 4) (match_dup 5))]
-{
- int rdest = REGNO (operands[0]);
- int rsrc = REGNO (operands[1]);
- rtx dest[3], src[3];
-
- dest[0] = gen_rtx_REG (TImode, rdest);
- src[0] = gen_rtx_REG (TImode, rsrc);
- dest[1] = gen_rtx_REG (TImode, rdest + 4);
- src[1] = gen_rtx_REG (TImode, rsrc + 4);
- dest[2] = gen_rtx_REG (TImode, rdest + 8);
- src[2] = gen_rtx_REG (TImode, rsrc + 8);
-
- neon_disambiguate_copy (operands, dest, src, 3);
-})
-
-(define_split
- [(set (match_operand:XI 0 "s_register_operand" "")
- (match_operand:XI 1 "s_register_operand" ""))]
- "TARGET_NEON && reload_completed"
- [(set (match_dup 0) (match_dup 1))
- (set (match_dup 2) (match_dup 3))
- (set (match_dup 4) (match_dup 5))
- (set (match_dup 6) (match_dup 7))]
-{
- int rdest = REGNO (operands[0]);
- int rsrc = REGNO (operands[1]);
- rtx dest[4], src[4];
-
- dest[0] = gen_rtx_REG (TImode, rdest);
- src[0] = gen_rtx_REG (TImode, rsrc);
- dest[1] = gen_rtx_REG (TImode, rdest + 4);
- src[1] = gen_rtx_REG (TImode, rsrc + 4);
- dest[2] = gen_rtx_REG (TImode, rdest + 8);
- src[2] = gen_rtx_REG (TImode, rsrc + 8);
- dest[3] = gen_rtx_REG (TImode, rdest + 12);
- src[3] = gen_rtx_REG (TImode, rsrc + 12);
-
- neon_disambiguate_copy (operands, dest, src, 4);
+ arm_split_tocx_imoves (operands, <MODE>mode);
+ DONE;
})
(define_expand "movmisalign<mode>"
--
1.7.4.1
More information about the Gcc-patches
mailing list