This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] ARM half-precision floating-point, 1/8 (NEON vld1/vst1 support)
- From: Sandra Loosemore <sandra at codesourcery dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Paul Brook <paul at codesourcery dot com>
- Date: Wed, 15 Apr 2009 18:15:48 -0400
- Subject: [PATCH] ARM half-precision floating-point, 1/8 (NEON vld1/vst1 support)
This patch adds support for NEON vld1/vst1 instructions. We've had
this sitting around in our local source tree for a while, originally
added to solve another problem. Although these instructions aren't
specific to half-precision float, I'm using them for HFmode loads and
stores between memory and S registers.... so, I'm submitting this now,
on Paul's behalf.
-Sandra
2009-04-15 Paul Brook <paul@codesourcery.com>
Sandra Loosemore <sandra@codesourcery.com>
gcc/
* config/arm/arm.c (neon_vector_mem_operand): Handle element/structure
loads. Allow PRE_DEC.
(output_move_neon): Handle PRE_DEC.
(arm_print_operand): Add 'A' for neon structure loads.
* config/arm/arm-protos.h (neon_vector_mem_operand): Update prototype.
* config/arm/neon.md (neon_mov): Update comment.
* config/arm/constraints.md (Un, Us): Update neon_vector_mem_operand
calls.
(Um): New constraint.
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c (revision 146010)
+++ gcc/config/arm/arm.c (working copy)
@@ -6915,10 +6915,13 @@ arm_coproc_mem_operand (rtx op, bool wb)
}
/* Return TRUE if OP is a memory operand which we can load or store a vector
- to/from. If CORE is true, we're moving from ARM registers not Neon
- registers. */
+ to/from. TYPE is one of the following values:
+ 0 - Vector load/stor (vldr)
+ 1 - Core registers (ldm)
+ 2 - Element/structure loads (vld1)
+ */
int
-neon_vector_mem_operand (rtx op, bool core)
+neon_vector_mem_operand (rtx op, int type)
{
rtx ind;
@@ -6951,23 +6954,15 @@ neon_vector_mem_operand (rtx op, bool co
return arm_address_register_rtx_p (ind, 0);
/* Allow post-increment with Neon registers. */
- if (!core && GET_CODE (ind) == POST_INC)
+ if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
return arm_address_register_rtx_p (XEXP (ind, 0), 0);
-#if 0
- /* FIXME: We can support this too if we use VLD1/VST1. */
- if (!core
- && GET_CODE (ind) == POST_MODIFY
- && arm_address_register_rtx_p (XEXP (ind, 0), 0)
- && GET_CODE (XEXP (ind, 1)) == PLUS
- && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
- ind = XEXP (ind, 1);
-#endif
+ /* FIXME: vld1 allows register post-modify. */
/* Match:
(plus (reg)
(const)). */
- if (!core
+ if (type == 0
&& GET_CODE (ind) == PLUS
&& GET_CODE (XEXP (ind, 0)) == REG
&& REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
@@ -7037,7 +7032,7 @@ coproc_secondary_reload_class (enum mach
if (TARGET_NEON
&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
- && neon_vector_mem_operand (x, FALSE))
+ && neon_vector_mem_operand (x, 0))
return NO_REGS;
if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
@@ -10754,7 +10749,7 @@ output_move_double (rtx *operands)
}
/* Output a move, load or store for quad-word vectors in ARM registers. Only
- handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
+ handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
const char *
output_move_quad (rtx *operands)
@@ -10950,6 +10945,13 @@ output_move_neon (rtx *operands)
ops[1] = reg;
break;
+ case PRE_DEC:
+ /* FIXME: We should be using vld1/vst1 here in BE mode? */
+ templ = "v%smdb%%?\t%%0!, %%h1";
+ ops[0] = XEXP (addr, 0);
+ ops[1] = reg;
+ break;
+
case POST_MODIFY:
/* FIXME: Not currently enabled in neon_vector_mem_operand. */
gcc_unreachable ();
@@ -13817,6 +13819,24 @@ arm_print_operand (FILE *stream, rtx x,
}
return;
+ /* Memory operand for vld1/vst1 instruction. */
+ case 'A':
+ {
+ rtx addr;
+ bool postinc = FALSE;
+ gcc_assert (GET_CODE (x) == MEM);
+ addr = XEXP (x, 0);
+ if (GET_CODE (addr) == POST_INC)
+ {
+ postinc = 1;
+ addr = XEXP (addr, 0);
+ }
+ asm_fprintf (stream, "[%r]", REGNO (addr));
+ if (postinc)
+ fputs("!", stream);
+ }
+ return;
+
default:
if (x == 0)
{
Index: gcc/config/arm/arm-protos.h
===================================================================
--- gcc/config/arm/arm-protos.h (revision 146010)
+++ gcc/config/arm/arm-protos.h (working copy)
@@ -88,7 +88,7 @@ extern bool arm_cannot_force_const_mem (
extern int cirrus_memory_offset (rtx);
extern int arm_coproc_mem_operand (rtx, bool);
-extern int neon_vector_mem_operand (rtx, bool);
+extern int neon_vector_mem_operand (rtx, int);
extern int neon_struct_mem_operand (rtx);
extern int arm_no_early_store_addr_dep (rtx, rtx);
extern int arm_no_early_alu_shift_dep (rtx, rtx);
Index: gcc/config/arm/neon.md
===================================================================
--- gcc/config/arm/neon.md (revision 146010)
+++ gcc/config/arm/neon.md (working copy)
@@ -481,7 +481,7 @@
/* FIXME: If the memory layout is changed in big-endian mode, output_move_vfp
below must be changed to output_move_neon (which will use the
- element/structure loads/stores), and the constraint changed to 'Un' instead
+ element/structure loads/stores), and the constraint changed to 'Um' instead
of 'Uv'. */
switch (which_alternative)
Index: gcc/config/arm/constraints.md
===================================================================
--- gcc/config/arm/constraints.md (revision 146010)
+++ gcc/config/arm/constraints.md (working copy)
@@ -32,7 +32,7 @@
;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv
;; The following memory constraints have been used:
-;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Us
+;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
;; in ARM state: Uq
@@ -214,17 +214,24 @@
(define_memory_constraint "Un"
"@internal
+ In ARM/Thumb-2 state a valid address for Neon doubleword vector
+ load/store instructions."
+ (and (match_code "mem")
+ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0)")))
+
+(define_memory_constraint "Um"
+ "@internal
In ARM/Thumb-2 state a valid address for Neon element and structure
load/store instructions."
(and (match_code "mem")
- (match_test "TARGET_32BIT && neon_vector_mem_operand (op, FALSE)")))
+ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
(define_memory_constraint "Us"
"@internal
In ARM/Thumb-2 state a valid address for non-offset loads/stores of
quad-word values in four ARM registers."
(and (match_code "mem")
- (match_test "TARGET_32BIT && neon_vector_mem_operand (op, TRUE)")))
+ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1)")))
(define_memory_constraint "Uq"
"@internal