This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH 10/13] libffi: Rewrite aarch64
- From: Richard Henderson <rth at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: libffi-discuss at sourceware dot org, gofrontend-dev at googlegroups dot com
- Date: Fri, 10 Oct 2014 13:42:50 -0700
- Subject: [PATCH 10/13] libffi: Rewrite aarch64
- Authentication-results: sourceware.org; auth=none
- References: <1412973773-3942-1-git-send-email-rth at redhat dot com>
(1) Invent a new "internal.h" rather than polluting the public ffitarget.h
with stuff that ought not be exposed.
(2) Rewrite is_hfa to not be so horribly computationally expensive. And
more to the point require us to _re_ compute the same stuff in order
to actually do anything with the type.
(3) Don't use the out-dated prep_args callback form for ffi_call.
The x86_64 port has for years shown how to do this with a single alloca,
but new ports keep copying i386 which still does it the inefficient way.
---
libffi/src/aarch64/ffi.c | 1362 +++++++++++++++-------------------------
libffi/src/aarch64/ffitarget.h | 17 +-
libffi/src/aarch64/internal.h | 43 ++
libffi/src/aarch64/sysv.S | 499 ++++++++-------
4 files changed, 816 insertions(+), 1105 deletions(-)
create mode 100644 libffi/src/aarch64/internal.h
diff --git a/libffi/src/aarch64/ffi.c b/libffi/src/aarch64/ffi.c
index 1405665..c409c0c 100644
--- a/libffi/src/aarch64/ffi.c
+++ b/libffi/src/aarch64/ffi.c
@@ -20,42 +20,37 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include <stdio.h>
-
+#include <stdlib.h>
+#include <stdint.h>
#include <ffi.h>
#include <ffi_common.h>
+#include "internal.h"
-#include <stdlib.h>
-
-/* Stack alignment requirement in bytes */
+/* Stack alignment requirement in bytes. */
#define AARCH64_STACK_ALIGN 16
+/* Number of X and V argument registers. */
#define N_X_ARG_REG 8
#define N_V_ARG_REG 8
-#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
-
union _d
{
UINT64 d;
UINT32 s[2];
};
-struct call_context
+struct _v
{
- UINT64 x [AARCH64_N_XREG];
- struct
- {
- union _d d[2];
- } v [AARCH64_N_VREG];
+ union _d d[2] __attribute__((aligned(16)));
};
-static void *
-get_x_addr (struct call_context *context, unsigned n)
+struct call_context
{
- return &context->x[n];
-}
+ UINT64 x[N_X_ARG_REG];
+ struct _v v[N_V_ARG_REG];
+};
-static void *
+static inline UINT32 *
get_s_addr (struct call_context *context, unsigned n)
{
#if defined __AARCH64EB__
@@ -65,557 +60,371 @@ get_s_addr (struct call_context *context, unsigned n)
#endif
}
-static void *
+static inline UINT64 *
get_d_addr (struct call_context *context, unsigned n)
{
#if defined __AARCH64EB__
- return &context->v[n].d[1];
+ return &context->v[n].d[1].d;
#else
- return &context->v[n].d[0];
+ return &context->v[n].d[0].d;
#endif
}
-static void *
-get_v_addr (struct call_context *context, unsigned n)
-{
- return &context->v[n];
-}
-
-/* Return the memory location at which a basic type would reside
- were it to have been stored in register n. */
-
-static void *
-get_basic_type_addr (unsigned short type, struct call_context *context,
- unsigned n)
-{
- switch (type)
- {
- case FFI_TYPE_FLOAT:
- return get_s_addr (context, n);
- case FFI_TYPE_DOUBLE:
- return get_d_addr (context, n);
- case FFI_TYPE_LONGDOUBLE:
- return get_v_addr (context, n);
- case FFI_TYPE_UINT8:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_INT:
- case FFI_TYPE_POINTER:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_SINT64:
- return get_x_addr (context, n);
- default:
- FFI_ASSERT (0);
- return NULL;
- }
-}
-
-/* Return the alignment width for each of the basic types. */
-
-static size_t
-get_basic_type_alignment (unsigned short type)
-{
- switch (type)
- {
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- return sizeof (UINT64);
- case FFI_TYPE_LONGDOUBLE:
- return sizeof (long double);
- case FFI_TYPE_UINT8:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_INT:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_POINTER:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_SINT64:
- return sizeof (UINT64);
+extern void ffi_call_SYSV (void *frame, void *rvalue,
+ struct call_context *context,
+ unsigned flags, void (*fn)(void)) FFI_HIDDEN;
- default:
- FFI_ASSERT (0);
- return 0;
- }
-}
+extern void ffi_closure_SYSV (void) FFI_HIDDEN;
+extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
-/* Return the size in bytes for each of the basic types. */
+/* A subroutine of is_hfa. Given a structure type, return the type code
+ of the first non-structure element. Recurse for structure elements.
+ Return -1 if the structure is in fact empty, i.e. no nested elements. */
-static size_t
-get_basic_type_size (unsigned short type)
+static int
+is_hfa0 (const ffi_type *ty)
{
- switch (type)
- {
- case FFI_TYPE_FLOAT:
- return sizeof (UINT32);
- case FFI_TYPE_DOUBLE:
- return sizeof (UINT64);
- case FFI_TYPE_LONGDOUBLE:
- return sizeof (long double);
- case FFI_TYPE_UINT8:
- return sizeof (UINT8);
- case FFI_TYPE_SINT8:
- return sizeof (SINT8);
- case FFI_TYPE_UINT16:
- return sizeof (UINT16);
- case FFI_TYPE_SINT16:
- return sizeof (SINT16);
- case FFI_TYPE_UINT32:
- return sizeof (UINT32);
- case FFI_TYPE_INT:
- case FFI_TYPE_SINT32:
- return sizeof (SINT32);
- case FFI_TYPE_POINTER:
- case FFI_TYPE_UINT64:
- return sizeof (UINT64);
- case FFI_TYPE_SINT64:
- return sizeof (SINT64);
-
- default:
- FFI_ASSERT (0);
- return 0;
- }
-}
+ ffi_type **elements = ty->elements;
+ int i, ret = -1;
-extern void
-ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
- extended_cif *),
- struct call_context *context,
- extended_cif *,
- unsigned,
- void (*fn)(void));
-
-extern void
-ffi_closure_SYSV (ffi_closure *);
-
-/* Test for an FFI floating point representation. */
+ if (elements != NULL)
+ for (i = 0; elements[i]; ++i)
+ {
+ ret = elements[i]->type;
+ if (ret == FFI_TYPE_STRUCT)
+ {
+ ret = is_hfa0 (elements[i]);
+ if (ret < 0)
+ continue;
+ }
+ break;
+ }
-static unsigned
-is_floating_type (unsigned short type)
-{
- return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE
- || type == FFI_TYPE_LONGDOUBLE);
+ return ret;
}
-/* Test for a homogeneous structure. */
+/* A subroutine of is_hfa. Given a structure type, return true if all
+ of the non-structure elements are the same as CANDIDATE. */
-static unsigned short
-get_homogeneous_type (ffi_type *ty)
+static int
+is_hfa1 (const ffi_type *ty, int candidate)
{
- if (ty->type == FFI_TYPE_STRUCT && ty->elements)
- {
- unsigned i;
- unsigned short candidate_type
- = get_homogeneous_type (ty->elements[0]);
- for (i =1; ty->elements[i]; i++)
- {
- unsigned short iteration_type = 0;
- /* If we have a nested struct, we must find its homogeneous type.
- If that fits with our candidate type, we are still
- homogeneous. */
- if (ty->elements[i]->type == FFI_TYPE_STRUCT
- && ty->elements[i]->elements)
- {
- iteration_type = get_homogeneous_type (ty->elements[i]);
- }
- else
- {
- iteration_type = ty->elements[i]->type;
- }
+ ffi_type **elements = ty->elements;
+ int i;
- /* If we are not homogeneous, return FFI_TYPE_STRUCT. */
- if (candidate_type != iteration_type)
- return FFI_TYPE_STRUCT;
- }
- return candidate_type;
- }
+ if (elements != NULL)
+ for (i = 0; elements[i]; ++i)
+ {
+ int t = elements[i]->type;
+ if (t == FFI_TYPE_STRUCT)
+ {
+ if (!is_hfa1 (elements[i], candidate))
+ return 0;
+ }
+ else if (t != candidate)
+ return 0;
+ }
- /* Base case, we have no more levels of nesting, so we
- are a basic type, and so, trivially homogeneous in that type. */
- return ty->type;
+ return 1;
}
-/* Determine the number of elements within a STRUCT.
+/* Determine if TY is an homogenous floating point aggregate (HFA).
+ That is, a structure consisting of 1 to 4 members of all the same type,
+ where that type is a floating point scalar.
- Note, we must handle nested structs.
+ Returns non-zero iff TY is an HFA. The result is an encoded value where
+ bits 0-7 contain the type code, and bits 8-10 contain the element count. */
- If ty is not a STRUCT this function will return 0. */
-
-static unsigned
-element_count (ffi_type *ty)
+static int
+is_hfa(const ffi_type *ty)
{
- if (ty->type == FFI_TYPE_STRUCT && ty->elements)
- {
- unsigned n;
- unsigned elems = 0;
- for (n = 0; ty->elements[n]; n++)
- {
- if (ty->elements[n]->type == FFI_TYPE_STRUCT
- && ty->elements[n]->elements)
- elems += element_count (ty->elements[n]);
- else
- elems++;
- }
- return elems;
- }
- return 0;
-}
+ ffi_type **elements;
+ int candidate, i;
+ size_t size, ele_count;
-/* Test for a homogeneous floating point aggregate.
+ /* Quickest tests first. */
+ if (ty->type != FFI_TYPE_STRUCT)
+ return 0;
- A homogeneous floating point aggregate is a homogeneous aggregate of
- a half- single- or double- precision floating point type with one
- to four elements. Note that this includes nested structs of the
- basic type. */
+ /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */
+ size = ty->size;
+ if (size < 4 || size > 64)
+ return 0;
-static int
-is_hfa (ffi_type *ty)
-{
- if (ty->type == FFI_TYPE_STRUCT
- && ty->elements[0]
- && is_floating_type (get_homogeneous_type (ty)))
+ /* Find the type of the first non-structure member. */
+ elements = ty->elements;
+ candidate = elements[0]->type;
+ if (candidate == FFI_TYPE_STRUCT)
{
- unsigned n = element_count (ty);
- return n >= 1 && n <= 4;
+ for (i = 0; ; ++i)
+ {
+ candidate = is_hfa0 (elements[i]);
+ if (candidate >= 0)
+ break;
+ }
}
- return 0;
-}
-
-/* Test if an ffi_type is a candidate for passing in a register.
-
- This test does not check that sufficient registers of the
- appropriate class are actually available, merely that IFF
- sufficient registers are available then the argument will be passed
- in register(s).
-
- Note that an ffi_type that is deemed to be a register candidate
- will always be returned in registers.
- Returns 1 if a register candidate else 0. */
-
-static int
-is_register_candidate (ffi_type *ty)
-{
- switch (ty->type)
+ /* If the first member is not a floating point type, it's not an HFA.
+ Also quickly re-check the size of the structure. */
+ switch (candidate)
{
- case FFI_TYPE_VOID:
case FFI_TYPE_FLOAT:
+ ele_count = size / sizeof(float);
+ if (size != ele_count * sizeof(float))
+ return 0;
+ break;
case FFI_TYPE_DOUBLE:
+ ele_count = size / sizeof(double);
+ if (size != ele_count * sizeof(double))
+ return 0;
+ break;
case FFI_TYPE_LONGDOUBLE:
- case FFI_TYPE_UINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_POINTER:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_INT:
- case FFI_TYPE_SINT64:
- return 1;
-
- case FFI_TYPE_STRUCT:
- if (is_hfa (ty))
- {
- return 1;
- }
- else if (ty->size > 16)
- {
- /* Too large. Will be replaced with a pointer to memory. The
- pointer MAY be passed in a register, but the value will
- not. This test specifically fails since the argument will
- never be passed by value in registers. */
- return 0;
- }
- else
- {
- /* Might be passed in registers depending on the number of
- registers required. */
- return (ty->size + 7) / 8 < N_X_ARG_REG;
- }
+ ele_count = size / sizeof(long double);
+ if (size != ele_count * sizeof(long double))
+ return 0;
break;
-
default:
- FFI_ASSERT (0);
- break;
+ return 0;
}
+ if (ele_count > 4)
+ return 0;
- return 0;
-}
-
-/* Test if an ffi_type argument or result is a candidate for a vector
- register. */
-
-static int
-is_v_register_candidate (ffi_type *ty)
-{
- return is_floating_type (ty->type)
- || (ty->type == FFI_TYPE_STRUCT && is_hfa (ty));
-}
-
-/* Representation of the procedure call argument marshalling
- state.
-
- The terse state variable names match the names used in the AARCH64
- PCS. */
-
-struct arg_state
-{
- unsigned ngrn; /* Next general-purpose register number. */
- unsigned nsrn; /* Next vector register number. */
- unsigned nsaa; /* Next stack offset. */
-};
-
-/* Initialize a procedure call argument marshalling state. */
-static void
-arg_init (struct arg_state *state, unsigned call_frame_size)
-{
- state->ngrn = 0;
- state->nsrn = 0;
- state->nsaa = 0;
-}
-
-/* Return the number of available consecutive core argument
- registers. */
-
-static unsigned
-available_x (struct arg_state *state)
-{
- return N_X_ARG_REG - state->ngrn;
-}
-
-/* Return the number of available consecutive vector argument
- registers. */
-
-static unsigned
-available_v (struct arg_state *state)
-{
- return N_V_ARG_REG - state->nsrn;
-}
-
-static void *
-allocate_to_x (struct call_context *context, struct arg_state *state)
-{
- FFI_ASSERT (state->ngrn < N_X_ARG_REG)
- return get_x_addr (context, (state->ngrn)++);
-}
-
-static void *
-allocate_to_s (struct call_context *context, struct arg_state *state)
-{
- FFI_ASSERT (state->nsrn < N_V_ARG_REG)
- return get_s_addr (context, (state->nsrn)++);
-}
-
-static void *
-allocate_to_d (struct call_context *context, struct arg_state *state)
-{
- FFI_ASSERT (state->nsrn < N_V_ARG_REG)
- return get_d_addr (context, (state->nsrn)++);
-}
-
-static void *
-allocate_to_v (struct call_context *context, struct arg_state *state)
-{
- FFI_ASSERT (state->nsrn < N_V_ARG_REG)
- return get_v_addr (context, (state->nsrn)++);
-}
-
-/* Allocate an aligned slot on the stack and return a pointer to it. */
-static void *
-allocate_to_stack (struct arg_state *state, void *stack, unsigned alignment,
- unsigned size)
-{
- void *allocation;
-
- /* Round up the NSAA to the larger of 8 or the natural
- alignment of the argument's type. */
- state->nsaa = ALIGN (state->nsaa, alignment);
- state->nsaa = ALIGN (state->nsaa, alignment);
- state->nsaa = ALIGN (state->nsaa, 8);
-
- allocation = stack + state->nsaa;
+ /* Finally, make sure that all scalar elements are the same type. */
+ for (i = 0; elements[i]; ++i)
+ {
+ if (elements[i]->type == FFI_TYPE_STRUCT)
+ {
+ if (!is_hfa1 (elements[i], candidate))
+ return 0;
+ }
+ else if (elements[i]->type != candidate)
+ return 0;
+ }
- state->nsaa += size;
- return allocation;
+ /* All tests succeeded. Encode the result. */
+ return (ele_count << 8) | candidate;
}
-static void
-copy_basic_type (void *dest, void *source, unsigned short type)
+/* Extend a basic type to fill a 64-bit slot. */
+static UINT64
+extend_basic_type (UINT64 ret, unsigned short type)
{
- /* This is neccessary to ensure that basic types are copied
- sign extended to 64-bits as libffi expects. */
switch (type)
{
case FFI_TYPE_FLOAT:
- *(float *) dest = *(float *) source;
- break;
- case FFI_TYPE_DOUBLE:
- *(double *) dest = *(double *) source;
- break;
- case FFI_TYPE_LONGDOUBLE:
- *(long double *) dest = *(long double *) source;
+ ret = (UINT32)ret;
+#if defined __AARCH64EB__
+ ret <<= 32;
+#endif
break;
case FFI_TYPE_UINT8:
- *(ffi_arg *) dest = *(UINT8 *) source;
+ ret = (UINT8)ret;
break;
case FFI_TYPE_SINT8:
- *(ffi_sarg *) dest = *(SINT8 *) source;
+ ret = (SINT8)ret;
break;
case FFI_TYPE_UINT16:
- *(ffi_arg *) dest = *(UINT16 *) source;
+ ret = (UINT16)ret;
break;
case FFI_TYPE_SINT16:
- *(ffi_sarg *) dest = *(SINT16 *) source;
+ ret = (SINT16)ret;
break;
case FFI_TYPE_UINT32:
- *(ffi_arg *) dest = *(UINT32 *) source;
+ ret = (UINT32)ret;
break;
case FFI_TYPE_INT:
case FFI_TYPE_SINT32:
- *(ffi_sarg *) dest = *(SINT32 *) source;
+ ret = (SINT32)ret;
break;
- case FFI_TYPE_POINTER:
+ case FFI_TYPE_DOUBLE:
case FFI_TYPE_UINT64:
- *(ffi_arg *) dest = *(UINT64 *) source;
- break;
case FFI_TYPE_SINT64:
- *(ffi_sarg *) dest = *(SINT64 *) source;
break;
-
+ case FFI_TYPE_POINTER:
+ ret = (uintptr_t)ret;
+ break;
default:
- FFI_ASSERT (0);
+ abort ();
}
+ return ret;
}
-static void
-copy_hfa_to_reg_or_stack (void *memory,
- ffi_type *ty,
- struct call_context *context,
- unsigned char *stack,
- struct arg_state *state)
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep (ffi_cif *cif)
{
- unsigned elems = element_count (ty);
- if (available_v (state) < elems)
- {
- /* There are insufficient V registers. Further V register allocations
- are prevented, the NSAA is adjusted (by allocate_to_stack ())
- and the argument is copied to memory at the adjusted NSAA. */
- state->nsrn = N_V_ARG_REG;
- memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size),
- memory,
- ty->size);
- }
- else
- {
- int i;
- unsigned short type = get_homogeneous_type (ty);
- unsigned elems = element_count (ty);
- for (i = 0; i < elems; i++)
- {
- void *reg = allocate_to_v (context, state);
- copy_basic_type (reg, memory, type);
- memory += get_basic_type_size (type);
- }
- }
-}
+ int flags, h, i;
+ ffi_type *rtype;
-/* Either allocate an appropriate register for the argument type, or if
- none are available, allocate a stack slot and return a pointer
- to the allocated space. */
+ /* Round the stack up to a multiple of the stack alignment requirement. */
+ cif->bytes = ALIGN (cif->bytes, AARCH64_STACK_ALIGN);
-static void *
-allocate_to_register_or_stack (struct call_context *context,
- unsigned char *stack,
- struct arg_state *state,
- unsigned short type)
-{
- size_t alignment = get_basic_type_alignment (type);
- size_t size = alignment;
- switch (type)
+ rtype = cif->rtype;
+ switch (rtype->type)
{
- case FFI_TYPE_FLOAT:
- /* This is the only case for which the allocated stack size
- should not match the alignment of the type. */
- size = sizeof (UINT32);
- /* Fall through. */
- case FFI_TYPE_DOUBLE:
- if (state->nsrn < N_V_ARG_REG)
- return allocate_to_d (context, state);
- state->nsrn = N_V_ARG_REG;
- break;
- case FFI_TYPE_LONGDOUBLE:
- if (state->nsrn < N_V_ARG_REG)
- return allocate_to_v (context, state);
- state->nsrn = N_V_ARG_REG;
+ case FFI_TYPE_VOID:
+ flags = AARCH64_RET_VOID;
break;
case FFI_TYPE_UINT8:
- case FFI_TYPE_SINT8:
case FFI_TYPE_UINT16:
- case FFI_TYPE_SINT16:
case FFI_TYPE_UINT32:
- case FFI_TYPE_SINT32:
+ flags = AARCH64_RET_UINT32;
+ break;
case FFI_TYPE_INT:
- case FFI_TYPE_POINTER:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_SINT32:
+ flags = AARCH64_RET_SINT32;
+ break;
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
- if (state->ngrn < N_X_ARG_REG)
- return allocate_to_x (context, state);
- state->ngrn = N_X_ARG_REG;
+ flags = AARCH64_RET_INT64;
+ break;
+ case FFI_TYPE_POINTER:
+ flags = (sizeof(void *) == 8 ? AARCH64_RET_INT64 : AARCH64_RET_UINT32);
+ break;
+ case FFI_TYPE_FLOAT:
+ flags = AARCH64_RET_FLOAT;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = AARCH64_RET_DOUBLE;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ flags = AARCH64_RET_LDOUBLE;
+ break;
+ case FFI_TYPE_STRUCT:
+ h = is_hfa (rtype);
+ switch (h & 0xff)
+ {
+ case FFI_TYPE_FLOAT:
+ flags = AARCH64_RET_HFA_FLOAT;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = AARCH64_RET_HFA_DOUBLE;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ flags = AARCH64_RET_HFA_LDOUBLE;
+ break;
+ default:
+ flags = (rtype->size > 16
+ ? AARCH64_RET_LG_STRUCT
+ : AARCH64_RET_SM_STRUCT);
+ break;
+ }
break;
default:
- FFI_ASSERT (0);
+ abort ();
}
- return allocate_to_stack (state, stack, alignment, size);
-}
-
-/* Copy a value to an appropriate register, or if none are
- available, to the stack. */
+ /* Note if any argument requires fp registers. */
+ for (i = 0; i < cif->nargs; i++)
+ {
+ ffi_type *ty = cif->arg_types[i];
+ int tt = ty->type;
+ if (tt == FFI_TYPE_FLOAT
+ || tt == FFI_TYPE_DOUBLE
+ || tt == FFI_TYPE_LONGDOUBLE
+ || is_hfa (ty))
+ {
+ flags |= AARCH64_FLAG_ARG_V;
+ break;
+ }
+ }
-static void
-copy_to_register_or_stack (struct call_context *context,
- unsigned char *stack,
- struct arg_state *state,
- void *value,
- unsigned short type)
-{
- copy_basic_type (
- allocate_to_register_or_stack (context, stack, state, type),
- value,
- type);
+ cif->flags = flags;
+ return FFI_OK;
}
-/* Marshall the arguments from FFI representation to procedure call
- context and stack. */
-
-static unsigned
-aarch64_prep_args (struct call_context *context, unsigned char *stack,
- extended_cif *ecif)
+/* Call a function with the provided arguments and capture the return
+ value. */
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
- int i;
- struct arg_state state;
+ struct call_context *context;
+ UINT64 *stack, *slot;
+ void *frame, *local_rvalue;
+ ffi_type **arg_types;
+ int i, h, nargs, ngrn, nsrn, nsaa;
+ size_t size, stack_space, ret_space;
- arg_init (&state, ALIGN(ecif->cif->bytes, 16));
+ FFI_ASSERT (cif->abi == FFI_SYSV);
- for (i = 0; i < ecif->cif->nargs; i++)
+ ret_space = 0;
+ h = cif->flags & AARCH64_FLAG_RET_MASK;
+ switch (h)
{
- ffi_type *ty = ecif->cif->arg_types[i];
- switch (ty->type)
- {
- case FFI_TYPE_VOID:
- FFI_ASSERT (0);
- break;
+ case AARCH64_RET_HFA_FLOAT:
+ /* The assembly always writes 4 elements. */
+ ret_space = 4 * sizeof(float);
+ break;
+ case AARCH64_RET_HFA_DOUBLE:
+ ret_space = 4 * sizeof(double);
+ break;
+ case AARCH64_RET_HFA_LDOUBLE:
+ ret_space = 4 * sizeof(long double);
+ break;
+ case AARCH64_RET_SM_STRUCT:
+ ret_space = 16;
+ break;
+ case AARCH64_RET_LG_STRUCT:
+ if (rvalue == NULL)
+ ret_space = cif->rtype->size;
+ break;
+ }
+ /* Allocate the space for all of the arguments, the context, the local
+ stack frame for ffi_call_SYSV, and (possibly) the return value. */
+ stack_space = ALIGN (cif->bytes, 16);
+ context = alloca (sizeof(struct call_context)
+ + stack_space
+ + 4 * sizeof(UINT64)
+ + ret_space);
+ stack = (UINT64 *)(context + 1);
+ frame = (char *)stack + stack_space;
+
+ local_rvalue = rvalue;
+ if (ret_space)
+ local_rvalue = (char *)frame + 4 * sizeof(UINT64);
+
+ ngrn = nsrn = nsaa = 0;
+ arg_types = cif->arg_types;
+ nargs = cif->nargs;
+
+ for (i = 0; i < nargs; i++)
+ {
+ ffi_type *ty = arg_types[i];
+ unsigned short t = ty->type;
+
+ switch (t)
+ {
/* If the argument is a basic type the argument is allocated to an
appropriate register, or if none are available, to the stack. */
case FFI_TYPE_FLOAT:
+ if (nsrn < N_V_ARG_REG)
+ slot = get_d_addr (context, nsrn++);
+ else
+ slot = &stack[nsaa++];
+ *slot = extend_basic_type (*(UINT32 *)avalue[i], t);
+ break;
case FFI_TYPE_DOUBLE:
+ if (nsrn < N_V_ARG_REG)
+ slot = get_d_addr (context, nsrn++);
+ else
+ slot = &stack[nsaa++];
+ *slot = extend_basic_type (*(UINT64 *)avalue[i], t);
+ break;
+
case FFI_TYPE_LONGDOUBLE:
+ if (nsrn < N_V_ARG_REG)
+ slot = &context->v[nsrn++].d[0].d;
+ else
+ {
+ nsaa = ALIGN (nsaa, 2);
+ slot = &stack[nsaa];
+ nsaa += 2;
+ }
+ memcpy (slot, avalue[i], sizeof(long double));
+ break;
+
case FFI_TYPE_UINT8:
case FFI_TYPE_SINT8:
case FFI_TYPE_UINT16:
@@ -626,207 +435,111 @@ aarch64_prep_args (struct call_context *context, unsigned char *stack,
case FFI_TYPE_POINTER:
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
- copy_to_register_or_stack (context, stack, &state,
- ecif->avalue[i], ty->type);
+ if (ngrn < N_X_ARG_REG)
+ slot = &context->x[ngrn++];
+ else
+ slot = &stack[nsaa++];
+ *slot = extend_basic_type (*(ffi_arg *)avalue[i], t);
break;
+ case FFI_TYPE_VOID:
+ /* Note that libgo passes void as a parameter for a
+ struct with no fields. */
case FFI_TYPE_STRUCT:
- if (is_hfa (ty))
- {
- copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context,
- stack, &state);
- }
- else if (ty->size > 16)
- {
- /* If the argument is a composite type that is larger than 16
- bytes, then the argument has been copied to memory, and
- the argument is replaced by a pointer to the copy. */
+ {
+ size_t slot_count;
- copy_to_register_or_stack (context, stack, &state,
- &(ecif->avalue[i]), FFI_TYPE_POINTER);
- }
- else if (available_x (&state) >= (ty->size + 7) / 8)
- {
- /* If the argument is a composite type and the size in
- double-words is not more than the number of available
- X registers, then the argument is copied into consecutive
- X registers. */
- int j;
- for (j = 0; j < (ty->size + 7) / 8; j++)
- {
- memcpy (allocate_to_x (context, &state),
- &(((UINT64 *) ecif->avalue[i])[j]),
- sizeof (UINT64));
- }
- }
- else
- {
- /* Otherwise, there are insufficient X registers. Further X
- register allocations are prevented, the NSAA is adjusted
- (by allocate_to_stack ()) and the argument is copied to
- memory at the adjusted NSAA. */
- state.ngrn = N_X_ARG_REG;
-
- memcpy (allocate_to_stack (&state, stack, ty->alignment,
- ty->size), ecif->avalue + i, ty->size);
- }
+ size = ty->size;
+ slot_count = (size + 7) / 8;
+ h = is_hfa (ty);
+ if (h)
+ {
+ int j, reg_count = h >> 8, tt = h & 0xff;
+
+ if (nsrn + reg_count <= N_V_ARG_REG)
+ {
+ switch (tt)
+ {
+ case FFI_TYPE_FLOAT:
+ {
+ UINT32 *src = avalue[i];
+ for (j = 0; j < reg_count; ++j)
+ *get_s_addr (context, nsrn + j) = src[j];
+ }
+ break;
+
+ case FFI_TYPE_DOUBLE:
+ {
+ UINT64 *src = avalue[i];
+ for (j = 0; j < reg_count; ++j)
+ *get_d_addr (context, nsrn + j) = src[j];
+ }
+ break;
+
+ case FFI_TYPE_LONGDOUBLE:
+ memcpy(&context->v[nsrn], avalue[i], size);
+ break;
+
+ default:
+ abort ();
+ }
+ nsrn += reg_count;
+ break;
+ }
+ /* All out of fp registers. Copy to the stack. */
+ nsrn = N_V_ARG_REG;
+ }
+ else if (size > 16)
+ {
+ /* If the argument is a composite type that is larger than
+ 16 bytes, then the argument has been copied to memory,
+ and the argument is replaced by a pointer. */
+ if (ngrn < N_X_ARG_REG)
+ slot = &context->x[ngrn++];
+ else
+ slot = &stack[nsaa++];
+ *slot = (uintptr_t)avalue[i];
+ break;
+ }
+ else
+ {
+ if (ty->alignment == 16)
+ ngrn = ALIGN (ngrn, 2);
+
+ if (ngrn + slot_count <= N_X_ARG_REG)
+ {
+ slot = &context->x[ngrn];
+ ngrn += slot_count;
+ memcpy (slot, avalue[i], size);
+ break;
+ }
+ /* All out of general registers. Copy to the stack. */
+ ngrn = N_X_ARG_REG;
+ }
+ if (ty->alignment > 8)
+ {
+ int a = ty->alignment / 8;
+ nsaa = ALIGN (nsaa, a);
+ }
+ memcpy (&stack[nsaa], avalue[i], size);
+ nsaa += slot_count;
+ }
break;
default:
- FFI_ASSERT (0);
+ abort ();
break;
}
}
- return ecif->cif->aarch64_flags;
+ size = cif->rtype->size;
+ ffi_call_SYSV (frame, local_rvalue, context, cif->flags, fn);
+ if (local_rvalue != rvalue && rvalue != NULL)
+ memcpy (rvalue, local_rvalue, size);
}
-ffi_status
-ffi_prep_cif_machdep (ffi_cif *cif)
-{
- /* Round the stack up to a multiple of the stack alignment requirement. */
- cif->bytes =
- (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1);
-
- /* Initialize our flags. We are interested if this CIF will touch a
- vector register, if so we will enable context save and load to
- those registers, otherwise not. This is intended to be friendly
- to lazy float context switching in the kernel. */
- cif->aarch64_flags = 0;
-
- if (is_v_register_candidate (cif->rtype))
- {
- cif->aarch64_flags |= AARCH64_FFI_WITH_V;
- }
- else
- {
- int i;
- for (i = 0; i < cif->nargs; i++)
- if (is_v_register_candidate (cif->arg_types[i]))
- {
- cif->aarch64_flags |= AARCH64_FFI_WITH_V;
- break;
- }
- }
-
- return FFI_OK;
-}
-
-/* Call a function with the provided arguments and capture the return
- value. */
-void
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
-{
- extended_cif ecif;
-
- ecif.cif = cif;
- ecif.avalue = avalue;
- ecif.rvalue = rvalue;
-
- switch (cif->abi)
- {
- case FFI_SYSV:
- {
- struct call_context context;
- unsigned stack_bytes;
-
- /* Figure out the total amount of stack space we need, the
- above call frame space needs to be 16 bytes aligned to
- ensure correct alignment of the first object inserted in
- that space hence the ALIGN applied to cif->bytes.*/
- stack_bytes = ALIGN(cif->bytes, 16);
-
- memset (&context, 0, sizeof (context));
- if (is_register_candidate (cif->rtype))
- {
- ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
- switch (cif->rtype->type)
- {
- case FFI_TYPE_VOID:
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- case FFI_TYPE_UINT8:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_POINTER:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_INT:
- case FFI_TYPE_SINT64:
- {
- void *addr = get_basic_type_addr (cif->rtype->type,
- &context, 0);
- copy_basic_type (rvalue, addr, cif->rtype->type);
- break;
- }
-
- case FFI_TYPE_STRUCT:
- if (is_hfa (cif->rtype))
- {
- int j;
- unsigned short type = get_homogeneous_type (cif->rtype);
- unsigned elems = element_count (cif->rtype);
- for (j = 0; j < elems; j++)
- {
- void *reg = get_basic_type_addr (type, &context, j);
- copy_basic_type (rvalue, reg, type);
- rvalue += get_basic_type_size (type);
- }
- }
- else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
- {
- unsigned size = ALIGN (cif->rtype->size, sizeof (UINT64));
- memcpy (rvalue, get_x_addr (&context, 0), size);
- }
- else
- {
- FFI_ASSERT (0);
- }
- break;
-
- default:
- FFI_ASSERT (0);
- break;
- }
- }
- else
- {
- memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
- ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
- stack_bytes, fn);
- }
- break;
- }
-
- default:
- FFI_ASSERT (0);
- break;
- }
-}
-
-static unsigned char trampoline [] =
-{ 0x70, 0x00, 0x00, 0x58, /* ldr x16, 1f */
- 0x91, 0x00, 0x00, 0x10, /* adr x17, 2f */
- 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
-};
-
/* Build a trampoline. */
-#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \
- ({unsigned char *__tramp = (unsigned char*)(TRAMP); \
- UINT64 __fun = (UINT64)(FUN); \
- UINT64 __ctx = (UINT64)(CTX); \
- UINT64 __flags = (UINT64)(FLAGS); \
- memcpy (__tramp, trampoline, sizeof (trampoline)); \
- memcpy (__tramp + 12, &__fun, sizeof (__fun)); \
- memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \
- memcpy (__tramp + 28, &__flags, sizeof (__flags)); \
- __clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \
- })
-
ffi_status
ffi_prep_closure_loc (ffi_closure* closure,
ffi_cif* cif,
@@ -834,15 +547,29 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
+ static const unsigned char trampoline[16] = {
+ 0x90, 0x00, 0x00, 0x58, /* ldr x16, 16 */
+ 0xf1, 0xff, 0xff, 0x10, /* adr x17, 0 */
+ 0x00, 0x02, 0x1f, 0xd6, /* br x16 */
+ };
+ char *tramp = &closure->tramp[0];
+ void (*entry)(void);
+
if (cif->abi != FFI_SYSV)
return FFI_BAD_ABI;
- FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc,
- cif->aarch64_flags);
+ entry = (cif->flags & AARCH64_FLAG_ARG_V
+ ? ffi_closure_SYSV_V : ffi_closure_SYSV);
+
+ memcpy (tramp, trampoline, sizeof(trampoline));
+
+ *(UINT64 *)(tramp + 16) = (uintptr_t)entry;
- closure->cif = cif;
+ closure->cif = cif;
+ closure->fun = fun;
closure->user_data = user_data;
- closure->fun = fun;
+
+ __clear_cache (tramp, tramp + sizeof(trampoline));
return FFI_OK;
}
@@ -863,26 +590,33 @@ ffi_prep_closure_loc (ffi_closure* closure,
desriptors, invokes the wrapped function, then marshalls the return
value back into the call context. */
-void
-ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
- void *stack)
+UINT64 FFI_HIDDEN
+ffi_closure_SYSV_inner (ffi_cif *cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ struct call_context *context,
+ UINT64 *stack, void *rvalue)
{
- ffi_cif *cif = closure->cif;
void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
- void *rvalue = NULL;
- int i;
- struct arg_state state;
-
- arg_init (&state, ALIGN(cif->bytes, 16));
-
- for (i = 0; i < cif->nargs; i++)
+ ffi_type **arg_types;
+ int i, nargs, h, ngrn, nsrn, nsaa;
+ size_t size;
+
+ ngrn = nsrn = nsaa = 0;
+ arg_types = cif->arg_types;
+ nargs = cif->nargs;
+
+ for (i = 0; i < nargs; i++)
{
- ffi_type *ty = cif->arg_types[i];
+ ffi_type *ty = arg_types[i];
+ int t = ty->type;
+ void *slot;
- switch (ty->type)
+ switch (t)
{
case FFI_TYPE_VOID:
- FFI_ASSERT (0);
+ /* ??? abort */
+ slot = NULL;
break;
case FFI_TYPE_UINT8:
@@ -895,182 +629,128 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
case FFI_TYPE_POINTER:
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- avalue[i] = allocate_to_register_or_stack (context, stack,
- &state, ty->type);
+ if (ngrn < N_X_ARG_REG)
+ slot = &context->x[ngrn++];
+ else
+ slot = &stack[nsaa++];
+ *(ffi_arg *)slot = extend_basic_type (*(UINT64 *)slot, t);
break;
- case FFI_TYPE_STRUCT:
- if (is_hfa (ty))
- {
- unsigned n = element_count (ty);
- if (available_v (&state) < n)
- {
- state.nsrn = N_V_ARG_REG;
- avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
- ty->size);
- }
- else
- {
- switch (get_homogeneous_type (ty))
- {
- case FFI_TYPE_FLOAT:
- {
- /* Eeek! We need a pointer to the structure,
- however the homogeneous float elements are
- being passed in individual S registers,
- therefore the structure is not represented as
- a contiguous sequence of bytes in our saved
- register context. We need to fake up a copy
- of the structure layed out in memory
- correctly. The fake can be tossed once the
- closure function has returned hence alloca()
- is sufficient. */
- int j;
- UINT32 *p = avalue[i] = alloca (ty->size);
- for (j = 0; j < element_count (ty); j++)
- memcpy (&p[j],
- allocate_to_s (context, &state),
- sizeof (*p));
- break;
- }
-
- case FFI_TYPE_DOUBLE:
- {
- /* Eeek! We need a pointer to the structure,
- however the homogeneous float elements are
- being passed in individual S registers,
- therefore the structure is not represented as
- a contiguous sequence of bytes in our saved
- register context. We need to fake up a copy
- of the structure layed out in memory
- correctly. The fake can be tossed once the
- closure function has returned hence alloca()
- is sufficient. */
- int j;
- UINT64 *p = avalue[i] = alloca (ty->size);
- for (j = 0; j < element_count (ty); j++)
- memcpy (&p[j],
- allocate_to_d (context, &state),
- sizeof (*p));
- break;
- }
+ case FFI_TYPE_FLOAT:
+ if (nsrn < N_V_ARG_REG)
+ slot = get_s_addr (context, nsrn++);
+ else
+ slot = &stack[nsaa++];
+ break;
- case FFI_TYPE_LONGDOUBLE:
- memcpy (&avalue[i],
- allocate_to_v (context, &state),
- sizeof (*avalue));
- break;
+ case FFI_TYPE_DOUBLE:
+ if (nsrn < N_V_ARG_REG)
+ slot = get_d_addr (context, nsrn++);
+ else
+ slot = &stack[nsaa++];
+ break;
- default:
- FFI_ASSERT (0);
- break;
- }
- }
- }
- else if (ty->size > 16)
- {
- /* Replace Composite type of size greater than 16 with a
- pointer. */
- memcpy (&avalue[i],
- allocate_to_register_or_stack (context, stack,
- &state, FFI_TYPE_POINTER),
- sizeof (avalue[i]));
- }
- else if (available_x (&state) >= (ty->size + 7) / 8)
- {
- avalue[i] = get_x_addr (context, state.ngrn);
- state.ngrn += (ty->size + 7) / 8;
- }
+ case FFI_TYPE_LONGDOUBLE:
+ if (nsrn < N_V_ARG_REG)
+ slot = &context->v[nsrn++];
else
{
- state.ngrn = N_X_ARG_REG;
-
- avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
- ty->size);
+ nsaa = ALIGN (nsaa, 2);
+ slot = &stack[nsaa];
+ nsaa += 2;
}
break;
- default:
- FFI_ASSERT (0);
+ case FFI_TYPE_STRUCT:
+ {
+ size_t slot_count;
+
+ size = ty->size;
+ slot_count = (size + 7) / 8;
+ h = is_hfa (ty);
+ if (h)
+ {
+ int reg_count = h >> 8;
+ int tt = h & 0xff;
+ int j;
+
+ if (nsrn + reg_count <= N_V_ARG_REG)
+ {
+ switch (tt)
+ {
+ case FFI_TYPE_FLOAT:
+ {
+ UINT32 *dst = alloca (size);
+ for (j = 0; j < reg_count; ++j)
+ dst[j] = *get_s_addr(context, nsrn + j);
+ slot = dst;
+ }
+ break;
+ case FFI_TYPE_DOUBLE:
+ {
+ UINT64 *dst = alloca (size);
+ for (j = 0; j < reg_count; ++j)
+ dst[j] = *get_d_addr(context, nsrn + j);
+ slot = dst;
+ }
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ slot = &context->v[nsrn];
+ break;
+ default:
+ abort ();
+ }
+ nsrn += reg_count;
+ break;
+ }
+ /* All out of fp registers. It's on the stack. */
+ nsrn = N_V_ARG_REG;
+ }
+ else if (size > 16)
+ {
+ /* The argument is passed by indirection. */
+ if (ngrn < N_X_ARG_REG)
+ slot = (void *)(uintptr_t)context->x[ngrn++];
+ else
+ slot = (void *)(uintptr_t)stack[nsaa++];
+ break;
+ }
+ else
+ {
+ if (ty->alignment == 16)
+ ngrn = ALIGN (ngrn, 2);
+
+ if (ngrn + slot_count <= N_X_ARG_REG)
+ {
+ slot = &context->x[ngrn];
+ ngrn += slot_count;
+ break;
+ }
+ /* All out of general registers. Copy to the stack. */
+ ngrn = N_X_ARG_REG;
+ }
+ if (ty->alignment > 8)
+ {
+ int a = ty->alignment / 8;
+ nsaa = ALIGN (nsaa, a);
+ }
+ slot = &stack[nsaa];
+ nsaa += slot_count;
+ }
break;
+
+ default:
+ abort ();
}
+
+ avalue[i] = slot;
}
- /* Figure out where the return value will be passed, either in
- registers or in a memory block allocated by the caller and passed
- in x8. */
+ h = cif->flags & AARCH64_FLAG_RET_MASK;
+ if (h != AARCH64_RET_LG_STRUCT)
+ rvalue = context + 1;
- if (is_register_candidate (cif->rtype))
- {
- /* Register candidates are *always* returned in registers. */
-
- /* Allocate a scratchpad for the return value, we will let the
- callee scrible the result into the scratch pad then move the
- contents into the appropriate return value location for the
- call convention. */
- rvalue = alloca (cif->rtype->size);
- (closure->fun) (cif, rvalue, avalue, closure->user_data);
-
- /* Copy the return value into the call context so that it is returned
- as expected to our caller. */
- switch (cif->rtype->type)
- {
- case FFI_TYPE_VOID:
- break;
+ fun (cif, rvalue, avalue, user_data);
- case FFI_TYPE_UINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_POINTER:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_INT:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_SINT64:
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- {
- void *addr = get_basic_type_addr (cif->rtype->type, context, 0);
- copy_basic_type (addr, rvalue, cif->rtype->type);
- break;
- }
- case FFI_TYPE_STRUCT:
- if (is_hfa (cif->rtype))
- {
- int i;
- unsigned short type = get_homogeneous_type (cif->rtype);
- unsigned elems = element_count (cif->rtype);
- for (i = 0; i < elems; i++)
- {
- void *reg = get_basic_type_addr (type, context, i);
- copy_basic_type (reg, rvalue, type);
- rvalue += get_basic_type_size (type);
- }
- }
- else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
- {
- unsigned size = ALIGN (cif->rtype->size, sizeof (UINT64)) ;
- memcpy (get_x_addr (context, 0), rvalue, size);
- }
- else
- {
- FFI_ASSERT (0);
- }
- break;
- default:
- FFI_ASSERT (0);
- break;
- }
- }
- else
- {
- memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
- (closure->fun) (cif, rvalue, avalue, closure->user_data);
- }
+ return h;
}
-
diff --git a/libffi/src/aarch64/ffitarget.h b/libffi/src/aarch64/ffitarget.h
index 6f1a348..ecfa159 100644
--- a/libffi/src/aarch64/ffitarget.h
+++ b/libffi/src/aarch64/ffitarget.h
@@ -27,8 +27,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#endif
#ifndef LIBFFI_ASM
-typedef unsigned long ffi_arg;
-typedef signed long ffi_sarg;
+typedef unsigned long long ffi_arg;
+typedef signed long long ffi_sarg;
typedef enum ffi_abi
{
@@ -42,18 +42,7 @@ typedef enum ffi_abi
/* ---- Definitions for closures ----------------------------------------- */
#define FFI_CLOSURES 1
-#define FFI_TRAMPOLINE_SIZE 36
+#define FFI_TRAMPOLINE_SIZE 24
#define FFI_NATIVE_RAW_API 0
-/* ---- Internal ---- */
-
-
-#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags
-
-#define AARCH64_FFI_WITH_V_BIT 0
-
-#define AARCH64_N_XREG 32
-#define AARCH64_N_VREG 32
-#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16)
-
#endif
diff --git a/libffi/src/aarch64/internal.h b/libffi/src/aarch64/internal.h
new file mode 100644
index 0000000..63cf683
--- /dev/null
+++ b/libffi/src/aarch64/internal.h
@@ -0,0 +1,43 @@
+/*
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/* ---- Internal ---- */
+
+#define AARCH64_RET_UINT32 0
+#define AARCH64_RET_SINT32 1
+#define AARCH64_RET_INT64 2
+#define AARCH64_RET_SM_STRUCT 3
+#define AARCH64_RET_FLOAT 4
+#define AARCH64_RET_DOUBLE 5
+#define AARCH64_RET_LDOUBLE 6
+#define AARCH64_RET_HFA_FLOAT 7
+#define AARCH64_RET_HFA_DOUBLE 8
+#define AARCH64_RET_HFA_LDOUBLE 13
+#define AARCH64_RET_LG_STRUCT 14
+#define AARCH64_RET_VOID 15
+#define AARCH64_FLAG_RET_MASK 15
+
+#define AARCH64_FLAG_ARG_V_BIT 4
+#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT)
+
+#define AARCH64_N_VREG 8
+#define AARCH64_N_XREG 8
+#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_VREG * 16 + AARCH64_N_XREG * 8)
+
diff --git a/libffi/src/aarch64/sysv.S b/libffi/src/aarch64/sysv.S
index ffb16f8..126c527 100644
--- a/libffi/src/aarch64/sysv.S
+++ b/libffi/src/aarch64/sysv.S
@@ -22,286 +22,285 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
+#include "internal.h"
-#define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
-#define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
-#define cfi_restore(reg) .cfi_restore reg
-#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
-
- .text
- .globl ffi_call_SYSV
- .type ffi_call_SYSV, #function
+ .text
/* ffi_call_SYSV()
- Create a stack frame, setup an argument context, call the callee
- and extract the result.
-
- The maximum required argument stack size is provided,
- ffi_call_SYSV() allocates that stack space then calls the
- prepare_fn to populate register context and stack. The
- argument passing registers are loaded from the register
- context and the callee called, on return the register passing
- register are saved back to the context. Our caller will
- extract the return value from the final state of the saved
- register context.
+ Install an argument context and a stack frame.
+ Call the callee and extract the result.
Prototype:
- extern unsigned
- ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *,
- extended_cif *),
- struct call_context *context,
- extended_cif *,
- unsigned required_stack_size,
- void (*fn)(void));
-
- Therefore on entry we have:
-
- x0 prepare_fn
- x1 &context
- x2 &ecif
- x3 bytes
- x4 fn
-
- This function uses the following stack frame layout:
-
- ==
- saved x30(lr)
- x29(fp)-> saved x29(fp)
- saved x24
- saved x23
- saved x22
- sp' -> saved x21
- ...
- sp -> (constructed callee stack arguments)
- ==
+ extern void
+ ffi_call_SYSV (void *frame, void *rvalue, struct call_context *context,
+ unsigned flags, void (*fn)(void))
- Voila! */
+ This function uses an unusual stack layout. Our local frame has
+ been allocated by the caller in FRAME with the outgoing arguments
+ in CONTEXT, and the outgoing stack arguments above CONTEXT. */
-#define ffi_call_SYSV_FS (8 * 4)
+ .globl ffi_call_SYSV
+ .hidden ffi_call_SYSV
+ .type ffi_call_SYSV, %function
+ .balign 32
- .cfi_startproc
ffi_call_SYSV:
- stp x29, x30, [sp, #-16]!
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
-
- mov x29, sp
- cfi_def_cfa_register (x29)
- sub sp, sp, #ffi_call_SYSV_FS
-
- stp x21, x22, [sp, 0]
- cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS)
- cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS)
-
- stp x23, x24, [sp, 16]
- cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS)
- cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS)
-
- mov x21, x1
- mov x22, x2
- mov x24, x4
-
- /* Allocate the stack space for the actual arguments, many
- arguments will be passed in registers, but we assume
- worst case and allocate sufficient stack for ALL of
- the arguments. */
- sub sp, sp, x3
-
- /* unsigned (*prepare_fn) (struct call_context *context,
- unsigned char *stack, extended_cif *ecif);
- */
- mov x23, x0
- mov x0, x1
- mov x1, sp
- /* x2 already in place */
- blr x23
-
- /* Preserve the flags returned. */
- mov x23, x0
-
- /* Figure out if we should touch the vector registers. */
- tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
-
- /* Load the vector argument passing registers. */
- ldp q0, q1, [x21, #8*32 + 0]
- ldp q2, q3, [x21, #8*32 + 32]
- ldp q4, q5, [x21, #8*32 + 64]
- ldp q6, q7, [x21, #8*32 + 96]
-1:
- /* Load the core argument passing registers. */
- ldp x0, x1, [x21, #0]
- ldp x2, x3, [x21, #16]
- ldp x4, x5, [x21, #32]
- ldp x6, x7, [x21, #48]
-
- /* Don't forget x8 which may be holding the address of a return buffer.
- */
- ldr x8, [x21, #8*8]
-
- blr x24
-
- /* Save the core argument passing registers. */
- stp x0, x1, [x21, #0]
- stp x2, x3, [x21, #16]
- stp x4, x5, [x21, #32]
- stp x6, x7, [x21, #48]
-
- /* Note nothing useful ever comes back in x8! */
-
- /* Figure out if we should touch the vector registers. */
- tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
-
- /* Save the vector argument passing registers. */
- stp q0, q1, [x21, #8*32 + 0]
- stp q2, q3, [x21, #8*32 + 32]
- stp q4, q5, [x21, #8*32 + 64]
- stp q6, q7, [x21, #8*32 + 96]
+ .cfi_startproc
+ .cfi_def_cfa x0, 32
+ stp x29, x30, [x0] /* Save fp, lr in our frame. */
+ mov x29, x0 /* Set up our new frame. */
+ .cfi_def_cfa_register x29
+ .cfi_rel_offset x29, 0
+ .cfi_rel_offset x30, 8
+
+ /* Move parameters out of the way. */
+ stp x3, x1, [x0, #16] /* flags, rvalue */
+ mov x8, x1 /* rvalue into place */
+ mov x10, x2 /* context */
+ mov x11, x4 /* fn */
+
+ /* Load the vector argument passing registers, if needed. */
+ tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
+ ldp q0, q1, [x10, #8*AARCH64_N_XREG + 0]
+ ldp q2, q3, [x10, #8*AARCH64_N_XREG + 32]
+ ldp q4, q5, [x10, #8*AARCH64_N_XREG + 64]
+ ldp q6, q7, [x10, #8*AARCH64_N_XREG + 96]
1:
- /* All done, unwind our stack frame. */
- ldp x21, x22, [x29, # - ffi_call_SYSV_FS]
- cfi_restore (x21)
- cfi_restore (x22)
-
- ldp x23, x24, [x29, # - ffi_call_SYSV_FS + 16]
- cfi_restore (x23)
- cfi_restore (x24)
-
- mov sp, x29
- cfi_def_cfa_register (sp)
-
- ldp x29, x30, [sp], #16
- cfi_adjust_cfa_offset (-16)
- cfi_restore (x29)
- cfi_restore (x30)
-
- ret
-
- .cfi_endproc
- .size ffi_call_SYSV, .-ffi_call_SYSV
-
-#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE)
+ /* Load the core argument passing registers. */
+ ldp x0, x1, [x10, #16*0]
+ ldp x2, x3, [x10, #16*1]
+ ldp x4, x5, [x10, #16*2]
+ ldp x6, x7, [x10, #16*3]
+
+ /* Setup SP for the stacked arguments. */
+ add sp, x10, #AARCH64_CALL_CONTEXT_SIZE
+
+ /* Call fn. */
+ blr x11
+
+ /* Recover the flags value and result address. */
+ ldp x3, x8, [x29, #16]
+
+ /* Store the return type.
+ Each case uses 8 bytes, so compute it directly. */
+ adr x2, 3f
+ and w3, w3, #AARCH64_FLAG_RET_MASK
+ add x2, x2, x3, lsl #3
+ br x2
+
+ /* Store results into the rvalue. Note that for most integer
+ cases this is actually ffi_arg, aka a 64-bit result.
+ For the HFA cases, and the (small) struct case, we've arranged
+ for temporary storage, so store the largest possible.
+ For the large struct case, we've remapped to VOID, since
+ the callee has already done the store via x8. */
+ .balign 8
+/* 0: AARCH64_RET_UINT32 */
+3: mov w0, w0
+ b 4f
+/* 1: AARCH64_RET_SINT32 */
+ sxtw x0, w0
+ nop
+/* 2: AARCH64_RET_INT64 */
+4: str x0, [x8]
+ b 9f
+/* 3: AARCH64_RET_SM_STRUCT */
+ stp x0, x1, [x8]
+ b 9f
+/* 4: AARCH64_RET_FLOAT */
+ str s0, [x8]
+ b 9f
+/* 5: AARCH64_RET_DOUBLE */
+ str d0, [x8]
+ b 9f
+/* 6: AARCH64_RET_LONGDOUBLE */
+ str q0, [x8]
+ b 9f
+/* 7: AARCH64_RET_HFA_FLOAT */
+ st4 { v0.s, v1.s, v2.s, v3.s }[0], [x8]
+ b 9f
+/* 8: AARCH64_RET_HFA_DOUBLE */
+ st4 { v0.d, v1.d, v2.d, v3.d }[0], [x8]
+ b 9f
+/* 9: invalid */
+ brk #1000
+ nop
+/* A: invalid */
+ brk #1000
+ nop
+/* B: invalid */
+ brk #1000
+ nop
+/* C: invalid */
+ brk #1000
+ nop
+/* D: AARCH64_RET_HFA_LDOUBLE */
+ stp q0, q1, [x8]
+ stp q2, q3, [x8, #32]
+/* E: AARCH64_RET_LG_STRUCT */
+ nop
+ nop
+/* F: AARCH64_RET_VOID */
+9: ldp x29, x30, [x29]
+ .cfi_def_cfa sp, 0
+ .cfi_restore x29
+ .cfi_restore x30
+ ret
+ .cfi_endproc
+ .size ffi_call_SYSV, .-ffi_call_SYSV
/* ffi_closure_SYSV
Closure invocation glue. This is the low level code invoked directly by
the closure trampoline to setup and call a closure.
- On entry x17 points to a struct trampoline_data, x16 has been clobbered
- all other registers are preserved.
+ On entry x17 points to a ffi_closure, x16 has been clobbered,
+ and all other registers are preserved.
We allocate a call context and save the argument passing registers,
then invoked the generic C ffi_closure_SYSV_inner() function to do all
the real work, on return we load the result passing registers back from
the call context.
- On entry
-
- extern void
- ffi_closure_SYSV (struct trampoline_data *);
-
- struct trampoline_data
- {
- UINT64 *ffi_closure;
- UINT64 flags;
- };
+ We use two separate entry points, depending on whether there are
+ any vector argument registers.
This function uses the following stack frame layout:
==
- saved x30(lr)
- x29(fp)-> saved x29(fp)
- saved x22
- saved x21
- ...
- sp -> call_context
+ temporary return slot
+ call_context
+ saved x30(lr)
+ sp, x29-> saved x29(fp)
==
Voila! */
- .text
- .globl ffi_closure_SYSV
- .cfi_startproc
+#define ffi_closure_FS (16 + AARCH64_CALL_CONTEXT_SIZE + 64)
+
+ .globl ffi_closure_SYSV_V
+ .hidden ffi_closure_SYSV_V
+ .type ffi_closure_SYSV_V, %function
+ .balign 32
+
+ffi_closure_SYSV_V:
+ .cfi_startproc
+ stp x29, x30, [sp, #-ffi_closure_FS]!
+ .cfi_adjust_cfa_offset ffi_closure_FS
+ .cfi_rel_offset x29, 0
+ .cfi_rel_offset x30, 8
+ mov x29, sp
+
+ /* Save the argument passing vector registers. */
+ stp q0, q1, [sp, #16 + 8*AARCH64_N_XREG + 0]
+ stp q2, q3, [sp, #16 + 8*AARCH64_N_XREG + 32]
+ stp q4, q5, [sp, #16 + 8*AARCH64_N_XREG + 64]
+ stp q6, q7, [sp, #16 + 8*AARCH64_N_XREG + 96]
+ b 0f
+
+ .cfi_endproc
+ .size ffi_closure_SYSV_V, . - ffi_closure_SYSV_V
+
+ .globl ffi_closure_SYSV
+ .hidden ffi_closure_SYSV
+ .type ffi_closure_SYSV, %function
+ .balign 32
+
ffi_closure_SYSV:
- stp x29, x30, [sp, #-16]!
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
-
- mov x29, sp
- cfi_def_cfa_register (x29)
-
- sub sp, sp, #ffi_closure_SYSV_FS
-
- stp x21, x22, [x29, #-16]
- cfi_rel_offset (x21, -16)
- cfi_rel_offset (x22, -8)
-
- /* Load x21 with &call_context. */
- mov x21, sp
- /* Preserve our struct trampoline_data * */
- mov x22, x17
-
- /* Save the rest of the argument passing registers. */
- stp x0, x1, [x21, #0]
- stp x2, x3, [x21, #16]
- stp x4, x5, [x21, #32]
- stp x6, x7, [x21, #48]
- /* Don't forget we may have been given a result scratch pad address.
- */
- str x8, [x21, #64]
-
- /* Figure out if we should touch the vector registers. */
- ldr x0, [x22, #8]
- tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
-
- /* Save the argument passing vector registers. */
- stp q0, q1, [x21, #8*32 + 0]
- stp q2, q3, [x21, #8*32 + 32]
- stp q4, q5, [x21, #8*32 + 64]
- stp q6, q7, [x21, #8*32 + 96]
-1:
- /* Load &ffi_closure.. */
- ldr x0, [x22, #0]
- mov x1, x21
- /* Compute the location of the stack at the point that the
- trampoline was called. */
- add x2, x29, #16
-
- bl ffi_closure_SYSV_inner
-
- /* Figure out if we should touch the vector registers. */
- ldr x0, [x22, #8]
- tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
-
- /* Load the result passing vector registers. */
- ldp q0, q1, [x21, #8*32 + 0]
- ldp q2, q3, [x21, #8*32 + 32]
- ldp q4, q5, [x21, #8*32 + 64]
- ldp q6, q7, [x21, #8*32 + 96]
-1:
- /* Load the result passing core registers. */
- ldp x0, x1, [x21, #0]
- ldp x2, x3, [x21, #16]
- ldp x4, x5, [x21, #32]
- ldp x6, x7, [x21, #48]
- /* Note nothing usefull is returned in x8. */
-
- /* We are done, unwind our frame. */
- ldp x21, x22, [x29, #-16]
- cfi_restore (x21)
- cfi_restore (x22)
-
- mov sp, x29
- cfi_def_cfa_register (sp)
-
- ldp x29, x30, [sp], #16
- cfi_adjust_cfa_offset (-16)
- cfi_restore (x29)
- cfi_restore (x30)
-
- ret
- .cfi_endproc
- .size ffi_closure_SYSV, .-ffi_closure_SYSV
+ .cfi_startproc
+ stp x29, x30, [sp, #-ffi_closure_FS]!
+ .cfi_adjust_cfa_offset ffi_closure_FS
+ .cfi_rel_offset x29, 0
+ .cfi_rel_offset x30, 8
+ mov x29, sp
+
+ /* Save the argument passing core registers. */
+0: stp x0, x1, [sp, #16 + 0]
+ stp x2, x3, [sp, #16 + 16]
+ stp x4, x5, [sp, #16 + 32]
+ stp x6, x7, [sp, #16 + 48]
+
+ ldp x0, x1, [x17, #FFI_TRAMPOLINE_SIZE] /* Load cfi, fun */
+ ldr x2, [x17, #FFI_TRAMPOLINE_SIZE + 16] /* Load user_data */
+
+.Ldo_closure:
+ add x3, sp, #16 /* Load &call_context. */
+ add x4, sp, #ffi_closure_FS /* Load incoming sp value. */
+ mov x5, x8 /* Load struct return. */
+ bl ffi_closure_SYSV_inner
+
+ /* Load the return type. Each case uses 8 bytes, so compute it
+ directly. Load x8 with address of the temporary return slot. */
+ adr x1, 3f
+ and w0, w0, #AARCH64_FLAG_RET_MASK
+ add x1, x1, x0, lsl #3
+ add x8, sp, #16 + AARCH64_CALL_CONTEXT_SIZE
+ br x1
+
+ /* Load results from temporary storage. Note that for most integer
+ cases this is actually ffi_arg, aka a 64-bit result. For the HFA
+ cases and the (small) struct case, we can load the maximum width.
+ For the large struct case, we've remapped to VOID. */
+#if defined __AARCH64EB__
+# define INT32OFS 4
+#else
+# define INT32OFS 0
+#endif
+
+ .balign 8
+/* 0: AARCH64_RET_UINT32 */
+3: ldr w0, [x8, #INT32OFS]
+ b 9f
+/* 1: AARCH64_RET_SINT32 */
+ ldrsw x0, [x8, #INT32OFS]
+ b 9f
+/* 2: AARCH64_RET_INT64 */
+ ldr x0, [x8]
+ b 9f
+/* 3: AARCH64_RET_SM_STRUCT */
+ ldp x0, x1, [x8]
+ b 9f
+/* 4: AARCH64_RET_FLOAT */
+ ldr s0, [x8]
+ b 9f
+/* 5: AARCH64_RET_DOUBLE */
+ ldr d0, [x8]
+ b 9f
+/* 6: AARCH64_RET_LONGDOUBLE */
+ ldr q0, [x8]
+ b 9f
+/* 7: AARCH64_RET_HFA_FLOAT */
+ ld4 { v0.s, v1.s, v2.s, v3.s }[0], [x8]
+ b 9f
+/* 8: AARCH64_RET_HFA_DOUBLE */
+ ld1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x8]
+ b 9f
+/* 9: invalid */
+ brk #1000
+ nop
+/* A: invalid */
+ brk #1000
+ nop
+/* B: invalid */
+ brk #1000
+ nop
+/* C: invalid */
+ brk #1000
+ nop
+/* D: AARCH64_RET_HFA_LDOUBLE */
+ ldp q0, q1, [x8]
+ ldp q2, q3, [x8, #32]
+/* E: AARCH64_RET_LG_STRUCT */
+ nop
+ nop
+/* F: AARCH64_RET_VOID */
+9: /* We are done, unwind our frame. */
+ ldp x29, x30, [sp], #ffi_closure_FS
+ .cfi_adjust_cfa_offset -ffi_closure_FS
+ .cfi_restore x29
+ .cfi_restore x30
+ ret
+ .cfi_endproc
+ .size ffi_closure_SYSV, .-ffi_closure_SYSV
--
1.9.3