aboutsummaryrefslogtreecommitdiff
path: root/libffi/src/ia64
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2004-12-31 12:11:17 -0800
committerRichard Henderson <rth@gcc.gnu.org>2004-12-31 12:11:17 -0800
commit81a69b13339a5a975ab17eef54490148addaf531 (patch)
tree9eac59f95be2aa7a36f95f2c7c9553e325370df0 /libffi/src/ia64
parentbdaa445236ad169bf757aa9d81c26d7e09a28e0c (diff)
downloadgcc-81a69b13339a5a975ab17eef54490148addaf531.zip
gcc-81a69b13339a5a975ab17eef54490148addaf531.tar.gz
gcc-81a69b13339a5a975ab17eef54490148addaf531.tar.bz2
types.c (FFI_TYPE_POINTER): Define with sizeof.
* src/types.c (FFI_TYPE_POINTER): Define with sizeof. (FFI_TYPE_LONGDOUBLE): Fix for ia64. * src/ia64/ffitarget.h (struct ffi_ia64_trampoline_struct): Move into ffi_prep_closure. * src/ia64/ia64_flags.h, src/ia64/ffi.c, src/ia64/unix.S: Rewrite from scratch. From-SVN: r92774
Diffstat (limited to 'libffi/src/ia64')
-rw-r--r--libffi/src/ia64/ffi.c947
-rw-r--r--libffi/src/ia64/ffitarget.h9
-rw-r--r--libffi/src/ia64/ia64_flags.h47
-rw-r--r--libffi/src/ia64/unix.S781
4 files changed, 945 insertions, 839 deletions
diff --git a/libffi/src/ia64/ffi.c b/libffi/src/ia64/ffi.c
index 1dc27db..e810827 100644
--- a/libffi/src/ia64/ffi.c
+++ b/libffi/src/ia64/ffi.c
@@ -29,622 +29,365 @@
#include <stdlib.h>
#include <stdbool.h>
+#include <float.h>
#include "ia64_flags.h"
-/* Memory image of fp register contents. Should eventually be an fp */
-/* type long enough to hold an entire register. For now we use double. */
-typedef double float80;
-
-/* The stack layout at call to ffi_prep_args. Other_args will remain */
-/* on the stack for the actual call. Everything else we be transferred */
-/* to registers and popped by the assembly code. */
-
-struct ia64_args {
- long scratch[2]; /* Two scratch words at top of stack. */
- /* Allows sp to be passed as arg pointer. */
- void * r8_contents; /* Value to be passed in r8 */
- long spare; /* Not used. */
- float80 fp_regs[8]; /* Contents of 8 floating point argument */
- /* registers. */
- long out_regs[8]; /* Contents of the 8 out registers used */
- /* for integer parameters. */
- long other_args[0]; /* Arguments passed on stack, variable size */
- /* Treated as continuation of out_regs. */
+/* A 64-bit pointer value. In LP64 mode, this is effectively a plain
+ pointer. In ILP32 mode, it's a pointer that's been extended to
+ 64 bits by "addp4". */
+typedef void *PTR64 __attribute__((mode(DI)));
+
+/* Memory image of fp register contents. This is the implementation
+ specific format used by ldf.fill/stf.spill. All we care about is
+ that it wants a 16 byte aligned slot. */
+typedef struct
+{
+ UINT64 x[2] __attribute__((aligned(16)));
+} fpreg;
+
+
+/* The stack layout given to ffi_call_unix and ffi_closure_unix_inner. */
+
+struct ia64_args
+{
+ fpreg fp_regs[8]; /* Contents of 8 fp arg registers. */
+ UINT64 gp_regs[8]; /* Contents of 8 gp arg registers. */
+ UINT64 other_args[]; /* Arguments passed on stack, variable size. */
};
-static size_t float_type_size(unsigned short tp)
+
+/* Adjust ADDR, a pointer to an 8 byte slot, to point to the low LEN bytes. */
+
+static inline void *
+endian_adjust (void *addr, size_t len)
{
- switch(tp) {
- case FFI_TYPE_FLOAT:
- return sizeof(float);
- case FFI_TYPE_DOUBLE:
- return sizeof(double);
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
- case FFI_TYPE_LONGDOUBLE:
- return sizeof(long double);
+#ifdef __BIG_ENDIAN__
+ return addr + (8 - len);
+#else
+ return addr;
#endif
- default:
- FFI_ASSERT(0);
- }
}
-/*
- * Is type a struct containing at most n floats, doubles, or extended
- * doubles, all of the same fp type?
- * If so, set *element_type to the fp type.
- */
-static bool is_homogeneous_fp_aggregate(ffi_type * type, int n,
- unsigned short * element_type)
+/* Store VALUE to ADDR in the current cpu implementation's fp spill format. */
+
+static inline void
+stf_spill(fpreg *addr, __float80 value)
{
- ffi_type **ptr;
- unsigned short element, struct_element;
+ asm ("stf.spill %0 = %1%P0" : "=m" (*addr) : "f"(value));
+}
+
+/* Load a value from ADDR, which is in the current cpu implementation's
+ fp spill format. */
- int type_set = 0;
+static inline __float80
+ldf_fill(fpreg *addr)
+{
+ __float80 ret;
+ asm ("ldf.fill %0 = %1%P1" : "=f"(ret) : "m"(*addr));
+ return ret;
+}
- FFI_ASSERT(type != NULL);
+/* Return the size of the C type associated with with TYPE. Which will
+ be one of the FFI_IA64_TYPE_HFA_* values. */
- FFI_ASSERT(type->elements != NULL);
+static size_t
+hfa_type_size (int type)
+{
+ switch (type)
+ {
+ case FFI_IA64_TYPE_HFA_FLOAT:
+ return sizeof(float);
+ case FFI_IA64_TYPE_HFA_DOUBLE:
+ return sizeof(double);
+ case FFI_IA64_TYPE_HFA_LDOUBLE:
+ return sizeof(__float80);
+ default:
+ abort ();
+ }
+}
- ptr = &(type->elements[0]);
+/* Load from ADDR a value indicated by TYPE. Which will be one of
+ the FFI_IA64_TYPE_HFA_* values. */
- while ((*ptr) != NULL)
+static __float80
+hfa_type_load (int type, void *addr)
+{
+ switch (type)
{
- switch((*ptr) -> type) {
- case FFI_TYPE_FLOAT:
- if (type_set && element != FFI_TYPE_FLOAT) return 0;
- if (--n < 0) return false;
- type_set = 1;
- element = FFI_TYPE_FLOAT;
- break;
- case FFI_TYPE_DOUBLE:
- if (type_set && element != FFI_TYPE_DOUBLE) return 0;
- if (--n < 0) return false;
- type_set = 1;
- element = FFI_TYPE_DOUBLE;
- break;
- case FFI_TYPE_STRUCT:
- if (!is_homogeneous_fp_aggregate(type, n, &struct_element))
- return false;
- if (type_set && struct_element != element) return false;
- n -= (type -> size)/float_type_size(element);
- element = struct_element;
- if (n < 0) return false;
- break;
- /* case FFI_TYPE_LONGDOUBLE:
- Not yet implemented. */
- default:
- return false;
- }
- ptr++;
+ case FFI_IA64_TYPE_HFA_FLOAT:
+ return *(float *) addr;
+ case FFI_IA64_TYPE_HFA_DOUBLE:
+ return *(double *) addr;
+ case FFI_IA64_TYPE_HFA_LDOUBLE:
+ return *(__float80 *) addr;
+ default:
+ abort ();
}
- *element_type = element;
- return true;
-
-}
+}
-/* ffi_prep_args is called by the assembly routine once stack space
- has been allocated for the function's arguments. It fills in
- the arguments in the structure referenced by stack. Returns nonzero
- if fp registers are used for arguments. */
+/* Load VALUE into ADDR as indicated by TYPE. Which will be one of
+ the FFI_IA64_TYPE_HFA_* values. */
-static bool
-ffi_prep_args(struct ia64_args *stack, extended_cif *ecif, int bytes)
+static void
+hfa_type_store (int type, void *addr, __float80 value)
{
- register long i, avn;
- register void **p_argv;
- register long *argp = stack -> out_regs;
- register float80 *fp_argp = stack -> fp_regs;
- register ffi_type **p_arg;
-
- /* For big return structs, r8 needs to contain the target address. */
- /* Since r8 is otherwise dead, we set it unconditionally. */
- stack -> r8_contents = ecif -> rvalue;
- i = 0;
- avn = ecif->cif->nargs;
- p_arg = ecif->cif->arg_types;
- p_argv = ecif->avalue;
- while (i < avn)
+ switch (type)
{
- size_t z; /* z is in units of arg slots or words, not bytes. */
+ case FFI_IA64_TYPE_HFA_FLOAT:
+ *(float *) addr = value;
+ break;
+ case FFI_IA64_TYPE_HFA_DOUBLE:
+ *(double *) addr = value;
+ break;
+ case FFI_IA64_TYPE_HFA_LDOUBLE:
+ *(__float80 *) addr = value;
+ break;
+ default:
+ abort ();
+ }
+}
- switch ((*p_arg)->type)
- {
- case FFI_TYPE_SINT8:
- z = 1;
- *(SINT64 *) argp = *(SINT8 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT8:
- z = 1;
- *(UINT64 *) argp = *(UINT8 *)(* p_argv);
- break;
-
- case FFI_TYPE_SINT16:
- z = 1;
- *(SINT64 *) argp = *(SINT16 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT16:
- z = 1;
- *(UINT64 *) argp = *(UINT16 *)(* p_argv);
- break;
-
- case FFI_TYPE_SINT32:
- z = 1;
- *(SINT64 *) argp = *(SINT32 *)(* p_argv);
- break;
-
- case FFI_TYPE_UINT32:
- z = 1;
- *(UINT64 *) argp = *(UINT32 *)(* p_argv);
- break;
+/* Is TYPE a struct containing floats, doubles, or extended doubles,
+ all of the same fp type? If so, return the element type. Return
+ FFI_TYPE_VOID if not. */
- case FFI_TYPE_SINT64:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_POINTER:
- z = 1;
- *(UINT64 *) argp = *(UINT64 *)(* p_argv);
- break;
+static int
+hfa_element_type (ffi_type *type, int nested)
+{
+ int element = FFI_TYPE_VOID;
- case FFI_TYPE_FLOAT:
- z = 1;
- if (fp_argp - stack->fp_regs < 8)
- {
- /* Note the conversion -- all the fp regs are loaded as
- doubles. */
- *fp_argp++ = *(float *)(* p_argv);
- }
- /* Also put it into the integer registers or memory: */
- *(UINT64 *) argp = *(UINT32 *)(* p_argv);
- break;
+ switch (type->type)
+ {
+ case FFI_TYPE_FLOAT:
+ /* We want to return VOID for raw floating-point types, but the
+ synthetic HFA type if we're nested within an aggregate. */
+ if (nested)
+ element = FFI_IA64_TYPE_HFA_FLOAT;
+ break;
- case FFI_TYPE_DOUBLE:
- z = 1;
- if (fp_argp - stack->fp_regs < 8)
- *fp_argp++ = *(double *)(* p_argv);
- /* Also put it into the integer registers or memory: */
- *(double *) argp = *(double *)(* p_argv);
- break;
+ case FFI_TYPE_DOUBLE:
+ /* Similarly. */
+ if (nested)
+ element = FFI_IA64_TYPE_HFA_DOUBLE;
+ break;
- case FFI_TYPE_STRUCT:
+ case FFI_TYPE_LONGDOUBLE:
+ /* Similarly, except that that HFA is true for double extended,
+ but not quad precision. Both have sizeof == 16, so tell the
+ difference based on the precision. */
+ if (LDBL_MANT_DIG == 64 && nested)
+ element = FFI_IA64_TYPE_HFA_LDOUBLE;
+ break;
+
+ case FFI_TYPE_STRUCT:
+ {
+ ffi_type **ptr = &type->elements[0];
+
+ for (ptr = &type->elements[0]; *ptr ; ptr++)
{
- size_t sz = (*p_arg)->size;
- unsigned short element_type;
- z = ((*p_arg)->size + FFI_SIZEOF_ARG - 1)/FFI_SIZEOF_ARG;
- if (is_homogeneous_fp_aggregate(*p_arg, 8, &element_type)) {
- int i;
- int nelements = sz/float_type_size(element_type);
- for (i = 0; i < nelements; ++i) {
- switch (element_type) {
- case FFI_TYPE_FLOAT:
- if (fp_argp - stack->fp_regs < 8)
- *fp_argp++ = ((float *)(* p_argv))[i];
- break;
- case FFI_TYPE_DOUBLE:
- if (fp_argp - stack->fp_regs < 8)
- *fp_argp++ = ((double *)(* p_argv))[i];
- break;
- default:
- /* Extended precision not yet implemented. */
- abort();
- }
- }
- }
- /* And pass it in integer registers as a struct, with */
- /* its actual field sizes packed into registers. */
- memcpy(argp, *p_argv, (*p_arg)->size);
+ int sub_element = hfa_element_type (*ptr, 1);
+ if (sub_element == FFI_TYPE_VOID)
+ return FFI_TYPE_VOID;
+
+ if (element == FFI_TYPE_VOID)
+ element = sub_element;
+ else if (element != sub_element)
+ return FFI_TYPE_VOID;
}
- break;
-
- default:
- FFI_ASSERT(0);
- }
+ }
+ break;
- argp += z;
- i++, p_arg++, p_argv++;
+ default:
+ return FFI_TYPE_VOID;
}
- return (fp_argp != stack -> fp_regs);
+
+ return element;
}
-/* Perform machine dependent cif processing */
+
+/* Perform machine dependent cif processing. */
+
ffi_status
ffi_prep_cif_machdep(ffi_cif *cif)
{
- long i, avn;
- bool is_simple = true;
- long simple_flag = FFI_SIMPLE_V;
- /* Adjust cif->bytes to include space for the 2 scratch words,
- r8 register contents, spare word,
- the 8 fp register contents, and all 8 integer register contents.
- This will be removed before the call, though 2 scratch words must
- remain. */
-
- cif->bytes += 4*sizeof(long) + 8 *sizeof(float80);
+ int flags;
+
+ /* Adjust cif->bytes to include space for the bits of the ia64_args frame
+ that preceeds the integer register portion. The estimate that the
+ generic bits did for the argument space required is good enough for the
+ integer component. */
+ cif->bytes += offsetof(struct ia64_args, gp_regs[0]);
if (cif->bytes < sizeof(struct ia64_args))
cif->bytes = sizeof(struct ia64_args);
- /* The stack must be double word aligned, so round bytes up
- appropriately. */
-
- cif->bytes = ALIGN(cif->bytes, 2*sizeof(void*));
-
- avn = cif->nargs;
- if (avn <= 2) {
- for (i = 0; i < avn; ++i) {
- switch(cif -> arg_types[i] -> type) {
- case FFI_TYPE_SINT32:
- simple_flag = FFI_ADD_INT_ARG(simple_flag);
- break;
- case FFI_TYPE_SINT64:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_POINTER:
- simple_flag = FFI_ADD_LONG_ARG(simple_flag);
- break;
- default:
- is_simple = false;
- }
- }
- } else {
- is_simple = false;
- }
-
- /* Set the return type flag */
+ /* Set the return type flag. */
+ flags = cif->rtype->type;
switch (cif->rtype->type)
{
- case FFI_TYPE_VOID:
- cif->flags = FFI_TYPE_VOID;
+ case FFI_TYPE_LONGDOUBLE:
+ /* Leave FFI_TYPE_LONGDOUBLE as meaning double extended precision,
+ and encode quad precision as a two-word integer structure. */
+ if (LDBL_MANT_DIG != 64)
+ flags = FFI_IA64_TYPE_SMALL_STRUCT | (16 << 8);
break;
case FFI_TYPE_STRUCT:
{
- size_t sz = cif -> rtype -> size;
- unsigned short element_type;
-
- is_simple = false;
- if (is_homogeneous_fp_aggregate(cif -> rtype, 8, &element_type)) {
- int nelements = sz/float_type_size(element_type);
- if (nelements <= 1) {
- if (0 == nelements) {
- cif -> flags = FFI_TYPE_VOID;
- } else {
- cif -> flags = element_type;
- }
- } else {
- switch(element_type) {
- case FFI_TYPE_FLOAT:
- cif -> flags = FFI_IS_FLOAT_FP_AGGREGATE | nelements;
- break;
- case FFI_TYPE_DOUBLE:
- cif -> flags = FFI_IS_DOUBLE_FP_AGGREGATE | nelements;
- break;
- default:
- /* long double NYI */
- abort();
- }
+ size_t size = cif->rtype->size;
+ int hfa_type = hfa_element_type (cif->rtype, 0);
+
+ if (hfa_type != FFI_TYPE_VOID)
+ {
+ size_t nelts = size / hfa_type_size (hfa_type);
+ if (nelts <= 8)
+ flags = hfa_type | (size << 8);
}
- break;
- }
- if (sz <= 32) {
- if (sz <= 8) {
- cif->flags = FFI_TYPE_INT;
- } else if (sz <= 16) {
- cif->flags = FFI_IS_SMALL_STRUCT2;
- } else if (sz <= 24) {
- cif->flags = FFI_IS_SMALL_STRUCT3;
- } else {
- cif->flags = FFI_IS_SMALL_STRUCT4;
+ else
+ {
+ if (size <= 32)
+ flags = FFI_IA64_TYPE_SMALL_STRUCT | (size << 8);
}
- } else {
- cif->flags = FFI_TYPE_STRUCT;
- }
}
break;
- case FFI_TYPE_FLOAT:
- is_simple = false;
- cif->flags = FFI_TYPE_FLOAT;
- break;
-
- case FFI_TYPE_DOUBLE:
- is_simple = false;
- cif->flags = FFI_TYPE_DOUBLE;
- break;
-
default:
- cif->flags = FFI_TYPE_INT;
- /* This seems to depend on little endian mode, and the fact that */
- /* the return pointer always points to at least 8 bytes. But */
- /* that also seems to be true for other platforms. */
break;
}
-
- if (is_simple) cif -> flags |= simple_flag;
+ cif->flags = flags;
+
return FFI_OK;
}
-extern int ffi_call_unix(bool (*)(struct ia64_args *, extended_cif *, int),
- extended_cif *, unsigned,
- unsigned, unsigned *, void (*)());
+extern int ffi_call_unix (struct ia64_args *, PTR64, void (*)(), UINT64);
void
ffi_call(ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
{
- extended_cif ecif;
- long simple = cif -> flags & FFI_SIMPLE;
-
- /* Should this also check for Unix ABI? */
- /* This is almost, but not quite, machine independent. Note that */
- /* we can get away with not caring about length of the result because */
- /* we assume we are little endian, and the result buffer is large */
- /* enough. */
- /* This needs work for HP/UX. */
- if (simple) {
- long (*lfn)() = (long (*)())fn;
- long result;
- switch(simple) {
- case FFI_SIMPLE_V:
- result = lfn();
- break;
- case FFI_SIMPLE_I:
- result = lfn(*(int *)avalue[0]);
- break;
- case FFI_SIMPLE_L:
- result = lfn(*(long *)avalue[0]);
- break;
- case FFI_SIMPLE_II:
- result = lfn(*(int *)avalue[0], *(int *)avalue[1]);
- break;
- case FFI_SIMPLE_IL:
- result = lfn(*(int *)avalue[0], *(long *)avalue[1]);
- break;
- case FFI_SIMPLE_LI:
- result = lfn(*(long *)avalue[0], *(int *)avalue[1]);
- break;
- case FFI_SIMPLE_LL:
- result = lfn(*(long *)avalue[0], *(long *)avalue[1]);
- break;
- }
- if ((cif->flags & ~FFI_SIMPLE) != FFI_TYPE_VOID && 0 != rvalue) {
- * (long *)rvalue = result;
- }
- return;
- }
- ecif.cif = cif;
- ecif.avalue = avalue;
-
- /* If the return value is a struct and we don't have a return
- value address then we need to make one. */
-
- if (rvalue == NULL && cif->rtype->type == FFI_TYPE_STRUCT)
- ecif.rvalue = alloca(cif->rtype->size);
- else
- ecif.rvalue = rvalue;
-
- switch (cif->abi)
- {
- case FFI_UNIX:
- ffi_call_unix(ffi_prep_args, &ecif, cif->bytes,
- cif->flags, rvalue, fn);
- break;
+ struct ia64_args *stack;
+ long i, avn, gpcount, fpcount;
+ ffi_type **p_arg;
- default:
- FFI_ASSERT(0);
- break;
- }
-}
-
-/*
- * Closures represent a pair consisting of a function pointer, and
- * some user data. A closure is invoked by reinterpreting the closure
- * as a function pointer, and branching to it. Thus we can make an
- * interpreted function callable as a C function: We turn the interpreter
- * itself, together with a pointer specifying the interpreted procedure,
- * into a closure.
- * On X86, the first few words of the closure structure actually contain code,
- * which will do the right thing. On most other architectures, this
- * would raise some Icache/Dcache coherence issues (which can be solved, but
- * often not cheaply).
- * For IA64, function pointer are already pairs consisting of a code
- * pointer, and a gp pointer. The latter is needed to access global variables.
- * Here we set up such a pair as the first two words of the closure (in
- * the "trampoline" area), but we replace the gp pointer with a pointer
- * to the closure itself. We also add the real gp pointer to the
- * closure. This allows the function entry code to both retrieve the
- * user data, and to restire the correct gp pointer.
- */
-
-static void
-ffi_prep_incoming_args_UNIX(struct ia64_args *args, void **rvalue,
- void **avalue, ffi_cif *cif);
-
-/* This function is entered with the doctored gp (r1) value.
- * This code is extremely gcc specific. There is some argument that
- * it should really be written in assembly code, since it depends on
- * gcc properties that might change over time.
- */
-
-/* ffi_closure_UNIX is an assembly routine, which copies the register */
-/* state into a struct ia64_args, and then invokes */
-/* ffi_closure_UNIX_inner. It also recovers the closure pointer */
-/* from its fake gp pointer. */
-void ffi_closure_UNIX();
-
-#ifndef __GNUC__
-# error This requires gcc
-#endif
-void
-ffi_closure_UNIX_inner (ffi_closure *closure, struct ia64_args * args)
-/* Hopefully declaring this as a varargs function will force all args */
-/* to memory. */
-{
- // this is our return value storage
- long double res;
-
- // our various things...
- ffi_cif *cif;
- unsigned short rtype;
- void *resp;
- void **arg_area;
-
- resp = (void*)&res;
- cif = closure->cif;
- arg_area = (void**) alloca (cif->nargs * sizeof (void*));
-
- /* this call will initialize ARG_AREA, such that each
- * element in that array points to the corresponding
- * value on the stack; and if the function returns
- * a structure, it will re-set RESP to point to the
- * structure return address. */
-
- ffi_prep_incoming_args_UNIX(args, (void**)&resp, arg_area, cif);
-
- (closure->fun) (cif, resp, arg_area, closure->user_data);
-
- rtype = cif->flags;
-
- /* now, do a generic return based on the value of rtype */
- if (rtype == FFI_TYPE_INT)
- {
- asm volatile ("ld8 r8=[%0]" : : "r" (resp) : "r8");
- }
- else if (rtype == FFI_TYPE_FLOAT)
- {
- asm volatile ("ldfs f8=[%0]" : : "r" (resp) : "f8");
- }
- else if (rtype == FFI_TYPE_DOUBLE)
- {
- asm volatile ("ldfd f8=[%0]" : : "r" (resp) : "f8");
- }
- else if (rtype == FFI_IS_SMALL_STRUCT2)
- {
- asm volatile ("ld8 r8=[%0]; ld8 r9=[%1]"
- : : "r" (resp), "r" (resp+8) : "r8","r9");
- }
- else if (rtype == FFI_IS_SMALL_STRUCT3)
- {
- asm volatile ("ld8 r8=[%0]; ld8 r9=[%1]; ld8 r10=[%2]"
- : : "r" (resp), "r" (resp+8), "r" (resp+16)
- : "r8","r9","r10");
- }
- else if (rtype == FFI_IS_SMALL_STRUCT4)
- {
- asm volatile ("ld8 r8=[%0]; ld8 r9=[%1]; ld8 r10=[%2]; ld8 r11=[%3]"
- : : "r" (resp), "r" (resp+8), "r" (resp+16), "r" (resp+24)
- : "r8","r9","r10","r11");
- }
- else if (rtype != FFI_TYPE_VOID && rtype != FFI_TYPE_STRUCT)
- {
- /* Can only happen for homogeneous FP aggregates? */
- abort();
- }
-}
+ FFI_ASSERT (cif->abi == FFI_UNIX);
-static void
-ffi_prep_incoming_args_UNIX(struct ia64_args *args, void **rvalue,
- void **avalue, ffi_cif *cif)
-{
- register unsigned int i;
- register unsigned int avn;
- register void **p_argv;
- register long *argp = args -> out_regs;
- unsigned fp_reg_num = 0;
- register ffi_type **p_arg;
+ /* If we have no spot for a return value, make one. */
+ if (rvalue == NULL && cif->rtype->type != FFI_TYPE_VOID)
+ rvalue = alloca (cif->rtype->size);
+
+ /* Allocate the stack frame. */
+ stack = alloca (cif->bytes);
+ gpcount = fpcount = 0;
avn = cif->nargs;
- p_argv = avalue;
-
- for (i = cif->nargs, p_arg = cif->arg_types; i != 0; i--, p_arg++)
+ for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++)
{
- size_t z; /* In units of words or argument slots. */
-
switch ((*p_arg)->type)
{
case FFI_TYPE_SINT8:
+ stack->gp_regs[gpcount++] = *(SINT8 *)avalue[i];
+ break;
case FFI_TYPE_UINT8:
+ stack->gp_regs[gpcount++] = *(UINT8 *)avalue[i];
+ break;
case FFI_TYPE_SINT16:
+ stack->gp_regs[gpcount++] = *(SINT16 *)avalue[i];
+ break;
case FFI_TYPE_UINT16:
+ stack->gp_regs[gpcount++] = *(UINT16 *)avalue[i];
+ break;
case FFI_TYPE_SINT32:
+ stack->gp_regs[gpcount++] = *(SINT32 *)avalue[i];
+ break;
case FFI_TYPE_UINT32:
+ stack->gp_regs[gpcount++] = *(UINT32 *)avalue[i];
+ break;
case FFI_TYPE_SINT64:
case FFI_TYPE_UINT64:
+ stack->gp_regs[gpcount++] = *(UINT64 *)avalue[i];
+ break;
+
case FFI_TYPE_POINTER:
- z = 1;
- *p_argv = (void *)argp;
+ stack->gp_regs[gpcount++] = (UINT64)(PTR64) *(void **)avalue[i];
break;
-
+
case FFI_TYPE_FLOAT:
- z = 1;
- /* Convert argument back to float in place from the saved value */
- if (argp - args->out_regs < 8 && fp_reg_num < 8) {
- *(float *)argp = args -> fp_regs[fp_reg_num++];
- }
- *p_argv = (void *)argp;
+ if (gpcount < 8 && fpcount < 8)
+ stf_spill (&stack->fp_regs[fpcount++], *(float *)avalue[i]);
+ stack->gp_regs[gpcount++] = *(UINT32 *)avalue[i];
break;
case FFI_TYPE_DOUBLE:
- z = 1;
- if (argp - args->out_regs < 8 && fp_reg_num < 8) {
- *p_argv = args -> fp_regs + fp_reg_num++;
- } else {
- *p_argv = (void *)argp;
- }
+ if (gpcount < 8 && fpcount < 8)
+ stf_spill (&stack->fp_regs[fpcount++], *(double *)avalue[i]);
+ stack->gp_regs[gpcount++] = *(UINT64 *)avalue[i];
+ break;
+
+ case FFI_TYPE_LONGDOUBLE:
+ if (gpcount & 1)
+ gpcount++;
+ if (LDBL_MANT_DIG == 64 && gpcount < 8 && fpcount < 8)
+ stf_spill (&stack->fp_regs[fpcount++], *(__float80 *)avalue[i]);
+ memcpy (&stack->gp_regs[gpcount], avalue[i], 16);
+ gpcount += 2;
break;
case FFI_TYPE_STRUCT:
{
- size_t sz = (*p_arg)->size;
- unsigned short element_type;
- z = ((*p_arg)->size + FFI_SIZEOF_ARG - 1)/FFI_SIZEOF_ARG;
- if (argp - args->out_regs < 8
- && is_homogeneous_fp_aggregate(*p_arg, 8, &element_type)) {
- int nelements = sz/float_type_size(element_type);
- if (nelements + fp_reg_num >= 8) {
- /* hard case NYI. */
- abort();
- }
- if (element_type == FFI_TYPE_DOUBLE) {
- *p_argv = args -> fp_regs + fp_reg_num;
- fp_reg_num += nelements;
- break;
- }
- if (element_type == FFI_TYPE_FLOAT) {
- int j;
- for (j = 0; j < nelements; ++ j) {
- ((float *)argp)[j] = args -> fp_regs[fp_reg_num + j];
+ size_t size = (*p_arg)->size;
+ size_t align = (*p_arg)->alignment;
+ int hfa_type = hfa_element_type (*p_arg, 0);
+
+ FFI_ASSERT (align <= 16);
+ if (align == 16 && (gpcount & 1))
+ gpcount++;
+
+ if (hfa_type != FFI_TYPE_VOID)
+ {
+ size_t hfa_size = hfa_type_size (hfa_type);
+ size_t offset = 0;
+ size_t gp_offset = gpcount * 8;
+
+ while (fpcount < 8
+ && offset < size
+ && gp_offset < 8 * 8)
+ {
+ stf_spill (&stack->fp_regs[fpcount],
+ hfa_type_load (hfa_type, avalue[i] + offset));
+ offset += hfa_size;
+ gp_offset += hfa_size;
+ fpcount += 1;
}
- *p_argv = (void *)argp;
- fp_reg_num += nelements;
- break;
- }
- abort(); /* Other fp types NYI */
}
+
+ memcpy (&stack->gp_regs[gpcount], avalue[i], size);
+ gpcount += (size + 7) / 8;
}
break;
default:
- FFI_ASSERT(0);
+ abort ();
}
-
- argp += z;
- p_argv++;
-
}
-
- return;
+
+ ffi_call_unix (stack, rvalue, fn, cif->flags);
}
+/* Closures represent a pair consisting of a function pointer, and
+ some user data. A closure is invoked by reinterpreting the closure
+ as a function pointer, and branching to it. Thus we can make an
+ interpreted function callable as a C function: We turn the
+ interpreter itself, together with a pointer specifying the
+ interpreted procedure, into a closure.
-/* Fill in a closure to refer to the specified fun and user_data. */
-/* cif specifies the argument and result types for fun. */
-/* the cif must already be prep'ed */
+ For IA64, function pointer are already pairs consisting of a code
+ pointer, and a gp pointer. The latter is needed to access global
+ variables. Here we set up such a pair as the first two words of
+ the closure (in the "trampoline" area), but we replace the gp
+ pointer with a pointer to the closure itself. We also add the real
+ gp pointer to the closure. This allows the function entry code to
+ both retrieve the user data, and to restire the correct gp pointer. */
-/* The layout of a function descriptor. A C function pointer really */
-/* points to one of these. */
-typedef struct ia64_fd_struct {
- void *code_pointer;
- void *gp;
-} ia64_fd;
+extern void ffi_closure_unix ();
ffi_status
ffi_prep_closure (ffi_closure* closure,
@@ -652,20 +395,168 @@ ffi_prep_closure (ffi_closure* closure,
void (*fun)(ffi_cif*,void*,void**,void*),
void *user_data)
{
- struct ffi_ia64_trampoline_struct *tramp =
- (struct ffi_ia64_trampoline_struct *) (closure -> tramp);
- ia64_fd *fd = (ia64_fd *)(void *)ffi_closure_UNIX;
+ /* The layout of a function descriptor. A C function pointer really
+ points to one of these. */
+ struct ia64_fd
+ {
+ UINT64 code_pointer;
+ UINT64 gp;
+ };
+
+ struct ffi_ia64_trampoline_struct
+ {
+ UINT64 code_pointer; /* Pointer to ffi_closure_unix. */
+ UINT64 fake_gp; /* Pointer to closure, installed as gp. */
+ UINT64 real_gp; /* Real gp value. */
+ };
+
+ struct ffi_ia64_trampoline_struct *tramp;
+ struct ia64_fd *fd;
FFI_ASSERT (cif->abi == FFI_UNIX);
- tramp -> code_pointer = fd -> code_pointer;
- tramp -> real_gp = fd -> gp;
- tramp -> fake_gp = closure;
- closure->cif = cif;
+ tramp = (struct ffi_ia64_trampoline_struct *)closure->tramp;
+ fd = (struct ia64_fd *)(void *)ffi_closure_unix;
+
+ tramp->code_pointer = fd->code_pointer;
+ tramp->real_gp = fd->gp;
+ tramp->fake_gp = (UINT64)(PTR64)closure;
+ closure->cif = cif;
closure->user_data = user_data;
- closure->fun = fun;
+ closure->fun = fun;
return FFI_OK;
}
+UINT64
+ffi_closure_unix_inner (ffi_closure *closure, struct ia64_args *stack,
+ void *rvalue, void *r8)
+{
+ ffi_cif *cif;
+ void **avalue;
+ ffi_type **p_arg;
+ long i, avn, gpcount, fpcount;
+
+ cif = closure->cif;
+ avn = cif->nargs;
+ avalue = alloca (avn * sizeof (void *));
+
+ /* If the structure return value is passed in memory get that location
+ from r8 so as to pass the value directly back to the caller. */
+ if (cif->flags == FFI_TYPE_STRUCT)
+ rvalue = r8;
+
+ gpcount = fpcount = 0;
+ for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++)
+ {
+ switch ((*p_arg)->type)
+ {
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT8:
+ avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 1);
+ break;
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT16:
+ avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 2);
+ break;
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+ avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 4);
+ break;
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ avalue[i] = &stack->gp_regs[gpcount++];
+ break;
+ case FFI_TYPE_POINTER:
+ avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], sizeof(void*));
+ break;
+
+ case FFI_TYPE_FLOAT:
+ if (gpcount < 8 && fpcount < 8)
+ {
+ void *addr = &stack->fp_regs[fpcount++];
+ avalue[i] = addr;
+ *(float *)addr = ldf_fill (addr);
+ }
+ else
+ avalue[i] = endian_adjust(&stack->gp_regs[gpcount], 4);
+ gpcount++;
+ break;
+
+ case FFI_TYPE_DOUBLE:
+ if (gpcount < 8 && fpcount < 8)
+ {
+ void *addr = &stack->fp_regs[fpcount++];
+ avalue[i] = addr;
+ *(double *)addr = ldf_fill (addr);
+ }
+ else
+ avalue[i] = &stack->gp_regs[gpcount];
+ gpcount++;
+ break;
+
+ case FFI_TYPE_LONGDOUBLE:
+ if (gpcount & 1)
+ gpcount++;
+ if (LDBL_MANT_DIG == 64 && gpcount < 8 && fpcount < 8)
+ {
+ void *addr = &stack->fp_regs[fpcount++];
+ avalue[i] = addr;
+ *(__float80 *)addr = ldf_fill (addr);
+ }
+ else
+ avalue[i] = &stack->gp_regs[gpcount];
+ gpcount += 2;
+ break;
+
+ case FFI_TYPE_STRUCT:
+ {
+ size_t size = (*p_arg)->size;
+ size_t align = (*p_arg)->alignment;
+ int hfa_type = hfa_element_type (*p_arg, 0);
+
+ FFI_ASSERT (align <= 16);
+ if (align == 16 && (gpcount & 1))
+ gpcount++;
+
+ if (hfa_type != FFI_TYPE_VOID)
+ {
+ size_t hfa_size = hfa_type_size (hfa_type);
+ size_t offset = 0;
+ size_t gp_offset = gpcount * 8;
+ void *addr = alloca (size);
+
+ avalue[i] = addr;
+
+ while (fpcount < 8
+ && offset < size
+ && gp_offset < 8 * 8)
+ {
+ hfa_type_store (hfa_type, addr + offset,
+ ldf_fill (&stack->fp_regs[fpcount]));
+ offset += hfa_size;
+ gp_offset += hfa_size;
+ fpcount += 1;
+ }
+
+ if (offset < size)
+ memcpy (addr + offset, (char *)stack->gp_regs + gp_offset,
+ size - offset);
+ }
+ else
+ avalue[i] = &stack->gp_regs[gpcount];
+
+ gpcount += (size + 7) / 8;
+ }
+ break;
+
+ default:
+ abort ();
+ }
+ }
+
+ closure->fun (cif, rvalue, avalue, closure->user_data);
+
+ return cif->flags;
+}
diff --git a/libffi/src/ia64/ffitarget.h b/libffi/src/ia64/ffitarget.h
index 3b78654..2f98d51 100644
--- a/libffi/src/ia64/ffitarget.h
+++ b/libffi/src/ia64/ffitarget.h
@@ -45,14 +45,5 @@ typedef enum ffi_abi {
/* can be interpreted as a C function */
/* descriptor: */
-#ifndef LIBFFI_ASM
-struct ffi_ia64_trampoline_struct {
- void * code_pointer; /* Pointer to ffi_closure_UNIX */
- void * fake_gp; /* Pointer to closure, installed as gp */
- void * real_gp; /* Real gp value, reinstalled by */
- /* ffi_closure_UNIX. */
-};
-#endif
-
#endif
diff --git a/libffi/src/ia64/ia64_flags.h b/libffi/src/ia64/ia64_flags.h
index 23dbd3e..1dd6d7e 100644
--- a/libffi/src/ia64/ia64_flags.h
+++ b/libffi/src/ia64/ia64_flags.h
@@ -25,38 +25,15 @@
OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------- */
-
-/* Homogeneous Floating Point Aggregates (HFAs) which are returned */
-/* in FP registers. The least significant bits specify the size in */
-/* words. */
-#define FFI_IS_FLOAT_FP_AGGREGATE 0x1000
-#define FFI_IS_DOUBLE_FP_AGGREGATE 0x0800
-#define FLOAT_FP_AGGREGATE_BIT 12
-#define DOUBLE_FP_AGGREGATE_BIT 11
-
-/* Small structures containing N words. If N=1, they are returned */
-/* as though they were integers. */
-#define FFI_IS_SMALL_STRUCT2 0x40 /* Struct > 8, <=16 bytes */
-#define FFI_IS_SMALL_STRUCT3 0x41 /* Struct > 16 <= 24 bytes */
-#define FFI_IS_SMALL_STRUCT4 0x42 /* Struct > 24, <=32 bytes */
-
-/* Flag values identifying particularly simple cases, which are */
-/* handled specially. We treat functions as simple if they take all */
-/* arguments can be passed as 32 or 64 bit integer quantities, there is */
-/* either no return value or it can be treated as a 64bit integer, and */
-/* if there are at most 2 arguments. */
-/* This is OR'ed with the normal flag values. */
-#define FFI_SIMPLE_V 0x10000 /* () -> X */
-#define FFI_SIMPLE_I 0x20000 /* (int) -> X */
-#define FFI_SIMPLE_L 0x30000 /* (long) -> X */
-#define FFI_SIMPLE_II 0x40000 /* (int,int) -> X */
-#define FFI_SIMPLE_IL 0x50000 /* (int,long) -> X */
-#define FFI_SIMPLE_LI 0x60000 /* (long,int) -> X */
-#define FFI_SIMPLE_LL 0x70000 /* (long,long) -> X */
-
-/* Mask for all of the FFI_SIMPLE bits: */
-#define FFI_SIMPLE 0xf0000
-
-/* An easy way to build FFI_SIMPLE flags from FFI_SIMPLE_V: */
-#define FFI_ADD_LONG_ARG(flag) (((flag) << 1) | 0x10000)
-#define FFI_ADD_INT_ARG(flag) ((flag) << 1)
+/* "Type" codes used between assembly and C. When used as a part of
+ a cfi->flags value, the low byte will be these extra type codes,
+ and bits 8-31 will be the actual size of the type. */
+
+/* Small structures containing N words in integer registers. */
+#define FFI_IA64_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 1)
+
+/* Homogeneous Floating Point Aggregates (HFAs) which are returned
+ in FP registers. */
+#define FFI_IA64_TYPE_HFA_FLOAT (FFI_TYPE_LAST + 2)
+#define FFI_IA64_TYPE_HFA_DOUBLE (FFI_TYPE_LAST + 3)
+#define FFI_IA64_TYPE_HFA_LDOUBLE (FFI_TYPE_LAST + 4)
diff --git a/libffi/src/ia64/unix.S b/libffi/src/ia64/unix.S
index be267f6..7c68b2d 100644
--- a/libffi/src/ia64/unix.S
+++ b/libffi/src/ia64/unix.S
@@ -33,295 +33,542 @@
#include <ffi.h>
#include "ia64_flags.h"
-/* parameters: */
-#define callback in0
-#define ecifp in1
-#define bytes in2
-#define flags in3
-#define raddr in4
-#define fn in5
-
-#define FLOAT_SZ 8 /* in-memory size of fp operands */
-
-/* Allocate an ia64_args structure on the stack; call ffi_prep_args */
-/* to fill it in with argument values; copy those to the real */
-/* registers, leaving overflow arguments on the stack. Then call fn */
-/* and move the result from registers into *raddr. */
.pred.safe_across_calls p1-p5,p16-p63
.text
+
+/* int ffi_call_unix (struct ia64_args *stack, PTR64 rvalue,
+ void (*fn)(), int flags);
+ */
+
.align 16
- .global ffi_call_unix
- .proc ffi_call_unix
+ .global ffi_call_unix
+ .proc ffi_call_unix
ffi_call_unix:
.prologue
- .save ar.pfs,r38 /* loc0 */
- alloc loc0=ar.pfs,6,6,8,0
- .save rp,loc1
- mov loc1=b0;
- .vframe loc5
- mov loc5=sp;
+ /* Bit o trickiness. We actually share a stack frame with ffi_call.
+ Rely on the fact that ffi_call uses a vframe and don't bother
+ tracking one here at all. */
+ .fframe 0
+ .save ar.pfs, r36 // loc0
+ alloc loc0 = ar.pfs, 4, 3, 8, 0
+ .save rp, loc1
+ mov loc1 = b0
.body
- sub sp=sp,bytes
- mov loc4=r1 /* Save gp */
- ld8 r8=[callback],8 /* code address of callback */
- ;;
- mov out0=sp
- mov out1=ecifp
- mov out2=bytes
- ld8 r1=[callback] /* Set up gp for callback. Unnecessary? */
- mov b6=r8
- ;;
- br.call.sptk.many b0 = b6 /* call ffi_prep_args */
- cmp.eq p6,p0=0,r8 /* r8 nonzero ==> need fp regs */
- ;;
-(p6) add loc2=32+8*FLOAT_SZ,sp
-(p6) br.cond.dptk.many fp_done
- ;; /* Quiets warning; needed? */
- add loc2=32,sp
- add loc3=32+FLOAT_SZ,sp
- ;;
- ldfd f8=[loc2],2*FLOAT_SZ
- ldfd f9=[loc3],2*FLOAT_SZ
- ;;
- ldfd f10=[loc2],2*FLOAT_SZ
- ldfd f11=[loc3],2*FLOAT_SZ
- ;;
- ldfd f12=[loc2],2*FLOAT_SZ
- ldfd f13=[loc3],2*FLOAT_SZ
- ;;
- ldfd f14=[loc2],2*FLOAT_SZ
- ldfd f15=[loc3]
- ;;
-fp_done:
- add r9=16,sp /* Pointer to r8_contents */
- /* loc2 points at first integer register value. */
- add loc3=8,loc2
- ;;
- ld8 r8=[r9] /* Just in case we return large struct */
- ld8 out0=[loc2],16
- ld8 out1=[loc3],16
- ;;
- ld8 out2=[loc2],16
- ld8 out3=[loc3],16
- ;;
- ld8 out4=[loc2],16
- ld8 out5=[loc3],16
- ;;
- ld8 out6=[loc2]
- ld8 out7=[loc3]
- /* Set sp to 16 bytes below the first stack parameter. This */
- /* is the value currently in loc2. */
- mov sp=loc2
-
- ld8 r8=[fn],8
- ;;
- ld8 r1=[fn] /* Set up gp */
- mov b6=r8;;
- br.call.sptk.many b0 = b6 /* call fn */
-
- /* Handle return value. */
- cmp.eq p6,p0=0,raddr
- cmp.eq p7,p0=FFI_TYPE_INT,flags
- cmp.eq p10,p0=FFI_IS_SMALL_STRUCT2,flags
- cmp.eq p11,p0=FFI_IS_SMALL_STRUCT3,flags
- cmp.eq p12,p0=FFI_IS_SMALL_STRUCT4,flags
- ;;
-(p6) br.cond.dpnt.few done /* Dont copy ret values if raddr = 0 */
-(p7) br.cond.dptk.few copy1
-(p10) br.cond.dpnt.few copy2
-(p11) br.cond.dpnt.few copy3
-(p12) br.cond.dpnt.few copy4
- cmp.eq p8,p0=FFI_TYPE_FLOAT,flags
- cmp.eq p9,p0=FFI_TYPE_DOUBLE,flags
- tbit.nz p6,p0=flags,FLOAT_FP_AGGREGATE_BIT
- tbit.nz p7,p0=flags,DOUBLE_FP_AGGREGATE_BIT
- ;;
-(p8) stfs [raddr]=f8
-(p9) stfd [raddr]=f8
+ add r16 = 16, in0
+ mov loc2 = gp
+ mov r8 = in1
+ ;;
+
+ /* Load up all of the argument registers. */
+ ldf.fill f8 = [in0], 32
+ ldf.fill f9 = [r16], 32
+ ;;
+ ldf.fill f10 = [in0], 32
+ ldf.fill f11 = [r16], 32
+ ;;
+ ldf.fill f12 = [in0], 32
+ ldf.fill f13 = [r16], 32
+ ;;
+ ldf.fill f14 = [in0], 32
+ ldf.fill f15 = [r16], 24
+ ;;
+ ld8 out0 = [in0], 16
+ ld8 out1 = [r16], 16
+ ;;
+ ld8 out2 = [in0], 16
+ ld8 out3 = [r16], 16
+ ;;
+ ld8 out4 = [in0], 16
+ ld8 out5 = [r16], 16
+ ;;
+ ld8 out6 = [in0]
+ ld8 out7 = [r16]
+ ;;
+
+ /* Deallocate the register save area from the stack frame. */
+ mov sp = in0
+
+ /* Call the target function. */
+ ld8 r16 = [in2], 8
+ ;;
+ ld8 gp = [in2]
+ mov b6 = r16
+ br.call.sptk.many b0 = b6
+ ;;
+
+ /* Dispatch to handle return value. */
+ mov gp = loc2
+ zxt1 r16 = in3
+ ;;
+ mov ar.pfs = loc0
+ addl r18 = @ltoffx(.Lst_table), gp
+ ;;
+ ld8.mov r18 = [r18], .Lst_table
+ mov b0 = loc1
+ ;;
+ shladd r18 = r16, 3, r18
+ ;;
+ ld8 r17 = [r18]
+ shr in3 = in3, 8
+ ;;
+ add r17 = r17, r18
+ ;;
+ mov b6 = r17
+ br b6
+ ;;
+
+.Lst_void:
+ br.ret.sptk.many b0
+ ;;
+.Lst_uint8:
+ zxt1 r8 = r8
+ ;;
+ st8 [in1] = r8
+ br.ret.sptk.many b0
+ ;;
+.Lst_sint8:
+ sxt1 r8 = r8
+ ;;
+ st8 [in1] = r8
+ br.ret.sptk.many b0
+ ;;
+.Lst_uint16:
+ zxt2 r8 = r8
+ ;;
+ st8 [in1] = r8
+ br.ret.sptk.many b0
+ ;;
+.Lst_sint16:
+ sxt2 r8 = r8
+ ;;
+ st8 [in1] = r8
+ br.ret.sptk.many b0
+ ;;
+.Lst_uint32:
+ zxt4 r8 = r8
+ ;;
+ st8 [in1] = r8
+ br.ret.sptk.many b0
+ ;;
+.Lst_sint32:
+ sxt4 r8 = r8
+ ;;
+ st8 [in1] = r8
+ br.ret.sptk.many b0
+ ;;
+.Lst_int64:
+ st8 [in1] = r8
+ br.ret.sptk.many b0
+ ;;
+.Lst_float:
+ stfs [in1] = f8
+ br.ret.sptk.many b0
+ ;;
+.Lst_double:
+ stfd [in1] = f8
+ br.ret.sptk.many b0
+ ;;
+.Lst_ldouble:
+ stfe [in1] = f8
+ br.ret.sptk.many b0
;;
- .label_state 1
-(p6) br.cond.dpnt.few handle_float_hfa
-(p7) br.cond.dpnt.few handle_double_hfa
- br done
-copy4:
- add loc3=24,raddr
+.Lst_small_struct:
+ add sp = -16, sp
+ cmp.lt p6, p0 = 8, in3
+ cmp.lt p7, p0 = 16, in3
+ cmp.lt p8, p0 = 24, in3
+ ;;
+ add r16 = 8, sp
+ add r17 = 16, sp
+ add r18 = 24, sp
+ ;;
+ st8 [sp] = r8
+(p6) st8 [r16] = r9
+ mov out0 = in1
+(p7) st8 [r17] = r10
+(p8) st8 [r18] = r11
+ mov out1 = sp
+ mov out2 = in3
+ br.call.sptk.many b0 = memcpy#
;;
- st8 [loc3]=r11
-copy3:
- add loc3=16,raddr
+ mov ar.pfs = loc0
+ mov b0 = loc1
+ mov gp = loc2
+ br.ret.sptk.many b0
+
+.Lst_hfa_float:
+ add r16 = 4, in1
+ cmp.lt p6, p0 = 4, in3
+ ;;
+ stfs [in1] = f8, 8
+(p6) stfs [r16] = f9, 8
+ cmp.lt p7, p0 = 8, in3
+ cmp.lt p8, p0 = 12, in3
+ ;;
+(p7) stfs [in1] = f10, 8
+(p8) stfs [r16] = f11, 8
+ cmp.lt p9, p0 = 16, in3
+ cmp.lt p10, p0 = 20, in3
+ ;;
+(p9) stfs [in1] = f12, 8
+(p10) stfs [r16] = f13, 8
+ cmp.lt p6, p0 = 24, in3
+ cmp.lt p7, p0 = 28, in3
+ ;;
+(p6) stfs [in1] = f14
+(p7) stfs [r16] = f15
+ br.ret.sptk.many b0
+ ;;
+
+.Lst_hfa_double:
+ add r16 = 8, in1
+ cmp.lt p6, p0 = 8, in3
+ ;;
+ stfd [in1] = f8, 16
+(p6) stfd [r16] = f9, 16
+ cmp.lt p7, p0 = 16, in3
+ cmp.lt p8, p0 = 24, in3
+ ;;
+(p7) stfd [in1] = f10, 16
+(p8) stfd [r16] = f11, 16
+ cmp.lt p9, p0 = 32, in3
+ cmp.lt p10, p0 = 40, in3
+ ;;
+(p9) stfd [in1] = f12, 16
+(p10) stfd [r16] = f13, 16
+ cmp.lt p6, p0 = 48, in3
+ cmp.lt p7, p0 = 56, in3
+ ;;
+(p6) stfd [in1] = f14
+(p7) stfd [r16] = f15
+ br.ret.sptk.many b0
+ ;;
+
+.Lst_hfa_ldouble:
+ add r16 = 16, in1
+ cmp.lt p6, p0 = 16, in3
+ ;;
+ stfe [in1] = f8, 32
+(p6) stfe [r16] = f9, 32
+ cmp.lt p7, p0 = 32, in3
+ cmp.lt p8, p0 = 48, in3
+ ;;
+(p7) stfe [in1] = f10, 32
+(p8) stfe [r16] = f11, 32
+ cmp.lt p9, p0 = 64, in3
+ cmp.lt p10, p0 = 80, in3
+ ;;
+(p9) stfe [in1] = f12, 32
+(p10) stfe [r16] = f13, 32
+ cmp.lt p6, p0 = 96, in3
+ cmp.lt p7, p0 = 112, in3
+ ;;
+(p6) stfe [in1] = f14
+(p7) stfe [r16] = f15
+ br.ret.sptk.many b0
+ ;;
+
+ .endp ffi_call_unix
+
+ .align 16
+ .global ffi_closure_unix
+ .proc ffi_closure_unix
+
+#define FRAME_SIZE (8*16 + 8*8 + 8*16)
+
+ffi_closure_unix:
+ .prologue
+ .save ar.pfs, r40 // loc0
+ alloc loc0 = ar.pfs, 8, 4, 4, 0
+ .fframe FRAME_SIZE
+ add r12 = -FRAME_SIZE, r12
+ .save rp, loc1
+ mov loc1 = b0
+ .save ar.unat, loc2
+ mov loc2 = ar.unat
+ .body
+
+ /* Retrieve closure pointer and real gp. */
+ mov out0 = gp
+ add gp = 16, gp
;;
- st8 [loc3]=r10
-copy2:
- add loc3=8,raddr
+ ld8 gp = [gp]
+
+ /* Spill all of the possible argument registers. */
+ add r16 = 16 + 8*16, sp
+ add r17 = 16 + 8*16 + 16, sp
+ ;;
+ stf.spill [r16] = f8, 32
+ stf.spill [r17] = f9, 32
+ mov loc3 = gp
+ ;;
+ stf.spill [r16] = f10, 32
+ stf.spill [r17] = f11, 32
+ ;;
+ stf.spill [r16] = f12, 32
+ stf.spill [r17] = f13, 32
+ ;;
+ stf.spill [r16] = f14, 32
+ stf.spill [r17] = f15, 24
+ ;;
+ .mem.offset 0, 0
+ st8.spill [r16] = in0, 16
+ .mem.offset 8, 0
+ st8.spill [r17] = in1, 16
+ add out1 = 16 + 8*16, sp
+ ;;
+ .mem.offset 0, 0
+ st8.spill [r16] = in2, 16
+ .mem.offset 8, 0
+ st8.spill [r17] = in3, 16
+ add out2 = 16, sp
+ ;;
+ .mem.offset 0, 0
+ st8.spill [r16] = in4, 16
+ .mem.offset 8, 0
+ st8.spill [r17] = in5, 16
+ mov out3 = r8
+ ;;
+ .mem.offset 0, 0
+ st8.spill [r16] = in6
+ .mem.offset 8, 0
+ st8.spill [r17] = in7
+
+ /* Invoke ffi_closure_unix_inner for the hard work. */
+ br.call.sptk.many b0 = ffi_closure_unix_inner
;;
- st8 [loc3]=r9
-copy1:
- st8 [raddr]=r8
- /* In the big struct case, raddr was passed as an argument. */
- /* In the void case there was nothing to do. */
-done:
- mov r1=loc4 /* Restore gp */
+ /* Dispatch to handle return value. */
+ mov gp = loc3
+ zxt1 r16 = r8
+ ;;
+ addl r18 = @ltoffx(.Lld_table), gp
mov ar.pfs = loc0
+ ;;
+ ld8.mov r18 = [r18], .Lld_table
mov b0 = loc1
+ ;;
+ shladd r18 = r16, 3, r18
+ mov ar.unat = loc2
+ ;;
+ ld8 r17 = [r18]
+ shr r8 = r8, 8
+ ;;
+ add r17 = r17, r18
+ add r16 = 16, sp
+ ;;
+ mov b6 = r17
+ br b6
+ ;;
+ .label_state 1
+
+.Lld_void:
+ .restore sp
+ add sp = FRAME_SIZE, sp
+ br.ret.sptk.many b0
+ ;;
+.Lld_int8:
+ .body
+ .copy_state 1
+ ld1 r8 = [r16]
+ .restore sp
+ add sp = FRAME_SIZE, sp
+ br.ret.sptk.many b0
+ ;;
+.Lld_int16:
+ .body
+ .copy_state 1
+ ld2 r8 = [r16]
+ .restore sp
+ add sp = FRAME_SIZE, sp
+ br.ret.sptk.many b0
+ ;;
+.Lld_int32:
+ .body
+ .copy_state 1
+ ld4 r8 = [r16]
+ .restore sp
+ add sp = FRAME_SIZE, sp
+ br.ret.sptk.many b0
+ ;;
+.Lld_int64:
+ .body
+ .copy_state 1
+ ld8 r8 = [r16]
+ .restore sp
+ add sp = FRAME_SIZE, sp
+ br.ret.sptk.many b0
+ ;;
+.Lld_float:
+ .body
+ .copy_state 1
+ ldfs f8 = [r16]
+ .restore sp
+ add sp = FRAME_SIZE, sp
+ br.ret.sptk.many b0
+ ;;
+.Lld_double:
+ .body
+ .copy_state 1
+ ldfd f8 = [r16]
.restore sp
- mov sp = loc5
+ add sp = FRAME_SIZE, sp
br.ret.sptk.many b0
+ ;;
+.Lld_ldouble:
+ .body
+ .copy_state 1
+ ldfe f8 = [r16]
+ .restore sp
+ add sp = FRAME_SIZE, sp
+ br.ret.sptk.many b0
+ ;;
-handle_double_hfa:
+.Lld_small_struct:
.body
.copy_state 1
- /* Homogeneous floating point array of doubles is returned in */
- /* registers f8-f15. Save one at a time to return area. */
- and flags=0xf,flags /* Retrieve size */
- ;;
- cmp.eq p6,p0=2,flags
- cmp.eq p7,p0=3,flags
- cmp.eq p8,p0=4,flags
- cmp.eq p9,p0=5,flags
- cmp.eq p10,p0=6,flags
- cmp.eq p11,p0=7,flags
- cmp.eq p12,p0=8,flags
- ;;
-(p6) br.cond.dptk.few dhfa2
-(p7) br.cond.dptk.few dhfa3
-(p8) br.cond.dptk.few dhfa4
-(p9) br.cond.dptk.few dhfa5
-(p10) br.cond.dptk.few dhfa6
-(p11) br.cond.dptk.few dhfa7
-dhfa8: add loc3=7*8,raddr
- ;;
- stfd [loc3]=f15
-dhfa7: add loc3=6*8,raddr
- ;;
- stfd [loc3]=f14
-dhfa6: add loc3=5*8,raddr
- ;;
- stfd [loc3]=f13
-dhfa5: add loc3=4*8,raddr
- ;;
- stfd [loc3]=f12
-dhfa4: add loc3=3*8,raddr
- ;;
- stfd [loc3]=f11
-dhfa3: add loc3=2*8,raddr
- ;;
- stfd [loc3]=f10
-dhfa2: add loc3=1*8,raddr
- ;;
- stfd [loc3]=f9
- stfd [raddr]=f8
- br done
-
-handle_float_hfa:
- /* Homogeneous floating point array of floats is returned in */
- /* registers f8-f15. Save one at a time to return area. */
- and flags=0xf,flags /* Retrieve size */
- ;;
- cmp.eq p6,p0=2,flags
- cmp.eq p7,p0=3,flags
- cmp.eq p8,p0=4,flags
- cmp.eq p9,p0=5,flags
- cmp.eq p10,p0=6,flags
- cmp.eq p11,p0=7,flags
- cmp.eq p12,p0=8,flags
- ;;
-(p6) br.cond.dptk.few shfa2
-(p7) br.cond.dptk.few shfa3
-(p8) br.cond.dptk.few shfa4
-(p9) br.cond.dptk.few shfa5
-(p10) br.cond.dptk.few shfa6
-(p11) br.cond.dptk.few shfa7
-shfa8: add loc3=7*4,raddr
- ;;
- stfd [loc3]=f15
-shfa7: add loc3=6*4,raddr
- ;;
- stfd [loc3]=f14
-shfa6: add loc3=5*4,raddr
- ;;
- stfd [loc3]=f13
-shfa5: add loc3=4*4,raddr
- ;;
- stfd [loc3]=f12
-shfa4: add loc3=3*4,raddr
- ;;
- stfd [loc3]=f11
-shfa3: add loc3=2*4,raddr
- ;;
- stfd [loc3]=f10
-shfa2: add loc3=1*4,raddr
- ;;
- stfd [loc3]=f9
- stfd [raddr]=f8
- br done
+ add r17 = 8, r16
+ cmp.lt p6, p0 = 8, r8
+ cmp.lt p7, p0 = 16, r8
+ cmp.lt p8, p0 = 24, r8
+ ;;
+ ld8 r8 = [r16], 16
+(p6) ld8 r9 = [r17], 16
+ ;;
+(p7) ld8 r10 = [r16]
+(p8) ld8 r11 = [r17]
+ .restore sp
+ add sp = FRAME_SIZE, sp
+ br.ret.sptk.many b0
+ ;;
- .endp ffi_call_unix
+.Lld_hfa_float:
+ .body
+ .copy_state 1
+ add r17 = 4, r16
+ cmp.lt p6, p0 = 4, r8
+ ;;
+ ldfs f8 = [r16], 8
+(p6) ldfs f9 = [r17], 8
+ cmp.lt p7, p0 = 8, r8
+ cmp.lt p8, p0 = 12, r8
+ ;;
+(p7) ldfs f10 = [r16], 8
+(p8) ldfs f11 = [r17], 8
+ cmp.lt p9, p0 = 16, r8
+ cmp.lt p10, p0 = 20, r8
+ ;;
+(p9) ldfs f12 = [r16], 8
+(p10) ldfs f13 = [r17], 8
+ cmp.lt p6, p0 = 24, r8
+ cmp.lt p7, p0 = 28, r8
+ ;;
+(p6) ldfs f14 = [r16]
+(p7) ldfs f15 = [r17]
+ .restore sp
+ add sp = FRAME_SIZE, sp
+ br.ret.sptk.many b0
+ ;;
+.Lld_hfa_double:
+ .body
+ .copy_state 1
+ add r17 = 8, r16
+ cmp.lt p6, p0 = 8, r8
+ ;;
+ ldfd f8 = [r16], 16
+(p6) ldfd f9 = [r17], 16
+ cmp.lt p7, p0 = 16, r8
+ cmp.lt p8, p0 = 24, r8
+ ;;
+(p7) ldfd f10 = [r16], 16
+(p8) ldfd f11 = [r17], 16
+ cmp.lt p9, p0 = 32, r8
+ cmp.lt p10, p0 = 40, r8
+ ;;
+(p9) ldfd f12 = [r16], 16
+(p10) ldfd f13 = [r17], 16
+ cmp.lt p6, p0 = 48, r8
+ cmp.lt p7, p0 = 56, r8
+ ;;
+(p6) ldfd f14 = [r16]
+(p7) ldfd f15 = [r17]
+ .restore sp
+ add sp = FRAME_SIZE, sp
+ br.ret.sptk.many b0
+ ;;
- .pred.safe_across_calls p1-p5,p16-p63
-.text
- .align 16
- .global ffi_closure_UNIX
- .proc ffi_closure_UNIX
-ffi_closure_UNIX:
- .prologue
- .save ar.pfs,r40 /* loc0 */
- alloc loc0=ar.pfs,8,3,2,0
- .save rp,loc1
- mov loc1=b0
- .vframe loc2
- mov loc2=sp
- /* Retrieve closure pointer and real gp. */
- mov out0=gp
- add gp=16,gp
- ;;
- ld8 gp=[gp]
- /* Reserve a structia64_args on the stack such that arguments */
- /* past the first 8 are automatically placed in the right */
- /* slot. Note that when we start the sp points at 2 8-byte */
- /* scratch words, followed by the extra arguments. */
-# define BASIC_ARGS_SZ (8*FLOAT_SZ+8*8+2*8)
-# define FIRST_FP_OFFSET (4*8)
- add r14=-(BASIC_ARGS_SZ-FIRST_FP_OFFSET),sp
- add r15=-(BASIC_ARGS_SZ-FIRST_FP_OFFSET-FLOAT_SZ),sp
- add sp=-BASIC_ARGS_SZ,sp
- /* r14 points to fp_regs[0], r15 points to fp_regs[1] */
- ;;
- stfd [r14]=f8,2*FLOAT_SZ
- stfd [r15]=f9,2*FLOAT_SZ
- ;;
- stfd [r14]=f10,2*FLOAT_SZ
- stfd [r15]=f11,2*FLOAT_SZ
- ;;
- stfd [r14]=f12,2*FLOAT_SZ
- stfd [r15]=f13,2*FLOAT_SZ
- ;;
- stfd [r14]=f14,2*FLOAT_SZ
- stfd [r15]=f15,FLOAT_SZ+8
- ;;
- /* r14 points to first parameter register area, r15 to second. */
- st8 [r14]=in0,2*8
- st8 [r15]=in1,2*8
- ;;
- st8 [r14]=in2,2*8
- st8 [r15]=in3,2*8
- ;;
- st8 [r14]=in4,2*8
- st8 [r15]=in5,2*8
- ;;
- st8 [r14]=in6,2*8
- st8 [r15]=in7,2*8
- /* Call ffi_closure_UNIX_inner */
- mov out1=sp
- br.call.sptk.many b0=ffi_closure_UNIX_inner
- ;;
- mov b0=loc1
- mov ar.pfs=loc0
+.Lld_hfa_ldouble:
+ .body
+ .copy_state 1
+ add r17 = 16, r16
+ cmp.lt p6, p0 = 16, r8
+ ;;
+ ldfe f8 = [r16], 32
+(p6) ldfe f9 = [r17], 32
+ cmp.lt p7, p0 = 32, r8
+ cmp.lt p8, p0 = 48, r8
+ ;;
+(p7) ldfe f10 = [r16], 32
+(p8) ldfe f11 = [r17], 32
+ cmp.lt p9, p0 = 64, r8
+ cmp.lt p10, p0 = 80, r8
+ ;;
+(p9) ldfe f12 = [r16], 32
+(p10) ldfe f13 = [r17], 32
+ cmp.lt p6, p0 = 96, r8
+ cmp.lt p7, p0 = 112, r8
+ ;;
+(p6) ldfe f14 = [r16]
+(p7) ldfe f15 = [r17]
.restore sp
- mov sp=loc2
+ add sp = FRAME_SIZE, sp
br.ret.sptk.many b0
- .endp ffi_closure_UNIX
-
+ ;;
+
+ .endp ffi_closure_unix
+
+ .section .rodata
+ .align 8
+.Lst_table:
+ data8 @pcrel(.Lst_void) // FFI_TYPE_VOID
+ data8 @pcrel(.Lst_sint32) // FFI_TYPE_INT
+ data8 @pcrel(.Lst_float) // FFI_TYPE_FLOAT
+ data8 @pcrel(.Lst_double) // FFI_TYPE_DOUBLE
+ data8 @pcrel(.Lst_ldouble) // FFI_TYPE_LONGDOUBLE
+ data8 @pcrel(.Lst_uint8) // FFI_TYPE_UINT8
+ data8 @pcrel(.Lst_sint8) // FFI_TYPE_SINT8
+ data8 @pcrel(.Lst_uint16) // FFI_TYPE_UINT16
+ data8 @pcrel(.Lst_sint16) // FFI_TYPE_SINT16
+ data8 @pcrel(.Lst_uint32) // FFI_TYPE_UINT32
+ data8 @pcrel(.Lst_sint32) // FFI_TYPE_SINT32
+ data8 @pcrel(.Lst_int64) // FFI_TYPE_UINT64
+ data8 @pcrel(.Lst_int64) // FFI_TYPE_SINT64
+ data8 @pcrel(.Lst_void) // FFI_TYPE_STRUCT
+ data8 @pcrel(.Lst_int64) // FFI_TYPE_POINTER
+ data8 @pcrel(.Lst_small_struct) // FFI_IA64_TYPE_SMALL_STRUCT
+ data8 @pcrel(.Lst_hfa_float) // FFI_IA64_TYPE_HFA_FLOAT
+ data8 @pcrel(.Lst_hfa_double) // FFI_IA64_TYPE_HFA_DOUBLE
+ data8 @pcrel(.Lst_hfa_ldouble) // FFI_IA64_TYPE_HFA_LDOUBLE
+.Lld_table:
+ data8 @pcrel(.Lld_void) // FFI_TYPE_VOID
+ data8 @pcrel(.Lld_int32) // FFI_TYPE_INT
+ data8 @pcrel(.Lld_float) // FFI_TYPE_FLOAT
+ data8 @pcrel(.Lld_double) // FFI_TYPE_DOUBLE
+ data8 @pcrel(.Lld_ldouble) // FFI_TYPE_LONGDOUBLE
+ data8 @pcrel(.Lld_int8) // FFI_TYPE_UINT8
+ data8 @pcrel(.Lld_int8) // FFI_TYPE_SINT8
+ data8 @pcrel(.Lld_int16) // FFI_TYPE_UINT16
+ data8 @pcrel(.Lld_int16) // FFI_TYPE_SINT16
+ data8 @pcrel(.Lld_int32) // FFI_TYPE_UINT32
+ data8 @pcrel(.Lld_int32) // FFI_TYPE_SINT32
+ data8 @pcrel(.Lld_int64) // FFI_TYPE_UINT64
+ data8 @pcrel(.Lld_int64) // FFI_TYPE_SINT64
+ data8 @pcrel(.Lld_void) // FFI_TYPE_STRUCT
+ data8 @pcrel(.Lld_int64) // FFI_TYPE_POINTER
+ data8 @pcrel(.Lld_small_struct) // FFI_IA64_TYPE_SMALL_STRUCT
+ data8 @pcrel(.Lld_hfa_float) // FFI_IA64_TYPE_HFA_FLOAT
+ data8 @pcrel(.Lld_hfa_double) // FFI_IA64_TYPE_HFA_DOUBLE
+ data8 @pcrel(.Lld_hfa_ldouble) // FFI_IA64_TYPE_HFA_LDOUBLE