diff options
author | Richard Henderson <rth@redhat.com> | 2004-12-31 12:11:17 -0800 |
---|---|---|
committer | Richard Henderson <rth@gcc.gnu.org> | 2004-12-31 12:11:17 -0800 |
commit | 81a69b13339a5a975ab17eef54490148addaf531 (patch) | |
tree | 9eac59f95be2aa7a36f95f2c7c9553e325370df0 /libffi/src/ia64 | |
parent | bdaa445236ad169bf757aa9d81c26d7e09a28e0c (diff) | |
download | gcc-81a69b13339a5a975ab17eef54490148addaf531.zip gcc-81a69b13339a5a975ab17eef54490148addaf531.tar.gz gcc-81a69b13339a5a975ab17eef54490148addaf531.tar.bz2 |
types.c (FFI_TYPE_POINTER): Define with sizeof.
* src/types.c (FFI_TYPE_POINTER): Define with sizeof.
(FFI_TYPE_LONGDOUBLE): Fix for ia64.
* src/ia64/ffitarget.h (struct ffi_ia64_trampoline_struct): Move
into ffi_prep_closure.
* src/ia64/ia64_flags.h, src/ia64/ffi.c, src/ia64/unix.S: Rewrite
from scratch.
From-SVN: r92774
Diffstat (limited to 'libffi/src/ia64')
-rw-r--r-- | libffi/src/ia64/ffi.c | 947 | ||||
-rw-r--r-- | libffi/src/ia64/ffitarget.h | 9 | ||||
-rw-r--r-- | libffi/src/ia64/ia64_flags.h | 47 | ||||
-rw-r--r-- | libffi/src/ia64/unix.S | 781 |
4 files changed, 945 insertions, 839 deletions
diff --git a/libffi/src/ia64/ffi.c b/libffi/src/ia64/ffi.c index 1dc27db..e810827 100644 --- a/libffi/src/ia64/ffi.c +++ b/libffi/src/ia64/ffi.c @@ -29,622 +29,365 @@ #include <stdlib.h> #include <stdbool.h> +#include <float.h> #include "ia64_flags.h" -/* Memory image of fp register contents. Should eventually be an fp */ -/* type long enough to hold an entire register. For now we use double. */ -typedef double float80; - -/* The stack layout at call to ffi_prep_args. Other_args will remain */ -/* on the stack for the actual call. Everything else we be transferred */ -/* to registers and popped by the assembly code. */ - -struct ia64_args { - long scratch[2]; /* Two scratch words at top of stack. */ - /* Allows sp to be passed as arg pointer. */ - void * r8_contents; /* Value to be passed in r8 */ - long spare; /* Not used. */ - float80 fp_regs[8]; /* Contents of 8 floating point argument */ - /* registers. */ - long out_regs[8]; /* Contents of the 8 out registers used */ - /* for integer parameters. */ - long other_args[0]; /* Arguments passed on stack, variable size */ - /* Treated as continuation of out_regs. */ +/* A 64-bit pointer value. In LP64 mode, this is effectively a plain + pointer. In ILP32 mode, it's a pointer that's been extended to + 64 bits by "addp4". */ +typedef void *PTR64 __attribute__((mode(DI))); + +/* Memory image of fp register contents. This is the implementation + specific format used by ldf.fill/stf.spill. All we care about is + that it wants a 16 byte aligned slot. */ +typedef struct +{ + UINT64 x[2] __attribute__((aligned(16))); +} fpreg; + + +/* The stack layout given to ffi_call_unix and ffi_closure_unix_inner. */ + +struct ia64_args +{ + fpreg fp_regs[8]; /* Contents of 8 fp arg registers. */ + UINT64 gp_regs[8]; /* Contents of 8 gp arg registers. */ + UINT64 other_args[]; /* Arguments passed on stack, variable size. */ }; -static size_t float_type_size(unsigned short tp) + +/* Adjust ADDR, a pointer to an 8 byte slot, to point to the low LEN bytes. */ + +static inline void * +endian_adjust (void *addr, size_t len) { - switch(tp) { - case FFI_TYPE_FLOAT: - return sizeof(float); - case FFI_TYPE_DOUBLE: - return sizeof(double); -#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE - case FFI_TYPE_LONGDOUBLE: - return sizeof(long double); +#ifdef __BIG_ENDIAN__ + return addr + (8 - len); +#else + return addr; #endif - default: - FFI_ASSERT(0); - } } -/* - * Is type a struct containing at most n floats, doubles, or extended - * doubles, all of the same fp type? - * If so, set *element_type to the fp type. - */ -static bool is_homogeneous_fp_aggregate(ffi_type * type, int n, - unsigned short * element_type) +/* Store VALUE to ADDR in the current cpu implementation's fp spill format. */ + +static inline void +stf_spill(fpreg *addr, __float80 value) { - ffi_type **ptr; - unsigned short element, struct_element; + asm ("stf.spill %0 = %1%P0" : "=m" (*addr) : "f"(value)); +} + +/* Load a value from ADDR, which is in the current cpu implementation's + fp spill format. */ - int type_set = 0; +static inline __float80 +ldf_fill(fpreg *addr) +{ + __float80 ret; + asm ("ldf.fill %0 = %1%P1" : "=f"(ret) : "m"(*addr)); + return ret; +} - FFI_ASSERT(type != NULL); +/* Return the size of the C type associated with with TYPE. Which will + be one of the FFI_IA64_TYPE_HFA_* values. */ - FFI_ASSERT(type->elements != NULL); +static size_t +hfa_type_size (int type) +{ + switch (type) + { + case FFI_IA64_TYPE_HFA_FLOAT: + return sizeof(float); + case FFI_IA64_TYPE_HFA_DOUBLE: + return sizeof(double); + case FFI_IA64_TYPE_HFA_LDOUBLE: + return sizeof(__float80); + default: + abort (); + } +} - ptr = &(type->elements[0]); +/* Load from ADDR a value indicated by TYPE. Which will be one of + the FFI_IA64_TYPE_HFA_* values. */ - while ((*ptr) != NULL) +static __float80 +hfa_type_load (int type, void *addr) +{ + switch (type) { - switch((*ptr) -> type) { - case FFI_TYPE_FLOAT: - if (type_set && element != FFI_TYPE_FLOAT) return 0; - if (--n < 0) return false; - type_set = 1; - element = FFI_TYPE_FLOAT; - break; - case FFI_TYPE_DOUBLE: - if (type_set && element != FFI_TYPE_DOUBLE) return 0; - if (--n < 0) return false; - type_set = 1; - element = FFI_TYPE_DOUBLE; - break; - case FFI_TYPE_STRUCT: - if (!is_homogeneous_fp_aggregate(type, n, &struct_element)) - return false; - if (type_set && struct_element != element) return false; - n -= (type -> size)/float_type_size(element); - element = struct_element; - if (n < 0) return false; - break; - /* case FFI_TYPE_LONGDOUBLE: - Not yet implemented. */ - default: - return false; - } - ptr++; + case FFI_IA64_TYPE_HFA_FLOAT: + return *(float *) addr; + case FFI_IA64_TYPE_HFA_DOUBLE: + return *(double *) addr; + case FFI_IA64_TYPE_HFA_LDOUBLE: + return *(__float80 *) addr; + default: + abort (); } - *element_type = element; - return true; - -} +} -/* ffi_prep_args is called by the assembly routine once stack space - has been allocated for the function's arguments. It fills in - the arguments in the structure referenced by stack. Returns nonzero - if fp registers are used for arguments. */ +/* Load VALUE into ADDR as indicated by TYPE. Which will be one of + the FFI_IA64_TYPE_HFA_* values. */ -static bool -ffi_prep_args(struct ia64_args *stack, extended_cif *ecif, int bytes) +static void +hfa_type_store (int type, void *addr, __float80 value) { - register long i, avn; - register void **p_argv; - register long *argp = stack -> out_regs; - register float80 *fp_argp = stack -> fp_regs; - register ffi_type **p_arg; - - /* For big return structs, r8 needs to contain the target address. */ - /* Since r8 is otherwise dead, we set it unconditionally. */ - stack -> r8_contents = ecif -> rvalue; - i = 0; - avn = ecif->cif->nargs; - p_arg = ecif->cif->arg_types; - p_argv = ecif->avalue; - while (i < avn) + switch (type) { - size_t z; /* z is in units of arg slots or words, not bytes. */ + case FFI_IA64_TYPE_HFA_FLOAT: + *(float *) addr = value; + break; + case FFI_IA64_TYPE_HFA_DOUBLE: + *(double *) addr = value; + break; + case FFI_IA64_TYPE_HFA_LDOUBLE: + *(__float80 *) addr = value; + break; + default: + abort (); + } +} - switch ((*p_arg)->type) - { - case FFI_TYPE_SINT8: - z = 1; - *(SINT64 *) argp = *(SINT8 *)(* p_argv); - break; - - case FFI_TYPE_UINT8: - z = 1; - *(UINT64 *) argp = *(UINT8 *)(* p_argv); - break; - - case FFI_TYPE_SINT16: - z = 1; - *(SINT64 *) argp = *(SINT16 *)(* p_argv); - break; - - case FFI_TYPE_UINT16: - z = 1; - *(UINT64 *) argp = *(UINT16 *)(* p_argv); - break; - - case FFI_TYPE_SINT32: - z = 1; - *(SINT64 *) argp = *(SINT32 *)(* p_argv); - break; - - case FFI_TYPE_UINT32: - z = 1; - *(UINT64 *) argp = *(UINT32 *)(* p_argv); - break; +/* Is TYPE a struct containing floats, doubles, or extended doubles, + all of the same fp type? If so, return the element type. Return + FFI_TYPE_VOID if not. */ - case FFI_TYPE_SINT64: - case FFI_TYPE_UINT64: - case FFI_TYPE_POINTER: - z = 1; - *(UINT64 *) argp = *(UINT64 *)(* p_argv); - break; +static int +hfa_element_type (ffi_type *type, int nested) +{ + int element = FFI_TYPE_VOID; - case FFI_TYPE_FLOAT: - z = 1; - if (fp_argp - stack->fp_regs < 8) - { - /* Note the conversion -- all the fp regs are loaded as - doubles. */ - *fp_argp++ = *(float *)(* p_argv); - } - /* Also put it into the integer registers or memory: */ - *(UINT64 *) argp = *(UINT32 *)(* p_argv); - break; + switch (type->type) + { + case FFI_TYPE_FLOAT: + /* We want to return VOID for raw floating-point types, but the + synthetic HFA type if we're nested within an aggregate. */ + if (nested) + element = FFI_IA64_TYPE_HFA_FLOAT; + break; - case FFI_TYPE_DOUBLE: - z = 1; - if (fp_argp - stack->fp_regs < 8) - *fp_argp++ = *(double *)(* p_argv); - /* Also put it into the integer registers or memory: */ - *(double *) argp = *(double *)(* p_argv); - break; + case FFI_TYPE_DOUBLE: + /* Similarly. */ + if (nested) + element = FFI_IA64_TYPE_HFA_DOUBLE; + break; - case FFI_TYPE_STRUCT: + case FFI_TYPE_LONGDOUBLE: + /* Similarly, except that that HFA is true for double extended, + but not quad precision. Both have sizeof == 16, so tell the + difference based on the precision. */ + if (LDBL_MANT_DIG == 64 && nested) + element = FFI_IA64_TYPE_HFA_LDOUBLE; + break; + + case FFI_TYPE_STRUCT: + { + ffi_type **ptr = &type->elements[0]; + + for (ptr = &type->elements[0]; *ptr ; ptr++) { - size_t sz = (*p_arg)->size; - unsigned short element_type; - z = ((*p_arg)->size + FFI_SIZEOF_ARG - 1)/FFI_SIZEOF_ARG; - if (is_homogeneous_fp_aggregate(*p_arg, 8, &element_type)) { - int i; - int nelements = sz/float_type_size(element_type); - for (i = 0; i < nelements; ++i) { - switch (element_type) { - case FFI_TYPE_FLOAT: - if (fp_argp - stack->fp_regs < 8) - *fp_argp++ = ((float *)(* p_argv))[i]; - break; - case FFI_TYPE_DOUBLE: - if (fp_argp - stack->fp_regs < 8) - *fp_argp++ = ((double *)(* p_argv))[i]; - break; - default: - /* Extended precision not yet implemented. */ - abort(); - } - } - } - /* And pass it in integer registers as a struct, with */ - /* its actual field sizes packed into registers. */ - memcpy(argp, *p_argv, (*p_arg)->size); + int sub_element = hfa_element_type (*ptr, 1); + if (sub_element == FFI_TYPE_VOID) + return FFI_TYPE_VOID; + + if (element == FFI_TYPE_VOID) + element = sub_element; + else if (element != sub_element) + return FFI_TYPE_VOID; } - break; - - default: - FFI_ASSERT(0); - } + } + break; - argp += z; - i++, p_arg++, p_argv++; + default: + return FFI_TYPE_VOID; } - return (fp_argp != stack -> fp_regs); + + return element; } -/* Perform machine dependent cif processing */ + +/* Perform machine dependent cif processing. */ + ffi_status ffi_prep_cif_machdep(ffi_cif *cif) { - long i, avn; - bool is_simple = true; - long simple_flag = FFI_SIMPLE_V; - /* Adjust cif->bytes to include space for the 2 scratch words, - r8 register contents, spare word, - the 8 fp register contents, and all 8 integer register contents. - This will be removed before the call, though 2 scratch words must - remain. */ - - cif->bytes += 4*sizeof(long) + 8 *sizeof(float80); + int flags; + + /* Adjust cif->bytes to include space for the bits of the ia64_args frame + that preceeds the integer register portion. The estimate that the + generic bits did for the argument space required is good enough for the + integer component. */ + cif->bytes += offsetof(struct ia64_args, gp_regs[0]); if (cif->bytes < sizeof(struct ia64_args)) cif->bytes = sizeof(struct ia64_args); - /* The stack must be double word aligned, so round bytes up - appropriately. */ - - cif->bytes = ALIGN(cif->bytes, 2*sizeof(void*)); - - avn = cif->nargs; - if (avn <= 2) { - for (i = 0; i < avn; ++i) { - switch(cif -> arg_types[i] -> type) { - case FFI_TYPE_SINT32: - simple_flag = FFI_ADD_INT_ARG(simple_flag); - break; - case FFI_TYPE_SINT64: - case FFI_TYPE_UINT64: - case FFI_TYPE_POINTER: - simple_flag = FFI_ADD_LONG_ARG(simple_flag); - break; - default: - is_simple = false; - } - } - } else { - is_simple = false; - } - - /* Set the return type flag */ + /* Set the return type flag. */ + flags = cif->rtype->type; switch (cif->rtype->type) { - case FFI_TYPE_VOID: - cif->flags = FFI_TYPE_VOID; + case FFI_TYPE_LONGDOUBLE: + /* Leave FFI_TYPE_LONGDOUBLE as meaning double extended precision, + and encode quad precision as a two-word integer structure. */ + if (LDBL_MANT_DIG != 64) + flags = FFI_IA64_TYPE_SMALL_STRUCT | (16 << 8); break; case FFI_TYPE_STRUCT: { - size_t sz = cif -> rtype -> size; - unsigned short element_type; - - is_simple = false; - if (is_homogeneous_fp_aggregate(cif -> rtype, 8, &element_type)) { - int nelements = sz/float_type_size(element_type); - if (nelements <= 1) { - if (0 == nelements) { - cif -> flags = FFI_TYPE_VOID; - } else { - cif -> flags = element_type; - } - } else { - switch(element_type) { - case FFI_TYPE_FLOAT: - cif -> flags = FFI_IS_FLOAT_FP_AGGREGATE | nelements; - break; - case FFI_TYPE_DOUBLE: - cif -> flags = FFI_IS_DOUBLE_FP_AGGREGATE | nelements; - break; - default: - /* long double NYI */ - abort(); - } + size_t size = cif->rtype->size; + int hfa_type = hfa_element_type (cif->rtype, 0); + + if (hfa_type != FFI_TYPE_VOID) + { + size_t nelts = size / hfa_type_size (hfa_type); + if (nelts <= 8) + flags = hfa_type | (size << 8); } - break; - } - if (sz <= 32) { - if (sz <= 8) { - cif->flags = FFI_TYPE_INT; - } else if (sz <= 16) { - cif->flags = FFI_IS_SMALL_STRUCT2; - } else if (sz <= 24) { - cif->flags = FFI_IS_SMALL_STRUCT3; - } else { - cif->flags = FFI_IS_SMALL_STRUCT4; + else + { + if (size <= 32) + flags = FFI_IA64_TYPE_SMALL_STRUCT | (size << 8); } - } else { - cif->flags = FFI_TYPE_STRUCT; - } } break; - case FFI_TYPE_FLOAT: - is_simple = false; - cif->flags = FFI_TYPE_FLOAT; - break; - - case FFI_TYPE_DOUBLE: - is_simple = false; - cif->flags = FFI_TYPE_DOUBLE; - break; - default: - cif->flags = FFI_TYPE_INT; - /* This seems to depend on little endian mode, and the fact that */ - /* the return pointer always points to at least 8 bytes. But */ - /* that also seems to be true for other platforms. */ break; } - - if (is_simple) cif -> flags |= simple_flag; + cif->flags = flags; + return FFI_OK; } -extern int ffi_call_unix(bool (*)(struct ia64_args *, extended_cif *, int), - extended_cif *, unsigned, - unsigned, unsigned *, void (*)()); +extern int ffi_call_unix (struct ia64_args *, PTR64, void (*)(), UINT64); void ffi_call(ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue) { - extended_cif ecif; - long simple = cif -> flags & FFI_SIMPLE; - - /* Should this also check for Unix ABI? */ - /* This is almost, but not quite, machine independent. Note that */ - /* we can get away with not caring about length of the result because */ - /* we assume we are little endian, and the result buffer is large */ - /* enough. */ - /* This needs work for HP/UX. */ - if (simple) { - long (*lfn)() = (long (*)())fn; - long result; - switch(simple) { - case FFI_SIMPLE_V: - result = lfn(); - break; - case FFI_SIMPLE_I: - result = lfn(*(int *)avalue[0]); - break; - case FFI_SIMPLE_L: - result = lfn(*(long *)avalue[0]); - break; - case FFI_SIMPLE_II: - result = lfn(*(int *)avalue[0], *(int *)avalue[1]); - break; - case FFI_SIMPLE_IL: - result = lfn(*(int *)avalue[0], *(long *)avalue[1]); - break; - case FFI_SIMPLE_LI: - result = lfn(*(long *)avalue[0], *(int *)avalue[1]); - break; - case FFI_SIMPLE_LL: - result = lfn(*(long *)avalue[0], *(long *)avalue[1]); - break; - } - if ((cif->flags & ~FFI_SIMPLE) != FFI_TYPE_VOID && 0 != rvalue) { - * (long *)rvalue = result; - } - return; - } - ecif.cif = cif; - ecif.avalue = avalue; - - /* If the return value is a struct and we don't have a return - value address then we need to make one. */ - - if (rvalue == NULL && cif->rtype->type == FFI_TYPE_STRUCT) - ecif.rvalue = alloca(cif->rtype->size); - else - ecif.rvalue = rvalue; - - switch (cif->abi) - { - case FFI_UNIX: - ffi_call_unix(ffi_prep_args, &ecif, cif->bytes, - cif->flags, rvalue, fn); - break; + struct ia64_args *stack; + long i, avn, gpcount, fpcount; + ffi_type **p_arg; - default: - FFI_ASSERT(0); - break; - } -} - -/* - * Closures represent a pair consisting of a function pointer, and - * some user data. A closure is invoked by reinterpreting the closure - * as a function pointer, and branching to it. Thus we can make an - * interpreted function callable as a C function: We turn the interpreter - * itself, together with a pointer specifying the interpreted procedure, - * into a closure. - * On X86, the first few words of the closure structure actually contain code, - * which will do the right thing. On most other architectures, this - * would raise some Icache/Dcache coherence issues (which can be solved, but - * often not cheaply). - * For IA64, function pointer are already pairs consisting of a code - * pointer, and a gp pointer. The latter is needed to access global variables. - * Here we set up such a pair as the first two words of the closure (in - * the "trampoline" area), but we replace the gp pointer with a pointer - * to the closure itself. We also add the real gp pointer to the - * closure. This allows the function entry code to both retrieve the - * user data, and to restire the correct gp pointer. - */ - -static void -ffi_prep_incoming_args_UNIX(struct ia64_args *args, void **rvalue, - void **avalue, ffi_cif *cif); - -/* This function is entered with the doctored gp (r1) value. - * This code is extremely gcc specific. There is some argument that - * it should really be written in assembly code, since it depends on - * gcc properties that might change over time. - */ - -/* ffi_closure_UNIX is an assembly routine, which copies the register */ -/* state into a struct ia64_args, and then invokes */ -/* ffi_closure_UNIX_inner. It also recovers the closure pointer */ -/* from its fake gp pointer. */ -void ffi_closure_UNIX(); - -#ifndef __GNUC__ -# error This requires gcc -#endif -void -ffi_closure_UNIX_inner (ffi_closure *closure, struct ia64_args * args) -/* Hopefully declaring this as a varargs function will force all args */ -/* to memory. */ -{ - // this is our return value storage - long double res; - - // our various things... - ffi_cif *cif; - unsigned short rtype; - void *resp; - void **arg_area; - - resp = (void*)&res; - cif = closure->cif; - arg_area = (void**) alloca (cif->nargs * sizeof (void*)); - - /* this call will initialize ARG_AREA, such that each - * element in that array points to the corresponding - * value on the stack; and if the function returns - * a structure, it will re-set RESP to point to the - * structure return address. */ - - ffi_prep_incoming_args_UNIX(args, (void**)&resp, arg_area, cif); - - (closure->fun) (cif, resp, arg_area, closure->user_data); - - rtype = cif->flags; - - /* now, do a generic return based on the value of rtype */ - if (rtype == FFI_TYPE_INT) - { - asm volatile ("ld8 r8=[%0]" : : "r" (resp) : "r8"); - } - else if (rtype == FFI_TYPE_FLOAT) - { - asm volatile ("ldfs f8=[%0]" : : "r" (resp) : "f8"); - } - else if (rtype == FFI_TYPE_DOUBLE) - { - asm volatile ("ldfd f8=[%0]" : : "r" (resp) : "f8"); - } - else if (rtype == FFI_IS_SMALL_STRUCT2) - { - asm volatile ("ld8 r8=[%0]; ld8 r9=[%1]" - : : "r" (resp), "r" (resp+8) : "r8","r9"); - } - else if (rtype == FFI_IS_SMALL_STRUCT3) - { - asm volatile ("ld8 r8=[%0]; ld8 r9=[%1]; ld8 r10=[%2]" - : : "r" (resp), "r" (resp+8), "r" (resp+16) - : "r8","r9","r10"); - } - else if (rtype == FFI_IS_SMALL_STRUCT4) - { - asm volatile ("ld8 r8=[%0]; ld8 r9=[%1]; ld8 r10=[%2]; ld8 r11=[%3]" - : : "r" (resp), "r" (resp+8), "r" (resp+16), "r" (resp+24) - : "r8","r9","r10","r11"); - } - else if (rtype != FFI_TYPE_VOID && rtype != FFI_TYPE_STRUCT) - { - /* Can only happen for homogeneous FP aggregates? */ - abort(); - } -} + FFI_ASSERT (cif->abi == FFI_UNIX); -static void -ffi_prep_incoming_args_UNIX(struct ia64_args *args, void **rvalue, - void **avalue, ffi_cif *cif) -{ - register unsigned int i; - register unsigned int avn; - register void **p_argv; - register long *argp = args -> out_regs; - unsigned fp_reg_num = 0; - register ffi_type **p_arg; + /* If we have no spot for a return value, make one. */ + if (rvalue == NULL && cif->rtype->type != FFI_TYPE_VOID) + rvalue = alloca (cif->rtype->size); + + /* Allocate the stack frame. */ + stack = alloca (cif->bytes); + gpcount = fpcount = 0; avn = cif->nargs; - p_argv = avalue; - - for (i = cif->nargs, p_arg = cif->arg_types; i != 0; i--, p_arg++) + for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++) { - size_t z; /* In units of words or argument slots. */ - switch ((*p_arg)->type) { case FFI_TYPE_SINT8: + stack->gp_regs[gpcount++] = *(SINT8 *)avalue[i]; + break; case FFI_TYPE_UINT8: + stack->gp_regs[gpcount++] = *(UINT8 *)avalue[i]; + break; case FFI_TYPE_SINT16: + stack->gp_regs[gpcount++] = *(SINT16 *)avalue[i]; + break; case FFI_TYPE_UINT16: + stack->gp_regs[gpcount++] = *(UINT16 *)avalue[i]; + break; case FFI_TYPE_SINT32: + stack->gp_regs[gpcount++] = *(SINT32 *)avalue[i]; + break; case FFI_TYPE_UINT32: + stack->gp_regs[gpcount++] = *(UINT32 *)avalue[i]; + break; case FFI_TYPE_SINT64: case FFI_TYPE_UINT64: + stack->gp_regs[gpcount++] = *(UINT64 *)avalue[i]; + break; + case FFI_TYPE_POINTER: - z = 1; - *p_argv = (void *)argp; + stack->gp_regs[gpcount++] = (UINT64)(PTR64) *(void **)avalue[i]; break; - + case FFI_TYPE_FLOAT: - z = 1; - /* Convert argument back to float in place from the saved value */ - if (argp - args->out_regs < 8 && fp_reg_num < 8) { - *(float *)argp = args -> fp_regs[fp_reg_num++]; - } - *p_argv = (void *)argp; + if (gpcount < 8 && fpcount < 8) + stf_spill (&stack->fp_regs[fpcount++], *(float *)avalue[i]); + stack->gp_regs[gpcount++] = *(UINT32 *)avalue[i]; break; case FFI_TYPE_DOUBLE: - z = 1; - if (argp - args->out_regs < 8 && fp_reg_num < 8) { - *p_argv = args -> fp_regs + fp_reg_num++; - } else { - *p_argv = (void *)argp; - } + if (gpcount < 8 && fpcount < 8) + stf_spill (&stack->fp_regs[fpcount++], *(double *)avalue[i]); + stack->gp_regs[gpcount++] = *(UINT64 *)avalue[i]; + break; + + case FFI_TYPE_LONGDOUBLE: + if (gpcount & 1) + gpcount++; + if (LDBL_MANT_DIG == 64 && gpcount < 8 && fpcount < 8) + stf_spill (&stack->fp_regs[fpcount++], *(__float80 *)avalue[i]); + memcpy (&stack->gp_regs[gpcount], avalue[i], 16); + gpcount += 2; break; case FFI_TYPE_STRUCT: { - size_t sz = (*p_arg)->size; - unsigned short element_type; - z = ((*p_arg)->size + FFI_SIZEOF_ARG - 1)/FFI_SIZEOF_ARG; - if (argp - args->out_regs < 8 - && is_homogeneous_fp_aggregate(*p_arg, 8, &element_type)) { - int nelements = sz/float_type_size(element_type); - if (nelements + fp_reg_num >= 8) { - /* hard case NYI. */ - abort(); - } - if (element_type == FFI_TYPE_DOUBLE) { - *p_argv = args -> fp_regs + fp_reg_num; - fp_reg_num += nelements; - break; - } - if (element_type == FFI_TYPE_FLOAT) { - int j; - for (j = 0; j < nelements; ++ j) { - ((float *)argp)[j] = args -> fp_regs[fp_reg_num + j]; + size_t size = (*p_arg)->size; + size_t align = (*p_arg)->alignment; + int hfa_type = hfa_element_type (*p_arg, 0); + + FFI_ASSERT (align <= 16); + if (align == 16 && (gpcount & 1)) + gpcount++; + + if (hfa_type != FFI_TYPE_VOID) + { + size_t hfa_size = hfa_type_size (hfa_type); + size_t offset = 0; + size_t gp_offset = gpcount * 8; + + while (fpcount < 8 + && offset < size + && gp_offset < 8 * 8) + { + stf_spill (&stack->fp_regs[fpcount], + hfa_type_load (hfa_type, avalue[i] + offset)); + offset += hfa_size; + gp_offset += hfa_size; + fpcount += 1; } - *p_argv = (void *)argp; - fp_reg_num += nelements; - break; - } - abort(); /* Other fp types NYI */ } + + memcpy (&stack->gp_regs[gpcount], avalue[i], size); + gpcount += (size + 7) / 8; } break; default: - FFI_ASSERT(0); + abort (); } - - argp += z; - p_argv++; - } - - return; + + ffi_call_unix (stack, rvalue, fn, cif->flags); } +/* Closures represent a pair consisting of a function pointer, and + some user data. A closure is invoked by reinterpreting the closure + as a function pointer, and branching to it. Thus we can make an + interpreted function callable as a C function: We turn the + interpreter itself, together with a pointer specifying the + interpreted procedure, into a closure. -/* Fill in a closure to refer to the specified fun and user_data. */ -/* cif specifies the argument and result types for fun. */ -/* the cif must already be prep'ed */ + For IA64, function pointer are already pairs consisting of a code + pointer, and a gp pointer. The latter is needed to access global + variables. Here we set up such a pair as the first two words of + the closure (in the "trampoline" area), but we replace the gp + pointer with a pointer to the closure itself. We also add the real + gp pointer to the closure. This allows the function entry code to + both retrieve the user data, and to restire the correct gp pointer. */ -/* The layout of a function descriptor. A C function pointer really */ -/* points to one of these. */ -typedef struct ia64_fd_struct { - void *code_pointer; - void *gp; -} ia64_fd; +extern void ffi_closure_unix (); ffi_status ffi_prep_closure (ffi_closure* closure, @@ -652,20 +395,168 @@ ffi_prep_closure (ffi_closure* closure, void (*fun)(ffi_cif*,void*,void**,void*), void *user_data) { - struct ffi_ia64_trampoline_struct *tramp = - (struct ffi_ia64_trampoline_struct *) (closure -> tramp); - ia64_fd *fd = (ia64_fd *)(void *)ffi_closure_UNIX; + /* The layout of a function descriptor. A C function pointer really + points to one of these. */ + struct ia64_fd + { + UINT64 code_pointer; + UINT64 gp; + }; + + struct ffi_ia64_trampoline_struct + { + UINT64 code_pointer; /* Pointer to ffi_closure_unix. */ + UINT64 fake_gp; /* Pointer to closure, installed as gp. */ + UINT64 real_gp; /* Real gp value. */ + }; + + struct ffi_ia64_trampoline_struct *tramp; + struct ia64_fd *fd; FFI_ASSERT (cif->abi == FFI_UNIX); - tramp -> code_pointer = fd -> code_pointer; - tramp -> real_gp = fd -> gp; - tramp -> fake_gp = closure; - closure->cif = cif; + tramp = (struct ffi_ia64_trampoline_struct *)closure->tramp; + fd = (struct ia64_fd *)(void *)ffi_closure_unix; + + tramp->code_pointer = fd->code_pointer; + tramp->real_gp = fd->gp; + tramp->fake_gp = (UINT64)(PTR64)closure; + closure->cif = cif; closure->user_data = user_data; - closure->fun = fun; + closure->fun = fun; return FFI_OK; } +UINT64 +ffi_closure_unix_inner (ffi_closure *closure, struct ia64_args *stack, + void *rvalue, void *r8) +{ + ffi_cif *cif; + void **avalue; + ffi_type **p_arg; + long i, avn, gpcount, fpcount; + + cif = closure->cif; + avn = cif->nargs; + avalue = alloca (avn * sizeof (void *)); + + /* If the structure return value is passed in memory get that location + from r8 so as to pass the value directly back to the caller. */ + if (cif->flags == FFI_TYPE_STRUCT) + rvalue = r8; + + gpcount = fpcount = 0; + for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++) + { + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 1); + break; + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 2); + break; + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 4); + break; + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + avalue[i] = &stack->gp_regs[gpcount++]; + break; + case FFI_TYPE_POINTER: + avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], sizeof(void*)); + break; + + case FFI_TYPE_FLOAT: + if (gpcount < 8 && fpcount < 8) + { + void *addr = &stack->fp_regs[fpcount++]; + avalue[i] = addr; + *(float *)addr = ldf_fill (addr); + } + else + avalue[i] = endian_adjust(&stack->gp_regs[gpcount], 4); + gpcount++; + break; + + case FFI_TYPE_DOUBLE: + if (gpcount < 8 && fpcount < 8) + { + void *addr = &stack->fp_regs[fpcount++]; + avalue[i] = addr; + *(double *)addr = ldf_fill (addr); + } + else + avalue[i] = &stack->gp_regs[gpcount]; + gpcount++; + break; + + case FFI_TYPE_LONGDOUBLE: + if (gpcount & 1) + gpcount++; + if (LDBL_MANT_DIG == 64 && gpcount < 8 && fpcount < 8) + { + void *addr = &stack->fp_regs[fpcount++]; + avalue[i] = addr; + *(__float80 *)addr = ldf_fill (addr); + } + else + avalue[i] = &stack->gp_regs[gpcount]; + gpcount += 2; + break; + + case FFI_TYPE_STRUCT: + { + size_t size = (*p_arg)->size; + size_t align = (*p_arg)->alignment; + int hfa_type = hfa_element_type (*p_arg, 0); + + FFI_ASSERT (align <= 16); + if (align == 16 && (gpcount & 1)) + gpcount++; + + if (hfa_type != FFI_TYPE_VOID) + { + size_t hfa_size = hfa_type_size (hfa_type); + size_t offset = 0; + size_t gp_offset = gpcount * 8; + void *addr = alloca (size); + + avalue[i] = addr; + + while (fpcount < 8 + && offset < size + && gp_offset < 8 * 8) + { + hfa_type_store (hfa_type, addr + offset, + ldf_fill (&stack->fp_regs[fpcount])); + offset += hfa_size; + gp_offset += hfa_size; + fpcount += 1; + } + + if (offset < size) + memcpy (addr + offset, (char *)stack->gp_regs + gp_offset, + size - offset); + } + else + avalue[i] = &stack->gp_regs[gpcount]; + + gpcount += (size + 7) / 8; + } + break; + + default: + abort (); + } + } + + closure->fun (cif, rvalue, avalue, closure->user_data); + + return cif->flags; +} diff --git a/libffi/src/ia64/ffitarget.h b/libffi/src/ia64/ffitarget.h index 3b78654..2f98d51 100644 --- a/libffi/src/ia64/ffitarget.h +++ b/libffi/src/ia64/ffitarget.h @@ -45,14 +45,5 @@ typedef enum ffi_abi { /* can be interpreted as a C function */ /* descriptor: */ -#ifndef LIBFFI_ASM -struct ffi_ia64_trampoline_struct { - void * code_pointer; /* Pointer to ffi_closure_UNIX */ - void * fake_gp; /* Pointer to closure, installed as gp */ - void * real_gp; /* Real gp value, reinstalled by */ - /* ffi_closure_UNIX. */ -}; -#endif - #endif diff --git a/libffi/src/ia64/ia64_flags.h b/libffi/src/ia64/ia64_flags.h index 23dbd3e..1dd6d7e 100644 --- a/libffi/src/ia64/ia64_flags.h +++ b/libffi/src/ia64/ia64_flags.h @@ -25,38 +25,15 @@ OTHER DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- */ - -/* Homogeneous Floating Point Aggregates (HFAs) which are returned */ -/* in FP registers. The least significant bits specify the size in */ -/* words. */ -#define FFI_IS_FLOAT_FP_AGGREGATE 0x1000 -#define FFI_IS_DOUBLE_FP_AGGREGATE 0x0800 -#define FLOAT_FP_AGGREGATE_BIT 12 -#define DOUBLE_FP_AGGREGATE_BIT 11 - -/* Small structures containing N words. If N=1, they are returned */ -/* as though they were integers. */ -#define FFI_IS_SMALL_STRUCT2 0x40 /* Struct > 8, <=16 bytes */ -#define FFI_IS_SMALL_STRUCT3 0x41 /* Struct > 16 <= 24 bytes */ -#define FFI_IS_SMALL_STRUCT4 0x42 /* Struct > 24, <=32 bytes */ - -/* Flag values identifying particularly simple cases, which are */ -/* handled specially. We treat functions as simple if they take all */ -/* arguments can be passed as 32 or 64 bit integer quantities, there is */ -/* either no return value or it can be treated as a 64bit integer, and */ -/* if there are at most 2 arguments. */ -/* This is OR'ed with the normal flag values. */ -#define FFI_SIMPLE_V 0x10000 /* () -> X */ -#define FFI_SIMPLE_I 0x20000 /* (int) -> X */ -#define FFI_SIMPLE_L 0x30000 /* (long) -> X */ -#define FFI_SIMPLE_II 0x40000 /* (int,int) -> X */ -#define FFI_SIMPLE_IL 0x50000 /* (int,long) -> X */ -#define FFI_SIMPLE_LI 0x60000 /* (long,int) -> X */ -#define FFI_SIMPLE_LL 0x70000 /* (long,long) -> X */ - -/* Mask for all of the FFI_SIMPLE bits: */ -#define FFI_SIMPLE 0xf0000 - -/* An easy way to build FFI_SIMPLE flags from FFI_SIMPLE_V: */ -#define FFI_ADD_LONG_ARG(flag) (((flag) << 1) | 0x10000) -#define FFI_ADD_INT_ARG(flag) ((flag) << 1) +/* "Type" codes used between assembly and C. When used as a part of + a cfi->flags value, the low byte will be these extra type codes, + and bits 8-31 will be the actual size of the type. */ + +/* Small structures containing N words in integer registers. */ +#define FFI_IA64_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 1) + +/* Homogeneous Floating Point Aggregates (HFAs) which are returned + in FP registers. */ +#define FFI_IA64_TYPE_HFA_FLOAT (FFI_TYPE_LAST + 2) +#define FFI_IA64_TYPE_HFA_DOUBLE (FFI_TYPE_LAST + 3) +#define FFI_IA64_TYPE_HFA_LDOUBLE (FFI_TYPE_LAST + 4) diff --git a/libffi/src/ia64/unix.S b/libffi/src/ia64/unix.S index be267f6..7c68b2d 100644 --- a/libffi/src/ia64/unix.S +++ b/libffi/src/ia64/unix.S @@ -33,295 +33,542 @@ #include <ffi.h> #include "ia64_flags.h" -/* parameters: */ -#define callback in0 -#define ecifp in1 -#define bytes in2 -#define flags in3 -#define raddr in4 -#define fn in5 - -#define FLOAT_SZ 8 /* in-memory size of fp operands */ - -/* Allocate an ia64_args structure on the stack; call ffi_prep_args */ -/* to fill it in with argument values; copy those to the real */ -/* registers, leaving overflow arguments on the stack. Then call fn */ -/* and move the result from registers into *raddr. */ .pred.safe_across_calls p1-p5,p16-p63 .text + +/* int ffi_call_unix (struct ia64_args *stack, PTR64 rvalue, + void (*fn)(), int flags); + */ + .align 16 - .global ffi_call_unix - .proc ffi_call_unix + .global ffi_call_unix + .proc ffi_call_unix ffi_call_unix: .prologue - .save ar.pfs,r38 /* loc0 */ - alloc loc0=ar.pfs,6,6,8,0 - .save rp,loc1 - mov loc1=b0; - .vframe loc5 - mov loc5=sp; + /* Bit o trickiness. We actually share a stack frame with ffi_call. + Rely on the fact that ffi_call uses a vframe and don't bother + tracking one here at all. */ + .fframe 0 + .save ar.pfs, r36 // loc0 + alloc loc0 = ar.pfs, 4, 3, 8, 0 + .save rp, loc1 + mov loc1 = b0 .body - sub sp=sp,bytes - mov loc4=r1 /* Save gp */ - ld8 r8=[callback],8 /* code address of callback */ - ;; - mov out0=sp - mov out1=ecifp - mov out2=bytes - ld8 r1=[callback] /* Set up gp for callback. Unnecessary? */ - mov b6=r8 - ;; - br.call.sptk.many b0 = b6 /* call ffi_prep_args */ - cmp.eq p6,p0=0,r8 /* r8 nonzero ==> need fp regs */ - ;; -(p6) add loc2=32+8*FLOAT_SZ,sp -(p6) br.cond.dptk.many fp_done - ;; /* Quiets warning; needed? */ - add loc2=32,sp - add loc3=32+FLOAT_SZ,sp - ;; - ldfd f8=[loc2],2*FLOAT_SZ - ldfd f9=[loc3],2*FLOAT_SZ - ;; - ldfd f10=[loc2],2*FLOAT_SZ - ldfd f11=[loc3],2*FLOAT_SZ - ;; - ldfd f12=[loc2],2*FLOAT_SZ - ldfd f13=[loc3],2*FLOAT_SZ - ;; - ldfd f14=[loc2],2*FLOAT_SZ - ldfd f15=[loc3] - ;; -fp_done: - add r9=16,sp /* Pointer to r8_contents */ - /* loc2 points at first integer register value. */ - add loc3=8,loc2 - ;; - ld8 r8=[r9] /* Just in case we return large struct */ - ld8 out0=[loc2],16 - ld8 out1=[loc3],16 - ;; - ld8 out2=[loc2],16 - ld8 out3=[loc3],16 - ;; - ld8 out4=[loc2],16 - ld8 out5=[loc3],16 - ;; - ld8 out6=[loc2] - ld8 out7=[loc3] - /* Set sp to 16 bytes below the first stack parameter. This */ - /* is the value currently in loc2. */ - mov sp=loc2 - - ld8 r8=[fn],8 - ;; - ld8 r1=[fn] /* Set up gp */ - mov b6=r8;; - br.call.sptk.many b0 = b6 /* call fn */ - - /* Handle return value. */ - cmp.eq p6,p0=0,raddr - cmp.eq p7,p0=FFI_TYPE_INT,flags - cmp.eq p10,p0=FFI_IS_SMALL_STRUCT2,flags - cmp.eq p11,p0=FFI_IS_SMALL_STRUCT3,flags - cmp.eq p12,p0=FFI_IS_SMALL_STRUCT4,flags - ;; -(p6) br.cond.dpnt.few done /* Dont copy ret values if raddr = 0 */ -(p7) br.cond.dptk.few copy1 -(p10) br.cond.dpnt.few copy2 -(p11) br.cond.dpnt.few copy3 -(p12) br.cond.dpnt.few copy4 - cmp.eq p8,p0=FFI_TYPE_FLOAT,flags - cmp.eq p9,p0=FFI_TYPE_DOUBLE,flags - tbit.nz p6,p0=flags,FLOAT_FP_AGGREGATE_BIT - tbit.nz p7,p0=flags,DOUBLE_FP_AGGREGATE_BIT - ;; -(p8) stfs [raddr]=f8 -(p9) stfd [raddr]=f8 + add r16 = 16, in0 + mov loc2 = gp + mov r8 = in1 + ;; + + /* Load up all of the argument registers. */ + ldf.fill f8 = [in0], 32 + ldf.fill f9 = [r16], 32 + ;; + ldf.fill f10 = [in0], 32 + ldf.fill f11 = [r16], 32 + ;; + ldf.fill f12 = [in0], 32 + ldf.fill f13 = [r16], 32 + ;; + ldf.fill f14 = [in0], 32 + ldf.fill f15 = [r16], 24 + ;; + ld8 out0 = [in0], 16 + ld8 out1 = [r16], 16 + ;; + ld8 out2 = [in0], 16 + ld8 out3 = [r16], 16 + ;; + ld8 out4 = [in0], 16 + ld8 out5 = [r16], 16 + ;; + ld8 out6 = [in0] + ld8 out7 = [r16] + ;; + + /* Deallocate the register save area from the stack frame. */ + mov sp = in0 + + /* Call the target function. */ + ld8 r16 = [in2], 8 + ;; + ld8 gp = [in2] + mov b6 = r16 + br.call.sptk.many b0 = b6 + ;; + + /* Dispatch to handle return value. */ + mov gp = loc2 + zxt1 r16 = in3 + ;; + mov ar.pfs = loc0 + addl r18 = @ltoffx(.Lst_table), gp + ;; + ld8.mov r18 = [r18], .Lst_table + mov b0 = loc1 + ;; + shladd r18 = r16, 3, r18 + ;; + ld8 r17 = [r18] + shr in3 = in3, 8 + ;; + add r17 = r17, r18 + ;; + mov b6 = r17 + br b6 + ;; + +.Lst_void: + br.ret.sptk.many b0 + ;; +.Lst_uint8: + zxt1 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_sint8: + sxt1 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_uint16: + zxt2 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_sint16: + sxt2 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_uint32: + zxt4 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_sint32: + sxt4 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_int64: + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_float: + stfs [in1] = f8 + br.ret.sptk.many b0 + ;; +.Lst_double: + stfd [in1] = f8 + br.ret.sptk.many b0 + ;; +.Lst_ldouble: + stfe [in1] = f8 + br.ret.sptk.many b0 ;; - .label_state 1 -(p6) br.cond.dpnt.few handle_float_hfa -(p7) br.cond.dpnt.few handle_double_hfa - br done -copy4: - add loc3=24,raddr +.Lst_small_struct: + add sp = -16, sp + cmp.lt p6, p0 = 8, in3 + cmp.lt p7, p0 = 16, in3 + cmp.lt p8, p0 = 24, in3 + ;; + add r16 = 8, sp + add r17 = 16, sp + add r18 = 24, sp + ;; + st8 [sp] = r8 +(p6) st8 [r16] = r9 + mov out0 = in1 +(p7) st8 [r17] = r10 +(p8) st8 [r18] = r11 + mov out1 = sp + mov out2 = in3 + br.call.sptk.many b0 = memcpy# ;; - st8 [loc3]=r11 -copy3: - add loc3=16,raddr + mov ar.pfs = loc0 + mov b0 = loc1 + mov gp = loc2 + br.ret.sptk.many b0 + +.Lst_hfa_float: + add r16 = 4, in1 + cmp.lt p6, p0 = 4, in3 + ;; + stfs [in1] = f8, 8 +(p6) stfs [r16] = f9, 8 + cmp.lt p7, p0 = 8, in3 + cmp.lt p8, p0 = 12, in3 + ;; +(p7) stfs [in1] = f10, 8 +(p8) stfs [r16] = f11, 8 + cmp.lt p9, p0 = 16, in3 + cmp.lt p10, p0 = 20, in3 + ;; +(p9) stfs [in1] = f12, 8 +(p10) stfs [r16] = f13, 8 + cmp.lt p6, p0 = 24, in3 + cmp.lt p7, p0 = 28, in3 + ;; +(p6) stfs [in1] = f14 +(p7) stfs [r16] = f15 + br.ret.sptk.many b0 + ;; + +.Lst_hfa_double: + add r16 = 8, in1 + cmp.lt p6, p0 = 8, in3 + ;; + stfd [in1] = f8, 16 +(p6) stfd [r16] = f9, 16 + cmp.lt p7, p0 = 16, in3 + cmp.lt p8, p0 = 24, in3 + ;; +(p7) stfd [in1] = f10, 16 +(p8) stfd [r16] = f11, 16 + cmp.lt p9, p0 = 32, in3 + cmp.lt p10, p0 = 40, in3 + ;; +(p9) stfd [in1] = f12, 16 +(p10) stfd [r16] = f13, 16 + cmp.lt p6, p0 = 48, in3 + cmp.lt p7, p0 = 56, in3 + ;; +(p6) stfd [in1] = f14 +(p7) stfd [r16] = f15 + br.ret.sptk.many b0 + ;; + +.Lst_hfa_ldouble: + add r16 = 16, in1 + cmp.lt p6, p0 = 16, in3 + ;; + stfe [in1] = f8, 32 +(p6) stfe [r16] = f9, 32 + cmp.lt p7, p0 = 32, in3 + cmp.lt p8, p0 = 48, in3 + ;; +(p7) stfe [in1] = f10, 32 +(p8) stfe [r16] = f11, 32 + cmp.lt p9, p0 = 64, in3 + cmp.lt p10, p0 = 80, in3 + ;; +(p9) stfe [in1] = f12, 32 +(p10) stfe [r16] = f13, 32 + cmp.lt p6, p0 = 96, in3 + cmp.lt p7, p0 = 112, in3 + ;; +(p6) stfe [in1] = f14 +(p7) stfe [r16] = f15 + br.ret.sptk.many b0 + ;; + + .endp ffi_call_unix + + .align 16 + .global ffi_closure_unix + .proc ffi_closure_unix + +#define FRAME_SIZE (8*16 + 8*8 + 8*16) + +ffi_closure_unix: + .prologue + .save ar.pfs, r40 // loc0 + alloc loc0 = ar.pfs, 8, 4, 4, 0 + .fframe FRAME_SIZE + add r12 = -FRAME_SIZE, r12 + .save rp, loc1 + mov loc1 = b0 + .save ar.unat, loc2 + mov loc2 = ar.unat + .body + + /* Retrieve closure pointer and real gp. */ + mov out0 = gp + add gp = 16, gp ;; - st8 [loc3]=r10 -copy2: - add loc3=8,raddr + ld8 gp = [gp] + + /* Spill all of the possible argument registers. */ + add r16 = 16 + 8*16, sp + add r17 = 16 + 8*16 + 16, sp + ;; + stf.spill [r16] = f8, 32 + stf.spill [r17] = f9, 32 + mov loc3 = gp + ;; + stf.spill [r16] = f10, 32 + stf.spill [r17] = f11, 32 + ;; + stf.spill [r16] = f12, 32 + stf.spill [r17] = f13, 32 + ;; + stf.spill [r16] = f14, 32 + stf.spill [r17] = f15, 24 + ;; + .mem.offset 0, 0 + st8.spill [r16] = in0, 16 + .mem.offset 8, 0 + st8.spill [r17] = in1, 16 + add out1 = 16 + 8*16, sp + ;; + .mem.offset 0, 0 + st8.spill [r16] = in2, 16 + .mem.offset 8, 0 + st8.spill [r17] = in3, 16 + add out2 = 16, sp + ;; + .mem.offset 0, 0 + st8.spill [r16] = in4, 16 + .mem.offset 8, 0 + st8.spill [r17] = in5, 16 + mov out3 = r8 + ;; + .mem.offset 0, 0 + st8.spill [r16] = in6 + .mem.offset 8, 0 + st8.spill [r17] = in7 + + /* Invoke ffi_closure_unix_inner for the hard work. */ + br.call.sptk.many b0 = ffi_closure_unix_inner ;; - st8 [loc3]=r9 -copy1: - st8 [raddr]=r8 - /* In the big struct case, raddr was passed as an argument. */ - /* In the void case there was nothing to do. */ -done: - mov r1=loc4 /* Restore gp */ + /* Dispatch to handle return value. */ + mov gp = loc3 + zxt1 r16 = r8 + ;; + addl r18 = @ltoffx(.Lld_table), gp mov ar.pfs = loc0 + ;; + ld8.mov r18 = [r18], .Lld_table mov b0 = loc1 + ;; + shladd r18 = r16, 3, r18 + mov ar.unat = loc2 + ;; + ld8 r17 = [r18] + shr r8 = r8, 8 + ;; + add r17 = r17, r18 + add r16 = 16, sp + ;; + mov b6 = r17 + br b6 + ;; + .label_state 1 + +.Lld_void: + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; +.Lld_int8: + .body + .copy_state 1 + ld1 r8 = [r16] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; +.Lld_int16: + .body + .copy_state 1 + ld2 r8 = [r16] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; +.Lld_int32: + .body + .copy_state 1 + ld4 r8 = [r16] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; +.Lld_int64: + .body + .copy_state 1 + ld8 r8 = [r16] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; +.Lld_float: + .body + .copy_state 1 + ldfs f8 = [r16] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; +.Lld_double: + .body + .copy_state 1 + ldfd f8 = [r16] .restore sp - mov sp = loc5 + add sp = FRAME_SIZE, sp br.ret.sptk.many b0 + ;; +.Lld_ldouble: + .body + .copy_state 1 + ldfe f8 = [r16] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; -handle_double_hfa: +.Lld_small_struct: .body .copy_state 1 - /* Homogeneous floating point array of doubles is returned in */ - /* registers f8-f15. Save one at a time to return area. */ - and flags=0xf,flags /* Retrieve size */ - ;; - cmp.eq p6,p0=2,flags - cmp.eq p7,p0=3,flags - cmp.eq p8,p0=4,flags - cmp.eq p9,p0=5,flags - cmp.eq p10,p0=6,flags - cmp.eq p11,p0=7,flags - cmp.eq p12,p0=8,flags - ;; -(p6) br.cond.dptk.few dhfa2 -(p7) br.cond.dptk.few dhfa3 -(p8) br.cond.dptk.few dhfa4 -(p9) br.cond.dptk.few dhfa5 -(p10) br.cond.dptk.few dhfa6 -(p11) br.cond.dptk.few dhfa7 -dhfa8: add loc3=7*8,raddr - ;; - stfd [loc3]=f15 -dhfa7: add loc3=6*8,raddr - ;; - stfd [loc3]=f14 -dhfa6: add loc3=5*8,raddr - ;; - stfd [loc3]=f13 -dhfa5: add loc3=4*8,raddr - ;; - stfd [loc3]=f12 -dhfa4: add loc3=3*8,raddr - ;; - stfd [loc3]=f11 -dhfa3: add loc3=2*8,raddr - ;; - stfd [loc3]=f10 -dhfa2: add loc3=1*8,raddr - ;; - stfd [loc3]=f9 - stfd [raddr]=f8 - br done - -handle_float_hfa: - /* Homogeneous floating point array of floats is returned in */ - /* registers f8-f15. Save one at a time to return area. */ - and flags=0xf,flags /* Retrieve size */ - ;; - cmp.eq p6,p0=2,flags - cmp.eq p7,p0=3,flags - cmp.eq p8,p0=4,flags - cmp.eq p9,p0=5,flags - cmp.eq p10,p0=6,flags - cmp.eq p11,p0=7,flags - cmp.eq p12,p0=8,flags - ;; -(p6) br.cond.dptk.few shfa2 -(p7) br.cond.dptk.few shfa3 -(p8) br.cond.dptk.few shfa4 -(p9) br.cond.dptk.few shfa5 -(p10) br.cond.dptk.few shfa6 -(p11) br.cond.dptk.few shfa7 -shfa8: add loc3=7*4,raddr - ;; - stfd [loc3]=f15 -shfa7: add loc3=6*4,raddr - ;; - stfd [loc3]=f14 -shfa6: add loc3=5*4,raddr - ;; - stfd [loc3]=f13 -shfa5: add loc3=4*4,raddr - ;; - stfd [loc3]=f12 -shfa4: add loc3=3*4,raddr - ;; - stfd [loc3]=f11 -shfa3: add loc3=2*4,raddr - ;; - stfd [loc3]=f10 -shfa2: add loc3=1*4,raddr - ;; - stfd [loc3]=f9 - stfd [raddr]=f8 - br done + add r17 = 8, r16 + cmp.lt p6, p0 = 8, r8 + cmp.lt p7, p0 = 16, r8 + cmp.lt p8, p0 = 24, r8 + ;; + ld8 r8 = [r16], 16 +(p6) ld8 r9 = [r17], 16 + ;; +(p7) ld8 r10 = [r16] +(p8) ld8 r11 = [r17] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; - .endp ffi_call_unix +.Lld_hfa_float: + .body + .copy_state 1 + add r17 = 4, r16 + cmp.lt p6, p0 = 4, r8 + ;; + ldfs f8 = [r16], 8 +(p6) ldfs f9 = [r17], 8 + cmp.lt p7, p0 = 8, r8 + cmp.lt p8, p0 = 12, r8 + ;; +(p7) ldfs f10 = [r16], 8 +(p8) ldfs f11 = [r17], 8 + cmp.lt p9, p0 = 16, r8 + cmp.lt p10, p0 = 20, r8 + ;; +(p9) ldfs f12 = [r16], 8 +(p10) ldfs f13 = [r17], 8 + cmp.lt p6, p0 = 24, r8 + cmp.lt p7, p0 = 28, r8 + ;; +(p6) ldfs f14 = [r16] +(p7) ldfs f15 = [r17] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; +.Lld_hfa_double: + .body + .copy_state 1 + add r17 = 8, r16 + cmp.lt p6, p0 = 8, r8 + ;; + ldfd f8 = [r16], 16 +(p6) ldfd f9 = [r17], 16 + cmp.lt p7, p0 = 16, r8 + cmp.lt p8, p0 = 24, r8 + ;; +(p7) ldfd f10 = [r16], 16 +(p8) ldfd f11 = [r17], 16 + cmp.lt p9, p0 = 32, r8 + cmp.lt p10, p0 = 40, r8 + ;; +(p9) ldfd f12 = [r16], 16 +(p10) ldfd f13 = [r17], 16 + cmp.lt p6, p0 = 48, r8 + cmp.lt p7, p0 = 56, r8 + ;; +(p6) ldfd f14 = [r16] +(p7) ldfd f15 = [r17] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; - .pred.safe_across_calls p1-p5,p16-p63 -.text - .align 16 - .global ffi_closure_UNIX - .proc ffi_closure_UNIX -ffi_closure_UNIX: - .prologue - .save ar.pfs,r40 /* loc0 */ - alloc loc0=ar.pfs,8,3,2,0 - .save rp,loc1 - mov loc1=b0 - .vframe loc2 - mov loc2=sp - /* Retrieve closure pointer and real gp. */ - mov out0=gp - add gp=16,gp - ;; - ld8 gp=[gp] - /* Reserve a structia64_args on the stack such that arguments */ - /* past the first 8 are automatically placed in the right */ - /* slot. Note that when we start the sp points at 2 8-byte */ - /* scratch words, followed by the extra arguments. */ -# define BASIC_ARGS_SZ (8*FLOAT_SZ+8*8+2*8) -# define FIRST_FP_OFFSET (4*8) - add r14=-(BASIC_ARGS_SZ-FIRST_FP_OFFSET),sp - add r15=-(BASIC_ARGS_SZ-FIRST_FP_OFFSET-FLOAT_SZ),sp - add sp=-BASIC_ARGS_SZ,sp - /* r14 points to fp_regs[0], r15 points to fp_regs[1] */ - ;; - stfd [r14]=f8,2*FLOAT_SZ - stfd [r15]=f9,2*FLOAT_SZ - ;; - stfd [r14]=f10,2*FLOAT_SZ - stfd [r15]=f11,2*FLOAT_SZ - ;; - stfd [r14]=f12,2*FLOAT_SZ - stfd [r15]=f13,2*FLOAT_SZ - ;; - stfd [r14]=f14,2*FLOAT_SZ - stfd [r15]=f15,FLOAT_SZ+8 - ;; - /* r14 points to first parameter register area, r15 to second. */ - st8 [r14]=in0,2*8 - st8 [r15]=in1,2*8 - ;; - st8 [r14]=in2,2*8 - st8 [r15]=in3,2*8 - ;; - st8 [r14]=in4,2*8 - st8 [r15]=in5,2*8 - ;; - st8 [r14]=in6,2*8 - st8 [r15]=in7,2*8 - /* Call ffi_closure_UNIX_inner */ - mov out1=sp - br.call.sptk.many b0=ffi_closure_UNIX_inner - ;; - mov b0=loc1 - mov ar.pfs=loc0 +.Lld_hfa_ldouble: + .body + .copy_state 1 + add r17 = 16, r16 + cmp.lt p6, p0 = 16, r8 + ;; + ldfe f8 = [r16], 32 +(p6) ldfe f9 = [r17], 32 + cmp.lt p7, p0 = 32, r8 + cmp.lt p8, p0 = 48, r8 + ;; +(p7) ldfe f10 = [r16], 32 +(p8) ldfe f11 = [r17], 32 + cmp.lt p9, p0 = 64, r8 + cmp.lt p10, p0 = 80, r8 + ;; +(p9) ldfe f12 = [r16], 32 +(p10) ldfe f13 = [r17], 32 + cmp.lt p6, p0 = 96, r8 + cmp.lt p7, p0 = 112, r8 + ;; +(p6) ldfe f14 = [r16] +(p7) ldfe f15 = [r17] .restore sp - mov sp=loc2 + add sp = FRAME_SIZE, sp br.ret.sptk.many b0 - .endp ffi_closure_UNIX - + ;; + + .endp ffi_closure_unix + + .section .rodata + .align 8 +.Lst_table: + data8 @pcrel(.Lst_void) // FFI_TYPE_VOID + data8 @pcrel(.Lst_sint32) // FFI_TYPE_INT + data8 @pcrel(.Lst_float) // FFI_TYPE_FLOAT + data8 @pcrel(.Lst_double) // FFI_TYPE_DOUBLE + data8 @pcrel(.Lst_ldouble) // FFI_TYPE_LONGDOUBLE + data8 @pcrel(.Lst_uint8) // FFI_TYPE_UINT8 + data8 @pcrel(.Lst_sint8) // FFI_TYPE_SINT8 + data8 @pcrel(.Lst_uint16) // FFI_TYPE_UINT16 + data8 @pcrel(.Lst_sint16) // FFI_TYPE_SINT16 + data8 @pcrel(.Lst_uint32) // FFI_TYPE_UINT32 + data8 @pcrel(.Lst_sint32) // FFI_TYPE_SINT32 + data8 @pcrel(.Lst_int64) // FFI_TYPE_UINT64 + data8 @pcrel(.Lst_int64) // FFI_TYPE_SINT64 + data8 @pcrel(.Lst_void) // FFI_TYPE_STRUCT + data8 @pcrel(.Lst_int64) // FFI_TYPE_POINTER + data8 @pcrel(.Lst_small_struct) // FFI_IA64_TYPE_SMALL_STRUCT + data8 @pcrel(.Lst_hfa_float) // FFI_IA64_TYPE_HFA_FLOAT + data8 @pcrel(.Lst_hfa_double) // FFI_IA64_TYPE_HFA_DOUBLE + data8 @pcrel(.Lst_hfa_ldouble) // FFI_IA64_TYPE_HFA_LDOUBLE +.Lld_table: + data8 @pcrel(.Lld_void) // FFI_TYPE_VOID + data8 @pcrel(.Lld_int32) // FFI_TYPE_INT + data8 @pcrel(.Lld_float) // FFI_TYPE_FLOAT + data8 @pcrel(.Lld_double) // FFI_TYPE_DOUBLE + data8 @pcrel(.Lld_ldouble) // FFI_TYPE_LONGDOUBLE + data8 @pcrel(.Lld_int8) // FFI_TYPE_UINT8 + data8 @pcrel(.Lld_int8) // FFI_TYPE_SINT8 + data8 @pcrel(.Lld_int16) // FFI_TYPE_UINT16 + data8 @pcrel(.Lld_int16) // FFI_TYPE_SINT16 + data8 @pcrel(.Lld_int32) // FFI_TYPE_UINT32 + data8 @pcrel(.Lld_int32) // FFI_TYPE_SINT32 + data8 @pcrel(.Lld_int64) // FFI_TYPE_UINT64 + data8 @pcrel(.Lld_int64) // FFI_TYPE_SINT64 + data8 @pcrel(.Lld_void) // FFI_TYPE_STRUCT + data8 @pcrel(.Lld_int64) // FFI_TYPE_POINTER + data8 @pcrel(.Lld_small_struct) // FFI_IA64_TYPE_SMALL_STRUCT + data8 @pcrel(.Lld_hfa_float) // FFI_IA64_TYPE_HFA_FLOAT + data8 @pcrel(.Lld_hfa_double) // FFI_IA64_TYPE_HFA_DOUBLE + data8 @pcrel(.Lld_hfa_ldouble) // FFI_IA64_TYPE_HFA_LDOUBLE |