aboutsummaryrefslogtreecommitdiff
path: root/tcg/tci.c
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2021-01-30 14:24:25 -0800
committerRichard Henderson <richard.henderson@linaro.org>2021-06-19 08:51:11 -0700
commit7b7d8b2d9a7fd68de821f96267e224c1a6256af1 (patch)
tree0de572bc83a96bf0793c3b27605c44c7c8704f0d /tcg/tci.c
parentbcb81061dc8173076d9669f969d132b998cd2af9 (diff)
downloadqemu-7b7d8b2d9a7fd68de821f96267e224c1a6256af1.zip
qemu-7b7d8b2d9a7fd68de821f96267e224c1a6256af1.tar.gz
qemu-7b7d8b2d9a7fd68de821f96267e224c1a6256af1.tar.bz2
tcg/tci: Use ffi for calls
This requires adjusting where arguments are stored. Place them on the stack at left-aligned positions. Adjust the stack frame to be at entirely positive offsets. Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'tcg/tci.c')
-rw-r--r--tcg/tci.c138
1 files changed, 79 insertions, 59 deletions
diff --git a/tcg/tci.c b/tcg/tci.c
index d68c5a4..a3d2351 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -18,45 +18,26 @@
*/
#include "qemu/osdep.h"
-
-/* Enable TCI assertions only when debugging TCG (and without NDEBUG defined).
- * Without assertions, the interpreter runs much faster. */
-#if defined(CONFIG_DEBUG_TCG)
-# define tci_assert(cond) assert(cond)
-#else
-# define tci_assert(cond) ((void)(cond))
-#endif
-
#include "qemu-common.h"
#include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */
#include "exec/cpu_ldst.h"
#include "tcg/tcg-op.h"
#include "qemu/compiler.h"
+#include <ffi.h>
-#if MAX_OPC_PARAM_IARGS != 6
-# error Fix needed, number of supported input arguments changed!
-#endif
-#if TCG_TARGET_REG_BITS == 32
-typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
- tcg_target_ulong, tcg_target_ulong,
- tcg_target_ulong, tcg_target_ulong,
- tcg_target_ulong, tcg_target_ulong,
- tcg_target_ulong, tcg_target_ulong,
- tcg_target_ulong, tcg_target_ulong);
+
+/*
+ * Enable TCI assertions only when debugging TCG (and without NDEBUG defined).
+ * Without assertions, the interpreter runs much faster.
+ */
+#if defined(CONFIG_DEBUG_TCG)
+# define tci_assert(cond) assert(cond)
#else
-typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
- tcg_target_ulong, tcg_target_ulong,
- tcg_target_ulong, tcg_target_ulong);
+# define tci_assert(cond) ((void)(cond))
#endif
__thread uintptr_t tci_tb_ptr;
-static tcg_target_ulong tci_read_reg(const tcg_target_ulong *regs, TCGReg index)
-{
- tci_assert(index < TCG_TARGET_NB_REGS);
- return regs[index];
-}
-
static void
tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value)
{
@@ -133,6 +114,7 @@ static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr)
* I = immediate (tcg_target_ulong)
* l = label or pointer
* m = immediate (TCGMemOpIdx)
+ * n = immediate (call return length)
* r = register
* s = signed ldst offset
*/
@@ -153,6 +135,18 @@ static void tci_args_l(const uint8_t **tb_ptr, void **l0)
check_size(start, tb_ptr);
}
+static void tci_args_nll(const uint8_t **tb_ptr, uint8_t *n0,
+ void **l1, void **l2)
+{
+ const uint8_t *start = *tb_ptr;
+
+ *n0 = tci_read_b(tb_ptr);
+ *l1 = (void *)tci_read_label(tb_ptr);
+ *l2 = (void *)tci_read_label(tb_ptr);
+
+ check_size(start, tb_ptr);
+}
+
static void tci_args_rr(const uint8_t **tb_ptr,
TCGReg *r0, TCGReg *r1)
{
@@ -487,11 +481,14 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
{
const uint8_t *tb_ptr = v_tb_ptr;
tcg_target_ulong regs[TCG_TARGET_NB_REGS];
- long tcg_temps[CPU_TEMP_BUF_NLONGS];
- uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS);
+ uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE)
+ / sizeof(uint64_t)];
+ void *call_slots[TCG_STATIC_CALL_ARGS_SIZE / sizeof(uint64_t)];
regs[TCG_AREG0] = (tcg_target_ulong)env;
- regs[TCG_REG_CALL_STACK] = sp_value;
+ regs[TCG_REG_CALL_STACK] = (uintptr_t)stack;
+ /* Other call_slots entries initialized at first use (see below). */
+ call_slots[0] = NULL;
tci_assert(tb_ptr);
for (;;) {
@@ -509,40 +506,58 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
#endif
TCGMemOpIdx oi;
int32_t ofs;
- void *ptr;
+ void *ptr, *cif;
/* Skip opcode and size entry. */
tb_ptr += 2;
switch (opc) {
case INDEX_op_call:
- tci_args_l(&tb_ptr, &ptr);
+ /*
+ * Set up the ffi_avalue array once, delayed until now
+ * because many TB's do not make any calls. In tcg_gen_callN,
+ * we arranged for every real argument to be "left-aligned"
+ * in each 64-bit slot.
+ */
+ if (unlikely(call_slots[0] == NULL)) {
+ for (int i = 0; i < ARRAY_SIZE(call_slots); ++i) {
+ call_slots[i] = &stack[i];
+ }
+ }
+
+ tci_args_nll(&tb_ptr, &len, &ptr, &cif);
+
+ /* Helper functions may need to access the "return address" */
tci_tb_ptr = (uintptr_t)tb_ptr;
-#if TCG_TARGET_REG_BITS == 32
- tmp64 = ((helper_function)ptr)(tci_read_reg(regs, TCG_REG_R0),
- tci_read_reg(regs, TCG_REG_R1),
- tci_read_reg(regs, TCG_REG_R2),
- tci_read_reg(regs, TCG_REG_R3),
- tci_read_reg(regs, TCG_REG_R4),
- tci_read_reg(regs, TCG_REG_R5),
- tci_read_reg(regs, TCG_REG_R6),
- tci_read_reg(regs, TCG_REG_R7),
- tci_read_reg(regs, TCG_REG_R8),
- tci_read_reg(regs, TCG_REG_R9),
- tci_read_reg(regs, TCG_REG_R10),
- tci_read_reg(regs, TCG_REG_R11));
- tci_write_reg(regs, TCG_REG_R0, tmp64);
- tci_write_reg(regs, TCG_REG_R1, tmp64 >> 32);
-#else
- tmp64 = ((helper_function)ptr)(tci_read_reg(regs, TCG_REG_R0),
- tci_read_reg(regs, TCG_REG_R1),
- tci_read_reg(regs, TCG_REG_R2),
- tci_read_reg(regs, TCG_REG_R3),
- tci_read_reg(regs, TCG_REG_R4),
- tci_read_reg(regs, TCG_REG_R5));
- tci_write_reg(regs, TCG_REG_R0, tmp64);
-#endif
+
+ ffi_call(cif, ptr, stack, call_slots);
+
+ /* Any result winds up "left-aligned" in the stack[0] slot. */
+ switch (len) {
+ case 0: /* void */
+ break;
+ case 1: /* uint32_t */
+ /*
+ * Note that libffi has an odd special case in that it will
+ * always widen an integral result to ffi_arg.
+ */
+ if (sizeof(ffi_arg) == 4) {
+ regs[TCG_REG_R0] = *(uint32_t *)stack;
+ break;
+ }
+ /* fall through */
+ case 2: /* uint64_t */
+ if (TCG_TARGET_REG_BITS == 32) {
+ tci_write_reg64(regs, TCG_REG_R1, TCG_REG_R0, stack[0]);
+ } else {
+ regs[TCG_REG_R0] = stack[0];
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
break;
+
case INDEX_op_br:
tci_args_l(&tb_ptr, &ptr);
tb_ptr = ptr;
@@ -1119,7 +1134,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
TCGCond c;
TCGMemOpIdx oi;
uint8_t pos, len;
- void *ptr;
+ void *ptr, *cif;
const uint8_t *tb_ptr;
status = info->read_memory_func(addr, buf, 2, info);
@@ -1147,13 +1162,18 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
switch (op) {
case INDEX_op_br:
- case INDEX_op_call:
case INDEX_op_exit_tb:
case INDEX_op_goto_tb:
tci_args_l(&tb_ptr, &ptr);
info->fprintf_func(info->stream, "%-12s %p", op_name, ptr);
break;
+ case INDEX_op_call:
+ tci_args_nll(&tb_ptr, &len, &ptr, &cif);
+ info->fprintf_func(info->stream, "%-12s %d, %p, %p",
+ op_name, len, ptr, cif);
+ break;
+
case INDEX_op_brcond_i32:
case INDEX_op_brcond_i64:
tci_args_rrcl(&tb_ptr, &r0, &r1, &c, &ptr);