aboutsummaryrefslogtreecommitdiff
path: root/tcg/ppc/tcg-target.inc.c
diff options
context:
space:
mode:
Diffstat (limited to 'tcg/ppc/tcg-target.inc.c')
-rw-r--r--tcg/ppc/tcg-target.inc.c345
1 files changed, 191 insertions, 154 deletions
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 1f690df..21d764c 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -22,7 +22,8 @@
* THE SOFTWARE.
*/
-#include "tcg-be-ldst.h"
+#include "elf.h"
+#include "tcg-pool.inc.c"
#if defined _CALL_DARWIN || defined __APPLE__
#define TCG_TARGET_CALL_DARWIN
@@ -41,29 +42,8 @@
# define TCG_REG_TMP1 TCG_REG_R12
#endif
-/* For the 64-bit target, we don't like the 5 insn sequence needed to build
- full 64-bit addresses. Better to have a base register to which we can
- apply a 32-bit displacement.
-
- There are generally three items of interest:
- (1) helper functions in the main executable,
- (2) TranslationBlock data structures,
- (3) the return address in the epilogue.
-
- For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer
- will be inside the main executable, and thus near enough to make a
- pointer to the epilogue be within 2GB of all helper functions.
-
- For softmmu, we'll let the kernel choose the address of code_gen_buffer,
- and odds are it'll be somewhere close to the main malloc arena, and so
- a pointer to the epilogue will be within 2GB of the TranslationBlocks.
-
- For --enable-pie, everything will be kinda near everything else,
- somewhere in high memory.
-
- Thus we choose to keep the return address in a call-saved register. */
-#define TCG_REG_RA TCG_REG_R31
-#define USE_REG_RA (TCG_TARGET_REG_BITS == 64)
+#define TCG_REG_TB TCG_REG_R31
+#define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
/* Shorthand for size of a pointer. Avoid promotion to unsigned. */
#define SZP ((int)sizeof(void *))
@@ -81,8 +61,6 @@
static tcg_insn_unit *tb_ret_addr;
-#include "elf.h"
-
bool have_isa_2_06;
bool have_isa_3_00;
@@ -247,9 +225,12 @@ static inline void tcg_out_bc_noaddr(TCGContext *s, int insn)
static void patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
{
- tcg_insn_unit *target = (tcg_insn_unit *)value;
+ tcg_insn_unit *target;
+ tcg_insn_unit old;
+
+ value += addend;
+ target = (tcg_insn_unit *)value;
- tcg_debug_assert(addend == 0);
switch (type) {
case R_PPC_REL14:
reloc_pc14(code_ptr, target);
@@ -257,6 +238,12 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
case R_PPC_REL24:
reloc_pc24(code_ptr, target);
break;
+ case R_PPC_ADDR16:
+ assert(value == (int16_t)value);
+ old = *code_ptr;
+ old = deposit32(old, 0, 16, value);
+ *code_ptr = old;
+ break;
default:
tcg_abort();
}
@@ -616,50 +603,114 @@ static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
}
-static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
+/* Emit a move into ret of arg, if it can be done in one insn. */
+static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
{
- if (arg == (int16_t) arg) {
+ if (arg == (int16_t)arg) {
tcg_out32(s, ADDI | TAI(ret, 0, arg));
- } else {
+ return true;
+ }
+ if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
- if (arg & 0xffff) {
- tcg_out32(s, ORI | SAI(ret, ret, arg));
- }
+ return true;
}
+ return false;
}
-static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
- tcg_target_long arg)
+static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
+ tcg_target_long arg, bool in_prologue)
{
+ intptr_t tb_diff;
+ tcg_target_long tmp;
+ int shift;
+
tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
- if (type == TCG_TYPE_I32 || arg == (int32_t)arg) {
- tcg_out_movi32(s, ret, arg);
- } else if (arg == (uint32_t)arg && !(arg & 0x8000)) {
+
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
+ arg = (int32_t)arg;
+ }
+
+ /* Load 16-bit immediates with one insn. */
+ if (tcg_out_movi_one(s, ret, arg)) {
+ return;
+ }
+
+ /* Load addresses within the TB with one insn. */
+ tb_diff = arg - (intptr_t)s->code_gen_ptr;
+ if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
+ tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
+ return;
+ }
+
+ /* Load 32-bit immediates with two insns. Note that we've already
+ eliminated bare ADDIS, so we know both insns are required. */
+ if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
+ tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
+ tcg_out32(s, ORI | SAI(ret, ret, arg));
+ return;
+ }
+ if (arg == (uint32_t)arg && !(arg & 0x8000)) {
tcg_out32(s, ADDI | TAI(ret, 0, arg));
tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
- } else {
- int32_t high;
+ return;
+ }
- if (USE_REG_RA) {
- intptr_t diff = arg - (intptr_t)tb_ret_addr;
- if (diff == (int32_t)diff) {
- tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff);
- return;
- }
+ /* Load masked 16-bit value. */
+ if (arg > 0 && (arg & 0x8000)) {
+ tmp = arg | 0x7fff;
+ if ((tmp & (tmp + 1)) == 0) {
+ int mb = clz64(tmp + 1) + 1;
+ tcg_out32(s, ADDI | TAI(ret, 0, arg));
+ tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
+ return;
}
+ }
- high = arg >> 31 >> 1;
- tcg_out_movi32(s, ret, high);
- if (high) {
- tcg_out_shli64(s, ret, ret, 32);
- }
- if (arg & 0xffff0000) {
- tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
- }
- if (arg & 0xffff) {
- tcg_out32(s, ORI | SAI(ret, ret, arg));
- }
+ /* Load common masks with 2 insns. */
+ shift = ctz64(arg);
+ tmp = arg >> shift;
+ if (tmp == (int16_t)tmp) {
+ tcg_out32(s, ADDI | TAI(ret, 0, tmp));
+ tcg_out_shli64(s, ret, ret, shift);
+ return;
+ }
+ shift = clz64(arg);
+ if (tcg_out_movi_one(s, ret, arg << shift)) {
+ tcg_out_shri64(s, ret, ret, shift);
+ return;
+ }
+
+ /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */
+ if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
+ tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
+ return;
+ }
+
+ /* Use the constant pool, if possible. */
+ if (!in_prologue && USE_REG_TB) {
+ new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
+ -(intptr_t)s->code_gen_ptr);
+ tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
+ return;
+ }
+
+ tmp = arg >> 31 >> 1;
+ tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
+ if (tmp) {
+ tcg_out_shli64(s, ret, ret, 32);
}
+ if (arg & 0xffff0000) {
+ tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
+ }
+ if (arg & 0xffff) {
+ tcg_out32(s, ORI | SAI(ret, ret, arg));
+ }
+}
+
+static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
+ tcg_target_long arg)
+{
+ tcg_out_movi_int(s, type, ret, arg, false);
}
static bool mask_operand(uint32_t c, int *mb, int *me)
@@ -1295,47 +1346,43 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
tcg_out32(s, insn);
}
-#ifdef __powerpc64__
-void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
+ uintptr_t addr)
{
- tcg_insn_unit i1, i2;
- uint64_t pair;
- intptr_t diff = addr - jmp_addr;
-
- if (in_range_b(diff)) {
- i1 = B | (diff & 0x3fffffc);
- i2 = NOP;
- } else if (USE_REG_RA) {
- intptr_t lo, hi;
- diff = addr - (uintptr_t)tb_ret_addr;
- lo = (int16_t)diff;
- hi = (int32_t)(diff - lo);
- tcg_debug_assert(diff == hi + lo);
- i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16);
- i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo);
- } else {
- tcg_debug_assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr);
- i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16);
- i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr);
- }
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_insn_unit i1, i2;
+ intptr_t tb_diff = addr - tc_ptr;
+ intptr_t br_diff = addr - (jmp_addr + 4);
+ uint64_t pair;
+
+ /* This does not exercise the range of the branch, but we do
+ still need to be able to load the new value of TCG_REG_TB.
+ But this does still happen quite often. */
+ if (tb_diff == (int16_t)tb_diff) {
+ i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
+ i2 = B | (br_diff & 0x3fffffc);
+ } else {
+ intptr_t lo = (int16_t)tb_diff;
+ intptr_t hi = (int32_t)(tb_diff - lo);
+ assert(tb_diff == hi + lo);
+ i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
+ i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
+ }
#ifdef HOST_WORDS_BIGENDIAN
- pair = (uint64_t)i1 << 32 | i2;
+ pair = (uint64_t)i1 << 32 | i2;
#else
- pair = (uint64_t)i2 << 32 | i1;
+ pair = (uint64_t)i2 << 32 | i1;
#endif
- atomic_set((uint64_t *)jmp_addr, pair);
- flush_icache_range(jmp_addr, jmp_addr + 8);
-}
-#else
-void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
-{
- intptr_t diff = addr - jmp_addr;
- tcg_debug_assert(in_range_b(diff));
- atomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
- flush_icache_range(jmp_addr, jmp_addr + 4);
+ atomic_set((uint64_t *)jmp_addr, pair);
+ flush_icache_range(jmp_addr, jmp_addr + 8);
+ } else {
+ intptr_t diff = addr - jmp_addr;
+ tcg_debug_assert(in_range_b(diff));
+ atomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
+ flush_icache_range(jmp_addr, jmp_addr + 4);
+ }
}
-#endif
static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
{
@@ -1416,6 +1463,8 @@ static const uint32_t qemu_exts_opc[4] = {
};
#if defined (CONFIG_SOFTMMU)
+#include "tcg-ldst.inc.c"
+
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
* int mmu_idx, uintptr_t ra)
*/
@@ -1827,6 +1876,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
#endif
}
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+ int i;
+ for (i = 0; i < count; ++i) {
+ p[i] = NOP;
+ }
+}
+
/* Parameters for function call generation, used in tcg.c. */
#define TCG_TARGET_STACK_ALIGN 16
#define TCG_TARGET_EXTEND_ARGS 1
@@ -1895,44 +1952,20 @@ static void tcg_target_qemu_prologue(TCGContext *s)
#ifndef CONFIG_SOFTMMU
if (guest_base) {
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
+ tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
}
#endif
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
-
- if (USE_REG_RA) {
-#ifdef _CALL_AIX
- /* Make the caller load the value as the TOC into R2. */
- tb_ret_addr = s->code_ptr + 2;
- desc[1] = tb_ret_addr;
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2);
- tcg_out32(s, BCCTR | BO_ALWAYS);
-#elif defined(_CALL_ELF) && _CALL_ELF == 2
- /* Compute from the incoming R12 value. */
- tb_ret_addr = s->code_ptr + 2;
- tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12,
- tcg_ptr_byte_diff(tb_ret_addr, s->code_buf)));
- tcg_out32(s, BCCTR | BO_ALWAYS);
-#else
- /* Reserve max 5 insns for the constant load. */
- tb_ret_addr = s->code_ptr + 6;
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr);
- tcg_out32(s, BCCTR | BO_ALWAYS);
- while (s->code_ptr < tb_ret_addr) {
- tcg_out32(s, NOP);
- }
-#endif
- } else {
- tcg_out32(s, BCCTR | BO_ALWAYS);
- tb_ret_addr = s->code_ptr;
+ if (USE_REG_TB) {
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
}
+ tcg_out32(s, BCCTR | BO_ALWAYS);
/* Epilogue */
- tcg_debug_assert(tb_ret_addr == s->code_ptr);
- s->code_gen_epilogue = tb_ret_addr;
+ s->code_gen_epilogue = tb_ret_addr = s->code_ptr;
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
@@ -1952,44 +1985,48 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
switch (opc) {
case INDEX_op_exit_tb:
- if (USE_REG_RA) {
- ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr);
-
- /* Use a direct branch if we can, otherwise use the value in RA.
- Note that the direct branch is always backward, thus we need
- to account for the possibility of 5 insns from the movi. */
- if (!in_range_b(disp - 20)) {
- tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR);
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
- tcg_out32(s, BCCTR | BO_ALWAYS);
- break;
- }
- }
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
tcg_out_b(s, 0, tb_ret_addr);
break;
case INDEX_op_goto_tb:
- tcg_debug_assert(s->tb_jmp_insn_offset);
- /* Direct jump. */
-#ifdef __powerpc64__
- /* Ensure the next insns are 8-byte aligned. */
- if ((uintptr_t)s->code_ptr & 7) {
- tcg_out32(s, NOP);
- }
- s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
- /* To be replaced by either a branch+nop or a load into TMP1. */
- s->code_ptr += 2;
- tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
+ if (s->tb_jmp_insn_offset) {
+ /* Direct jump. */
+ if (TCG_TARGET_REG_BITS == 64) {
+ /* Ensure the next insns are 8-byte aligned. */
+ if ((uintptr_t)s->code_ptr & 7) {
+ tcg_out32(s, NOP);
+ }
+ s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
+ tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
+ tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
+ } else {
+ s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
+ tcg_out32(s, B);
+ s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
+ break;
+ }
+ } else {
+ /* Indirect jump. */
+ tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
+ (intptr_t)(s->tb_jmp_insn_offset + args[0]));
+ }
+ tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
tcg_out32(s, BCCTR | BO_ALWAYS);
-#else
- /* To be replaced by a branch. */
- s->code_ptr++;
-#endif
- s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
+ s->tb_jmp_reset_offset[args[0]] = c = tcg_current_code_size(s);
+ if (USE_REG_TB) {
+ /* For the unlinked case, need to reset TCG_REG_TB. */
+ c = -c;
+ assert(c == (int16_t)c);
+ tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, c));
+ }
break;
case INDEX_op_goto_ptr:
tcg_out32(s, MTSPR | RS(args[0]) | CTR);
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0);
+ if (USE_REG_TB) {
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
+ }
+ tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
tcg_out32(s, BCCTR | BO_ALWAYS);
break;
case INDEX_op_br:
@@ -2759,8 +2796,8 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
#endif
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
- if (USE_REG_RA) {
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return addr */
+ if (USE_REG_TB) {
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */
}
}