From 7b17a475404c970de1b192d026e00058cea980a6 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Mon, 7 Feb 2022 00:21:06 +0800 Subject: tcg/loongarch64: Fix fallout from recent MO_Q renaming Apparently we were left behind; just renaming MO_Q to MO_UQ is enough. Fixes: fc313c64345453c7 ("exec/memop: Adding signedness to quad definitions") Signed-off-by: WANG Xuerui Message-Id: <20220206162106.1092364-1-i.qemu@xen0n.name> Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tcg') diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 9cd46c9..d31a0e5 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -871,7 +871,7 @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, TCGReg rd, TCGReg rj, case MO_SL: tcg_out_opc_ldx_w(s, rd, rj, rk); break; - case MO_Q: + case MO_UQ: tcg_out_opc_ldx_d(s, rd, rj, rk); break; default: -- cgit v1.1 From b1ee3c67253a40f8e62a799eb5b5a0d2954769f4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 27 Jul 2021 19:42:35 -1000 Subject: tcg/i386: Support raising sigbus for user-only Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 103 +++++++++++++++++++++++++++++++++++++++++++--- tcg/i386/tcg-target.h | 2 - 2 files changed, 98 insertions(+), 7 deletions(-) (limited to 'tcg') diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 4dab09f..faa15ee 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -22,6 +22,7 @@ * THE SOFTWARE. */ +#include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" #ifdef CONFIG_DEBUG_TCG @@ -421,8 +422,9 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_VZEROUPPER (0x77 | P_EXT) #define OPC_XCHG_ax_r32 (0x90) -#define OPC_GRP3_Ev (0xf7) -#define OPC_GRP5 (0xff) +#define OPC_GRP3_Eb (0xf6) +#define OPC_GRP3_Ev (0xf7) +#define OPC_GRP5 (0xff) #define OPC_GRP14 (0x73 | P_EXT | P_DATA16) /* Group 1 opcode extensions for 0x80-0x83. @@ -444,6 +446,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define SHIFT_SAR 7 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ +#define EXT3_TESTi 0 #define EXT3_NOT 2 #define EXT3_NEG 3 #define EXT3_MUL 4 @@ -1606,8 +1609,6 @@ static void tcg_out_nopn(TCGContext *s, int n) } #if defined(CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ @@ -1916,7 +1917,84 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); return true; } -#elif TCG_TARGET_REG_BITS == 32 +#else + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, + TCGReg addrhi, unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *label; + + /* + * We are expecting a_bits to max out at 7, so we can usually use testb. + * For i686, we have to use testl for %esi/%edi. + */ + if (a_mask <= 0xff && (TCG_TARGET_REG_BITS == 64 || addrlo < 4)) { + tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, addrlo); + tcg_out8(s, a_mask); + } else { + tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_TESTi, addrlo); + tcg_out32(s, a_mask); + } + + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + + label = new_ldst_label(s); + label->is_ld = is_ld; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->raddr = tcg_splitwx_to_rx(s->code_ptr + 4); + label->label_ptr[0] = s->code_ptr; + + s->code_ptr += 4; +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + /* resolve label address */ + tcg_patch32(l->label_ptr[0], s->code_ptr - l->label_ptr[0] - 4); + + if (TCG_TARGET_REG_BITS == 32) { + int ofs = 0; + + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); + ofs += 4; + + tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); + ofs += 4; + if (TARGET_LONG_BITS == 64) { + tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); + ofs += 4; + } + + tcg_out_pushi(s, (uintptr_t)l->raddr); + } else { + tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], + l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, (uintptr_t)l->raddr); + tcg_out_push(s, TCG_REG_RAX); + } + + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_jmp(s, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st)); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +#if TCG_TARGET_REG_BITS == 32 # define x86_guest_base_seg 0 # define x86_guest_base_index -1 # define x86_guest_base_offset guest_base @@ -1950,6 +2028,7 @@ static inline int setup_guest_base_seg(void) return 0; } # endif +#endif #endif /* SOFTMMU */ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, @@ -2059,6 +2138,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) int mem_index; tcg_insn_unit *label_ptr[2]; +#else + unsigned a_bits; #endif datalo = *args++; @@ -2081,6 +2162,11 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); + } + tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index, x86_guest_base_offset, x86_guest_base_seg, is64, opc); @@ -2148,6 +2234,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) int mem_index; tcg_insn_unit *label_ptr[2]; +#else + unsigned a_bits; #endif datalo = *args++; @@ -2170,6 +2258,11 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); + } + tcg_out_qemu_st_direct(s, datalo, datahi, addrlo, x86_guest_base_index, x86_guest_base_offset, x86_guest_base_seg, opc); #endif diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index b00a6da..3b2c943 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -232,9 +232,7 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, #define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #endif -- cgit v1.1 From f85ab3d2e51e45e7fa33b539bc7e1350bdf64dde Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 3 Aug 2021 20:07:57 +0000 Subject: tcg/aarch64: Support raising sigbus for user-only Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 91 +++++++++++++++++++++++++++++++++++--------- tcg/aarch64/tcg-target.h | 2 - 2 files changed, 74 insertions(+), 19 deletions(-) (limited to 'tcg') diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index a8db553..077fc51 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -10,6 +10,7 @@ * See the COPYING file in the top-level directory for details. */ +#include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" #include "qemu/bitops.h" @@ -443,6 +444,7 @@ typedef enum { I3404_ANDI = 0x12000000, I3404_ORRI = 0x32000000, I3404_EORI = 0x52000000, + I3404_ANDSI = 0x72000000, /* Move wide immediate instructions. */ I3405_MOVN = 0x12800000, @@ -1328,8 +1330,9 @@ static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) if (offset == sextract64(offset, 0, 26)) { tcg_out_insn(s, 3206, B, offset); } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); - tcg_out_insn(s, 3207, BR, TCG_REG_TMP); + /* Choose X9 as a call-clobbered non-LR temporary. */ + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target); + tcg_out_insn(s, 3207, BR, TCG_REG_X9); } } @@ -1541,9 +1544,14 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, } } -#ifdef CONFIG_SOFTMMU -#include "../tcg-ldst.c.inc" +static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) +{ + ptrdiff_t offset = tcg_pcrel_diff(s, target); + tcg_debug_assert(offset == sextract64(offset, 0, 21)); + tcg_out_insn(s, 3406, ADR, rd, offset); +} +#ifdef CONFIG_SOFTMMU /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * MemOpIdx oi, uintptr_t ra) */ @@ -1577,13 +1585,6 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = { #endif }; -static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) -{ - ptrdiff_t offset = tcg_pcrel_diff(s, target); - tcg_debug_assert(offset == sextract64(offset, 0, 21)); - tcg_out_insn(s, 3406, ADR, rd, offset); -} - static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { MemOpIdx oi = lb->oi; @@ -1714,15 +1715,58 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); } +#else +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, + unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->addrlo_reg = addr_reg; + + /* tst addr, #mask */ + tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); + + label->label_ptr[0] = s->code_ptr; + + /* b.ne slow_path */ + tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); + + label->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); + + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_adr(s, TCG_REG_LR, l->raddr); + tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st)); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} #endif /* CONFIG_SOFTMMU */ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, TCGReg data_r, TCGReg addr_r, TCGType otype, TCGReg off_r) { - /* Byte swapping is left to middle-end expansion. */ - tcg_debug_assert((memop & MO_BSWAP) == 0); - switch (memop & MO_SSIZE) { case MO_UB: tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); @@ -1756,9 +1800,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, TCGReg data_r, TCGReg addr_r, TCGType otype, TCGReg off_r) { - /* Byte swapping is left to middle-end expansion. */ - tcg_debug_assert((memop & MO_BSWAP) == 0); - switch (memop & MO_SIZE) { case MO_8: tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); @@ -1782,6 +1823,10 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, { MemOp memop = get_memop(oi); const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; + + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((memop & MO_BSWAP) == 0); + #ifdef CONFIG_SOFTMMU unsigned mem_index = get_mmuidx(oi); tcg_insn_unit *label_ptr; @@ -1792,6 +1837,10 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ + unsigned a_bits = get_alignment_bits(memop); + if (a_bits) { + tcg_out_test_alignment(s, true, addr_reg, a_bits); + } if (USE_GUEST_BASE) { tcg_out_qemu_ld_direct(s, memop, ext, data_reg, TCG_REG_GUEST_BASE, otype, addr_reg); @@ -1807,6 +1856,10 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, { MemOp memop = get_memop(oi); const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; + + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((memop & MO_BSWAP) == 0); + #ifdef CONFIG_SOFTMMU unsigned mem_index = get_mmuidx(oi); tcg_insn_unit *label_ptr; @@ -1817,6 +1870,10 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ + unsigned a_bits = get_alignment_bits(memop); + if (a_bits) { + tcg_out_test_alignment(s, false, addr_reg, a_bits); + } if (USE_GUEST_BASE) { tcg_out_qemu_st_direct(s, memop, data_reg, TCG_REG_GUEST_BASE, otype, addr_reg); diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 7a93ac8..876af58 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -151,9 +151,7 @@ typedef enum { void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #endif /* AARCH64_TCG_TARGET_H */ -- cgit v1.1 From 8605cbcdeedb5f24a20dd1341a7a08bd9f38be71 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 3 Aug 2021 22:08:55 +0000 Subject: tcg/ppc: Support raising sigbus for user-only Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c.inc | 98 ++++++++++++++++++++++++++++++++++++++++++++---- tcg/ppc/tcg-target.h | 2 - 2 files changed, 90 insertions(+), 10 deletions(-) (limited to 'tcg') diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 9e79a7e..dea24f2 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -24,6 +24,7 @@ #include "elf.h" #include "../tcg-pool.c.inc" +#include "../tcg-ldst.c.inc" /* * Standardize on the _CALL_FOO symbols used by GCC: @@ -1881,7 +1882,8 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, } } -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) +static void tcg_out_call_int(TCGContext *s, int lk, + const tcg_insn_unit *target) { #ifdef _CALL_AIX /* Look through the descriptor. If the branch is in range, and we @@ -1892,7 +1894,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) if (in_range_b(diff) && toc == (uint32_t)toc) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); - tcg_out_b(s, LK, tgt); + tcg_out_b(s, lk, tgt); } else { /* Fold the low bits of the constant into the addresses below. */ intptr_t arg = (intptr_t)target; @@ -1907,7 +1909,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); - tcg_out32(s, BCCTR | BO_ALWAYS | LK); + tcg_out32(s, BCCTR | BO_ALWAYS | lk); } #elif defined(_CALL_ELF) && _CALL_ELF == 2 intptr_t diff; @@ -1921,16 +1923,21 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) diff = tcg_pcrel_diff(s, target); if (in_range_b(diff)) { - tcg_out_b(s, LK, target); + tcg_out_b(s, lk, target); } else { tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS | LK); + tcg_out32(s, BCCTR | BO_ALWAYS | lk); } #else - tcg_out_b(s, LK, target); + tcg_out_b(s, lk, target); #endif } +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) +{ + tcg_out_call_int(s, LK, target); +} + static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = { [MO_UB] = LBZX, [MO_UW] = LHZX, @@ -1960,8 +1967,6 @@ static const uint32_t qemu_exts_opc[4] = { }; #if defined (CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ @@ -2227,6 +2232,71 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_b(s, 0, lb->raddr); return true; } +#else + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, + TCGReg addrhi, unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7, much lower than ANDI. */ + tcg_debug_assert(a_bits < 16); + tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask)); + + label->label_ptr[0] = s->code_ptr; + tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); + + label->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + TCGReg arg = TCG_REG_R4; +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + arg |= 1; +#endif + if (l->addrlo_reg != arg) { + tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg); + tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg); + } else if (l->addrhi_reg != arg + 1) { + tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg); + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg); + tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1); + tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0); + } + } else { + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg); + } + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0); + + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st)); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + #endif /* SOFTMMU */ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) @@ -2238,6 +2308,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) #ifdef CONFIG_SOFTMMU int mem_index; tcg_insn_unit *label_ptr; +#else + unsigned a_bits; #endif datalo = *args++; @@ -2258,6 +2330,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); + } rbase = guest_base ? TCG_GUEST_BASE_REG : 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); @@ -2313,6 +2389,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #ifdef CONFIG_SOFTMMU int mem_index; tcg_insn_unit *label_ptr; +#else + unsigned a_bits; #endif datalo = *args++; @@ -2333,6 +2411,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); + } rbase = guest_base ? TCG_GUEST_BASE_REG : 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 0943192..c775c97 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -182,9 +182,7 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #endif -- cgit v1.1 From a3fb7c99c05659c98f4de301a932c70991382e26 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 07:20:12 -1000 Subject: tcg/riscv: Support raising sigbus for user-only Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 63 ++++++++++++++++++++++++++++++++++++++++++++-- tcg/riscv/tcg-target.h | 2 -- 2 files changed, 61 insertions(+), 4 deletions(-) (limited to 'tcg') diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index e9488f7..6409d9c 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -27,6 +27,7 @@ * THE SOFTWARE. */ +#include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" #ifdef CONFIG_DEBUG_TCG @@ -847,8 +848,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) */ #if defined(CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * MemOpIdx oi, uintptr_t ra) */ @@ -1053,6 +1052,54 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_goto(s, l->raddr); return true; } +#else + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, + unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *l = new_ldst_label(s); + + l->is_ld = is_ld; + l->addrlo_reg = addr_reg; + + /* We are expecting a_bits to max out at 7, so we can always use andi. */ + tcg_debug_assert(a_bits < 12); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask); + + l->label_ptr[0] = s->code_ptr; + tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0); + + l->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + /* resolve label address */ + if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); + + /* tail call, with the return address back inline. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr); + tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st), true); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + #endif /* CONFIG_SOFTMMU */ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, @@ -1108,6 +1155,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[1]; +#else + unsigned a_bits; #endif TCGReg base = TCG_REG_TMP0; @@ -1130,6 +1179,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) tcg_out_ext32u(s, base, addr_regl); addr_regl = base; } + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addr_regl, a_bits); + } if (guest_base != 0) { tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl); } @@ -1174,6 +1227,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[1]; +#else + unsigned a_bits; #endif TCGReg base = TCG_REG_TMP0; @@ -1196,6 +1251,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) tcg_out_ext32u(s, base, addr_regl); addr_regl = base; } + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addr_regl, a_bits); + } if (guest_base != 0) { tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl); } diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index ef78b99..11c9b3e 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -165,9 +165,7 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #define TCG_TARGET_HAS_MEMORY_BSWAP 0 -- cgit v1.1 From 1cd49868d47ceceaa0302582bf024936836392a6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 4 Aug 2021 00:08:57 +0000 Subject: tcg/s390x: Support raising sigbus for user-only Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target.c.inc | 59 ++++++++++++++++++++++++++++++++++++++++++++-- tcg/s390x/tcg-target.h | 2 -- 2 files changed, 57 insertions(+), 4 deletions(-) (limited to 'tcg') diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index b12fbfd..d56c1e5 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -29,6 +29,7 @@ #error "unsupported code generation mode" #endif +#include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" #include "elf.h" @@ -136,6 +137,7 @@ typedef enum S390Opcode { RI_OIHL = 0xa509, RI_OILH = 0xa50a, RI_OILL = 0xa50b, + RI_TMLL = 0xa701, RIE_CGIJ = 0xec7c, RIE_CGRJ = 0xec64, @@ -1804,8 +1806,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data, } #if defined(CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - /* We're expecting to use a 20-bit negative offset on the tlb memory ops. */ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19)); @@ -1942,6 +1942,53 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) return true; } #else +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, + TCGReg addrlo, unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *l = new_ldst_label(s); + + l->is_ld = is_ld; + l->addrlo_reg = addrlo; + + /* We are expecting a_bits to max out at 7, much lower than TMLL. */ + tcg_debug_assert(a_bits < 16); + tcg_out_insn(s, RI, TMLL, addrlo, a_mask); + + tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */ + l->label_ptr[0] = s->code_ptr; + s->code_ptr += 1; + + l->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL, + (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) { + return false; + } + + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr); + tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st)); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, TCGReg *index_reg, tcg_target_long *disp) { @@ -1980,7 +2027,11 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, #else TCGReg index_reg; tcg_target_long disp; + unsigned a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addr_reg, a_bits); + } tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp); #endif @@ -2007,7 +2058,11 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, #else TCGReg index_reg; tcg_target_long disp; + unsigned a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addr_reg, a_bits); + } tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp); #endif diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 527ada0..69217d9 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -178,9 +178,7 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, /* no need to flush icache explicitly */ } -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #endif -- cgit v1.1 From fe1bee3a0a97585e3b59dc1db154c28bc8005882 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 3 Aug 2021 14:56:51 -1000 Subject: tcg/tci: Support raising sigbus for user-only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/tci.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'tcg') diff --git a/tcg/tci.c b/tcg/tci.c index 336af59..fe92b5d 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -292,11 +292,11 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, MemOpIdx oi, const void *tb_ptr) { - MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); + MemOp mop = get_memop(oi); uintptr_t ra = (uintptr_t)tb_ptr; #ifdef CONFIG_SOFTMMU - switch (mop) { + switch (mop & (MO_BSWAP | MO_SSIZE)) { case MO_UB: return helper_ret_ldub_mmu(env, taddr, oi, ra); case MO_SB: @@ -326,10 +326,14 @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, } #else void *haddr = g2h(env_cpu(env), taddr); + unsigned a_mask = (1u << get_alignment_bits(mop)) - 1; uint64_t ret; set_helper_retaddr(ra); - switch (mop) { + if (taddr & a_mask) { + helper_unaligned_ld(env, taddr); + } + switch (mop & (MO_BSWAP | MO_SSIZE)) { case MO_UB: ret = ldub_p(haddr); break; @@ -377,11 +381,11 @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val, MemOpIdx oi, const void *tb_ptr) { - MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); + MemOp mop = get_memop(oi); uintptr_t ra = (uintptr_t)tb_ptr; #ifdef CONFIG_SOFTMMU - switch (mop) { + switch (mop & (MO_BSWAP | MO_SIZE)) { case MO_UB: helper_ret_stb_mmu(env, taddr, val, oi, ra); break; @@ -408,9 +412,13 @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val, } #else void *haddr = g2h(env_cpu(env), taddr); + unsigned a_mask = (1u << get_alignment_bits(mop)) - 1; set_helper_retaddr(ra); - switch (mop) { + if (taddr & a_mask) { + helper_unaligned_st(env, taddr); + } + switch (mop & (MO_BSWAP | MO_SIZE)) { case MO_UB: stb_p(haddr, val); break; -- cgit v1.1 From 6f78c7b0828660f6733528dab7bedf027d4958f6 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 6 Jan 2022 21:42:38 +0800 Subject: tcg/loongarch64: Support raising sigbus for user-only Signed-off-by: WANG Xuerui Reviewed-by: Richard Henderson Message-Id: <20220106134238.3936163-1-git@xen0n.name> Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 71 ++++++++++++++++++++++++++++++++++++++-- tcg/loongarch64/tcg-target.h | 2 -- 2 files changed, 69 insertions(+), 4 deletions(-) (limited to 'tcg') diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index d31a0e5..a3debf6 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -29,6 +29,8 @@ * THE SOFTWARE. */ +#include "../tcg-ldst.c.inc" + #ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "zero", @@ -642,8 +644,6 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, */ #if defined(CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - /* * helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * MemOpIdx oi, uintptr_t ra) @@ -825,6 +825,61 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) return tcg_out_goto(s, l->raddr); } +#else + +/* + * Alignment helpers for user-mode emulation + */ + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, + unsigned a_bits) +{ + TCGLabelQemuLdst *l = new_ldst_label(s); + + l->is_ld = is_ld; + l->addrlo_reg = addr_reg; + + /* + * Without micro-architecture details, we don't know which of bstrpick or + * andi is faster, so use bstrpick as it's not constrained by imm field + * width. (Not to say alignments >= 2^12 are going to happen any time + * soon, though) + */ + tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); + + l->label_ptr[0] = s->code_ptr; + tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); + + l->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + /* resolve label address */ + if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); + + /* tail call, with the return address back inline. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr); + tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st), true); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + #endif /* CONFIG_SOFTMMU */ /* @@ -887,6 +942,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[1]; +#else + unsigned a_bits; #endif TCGReg base; @@ -903,6 +960,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type) data_regl, addr_regl, s->code_ptr, label_ptr); #else + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addr_regl, a_bits); + } base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; tcg_out_qemu_ld_indexed(s, data_regl, base, guest_base_reg, opc, type); @@ -941,6 +1002,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[1]; +#else + unsigned a_bits; #endif TCGReg base; @@ -958,6 +1021,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) data_regl, addr_regl, s->code_ptr, label_ptr); #else + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addr_regl, a_bits); + } base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; tcg_out_qemu_st_indexed(s, data_regl, base, guest_base_reg, opc); diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 0501080..d58a616 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -171,9 +171,7 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_HAS_MEMORY_BSWAP 0 -- cgit v1.1 From 01dfc0ed7f2c5f8dbab65f31228a2888c7b85a07 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 2 Jan 2022 17:42:07 -0800 Subject: tcg/arm: Drop support for armv4 and armv5 hosts Support for unaligned accesses is difficult for pre-v6 hosts. While debian still builds for armv4, we cannot use a compile time test, so test the architecture at runtime and error out. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tcg') diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 5345c4e..29d63e9 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2474,6 +2474,11 @@ static void tcg_target_init(TCGContext *s) if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') { arm_arch = pl[1] - '0'; } + + if (arm_arch < 6) { + error_report("TCG: ARMv%d is unsupported; exiting", arm_arch); + exit(EXIT_FAILURE); + } } tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; -- cgit v1.1 From 6cef13940cc57124bdef9dffbf03f25784f7a51c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 2 Jan 2022 18:30:13 -0800 Subject: tcg/arm: Remove use_armv5t_instructions This is now always true, since we require armv6. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 35 ++++++----------------------------- tcg/arm/tcg-target.h | 3 +-- 2 files changed, 7 insertions(+), 31 deletions(-) (limited to 'tcg') diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 29d63e9..f3b6350 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -596,11 +596,7 @@ static void tcg_out_b_reg(TCGContext *s, ARMCond cond, TCGReg rn) * Unless the C portion of QEMU is compiled as thumb, we don't need * true BX semantics; merely a branch to an address held in a register. */ - if (use_armv5t_instructions) { - tcg_out_bx_reg(s, cond, rn); - } else { - tcg_out_mov_reg(s, cond, TCG_REG_PC, rn); - } + tcg_out_bx_reg(s, cond, rn); } static void tcg_out_dat_imm(TCGContext *s, ARMCond cond, ARMInsn opc, @@ -1247,14 +1243,7 @@ static void tcg_out_goto(TCGContext *s, ARMCond cond, const tcg_insn_unit *addr) } /* LDR is interworking from v5t. */ - if (arm_mode || use_armv5t_instructions) { - tcg_out_movi_pool(s, cond, TCG_REG_PC, addri); - return; - } - - /* else v4t */ - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); - tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP); + tcg_out_movi_pool(s, cond, TCG_REG_PC, addri); } /* @@ -1270,26 +1259,14 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr) if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) { if (arm_mode) { tcg_out_bl_imm(s, COND_AL, disp); - return; - } - if (use_armv5t_instructions) { + } else { tcg_out_blx_imm(s, disp); - return; } + return; } - if (use_armv5t_instructions) { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); - tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP); - } else if (arm_mode) { - /* ??? Know that movi_pool emits exactly 1 insn. */ - tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC); - tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri); - } else { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); - tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC); - tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP); - } + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); + tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP); } static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l) diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index f41b809..5c9ba5f 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -28,7 +28,6 @@ extern int arm_arch; -#define use_armv5t_instructions (__ARM_ARCH >= 5 || arm_arch >= 5) #define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6) #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) @@ -109,7 +108,7 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_clz_i32 use_armv5t_instructions +#define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions -- cgit v1.1 From bde2cdb59bcd3b0965a80597e72c835fcf5ef7f4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 2 Jan 2022 20:54:57 -0800 Subject: tcg/arm: Remove use_armv6_instructions This is now always true, since we require armv6. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 192 +++++++---------------------------------------- tcg/arm/tcg-target.h | 1 - 2 files changed, 27 insertions(+), 166 deletions(-) (limited to 'tcg') diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index f3b6350..9eb4340 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -923,17 +923,6 @@ static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc, static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn, TCGReg rm) { - /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */ - if (!use_armv6_instructions && rd == rn) { - if (rd == rm) { - /* rd == rn == rm; copy an input to tmp first. */ - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rm = rn = TCG_REG_TMP; - } else { - rn = rm; - rm = rd; - } - } /* mul */ tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); } @@ -941,17 +930,6 @@ static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd, static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0, TCGReg rd1, TCGReg rn, TCGReg rm) { - /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ - if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { - if (rd0 == rm || rd1 == rm) { - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rn = TCG_REG_TMP; - } else { - TCGReg t = rn; - rn = rm; - rm = t; - } - } /* umull */ tcg_out32(s, (cond << 28) | 0x00800090 | (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); @@ -960,17 +938,6 @@ static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0, static void tcg_out_smull32(TCGContext *s, ARMCond cond, TCGReg rd0, TCGReg rd1, TCGReg rn, TCGReg rm) { - /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ - if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { - if (rd0 == rm || rd1 == rm) { - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rn = TCG_REG_TMP; - } else { - TCGReg t = rn; - rn = rm; - rm = t; - } - } /* smull */ tcg_out32(s, (cond << 28) | 0x00c00090 | (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); @@ -990,15 +957,8 @@ static void tcg_out_udiv(TCGContext *s, ARMCond cond, static void tcg_out_ext8s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* sxtb */ - tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_ASR(24)); - } + /* sxtb */ + tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn); } static void __attribute__((unused)) @@ -1009,113 +969,37 @@ tcg_out_ext8u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) static void tcg_out_ext16s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* sxth */ - tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_ASR(16)); - } + /* sxth */ + tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn); } static void tcg_out_ext16u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_LSR(16)); - } + /* uxth */ + tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); } static void tcg_out_bswap16(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn, int flags) { - if (use_armv6_instructions) { - if (flags & TCG_BSWAP_OS) { - /* revsh */ - tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); - return; - } - - /* rev16 */ - tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd); - } - return; - } - - if (flags == 0) { - /* - * For stores, no input or output extension: - * rn = xxAB - * lsr tmp, rn, #8 tmp = 0xxA - * and tmp, tmp, #0xff tmp = 000A - * orr rd, tmp, rn, lsl #8 rd = xABA - */ - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); - tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); + if (flags & TCG_BSWAP_OS) { + /* revsh */ + tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); return; } - /* - * Byte swap, leaving the result at the top of the register. - * We will then shift down, zero or sign-extending. - */ - if (flags & TCG_BSWAP_IZ) { - /* - * rn = 00AB - * ror tmp, rn, #8 tmp = B00A - * orr tmp, tmp, tmp, lsl #16 tmp = BA00 - */ - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_ROR(8)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - TCG_REG_TMP, TCG_REG_TMP, TCG_REG_TMP, - SHIFT_IMM_LSL(16)); - } else { - /* - * rn = xxAB - * and tmp, rn, #0xff00 tmp = 00A0 - * lsl tmp, tmp, #8 tmp = 0A00 - * orr tmp, tmp, rn, lsl #24 tmp = BA00 - */ - tcg_out_dat_rI(s, cond, ARITH_AND, TCG_REG_TMP, rn, 0xff00, 1); - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSL(8)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - TCG_REG_TMP, TCG_REG_TMP, rn, SHIFT_IMM_LSL(24)); + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); + if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* uxth */ + tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd); } - tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, TCG_REG_TMP, - (flags & TCG_BSWAP_OS - ? SHIFT_IMM_ASR(8) : SHIFT_IMM_LSR(8))); } static void tcg_out_bswap32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* rev */ - tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_EOR, - TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16)); - tcg_out_dat_imm(s, cond, ARITH_BIC, - TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_ROR(8)); - tcg_out_dat_reg(s, cond, ARITH_EOR, - rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8)); - } + /* rev */ + tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); } static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd, @@ -1283,7 +1167,7 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) { if (use_armv7_instructions) { tcg_out32(s, INSN_DMB_ISH); - } else if (use_armv6_instructions) { + } else { tcg_out32(s, INSN_DMB_MCR); } } @@ -1489,8 +1373,7 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, if (argreg & 1) { argreg++; } - if (use_armv6_instructions && argreg >= 4 - && (arglo & 1) == 0 && arghi == arglo + 1) { + if (argreg >= 4 && (arglo & 1) == 0 && arghi == arglo + 1) { tcg_out_strd_8(s, COND_AL, arglo, TCG_REG_CALL_STACK, (argreg - 4) * 4); return argreg + 2; @@ -1520,8 +1403,6 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); int fast_off = TLB_MASK_TABLE_OFS(mem_index); - int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); - int table_off = fast_off + offsetof(CPUTLBDescFast, table); unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); @@ -1534,12 +1415,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ - if (use_armv6_instructions) { - tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); - } else { - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R0, TCG_AREG0, mask_off); - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R1, TCG_AREG0, table_off); - } + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); /* Extract the tlb index from the address into R0. */ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo, @@ -1550,7 +1426,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, * Load the tlb comparator into R2/R3 and the fast path addend into R1. */ if (cmp_off == 0) { - if (use_armv6_instructions && TARGET_LONG_BITS == 64) { + if (TARGET_LONG_BITS == 64) { tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); } else { tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); @@ -1558,15 +1434,12 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } else { tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0); - if (use_armv6_instructions && TARGET_LONG_BITS == 64) { + if (TARGET_LONG_BITS == 64) { tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); } else { tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); } } - if (!use_armv6_instructions && TARGET_LONG_BITS == 64) { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R3, TCG_REG_R1, cmp_off + 4); - } /* Load the tlb addend. */ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1, @@ -1631,7 +1504,6 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGReg argreg, datalo, datahi; MemOpIdx oi = lb->oi; MemOp opc = get_memop(oi); - void *func; if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; @@ -1646,18 +1518,8 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) argreg = tcg_out_arg_imm32(s, argreg, oi); argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - /* For armv6 we can use the canonical unsigned helpers and minimize - icache usage. For pre-armv6, use the signed helpers since we do - not have a single insn sign-extend. */ - if (use_armv6_instructions) { - func = qemu_ld_helpers[opc & MO_SIZE]; - } else { - func = qemu_ld_helpers[opc & MO_SSIZE]; - if (opc & MO_SIGN) { - opc = MO_UL; - } - } - tcg_out_call(s, func); + /* Use the canonical unsigned helpers and minimize icache usage. */ + tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); datalo = lb->datalo_reg; datahi = lb->datahi_reg; @@ -1760,7 +1622,7 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, break; case MO_UQ: /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + if (USING_SOFTMMU && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); } else if (datalo != addend) { @@ -1803,7 +1665,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, break; case MO_UQ: /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + if (USING_SOFTMMU && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0); } else if (datalo == addrlo) { @@ -1880,7 +1742,7 @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc, break; case MO_64: /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + if (USING_SOFTMMU && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_r(s, cond, datalo, addrlo, addend); } else { @@ -1912,7 +1774,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, break; case MO_64: /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + if (USING_SOFTMMU && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); } else { diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 5c9ba5f..1dd4cd5 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -28,7 +28,6 @@ extern int arm_arch; -#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6) #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) #undef TCG_TARGET_STACK_GROWSUP -- cgit v1.1 From 367d43d85b8a0f6262125ccbad8720d02416265e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 2 Jan 2022 21:26:17 -0800 Subject: tcg/arm: Check alignment for ldrd and strd We will shortly allow the use of unaligned memory accesses, and these require proper alignment. Use get_alignment_bits to verify and remove USING_SOFTMMU. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'tcg') diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 9eb4340..4b0b4f4 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -34,13 +34,6 @@ bool use_idiv_instructions; bool use_neon_instructions; #endif -/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */ -#ifdef CONFIG_SOFTMMU -# define USING_SOFTMMU 1 -#else -# define USING_SOFTMMU 0 -#endif - #ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", @@ -1621,8 +1614,8 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); break; case MO_UQ: - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU + /* LDRD requires alignment; double-check that. */ + if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); } else if (datalo != addend) { @@ -1664,8 +1657,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); break; case MO_UQ: - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU + /* LDRD requires alignment; double-check that. */ + if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0); } else if (datalo == addrlo) { @@ -1741,8 +1734,8 @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc, tcg_out_st32_r(s, cond, datalo, addrlo, addend); break; case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU + /* STRD requires alignment; double-check that. */ + if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_r(s, cond, datalo, addrlo, addend); } else { @@ -1773,8 +1766,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); break; case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU + /* STRD requires alignment; double-check that. */ + if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); } else { -- cgit v1.1 From 8821ec2323dd8793d840fd455c5e20e144bddc9b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 7 Aug 2021 18:19:14 -1000 Subject: tcg/arm: Support unaligned access for softmmu From armv6, the architecture supports unaligned accesses. All we need to do is perform the correct alignment check in tcg_out_tlb_read. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'tcg') diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 4b0b4f4..d290b45 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1396,16 +1396,9 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); int fast_off = TLB_MASK_TABLE_OFS(mem_index); - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); - - /* - * We don't support inline unaligned acceses, but we can easily - * support overalignment checks. - */ - if (a_bits < s_bits) { - a_bits = s_bits; - } + unsigned s_mask = (1 << (opc & MO_SIZE)) - 1; + unsigned a_mask = (1 << get_alignment_bits(opc)) - 1; + TCGReg t_addr; /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); @@ -1440,27 +1433,35 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, /* * Check alignment, check comparators. - * Do this in no more than 3 insns. Use MOVW for v7, if possible, + * Do this in 2-4 insns. Use MOVW for v7, if possible, * to reduce the number of sequential conditional instructions. * Almost all guests have at least 4k pages, which means that we need * to clear at least 9 bits even for an 8-byte memory, which means it * isn't worth checking for an immediate operand for BIC. + * + * For unaligned accesses, test the page of the last unit of alignment. + * This leaves the least significant alignment bits unchanged, and of + * course must be zero. */ + t_addr = addrlo; + if (a_mask < s_mask) { + t_addr = TCG_REG_R0; + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr, + addrlo, s_mask - a_mask); + } if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) { - tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1)); - - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask); + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask)); tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, - addrlo, TCG_REG_TMP, 0); + t_addr, TCG_REG_TMP, 0); tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0); } else { - if (a_bits) { - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, - (1 << a_bits) - 1); + if (a_mask) { + tcg_debug_assert(a_mask <= 0xff); + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); } - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo, + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP, + tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); } -- cgit v1.1 From 4bb802073f212b5a7e7cffcd4456484932911b91 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 9 Aug 2021 17:44:50 -1000 Subject: tcg/arm: Reserve a register for guest_base Reserve a register for the guest_base using aarch64 for reference. By doing so, we do not have to recompute it for every memory load. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'tcg') diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index d290b45..7eebbfa 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -84,6 +84,9 @@ static const int tcg_target_call_oarg_regs[2] = { #define TCG_REG_TMP TCG_REG_R12 #define TCG_VEC_TMP TCG_REG_Q15 +#ifndef CONFIG_SOFTMMU +#define TCG_REG_GUEST_BASE TCG_REG_R11 +#endif typedef enum { COND_EQ = 0x0, @@ -1593,7 +1596,8 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addend) + TCGReg addrlo, TCGReg addend, + bool scratch_addend) { /* Byte swapping is left to middle-end expansion. */ tcg_debug_assert((opc & MO_BSWAP) == 0); @@ -1619,7 +1623,7 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); - } else if (datalo != addend) { + } else if (scratch_addend) { tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo); tcg_out_ld32_12(s, COND_AL, datahi, addend, 4); } else { @@ -1703,14 +1707,14 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) label_ptr = s->code_ptr; tcg_out_bl_imm(s, COND_NE, 0); - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend, true); add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); + tcg_out_qemu_ld_index(s, opc, datalo, datahi, + addrlo, TCG_REG_GUEST_BASE, false); } else { tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); } @@ -1719,7 +1723,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc, TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addend) + TCGReg addrlo, TCGReg addend, + bool scratch_addend) { /* Byte swapping is left to middle-end expansion. */ tcg_debug_assert((opc & MO_BSWAP) == 0); @@ -1739,9 +1744,14 @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc, if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_r(s, cond, datalo, addrlo, addend); - } else { + } else if (scratch_addend) { tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); tcg_out_st32_12(s, cond, datahi, addend, 4); + } else { + tcg_out_dat_reg(s, cond, ARITH_ADD, TCG_REG_TMP, + addend, addrlo, SHIFT_IMM_LSL(0)); + tcg_out_st32_12(s, cond, datalo, TCG_REG_TMP, 0); + tcg_out_st32_12(s, cond, datahi, TCG_REG_TMP, 4); } break; default: @@ -1804,7 +1814,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) mem_index = get_mmuidx(oi); addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0); - tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); + tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, + addrlo, addend, true); /* The conditional call must come last, as we're going to return here. */ label_ptr = s->code_ptr; @@ -1814,9 +1825,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); - tcg_out_qemu_st_index(s, COND_AL, opc, datalo, - datahi, addrlo, TCG_REG_TMP); + tcg_out_qemu_st_index(s, COND_AL, opc, datalo, datahi, + addrlo, TCG_REG_GUEST_BASE, false); } else { tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo); } @@ -2958,6 +2968,13 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); +#ifndef CONFIG_SOFTMMU + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); + } +#endif + tcg_out_b_reg(s, COND_AL, tcg_target_call_iarg_regs[1]); /* -- cgit v1.1 From 0c90fa5dce29243c06841d7b07ff2bd97c27c1f4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 9 Aug 2021 19:18:27 -1000 Subject: tcg/arm: Support raising sigbus for user-only Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 83 ++++++++++++++++++++++++++++++++++++++++++++++-- tcg/arm/tcg-target.h | 2 -- 2 files changed, 81 insertions(+), 4 deletions(-) (limited to 'tcg') diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 7eebbfa..e1ea696 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -23,6 +23,7 @@ */ #include "elf.h" +#include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" int arm_arch = __ARM_ARCH; @@ -1289,8 +1290,6 @@ static void tcg_out_vldst(TCGContext *s, ARMInsn insn, } #ifdef CONFIG_SOFTMMU -#include "../tcg-ldst.c.inc" - /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ @@ -1592,6 +1591,74 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]); return true; } +#else + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, + TCGReg addrhi, unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7, and can easily support 8. */ + tcg_debug_assert(a_mask <= 0xff); + /* tst addr, #mask */ + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); + + /* blne slow_path */ + label->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); + + label->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + if (TARGET_LONG_BITS == 64) { + /* 64-bit target address is aligned into R2:R3. */ + if (l->addrhi_reg != TCG_REG_R2) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg); + } else if (l->addrlo_reg != TCG_REG_R3) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg); + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1); + } + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg); + } + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0); + + /* + * Tail call to the helper, with the return address back inline, + * just for the clarity of the debugging traceback -- the helper + * cannot return. We have used BLNE to arrive here, so LR is + * already set. + */ + tcg_out_goto(s, COND_AL, (const void *) + (l->is_ld ? helper_unaligned_ld : helper_unaligned_st)); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} #endif /* SOFTMMU */ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, @@ -1689,6 +1756,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) int mem_index; TCGReg addend; tcg_insn_unit *label_ptr; +#else + unsigned a_bits; #endif datalo = *args++; @@ -1712,6 +1781,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); + } if (guest_base) { tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_GUEST_BASE, false); @@ -1801,6 +1874,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) int mem_index; TCGReg addend; tcg_insn_unit *label_ptr; +#else + unsigned a_bits; #endif datalo = *args++; @@ -1824,6 +1899,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); + } if (guest_base) { tcg_out_qemu_st_index(s, COND_AL, opc, datalo, datahi, addrlo, TCG_REG_GUEST_BASE, false); diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 1dd4cd5..27c27a1 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -151,9 +151,7 @@ extern bool use_neon_instructions; /* not defined -- call should be eliminated at compile time */ void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #endif -- cgit v1.1 From 23a79c113ed2ae693d882d109862f4a759fbf10e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 05:49:16 -1000 Subject: tcg/mips: Support unaligned access for user-only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is kinda sorta the opposite of the other tcg hosts, where we get (normal) alignment checks for free with host SIGBUS and need to add code to support unaligned accesses. Fortunately, the ISA contains pairs of instructions that are used to implement unaligned memory accesses. Use them. Tested-by: Jiaxun Yang Reviewed-by: Jiaxun Yang Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 334 +++++++++++++++++++++++++++++++++++++++++++++- tcg/mips/tcg-target.h | 2 - 2 files changed, 328 insertions(+), 8 deletions(-) (limited to 'tcg') diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 27b020e..2c94ac2 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -24,6 +24,8 @@ * THE SOFTWARE. */ +#include "../tcg-ldst.c.inc" + #ifdef HOST_WORDS_BIGENDIAN # define MIPS_BE 1 #else @@ -230,16 +232,26 @@ typedef enum { OPC_ORI = 015 << 26, OPC_XORI = 016 << 26, OPC_LUI = 017 << 26, + OPC_BNEL = 025 << 26, + OPC_BNEZALC_R6 = 030 << 26, OPC_DADDIU = 031 << 26, + OPC_LDL = 032 << 26, + OPC_LDR = 033 << 26, OPC_LB = 040 << 26, OPC_LH = 041 << 26, + OPC_LWL = 042 << 26, OPC_LW = 043 << 26, OPC_LBU = 044 << 26, OPC_LHU = 045 << 26, + OPC_LWR = 046 << 26, OPC_LWU = 047 << 26, OPC_SB = 050 << 26, OPC_SH = 051 << 26, + OPC_SWL = 052 << 26, OPC_SW = 053 << 26, + OPC_SDL = 054 << 26, + OPC_SDR = 055 << 26, + OPC_SWR = 056 << 26, OPC_LD = 067 << 26, OPC_SD = 077 << 26, @@ -1015,8 +1027,6 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) } #if defined(CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = { [MO_UB] = helper_ret_ldub_mmu, [MO_SB] = helper_ret_ldsb_mmu, @@ -1324,7 +1334,82 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); return true; } -#endif + +#else + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, + TCGReg addrhi, unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *l = new_ldst_label(s); + + l->is_ld = is_ld; + l->addrlo_reg = addrlo; + l->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7, much lower than ANDI. */ + tcg_debug_assert(a_bits < 16); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask); + + l->label_ptr[0] = s->code_ptr; + if (use_mips32r6_instructions) { + tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0); + } else { + tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO); + tcg_out_nop(s); + } + + l->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + void *target; + + if (!reloc_pc16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + /* A0 is env, A1 is skipped, A2:A3 is the uint64_t address. */ + TCGReg a2 = MIPS_BE ? l->addrhi_reg : l->addrlo_reg; + TCGReg a3 = MIPS_BE ? l->addrlo_reg : l->addrhi_reg; + + if (a3 != TCG_REG_A2) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3); + } else if (a2 != TCG_REG_A3) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2); + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A2); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, TCG_REG_A3); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, TCG_TMP0); + } + } else { + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg); + } + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); + + /* + * Tail call to the helper, with the return address back inline. + * We have arrived here via BNEL, so $31 is already set. + */ + target = (l->is_ld ? helper_unaligned_ld : helper_unaligned_st); + tcg_out_call_int(s, target, true); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} +#endif /* SOFTMMU */ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc, bool is_64) @@ -1430,6 +1515,127 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, } } +static void __attribute__((unused)) +tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, + TCGReg base, MemOp opc, bool is_64) +{ + const MIPSInsn lw1 = MIPS_BE ? OPC_LWL : OPC_LWR; + const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL; + const MIPSInsn ld1 = MIPS_BE ? OPC_LDL : OPC_LDR; + const MIPSInsn ld2 = MIPS_BE ? OPC_LDR : OPC_LDL; + + bool sgn = (opc & MO_SIGN); + + switch (opc & (MO_SSIZE | MO_BSWAP)) { + case MO_SW | MO_BE: + case MO_UW | MO_BE: + tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0); + tcg_out_opc_imm(s, OPC_LBU, lo, base, 1); + if (use_mips32r2_instructions) { + tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8); + } else { + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); + tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1); + } + break; + + case MO_SW | MO_LE: + case MO_UW | MO_LE: + if (use_mips32r2_instructions && lo != base) { + tcg_out_opc_imm(s, OPC_LBU, lo, base, 0); + tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 1); + tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8); + } else { + tcg_out_opc_imm(s, OPC_LBU, TCG_TMP0, base, 0); + tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP1, base, 1); + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP1, TCG_TMP1, 8); + tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1); + } + break; + + case MO_SL: + case MO_UL: + tcg_out_opc_imm(s, lw1, lo, base, 0); + tcg_out_opc_imm(s, lw2, lo, base, 3); + if (TCG_TARGET_REG_BITS == 64 && is_64 && !sgn) { + tcg_out_ext32u(s, lo, lo); + } + break; + + case MO_UL | MO_BSWAP: + case MO_SL | MO_BSWAP: + if (use_mips32r2_instructions) { + tcg_out_opc_imm(s, lw1, lo, base, 0); + tcg_out_opc_imm(s, lw2, lo, base, 3); + tcg_out_bswap32(s, lo, lo, + TCG_TARGET_REG_BITS == 64 && is_64 + ? (sgn ? TCG_BSWAP_OS : TCG_BSWAP_OZ) : 0); + } else { + const tcg_insn_unit *subr = + (TCG_TARGET_REG_BITS == 64 && is_64 && !sgn + ? bswap32u_addr : bswap32_addr); + + tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0); + tcg_out_bswap_subr(s, subr); + /* delay slot */ + tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 3); + tcg_out_mov(s, is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, lo, TCG_TMP3); + } + break; + + case MO_UQ: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_opc_imm(s, ld1, lo, base, 0); + tcg_out_opc_imm(s, ld2, lo, base, 7); + } else { + tcg_out_opc_imm(s, lw1, MIPS_BE ? hi : lo, base, 0 + 0); + tcg_out_opc_imm(s, lw2, MIPS_BE ? hi : lo, base, 0 + 3); + tcg_out_opc_imm(s, lw1, MIPS_BE ? lo : hi, base, 4 + 0); + tcg_out_opc_imm(s, lw2, MIPS_BE ? lo : hi, base, 4 + 3); + } + break; + + case MO_UQ | MO_BSWAP: + if (TCG_TARGET_REG_BITS == 64) { + if (use_mips32r2_instructions) { + tcg_out_opc_imm(s, ld1, lo, base, 0); + tcg_out_opc_imm(s, ld2, lo, base, 7); + tcg_out_bswap64(s, lo, lo); + } else { + tcg_out_opc_imm(s, ld1, TCG_TMP0, base, 0); + tcg_out_bswap_subr(s, bswap64_addr); + /* delay slot */ + tcg_out_opc_imm(s, ld2, TCG_TMP0, base, 7); + tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3); + } + } else if (use_mips32r2_instructions) { + tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0); + tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3); + tcg_out_opc_imm(s, lw1, TCG_TMP1, base, 4 + 0); + tcg_out_opc_imm(s, lw2, TCG_TMP1, base, 4 + 3); + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0); + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1); + tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16); + tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16); + } else { + tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0); + tcg_out_bswap_subr(s, bswap32_addr); + /* delay slot */ + tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3); + tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 4 + 0); + tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3); + tcg_out_bswap_subr(s, bswap32_addr); + /* delay slot */ + tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 4 + 3); + tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3); + } + break; + + default: + g_assert_not_reached(); + } +} + static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) { TCGReg addr_regl, addr_regh __attribute__((unused)); @@ -1438,6 +1644,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; +#else + unsigned a_bits, s_bits; #endif TCGReg base = TCG_REG_A0; @@ -1467,7 +1675,27 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) } else { tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl); } - tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + a_bits = get_alignment_bits(opc); + s_bits = opc & MO_SIZE; + /* + * R6 removes the left/right instructions but requires the + * system to support misaligned memory accesses. + */ + if (use_mips32r6_instructions) { + if (a_bits) { + tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); + } + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + } else { + if (a_bits && a_bits != s_bits) { + tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); + } + if (a_bits >= s_bits) { + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + } else { + tcg_out_qemu_ld_unalign(s, data_regl, data_regh, base, opc, is_64); + } + } #endif } @@ -1532,6 +1760,79 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, } } +static void __attribute__((unused)) +tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, + TCGReg base, MemOp opc) +{ + const MIPSInsn sw1 = MIPS_BE ? OPC_SWL : OPC_SWR; + const MIPSInsn sw2 = MIPS_BE ? OPC_SWR : OPC_SWL; + const MIPSInsn sd1 = MIPS_BE ? OPC_SDL : OPC_SDR; + const MIPSInsn sd2 = MIPS_BE ? OPC_SDR : OPC_SDL; + + /* Don't clutter the code below with checks to avoid bswapping ZERO. */ + if ((lo | hi) == 0) { + opc &= ~MO_BSWAP; + } + + switch (opc & (MO_SIZE | MO_BSWAP)) { + case MO_16 | MO_BE: + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8); + tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 0); + tcg_out_opc_imm(s, OPC_SB, lo, base, 1); + break; + + case MO_16 | MO_LE: + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8); + tcg_out_opc_imm(s, OPC_SB, lo, base, 0); + tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 1); + break; + + case MO_32 | MO_BSWAP: + tcg_out_bswap32(s, TCG_TMP3, lo, 0); + lo = TCG_TMP3; + /* fall through */ + case MO_32: + tcg_out_opc_imm(s, sw1, lo, base, 0); + tcg_out_opc_imm(s, sw2, lo, base, 3); + break; + + case MO_64 | MO_BSWAP: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_bswap64(s, TCG_TMP3, lo); + lo = TCG_TMP3; + } else if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? hi : lo); + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? lo : hi); + tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16); + tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16); + hi = MIPS_BE ? TCG_TMP0 : TCG_TMP1; + lo = MIPS_BE ? TCG_TMP1 : TCG_TMP0; + } else { + tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0); + tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 0 + 0); + tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 0 + 3); + tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0); + tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 4 + 0); + tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 4 + 3); + break; + } + /* fall through */ + case MO_64: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_opc_imm(s, sd1, lo, base, 0); + tcg_out_opc_imm(s, sd2, lo, base, 7); + } else { + tcg_out_opc_imm(s, sw1, MIPS_BE ? hi : lo, base, 0 + 0); + tcg_out_opc_imm(s, sw2, MIPS_BE ? hi : lo, base, 0 + 3); + tcg_out_opc_imm(s, sw1, MIPS_BE ? lo : hi, base, 4 + 0); + tcg_out_opc_imm(s, sw2, MIPS_BE ? lo : hi, base, 4 + 3); + } + break; + + default: + tcg_abort(); + } +} static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) { TCGReg addr_regl, addr_regh __attribute__((unused)); @@ -1540,6 +1841,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; +#else + unsigned a_bits, s_bits; #endif TCGReg base = TCG_REG_A0; @@ -1558,7 +1861,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) data_regl, data_regh, addr_regl, addr_regh, s->code_ptr, label_ptr); #else - base = TCG_REG_A0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, base, addr_regl); addr_regl = base; @@ -1570,7 +1872,27 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) } else { tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl); } - tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + a_bits = get_alignment_bits(opc); + s_bits = opc & MO_SIZE; + /* + * R6 removes the left/right instructions but requires the + * system to support misaligned memory accesses. + */ + if (use_mips32r6_instructions) { + if (a_bits) { + tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); + } + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + } else { + if (a_bits && a_bits != s_bits) { + tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); + } + if (a_bits >= s_bits) { + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + } else { + tcg_out_qemu_st_unalign(s, data_regl, data_regh, base, opc); + } + } #endif } diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index c366fdf..7669213 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -207,8 +207,6 @@ extern bool use_mips32r2_instructions; void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t) QEMU_ERROR("code path is reachable"); -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #endif -- cgit v1.1 From d9e52834656ffa766113f1fbae8efe37aaac1df2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 06:55:14 -1000 Subject: tcg/mips: Support unaligned access for softmmu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can use the routines just added for user-only to emit unaligned accesses in softmmu mode too. Tested-by: Jiaxun Yang Reviewed-by: Jiaxun Yang Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 91 ++++++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 40 deletions(-) (limited to 'tcg') diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 2c94ac2..993149d 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1134,8 +1134,10 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, tcg_insn_unit *label_ptr[2], bool is_load) { MemOp opc = get_memop(oi); - unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); + unsigned s_bits = opc & MO_SIZE; + unsigned a_mask = (1 << a_bits) - 1; + unsigned s_mask = (1 << s_bits) - 1; int mem_index = get_mmuidx(oi); int fast_off = TLB_MASK_TABLE_OFS(mem_index); int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); @@ -1143,7 +1145,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, int add_off = offsetof(CPUTLBEntry, addend); int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); - target_ulong mask; + target_ulong tlb_mask; /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off); @@ -1157,27 +1159,13 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1); - /* We don't currently support unaligned accesses. - We could do so with mips32r6. */ - if (a_bits < s_bits) { - a_bits = s_bits; - } - - /* Mask the page bits, keeping the alignment bits to compare against. */ - mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1); - /* Load the (low-half) tlb comparator. */ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF); - tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, mask); + tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF); } else { tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW), TCG_TMP0, TCG_TMP3, cmp_off); - tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, mask); - /* No second compare is required here; - load the tlb addend for the fast path. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); } /* Zero extend a 32-bit guest address for a 64-bit host. */ @@ -1185,7 +1173,25 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, tcg_out_ext32u(s, base, addrl); addrl = base; } - tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); + + /* + * Mask the page bits, keeping the alignment bits to compare against. + * For unaligned accesses, compare against the end of the access to + * verify that it does not cross a page boundary. + */ + tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask; + tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask); + if (a_mask >= s_mask) { + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); + } else { + tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - a_mask); + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2); + } + + if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { + /* Load the tlb addend for the fast path. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); + } label_ptr[0] = s->code_ptr; tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); @@ -1193,7 +1199,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, /* Load and test the high half tlb comparator. */ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { /* delay slot */ - tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF); + tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF); /* Load the tlb addend for the fast path. */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); @@ -1515,8 +1521,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, } } -static void __attribute__((unused)) -tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, +static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc, bool is_64) { const MIPSInsn lw1 = MIPS_BE ? OPC_LWL : OPC_LWR; @@ -1645,8 +1650,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; #else - unsigned a_bits, s_bits; #endif + unsigned a_bits, s_bits; TCGReg base = TCG_REG_A0; data_regl = *args++; @@ -1655,10 +1660,20 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); oi = *args++; opc = get_memop(oi); + a_bits = get_alignment_bits(opc); + s_bits = opc & MO_SIZE; + /* + * R6 removes the left/right instructions but requires the + * system to support misaligned memory accesses. + */ #if defined(CONFIG_SOFTMMU) tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1); - tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + if (use_mips32r6_instructions || a_bits >= s_bits) { + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + } else { + tcg_out_qemu_ld_unalign(s, data_regl, data_regh, base, opc, is_64); + } add_qemu_ldst_label(s, 1, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), data_regl, data_regh, addr_regl, addr_regh, @@ -1675,12 +1690,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) } else { tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl); } - a_bits = get_alignment_bits(opc); - s_bits = opc & MO_SIZE; - /* - * R6 removes the left/right instructions but requires the - * system to support misaligned memory accesses. - */ if (use_mips32r6_instructions) { if (a_bits) { tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); @@ -1760,8 +1769,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, } } -static void __attribute__((unused)) -tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, +static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc) { const MIPSInsn sw1 = MIPS_BE ? OPC_SWL : OPC_SWR; @@ -1841,9 +1849,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; -#else - unsigned a_bits, s_bits; #endif + unsigned a_bits, s_bits; TCGReg base = TCG_REG_A0; data_regl = *args++; @@ -1852,10 +1859,20 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); oi = *args++; opc = get_memop(oi); + a_bits = get_alignment_bits(opc); + s_bits = opc & MO_SIZE; + /* + * R6 removes the left/right instructions but requires the + * system to support misaligned memory accesses. + */ #if defined(CONFIG_SOFTMMU) tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0); - tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + if (use_mips32r6_instructions || a_bits >= s_bits) { + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + } else { + tcg_out_qemu_st_unalign(s, data_regl, data_regh, base, opc); + } add_qemu_ldst_label(s, 0, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), data_regl, data_regh, addr_regl, addr_regh, @@ -1872,12 +1889,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) } else { tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl); } - a_bits = get_alignment_bits(opc); - s_bits = opc & MO_SIZE; - /* - * R6 removes the left/right instructions but requires the - * system to support misaligned memory accesses. - */ if (use_mips32r6_instructions) { if (a_bits) { tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); -- cgit v1.1 From 414399b6b8a02f03d3b0cba5cfa9205dff618a9b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 8 Feb 2022 10:15:58 +1100 Subject: tcg/sparc: Use tcg_out_movi_imm13 in tcg_out_addsub2_i64 When BH is constant, it is constrained to 11 bits for use in MOVCC. For the cases in which we must load the constant BH into a register, we do not need the full logic of tcg_out_movi; we can use the simpler function for emitting a 13 bit constant. This eliminates the only case in which TCG_REG_T2 was passed to tcg_out_movi, which will shortly become invalid. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'tcg') diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 0c062c6..8d5992e 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -795,7 +795,7 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, if (use_vis3_instructions && !is_sub) { /* Note that ADDXC doesn't accept immediates. */ if (bhconst && bh != 0) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh); + tcg_out_movi_imm13(s, TCG_REG_T2, bh); bh = TCG_REG_T2; } tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC); @@ -811,9 +811,13 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0); } } else { - /* Otherwise adjust BH as if there is carry into T2 ... */ + /* + * Otherwise adjust BH as if there is carry into T2. + * Note that constant BH is constrained to 11 bits for the MOVCC, + * so the adjustment fits 12 bits. + */ if (bhconst) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1)); + tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1)); } else { tcg_out_arithi(s, TCG_REG_T2, bh, 1, is_sub ? ARITH_SUB : ARITH_ADD); -- cgit v1.1 From c71929c345d08fa70edb1eeaa9babaaf5d6a9bf7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 8 Feb 2022 10:46:40 +1100 Subject: tcg/sparc: Split out tcg_out_movi_imm32 Handle 32-bit constants with a separate function, so that tcg_out_movi_int does not need to recurse. This slightly rearranges the order of tests for small constants, but produces the same output. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'tcg') diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 8d5992e..2f7c8dc 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -413,15 +413,30 @@ static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg) tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR); } +static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg) +{ + if (check_fit_i32(arg, 13)) { + /* A 13-bit constant sign-extended to 64-bits. */ + tcg_out_movi_imm13(s, ret, arg); + } else { + /* A 32-bit constant zero-extended to 64 bits. */ + tcg_out_sethi(s, ret, arg); + if (arg & 0x3ff) { + tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); + } + } +} + static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg, bool in_prologue) { tcg_target_long hi, lo = (int32_t)arg; tcg_target_long test, lsb; - /* Make sure we test 32-bit constants for imm13 properly. */ - if (type == TCG_TYPE_I32) { - arg = lo; + /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ + if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { + tcg_out_movi_imm32(s, ret, arg); + return; } /* A 13-bit constant sign-extended to 64-bits. */ @@ -439,15 +454,6 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } } - /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ - if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { - tcg_out_sethi(s, ret, arg); - if (arg & 0x3ff) { - tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); - } - return; - } - /* A 32-bit constant sign-extended to 64-bits. */ if (arg == lo) { tcg_out_sethi(s, ret, ~arg); @@ -471,13 +477,13 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, /* A 64-bit constant decomposed into 2 32-bit pieces. */ if (check_fit_i32(lo, 13)) { hi = (arg - lo) >> 32; - tcg_out_movi(s, TCG_TYPE_I32, ret, hi); + tcg_out_movi_imm32(s, ret, hi); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); tcg_out_arithi(s, ret, ret, lo, ARITH_ADD); } else { hi = arg >> 32; - tcg_out_movi(s, TCG_TYPE_I32, ret, hi); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo); + tcg_out_movi_imm32(s, ret, hi); + tcg_out_movi_imm32(s, TCG_REG_T2, lo); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR); } -- cgit v1.1 From 92840d06faeed2038b90d5b168c18d73ca3a44b8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 02:16:05 +0300 Subject: tcg/sparc: Add scratch argument to tcg_out_movi_int This will allow us to control exactly what scratch register is used for loading the constant. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'tcg') diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 2f7c8dc..7a8f20e 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -428,7 +428,8 @@ static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg) } static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, - tcg_target_long arg, bool in_prologue) + tcg_target_long arg, bool in_prologue, + TCGReg scratch) { tcg_target_long hi, lo = (int32_t)arg; tcg_target_long test, lsb; @@ -483,16 +484,17 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } else { hi = arg >> 32; tcg_out_movi_imm32(s, ret, hi); - tcg_out_movi_imm32(s, TCG_REG_T2, lo); + tcg_out_movi_imm32(s, scratch, lo); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); - tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR); + tcg_out_arith(s, ret, ret, scratch, ARITH_OR); } } static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { - tcg_out_movi_int(s, type, ret, arg, false); + tcg_debug_assert(ret != TCG_REG_T2); + tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2); } static void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1, @@ -847,7 +849,7 @@ static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, } else { uintptr_t desti = (uintptr_t)dest; tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1, - desti & ~0xfff, in_prologue); + desti & ~0xfff, in_prologue, TCG_REG_O7); tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL); } } @@ -1023,7 +1025,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) #ifndef CONFIG_SOFTMMU if (guest_base != 0) { - tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); + tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, + guest_base, true, TCG_REG_T1); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif -- cgit v1.1 From 684db2a0b04ff024d0a275de85982fe5892185d3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 04:09:15 +0300 Subject: tcg/sparc: Improve code gen for shifted 32-bit constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We had code for checking for 13 and 21-bit shifted constants, but we can do better and allow 32-bit shifted constants. This is still 2 insns shorter than the full 64-bit sequence. Reviewed-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tcg') diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 7a8f20e..ed2f4ec 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -462,17 +462,17 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } - /* A 21-bit constant, shifted. */ + /* A 32-bit constant, shifted. */ lsb = ctz64(arg); test = (tcg_target_long)arg >> lsb; - if (check_fit_tl(test, 13)) { - tcg_out_movi_imm13(s, ret, test); - tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX); - return; - } else if (lsb > 10 && test == extract64(test, 0, 21)) { + if (lsb > 10 && test == extract64(test, 0, 21)) { tcg_out_sethi(s, ret, test << 10); tcg_out_arithi(s, ret, ret, lsb - 10, SHIFT_SLLX); return; + } else if (test == (uint32_t)test || test == (int32_t)test) { + tcg_out_movi_int(s, TCG_TYPE_I64, ret, test, in_prologue, scratch); + tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX); + return; } /* A 64-bit constant decomposed into 2 32-bit pieces. */ -- cgit v1.1 From 6a6bfa3c60ba70e870f8355b9d3508915c90c13a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 5 Feb 2022 08:12:30 +0300 Subject: tcg/sparc: Convert patch_reloc to return bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 7ecd02a06f8, if patch_reloc fails we restart translation with a smaller TB. SPARC had its function signature changed, but not the logic. Replace assert with return false. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tcg') diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index ed2f4ec..213aba4 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -323,12 +323,16 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type, switch (type) { case R_SPARC_WDISP16: - assert(check_fit_ptr(pcrel >> 2, 16)); + if (!check_fit_ptr(pcrel >> 2, 16)) { + return false; + } insn &= ~INSN_OFF16(-1); insn |= INSN_OFF16(pcrel); break; case R_SPARC_WDISP19: - assert(check_fit_ptr(pcrel >> 2, 19)); + if (!check_fit_ptr(pcrel >> 2, 19)) { + return false; + } insn &= ~INSN_OFF19(-1); insn |= INSN_OFF19(pcrel); break; -- cgit v1.1 From c834b8d81b0ee597d185a4d8127dfc76230307c8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 04:34:20 +0300 Subject: tcg/sparc: Use the constant pool for 64-bit constants Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tcg') diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 213aba4..e78945d 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -336,6 +336,13 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type, insn &= ~INSN_OFF19(-1); insn |= INSN_OFF19(pcrel); break; + case R_SPARC_13: + if (!check_fit_ptr(value, 13)) { + return false; + } + insn &= ~INSN_IMM13(-1); + insn |= INSN_IMM13(value); + break; default: g_assert_not_reached(); } @@ -479,6 +486,14 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } + /* Use the constant pool, if possible. */ + if (!in_prologue && USE_REG_TB) { + new_pool_label(s, arg, R_SPARC_13, s->code_ptr, + tcg_tbrel_diff(s, NULL)); + tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB)); + return; + } + /* A 64-bit constant decomposed into 2 32-bit pieces. */ if (check_fit_i32(lo, 13)) { hi = (arg - lo) >> 32; -- cgit v1.1 From e01d60f211251388a5d9ae7e02d0b4500af26966 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 02:23:05 +0300 Subject: tcg/sparc: Add tcg_out_jmpl_const for better tail calls Due to mapping changes, we now rarely place the code_gen_buffer near the main executable. Which means that direct calls will now rarely be in range. So, always use indirect calls for tail calls, which allows us to avoid clobbering %o7, and therefore we need not save and restore it. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) (limited to 'tcg') diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index e78945d..646bb46 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -858,6 +858,19 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, tcg_out_mov(s, TCG_TYPE_I64, rl, tmp); } +static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest, + bool in_prologue, bool tail_call) +{ + uintptr_t desti = (uintptr_t)dest; + + /* Be careful not to clobber %o7 for a tail call. */ + tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1, + desti & ~0xfff, in_prologue, + tail_call ? TCG_REG_G2 : TCG_REG_O7); + tcg_out_arithi(s, tail_call ? TCG_REG_G0 : TCG_REG_O7, + TCG_REG_T1, desti & 0xfff, JMPL); +} + static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, bool in_prologue) { @@ -866,10 +879,7 @@ static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, if (disp == (int32_t)disp) { tcg_out32(s, CALL | (uint32_t)disp >> 2); } else { - uintptr_t desti = (uintptr_t)dest; - tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1, - desti & ~0xfff, in_prologue, TCG_REG_O7); - tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL); + tcg_out_jmpl_const(s, dest, in_prologue, false); } } @@ -960,11 +970,10 @@ static void build_trampolines(TCGContext *s) /* Set the retaddr operand. */ tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); - /* Set the env operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); /* Tail call. */ - tcg_out_call_nodelay(s, qemu_ld_helpers[i], true); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); + tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true); + /* delay slot -- set the env argument */ + tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0); } for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) { @@ -1006,14 +1015,14 @@ static void build_trampolines(TCGContext *s) if (ra >= TCG_REG_O6) { tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, TCG_TARGET_CALL_STACK_OFFSET); - ra = TCG_REG_G1; + } else { + tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); } - tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); - /* Set the env operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); + /* Tail call. */ - tcg_out_call_nodelay(s, qemu_st_helpers[i], true); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); + tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true); + /* delay slot -- set the env argument */ + tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0); } } #endif -- cgit v1.1 From 321dbde33a6aa8e7780a3b6b4746628d215a1fec Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 03:54:30 +0300 Subject: tcg/sparc: Support unaligned access for user-only This is kinda sorta the opposite of the other tcg hosts, where we get (normal) alignment checks for free with host SIGBUS and need to add code to support unaligned accesses. This inline code expansion is somewhat large, but it takes quite a few instructions to make a function call to a helper anyway. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 219 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 211 insertions(+), 8 deletions(-) (limited to 'tcg') diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 646bb46..72d9552 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -211,6 +211,7 @@ static const int tcg_target_call_oarg_regs[] = { #define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00)) #define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10)) #define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01)) +#define ARITH_ANDCC (INSN_OP(2) | INSN_OP3(0x11)) #define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05)) #define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02)) #define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12)) @@ -1025,6 +1026,38 @@ static void build_trampolines(TCGContext *s) tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0); } } +#else +static const tcg_insn_unit *qemu_unalign_ld_trampoline; +static const tcg_insn_unit *qemu_unalign_st_trampoline; + +static void build_trampolines(TCGContext *s) +{ + for (int ld = 0; ld < 2; ++ld) { + void *helper; + + while ((uintptr_t)s->code_ptr & 15) { + tcg_out_nop(s); + } + + if (ld) { + helper = helper_unaligned_ld; + qemu_unalign_ld_trampoline = tcg_splitwx_to_rx(s->code_ptr); + } else { + helper = helper_unaligned_st; + qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr); + } + + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Install the high part of the address. */ + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); + } + + /* Tail call. */ + tcg_out_jmpl_const(s, helper, true, true); + /* delay slot -- set the env argument */ + tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0); + } +} #endif /* Generate global QEMU prologue and epilogue code */ @@ -1075,9 +1108,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* delay slot */ tcg_out_movi_imm13(s, TCG_REG_O0, 0); -#ifdef CONFIG_SOFTMMU build_trampolines(s); -#endif } static void tcg_out_nop_fill(tcg_insn_unit *p, int count) @@ -1162,18 +1193,22 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = { [MO_UB] = LDUB, [MO_SB] = LDSB, + [MO_UB | MO_LE] = LDUB, + [MO_SB | MO_LE] = LDSB, [MO_BEUW] = LDUH, [MO_BESW] = LDSH, [MO_BEUL] = LDUW, [MO_BESL] = LDSW, [MO_BEUQ] = LDX, + [MO_BESQ] = LDX, [MO_LEUW] = LDUH_LE, [MO_LESW] = LDSH_LE, [MO_LEUL] = LDUW_LE, [MO_LESL] = LDSW_LE, [MO_LEUQ] = LDX_LE, + [MO_LESQ] = LDX_LE, }; static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = { @@ -1192,11 +1227,12 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, MemOpIdx oi, bool is_64) { MemOp memop = get_memop(oi); + tcg_insn_unit *label_ptr; + #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); TCGReg addrz, param; const tcg_insn_unit *func; - tcg_insn_unit *label_ptr; addrz = tcg_out_tlb_load(s, addr, memi, memop, offsetof(CPUTLBEntry, addr_read)); @@ -1260,13 +1296,99 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #else + TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0); + unsigned a_bits = get_alignment_bits(memop); + unsigned s_bits = memop & MO_SIZE; + unsigned t_bits; + if (SPARC64 && TARGET_LONG_BITS == 32) { tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); addr = TCG_REG_T1; } - tcg_out_ldst_rr(s, data, addr, - (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), + + /* + * Normal case: alignment equal to access size. + */ + if (a_bits == s_bits) { + tcg_out_ldst_rr(s, data, addr, index, + qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); + return; + } + + /* + * Test for at least natural alignment, and assume most accesses + * will be aligned -- perform a straight load in the delay slot. + * This is required to preserve atomicity for aligned accesses. + */ + t_bits = MAX(a_bits, s_bits); + tcg_debug_assert(t_bits < 13); + tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC); + + /* beq,a,pt %icc, label */ + label_ptr = s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addr, index, qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); + + if (a_bits >= s_bits) { + /* + * Overalignment: A successful alignment test will perform the memory + * operation in the delay slot, and failure need only invoke the + * handler for SIGBUS. + */ + TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64); + tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false); + /* delay slot -- move to low part of argument reg */ + tcg_out_mov_delay(s, arg_low, addr); + } else { + /* Underalignment: load by pieces of minimum alignment. */ + int ld_opc, a_size, s_size, i; + + /* + * Force full address into T1 early; avoids problems with + * overlap between @addr and @data. + */ + tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD); + + a_size = 1 << a_bits; + s_size = 1 << s_bits; + if ((memop & MO_BSWAP) == MO_BE) { + ld_opc = qemu_ld_opc[a_bits | MO_BE | (memop & MO_SIGN)]; + tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc); + ld_opc = qemu_ld_opc[a_bits | MO_BE]; + for (i = a_size; i < s_size; i += a_size) { + tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc); + tcg_out_arithi(s, data, data, a_size, SHIFT_SLLX); + tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR); + } + } else if (a_bits == 0) { + ld_opc = LDUB; + tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc); + for (i = a_size; i < s_size; i += a_size) { + if ((memop & MO_SIGN) && i == s_size - a_size) { + ld_opc = LDSB; + } + tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc); + tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX); + tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR); + } + } else { + ld_opc = qemu_ld_opc[a_bits | MO_LE]; + tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, ld_opc); + for (i = a_size; i < s_size; i += a_size) { + tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD); + if ((memop & MO_SIGN) && i == s_size - a_size) { + ld_opc = qemu_ld_opc[a_bits | MO_LE | MO_SIGN]; + } + tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, ld_opc); + tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX); + tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR); + } + } + } + + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #endif /* CONFIG_SOFTMMU */ } @@ -1274,11 +1396,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, MemOpIdx oi) { MemOp memop = get_memop(oi); + tcg_insn_unit *label_ptr; + #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); TCGReg addrz, param; const tcg_insn_unit *func; - tcg_insn_unit *label_ptr; addrz = tcg_out_tlb_load(s, addr, memi, memop, offsetof(CPUTLBEntry, addr_write)); @@ -1315,13 +1438,93 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #else + TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0); + unsigned a_bits = get_alignment_bits(memop); + unsigned s_bits = memop & MO_SIZE; + unsigned t_bits; + if (SPARC64 && TARGET_LONG_BITS == 32) { tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); addr = TCG_REG_T1; } - tcg_out_ldst_rr(s, data, addr, - (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), + + /* + * Normal case: alignment equal to access size. + */ + if (a_bits == s_bits) { + tcg_out_ldst_rr(s, data, addr, index, + qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); + return; + } + + /* + * Test for at least natural alignment, and assume most accesses + * will be aligned -- perform a straight store in the delay slot. + * This is required to preserve atomicity for aligned accesses. + */ + t_bits = MAX(a_bits, s_bits); + tcg_debug_assert(t_bits < 13); + tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC); + + /* beq,a,pt %icc, label */ + label_ptr = s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addr, index, qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); + + if (a_bits >= s_bits) { + /* + * Overalignment: A successful alignment test will perform the memory + * operation in the delay slot, and failure need only invoke the + * handler for SIGBUS. + */ + TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64); + tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false); + /* delay slot -- move to low part of argument reg */ + tcg_out_mov_delay(s, arg_low, addr); + } else { + /* Underalignment: store by pieces of minimum alignment. */ + int st_opc, a_size, s_size, i; + + /* + * Force full address into T1 early; avoids problems with + * overlap between @addr and @data. + */ + tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD); + + a_size = 1 << a_bits; + s_size = 1 << s_bits; + if ((memop & MO_BSWAP) == MO_BE) { + st_opc = qemu_st_opc[a_bits | MO_BE]; + for (i = 0; i < s_size; i += a_size) { + TCGReg d = data; + int shift = (s_size - a_size - i) * 8; + if (shift) { + d = TCG_REG_T2; + tcg_out_arithi(s, d, data, shift, SHIFT_SRLX); + } + tcg_out_ldst(s, d, TCG_REG_T1, i, st_opc); + } + } else if (a_bits == 0) { + tcg_out_ldst(s, data, TCG_REG_T1, 0, STB); + for (i = 1; i < s_size; i++) { + tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX); + tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, STB); + } + } else { + /* Note that ST*A with immediate asi must use indexed address. */ + st_opc = qemu_st_opc[a_bits + MO_LE]; + tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, st_opc); + for (i = a_size; i < s_size; i += a_size) { + tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX); + tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD); + tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, st_opc); + } + } + } + + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #endif /* CONFIG_SOFTMMU */ } -- cgit v1.1