diff options
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/README | 32 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.c | 59 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.h | 3 | ||||
-rw-r--r-- | tcg/arm/tcg-target.c | 8 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c | 49 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 3 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.c | 29 | ||||
-rw-r--r-- | tcg/ia64/tcg-target.h | 3 | ||||
-rw-r--r-- | tcg/mips/tcg-target.c | 16 | ||||
-rw-r--r-- | tcg/optimize.c | 253 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c | 65 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 3 | ||||
-rw-r--r-- | tcg/s390/tcg-target.c | 46 | ||||
-rw-r--r-- | tcg/s390/tcg-target.h | 3 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.c | 36 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.h | 3 | ||||
-rw-r--r-- | tcg/tcg-op.c | 48 | ||||
-rw-r--r-- | tcg/tcg-op.h | 12 | ||||
-rw-r--r-- | tcg/tcg-opc.h | 10 | ||||
-rw-r--r-- | tcg/tcg.h | 3 | ||||
-rw-r--r-- | tcg/tci/tcg-target.c | 4 | ||||
-rw-r--r-- | tcg/tci/tcg-target.h | 3 |
22 files changed, 391 insertions, 300 deletions
@@ -314,11 +314,17 @@ This operation would be equivalent to dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) -* trunc_shr_i32 t0, t1, pos +* extrl_i64_i32 t0, t1 -For 64-bit hosts only, right shift the 64-bit input T1 by POS and -truncate to 32-bit output T0. Depending on the host, this may be -a simple mov/shift, or may require additional canonicalization. +For 64-bit hosts only, extract the low 32-bits of input T1 and place it +into 32-bit output T0. Depending on the host, this may be a simple move, +or may require additional canonicalization. + +* extrh_i64_i32 t0, t1 + +For 64-bit hosts only, extract the high 32-bits of input T1 and place it +into 32-bit output T0. Depending on the host, this may be a simple shift, +or may require additional canonicalization. ********* Conditional moves @@ -466,13 +472,25 @@ On a 32 bit target, all 64 bit operations are converted to 32 bits. A few specific operations must be implemented to allow it (see add2_i32, sub2_i32, brcond2_i32). +On a 64 bit target, the values are transfered between 32 and 64-bit +registers using the following ops: +- trunc_shr_i64_i32 +- ext_i32_i64 +- extu_i32_i64 + +They ensure that the values are correctly truncated or extended when +moved from a 32-bit to a 64-bit register or vice-versa. Note that the +trunc_shr_i64_i32 is an optional op. It is not necessary to implement +it if all the following conditions are met: +- 64-bit registers can hold 32-bit values +- 32-bit values in a 64-bit register do not need to stay zero or + sign extended +- all 32-bit TCG ops ignore the high part of 64-bit registers + Floating point operations are not supported in this version. A previous incarnation of the code generator had full support of them, but it is better to concentrate on integer operations first. -On a 64 bit target, no assumption is made in TCG about the storage of -the 32 bit values in 64 bit registers. - 4.2) Constraints GCC like constraints are used to define the constraints of every diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index b7ec4f5..01ae610 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -30,7 +30,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { static const int tcg_target_reg_alloc_order[] = { TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, - TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */ + TCG_REG_X28, /* we will reserve this for guest_base if configured */ TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, @@ -56,11 +56,7 @@ static const int tcg_target_call_oarg_regs[1] = { #define TCG_REG_TMP TCG_REG_X30 #ifndef CONFIG_SOFTMMU -# ifdef CONFIG_USE_GUEST_BASE -# define TCG_REG_GUEST_BASE TCG_REG_X28 -# else -# define TCG_REG_GUEST_BASE TCG_REG_XZR -# endif +#define TCG_REG_GUEST_BASE TCG_REG_X28 #endif static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) @@ -1051,14 +1047,29 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, slow path for the failure case, which will be patched later when finalizing the slow path. Generated code returns the host addend in X1, clobbers X0,X2,X3,TMP. */ -static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits, +static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc, tcg_insn_unit **label_ptr, int mem_index, bool is_read) { - TCGReg base = TCG_AREG0; int tlb_offset = is_read ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); + int s_mask = (1 << (opc & MO_SIZE)) - 1; + TCGReg base = TCG_AREG0, x3; + uint64_t tlb_mask; + + /* For aligned accesses, we check the first byte and include the alignment + bits within the address. For unaligned access, we check that we don't + cross pages using the address of the last byte of the access. */ + if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { + tlb_mask = TARGET_PAGE_MASK | s_mask; + x3 = addr_reg; + } else { + tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, addr_reg, s_mask); + tlb_mask = TARGET_PAGE_MASK; + x3 = TCG_REG_X3; + } /* Extract the TLB index from the address into X0. X0<CPU_TLB_BITS:0> = @@ -1066,11 +1077,9 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits, tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg, TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS); - /* Store the page mask part of the address and the low s_bits into X3. - Later this allows checking for equality and alignment at the same time. - X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */ - tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3, - addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + /* Store the page mask part of the address into X3. */ + tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, x3, tlb_mask); /* Add any "high bits" from the tlb offset to the env address into X2, to take advantage of the LSL12 form of the ADDI instruction. @@ -1207,17 +1216,16 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; #ifdef CONFIG_SOFTMMU unsigned mem_index = get_mmuidx(oi); - TCGMemOp s_bits = memop & MO_SIZE; tcg_insn_unit *label_ptr; - tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1); + tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); tcg_out_qemu_ld_direct(s, memop, ext, data_reg, TCG_REG_X1, otype, addr_reg); add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ tcg_out_qemu_ld_direct(s, memop, ext, data_reg, - GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR, + guest_base ? TCG_REG_GUEST_BASE : TCG_REG_XZR, otype, addr_reg); #endif /* CONFIG_SOFTMMU */ } @@ -1229,17 +1237,16 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; #ifdef CONFIG_SOFTMMU unsigned mem_index = get_mmuidx(oi); - TCGMemOp s_bits = memop & MO_SIZE; tcg_insn_unit *label_ptr; - tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0); + tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); tcg_out_qemu_st_direct(s, memop, data_reg, TCG_REG_X1, otype, addr_reg); - add_qemu_ldst_label(s, false, oi, s_bits == MO_64, data_reg, addr_reg, - s->code_ptr, label_ptr); + add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, + data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ tcg_out_qemu_st_direct(s, memop, data_reg, - GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR, + guest_base ? TCG_REG_GUEST_BASE : TCG_REG_XZR, otype, addr_reg); #endif /* CONFIG_SOFTMMU */ } @@ -1556,6 +1563,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16s_i32: tcg_out_sxt(s, ext, MO_16, a0, a1); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); break; @@ -1567,6 +1575,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16u_i32: tcg_out_uxt(s, MO_16, a0, a1); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_movr(s, TCG_TYPE_I32, a0, a1); break; @@ -1712,6 +1721,8 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_ext8u_i64, { "r", "r" } }, { INDEX_op_ext16u_i64, { "r", "r" } }, { INDEX_op_ext32u_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, @@ -1794,9 +1805,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, CPU_TEMP_BUF_NLONGS * sizeof(long)); -#if defined(CONFIG_USE_GUEST_BASE) - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE); +#if !defined(CONFIG_SOFTMMU) + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); } #endif diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 8aec04d..19a04a6 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -70,7 +70,8 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index ae2ec7a..3edf6a6 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -1493,8 +1493,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); } else { tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); @@ -1623,8 +1623,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); tcg_out_qemu_st_index(s, COND_AL, opc, datalo, datahi, addrlo, TCG_REG_TMP); } else { diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 887f22f..d2adbc4 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -1172,7 +1172,7 @@ static void * const qemu_st_helpers[16] = { First argument register is clobbered. */ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - int mem_index, TCGMemOp s_bits, + int mem_index, TCGMemOp opc, tcg_insn_unit **label_ptr, int which) { const TCGReg r0 = TCG_REG_L0; @@ -1180,6 +1180,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, TCGType ttype = TCG_TYPE_I32; TCGType htype = TCG_TYPE_I32; int trexw = 0, hrexw = 0; + int s_mask = (1 << (opc & MO_SIZE)) - 1; + bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; if (TCG_TARGET_REG_BITS == 64) { if (TARGET_LONG_BITS == 64) { @@ -1193,13 +1195,19 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } tcg_out_mov(s, htype, r0, addrlo); - tcg_out_mov(s, ttype, r1, addrlo); + if (aligned) { + tcg_out_mov(s, ttype, r1, addrlo); + } else { + /* For unaligned access check that we don't cross pages using + the page address of the last byte. */ + tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); + } tcg_out_shifti(s, SHIFT_SHR + hrexw, r0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tgen_arithi(s, ARITH_AND + trexw, r1, - TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); + TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); tgen_arithi(s, ARITH_AND + hrexw, r0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); @@ -1424,7 +1432,7 @@ int arch_prctl(int code, unsigned long addr); static int guest_base_flags; static inline void setup_guest_base_seg(void) { - if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) { + if (arch_prctl(ARCH_SET_GS, guest_base) == 0) { guest_base_flags = P_GS; } } @@ -1545,7 +1553,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) TCGMemOp opc; #if defined(CONFIG_SOFTMMU) int mem_index; - TCGMemOp s_bits; tcg_insn_unit *label_ptr[2]; #endif @@ -1558,9 +1565,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) mem_index = get_mmuidx(oi); - s_bits = opc & MO_SIZE; - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, label_ptr, offsetof(CPUTLBEntry, addr_read)); /* TLB Hit. */ @@ -1571,7 +1577,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) s->code_ptr, label_ptr); #else { - int32_t offset = GUEST_BASE; + int32_t offset = guest_base; TCGReg base = addrlo; int index = -1; int seg = 0; @@ -1580,7 +1586,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) We can do this with the ADDR32 prefix if we're not using a guest base, or when using segmentation. Otherwise we need to zero-extend manually. */ - if (GUEST_BASE == 0 || guest_base_flags) { + if (guest_base == 0 || guest_base_flags) { seg = guest_base_flags; offset = 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { @@ -1591,8 +1597,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) tcg_out_ext32u(s, TCG_REG_L0, base); base = TCG_REG_L0; } - if (offset != GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); + if (offset != guest_base) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); index = TCG_REG_L1; offset = 0; } @@ -1687,7 +1693,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) TCGMemOp opc; #if defined(CONFIG_SOFTMMU) int mem_index; - TCGMemOp s_bits; tcg_insn_unit *label_ptr[2]; #endif @@ -1700,9 +1705,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) mem_index = get_mmuidx(oi); - s_bits = opc & MO_SIZE; - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, label_ptr, offsetof(CPUTLBEntry, addr_write)); /* TLB Hit. */ @@ -1713,12 +1717,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) s->code_ptr, label_ptr); #else { - int32_t offset = GUEST_BASE; + int32_t offset = guest_base; TCGReg base = addrlo; int seg = 0; /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */ - if (GUEST_BASE == 0 || guest_base_flags) { + if (guest_base == 0 || guest_base_flags) { seg = guest_base_flags; offset = 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { @@ -1727,12 +1731,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) } else if (TCG_TARGET_REG_BITS == 64) { /* ??? Note that we can't use the same SIB addressing scheme as for loads, since we require L0 free for bswap. */ - if (offset != GUEST_BASE) { + if (offset != guest_base) { if (TARGET_LONG_BITS == 32) { tcg_out_ext32u(s, TCG_REG_L0, base); base = TCG_REG_L0; } - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); base = TCG_REG_L1; offset = 0; @@ -2064,9 +2068,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_bswap64_i64: tcg_out_bswap64(s, args[0]); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_ext32u(s, args[0], args[1]); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_ext32s(s, args[0], args[1]); break; @@ -2201,6 +2207,9 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_ext16u_i64, { "r", "r" } }, { INDEX_op_ext32u_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, @@ -2306,8 +2315,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_opc(s, OPC_RET, 0, 0, 0); #if !defined(CONFIG_SOFTMMU) - /* Try to set up a segment register to point to GUEST_BASE. */ - if (GUEST_BASE) { + /* Try to set up a segment register to point to guest_base. */ + if (guest_base) { setup_guest_base_seg(); } #endif diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 25b5133..92be341 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -102,7 +102,8 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 81cb9f7..3c07017 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -40,13 +40,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { }; #endif -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG TCG_REG_R55 -#else -#define TCG_GUEST_BASE_REG TCG_REG_R0 -#endif -#ifndef GUEST_BASE -#define GUEST_BASE 0 #endif /* Branch registers */ @@ -1765,7 +1760,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) bswap = opc & MO_BSWAP; #if TARGET_LONG_BITS == 32 - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, mII, INSN_NOP_M, tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, @@ -1829,7 +1824,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) } } #else - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, MmI, tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_GUEST_BASE_REG, addr_reg), @@ -1889,7 +1884,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) bswap = opc & MO_BSWAP; #if TARGET_LONG_BITS == 32 - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, mII, INSN_NOP_M, tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, @@ -1935,7 +1930,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) INSN_NOP_M, INSN_NOP_I); #else - if (GUEST_BASE != 0) { + if (guest_base != 0) { add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_GUEST_BASE_REG, addr_reg); addr_reg = TCG_REG_R2; @@ -1944,7 +1939,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) } if (!bswap) { - tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI), + tcg_out_bundle(s, (guest_base ? MmI : mmI), add_guest_base, tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], data_reg, addr_reg), @@ -2148,9 +2143,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16u_i64: tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]); break; @@ -2301,6 +2298,8 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_ext16u_i64, { "r", "rZ"} }, { INDEX_op_ext32s_i64, { "r", "rZ"} }, { INDEX_op_ext32u_i64, { "r", "rZ"} }, + { INDEX_op_ext_i32_i64, { "r", "rZ" } }, + { INDEX_op_extu_i32_i64, { "r", "rZ" } }, { INDEX_op_bswap16_i64, { "r", "rZ" } }, { INDEX_op_bswap32_i64, { "r", "rZ" } }, @@ -2349,14 +2348,14 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R33, 0)); - /* ??? If GUEST_BASE < 0x200000, we could load the register via + /* ??? If guest_base < 0x200000, we could load the register via an ADDL in the M slot of the next bundle. */ - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, mlx, INSN_NOP_M, - tcg_opc_l2 (GUEST_BASE), + tcg_opc_l2(guest_base), tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, - TCG_GUEST_BASE_REG, GUEST_BASE)); + TCG_GUEST_BASE_REG, guest_base)); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index a04ed81..ae9b79f 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -160,7 +160,8 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i64 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index e97980d..c0ce520 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -1180,12 +1180,12 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh, s->code_ptr, label_ptr); #else - if (GUEST_BASE == 0 && data_regl != addr_regl) { + if (guest_base == 0 && data_regl != addr_regl) { base = addr_regl; - } else if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); + } else if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); } tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); @@ -1314,14 +1314,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh, s->code_ptr, label_ptr); #else - if (GUEST_BASE == 0) { + if (guest_base == 0) { base = addr_regl; } else { base = TCG_REG_A0; - if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); + if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); } } diff --git a/tcg/optimize.c b/tcg/optimize.c index 18283cf..10795ec 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -35,14 +35,8 @@ glue(glue(case INDEX_op_, x), _i32): \ glue(glue(case INDEX_op_, x), _i64) -typedef enum { - TCG_TEMP_UNDEF = 0, - TCG_TEMP_CONST, - TCG_TEMP_COPY, -} tcg_temp_state; - struct tcg_temp_info { - tcg_temp_state state; + bool is_const; uint16_t prev_copy; uint16_t next_copy; tcg_target_ulong val; @@ -50,23 +44,47 @@ struct tcg_temp_info { }; static struct tcg_temp_info temps[TCG_MAX_TEMPS]; +static TCGTempSet temps_used; + +static inline bool temp_is_const(TCGArg arg) +{ + return temps[arg].is_const; +} + +static inline bool temp_is_copy(TCGArg arg) +{ + return temps[arg].next_copy != arg; +} -/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove - the copy flag from the left temp. */ +/* Reset TEMP's state, possibly removing the temp for the list of copies. */ static void reset_temp(TCGArg temp) { - if (temps[temp].state == TCG_TEMP_COPY) { - if (temps[temp].prev_copy == temps[temp].next_copy) { - temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; - } else { - temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; - temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; - } - } - temps[temp].state = TCG_TEMP_UNDEF; + temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; + temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; + temps[temp].next_copy = temp; + temps[temp].prev_copy = temp; + temps[temp].is_const = false; temps[temp].mask = -1; } +/* Reset all temporaries, given that there are NB_TEMPS of them. */ +static void reset_all_temps(int nb_temps) +{ + bitmap_zero(temps_used.l, nb_temps); +} + +/* Initialize and activate a temporary. */ +static void init_temp_info(TCGArg temp) +{ + if (!test_bit(temp, temps_used.l)) { + temps[temp].next_copy = temp; + temps[temp].prev_copy = temp; + temps[temp].is_const = false; + temps[temp].mask = -1; + set_bit(temp, temps_used.l); + } +} + static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc, int nargs) { @@ -98,16 +116,6 @@ static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, return new_op; } -/* Reset all temporaries, given that there are NB_TEMPS of them. */ -static void reset_all_temps(int nb_temps) -{ - int i; - for (i = 0; i < nb_temps; i++) { - temps[i].state = TCG_TEMP_UNDEF; - temps[i].mask = -1; - } -} - static int op_bits(TCGOpcode op) { const TCGOpDef *def = &tcg_op_defs[op]; @@ -179,8 +187,7 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2) return true; } - if (temps[arg1].state != TCG_TEMP_COPY - || temps[arg2].state != TCG_TEMP_COPY) { + if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) { return false; } @@ -202,7 +209,7 @@ static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, op->opc = new_op; reset_temp(dst); - temps[dst].state = TCG_TEMP_CONST; + temps[dst].is_const = true; temps[dst].val = val; mask = val; if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { @@ -223,11 +230,6 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, return; } - if (temps[src].state == TCG_TEMP_CONST) { - tcg_opt_gen_movi(s, op, args, dst, temps[src].val); - return; - } - TCGOpcode new_op = op_to_mov(op->opc); tcg_target_ulong mask; @@ -241,19 +243,13 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, } temps[dst].mask = mask; - assert(temps[src].state != TCG_TEMP_CONST); - if (s->temps[src].type == s->temps[dst].type) { - if (temps[src].state != TCG_TEMP_COPY) { - temps[src].state = TCG_TEMP_COPY; - temps[src].next_copy = src; - temps[src].prev_copy = src; - } - temps[dst].state = TCG_TEMP_COPY; temps[dst].next_copy = temps[src].next_copy; temps[dst].prev_copy = src; temps[temps[dst].next_copy].prev_copy = dst; temps[src].next_copy = dst; + temps[dst].is_const = temps[src].is_const; + temps[dst].val = temps[src].val; } args[0] = dst; @@ -292,7 +288,6 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) case INDEX_op_shr_i32: return (uint32_t)x >> (y & 31); - case INDEX_op_trunc_shr_i32: case INDEX_op_shr_i64: return (uint64_t)x >> (y & 63); @@ -347,12 +342,18 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) CASE_OP_32_64(ext16u): return (uint16_t)x; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: return (int32_t)x; + case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: case INDEX_op_ext32u_i64: return (uint32_t)x; + case INDEX_op_extrh_i64_i32: + return (uint64_t)x >> 32; + case INDEX_op_muluh_i32: return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; case INDEX_op_mulsh_i32: @@ -395,7 +396,7 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) { TCGArg res = do_constant_folding_2(op, x, y); if (op_bits(op) == 32) { - res &= 0xffffffff; + res = (int32_t)res; } return res; } @@ -481,7 +482,7 @@ static bool do_constant_folding_cond_eq(TCGCond c) static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, TCGArg y, TCGCond c) { - if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { + if (temp_is_const(x) && temp_is_const(y)) { switch (op_bits(op)) { case 32: return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); @@ -492,7 +493,7 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, } } else if (temps_are_copies(x, y)) { return do_constant_folding_cond_eq(c); - } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { + } else if (temp_is_const(y) && temps[y].val == 0) { switch (c) { case TCG_COND_LTU: return 0; @@ -513,12 +514,10 @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) TCGArg al = p1[0], ah = p1[1]; TCGArg bl = p2[0], bh = p2[1]; - if (temps[bl].state == TCG_TEMP_CONST - && temps[bh].state == TCG_TEMP_CONST) { + if (temp_is_const(bl) && temp_is_const(bh)) { uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; - if (temps[al].state == TCG_TEMP_CONST - && temps[ah].state == TCG_TEMP_CONST) { + if (temp_is_const(al) && temp_is_const(ah)) { uint64_t a; a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; return do_constant_folding_cond_64(a, b, c); @@ -544,8 +543,8 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) { TCGArg a1 = *p1, a2 = *p2; int sum = 0; - sum += temps[a1].state == TCG_TEMP_CONST; - sum -= temps[a2].state == TCG_TEMP_CONST; + sum += temp_is_const(a1); + sum -= temp_is_const(a2); /* Prefer the constant in second argument, and then the form op a, a, b, which is better handled on non-RISC hosts. */ @@ -560,10 +559,10 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) static bool swap_commutative2(TCGArg *p1, TCGArg *p2) { int sum = 0; - sum += temps[p1[0]].state == TCG_TEMP_CONST; - sum += temps[p1[1]].state == TCG_TEMP_CONST; - sum -= temps[p2[0]].state == TCG_TEMP_CONST; - sum -= temps[p2[1]].state == TCG_TEMP_CONST; + sum += temp_is_const(p1[0]); + sum += temp_is_const(p1[1]); + sum -= temp_is_const(p2[0]); + sum -= temp_is_const(p2[1]); if (sum > 0) { TCGArg t; t = p1[0], p1[0] = p2[0], p2[0] = t; @@ -598,17 +597,29 @@ void tcg_optimize(TCGContext *s) const TCGOpDef *def = &tcg_op_defs[opc]; oi_next = op->next; + + /* Count the arguments, and initialize the temps that are + going to be used */ if (opc == INDEX_op_call) { nb_oargs = op->callo; nb_iargs = op->calli; + for (i = 0; i < nb_oargs + nb_iargs; i++) { + tmp = args[i]; + if (tmp != TCG_CALL_DUMMY_ARG) { + init_temp_info(tmp); + } + } } else { nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; + for (i = 0; i < nb_oargs + nb_iargs; i++) { + init_temp_info(args[i]); + } } /* Do copy propagation */ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - if (temps[args[i]].state == TCG_TEMP_COPY) { + if (temp_is_copy(args[i])) { args[i] = find_better_copy(s, args[i]); } } @@ -678,8 +689,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(sar): CASE_OP_32_64(rotl): CASE_OP_32_64(rotr): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == 0) { + if (temp_is_const(args[1]) && temps[args[1]].val == 0) { tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } @@ -689,7 +699,7 @@ void tcg_optimize(TCGContext *s) TCGOpcode neg_op; bool have_neg; - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { /* Proceed with possible constant folding. */ break; } @@ -703,8 +713,7 @@ void tcg_optimize(TCGContext *s) if (!have_neg) { break; } - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == 0) { + if (temp_is_const(args[1]) && temps[args[1]].val == 0) { op->opc = neg_op; reset_temp(args[0]); args[1] = args[2]; @@ -714,34 +723,30 @@ void tcg_optimize(TCGContext *s) break; CASE_OP_32_64(xor): CASE_OP_32_64(nand): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == -1) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == -1) { i = 1; goto try_not; } break; CASE_OP_32_64(nor): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == 0) { i = 1; goto try_not; } break; CASE_OP_32_64(andc): - if (temps[args[2]].state != TCG_TEMP_CONST - && temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == -1) { + if (!temp_is_const(args[2]) + && temp_is_const(args[1]) && temps[args[1]].val == -1) { i = 2; goto try_not; } break; CASE_OP_32_64(orc): CASE_OP_32_64(eqv): - if (temps[args[2]].state != TCG_TEMP_CONST - && temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == 0) { + if (!temp_is_const(args[2]) + && temp_is_const(args[1]) && temps[args[1]].val == 0) { i = 2; goto try_not; } @@ -782,9 +787,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(or): CASE_OP_32_64(xor): CASE_OP_32_64(andc): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == 0) { tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; } @@ -792,9 +796,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(and): CASE_OP_32_64(orc): CASE_OP_32_64(eqv): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == -1) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == -1) { tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; } @@ -832,17 +835,26 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(and): mask = temps[args[2]].mask; - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { and_const: affected = temps[args[1]].mask & ~mask; } mask = temps[args[1]].mask & mask; break; + case INDEX_op_ext_i32_i64: + if ((temps[args[1]].mask & 0x80000000) != 0) { + break; + } + case INDEX_op_extu_i32_i64: + /* We do not compute affected as it is a size changing op. */ + mask = (uint32_t)temps[args[1]].mask; + break; + CASE_OP_32_64(andc): /* Known-zeros does not imply known-ones. Therefore unless args[2] is constant, we can't infer anything from it. */ - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { mask = ~temps[args[2]].mask; goto and_const; } @@ -851,37 +863,40 @@ void tcg_optimize(TCGContext *s) break; case INDEX_op_sar_i32: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 31; mask = (int32_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_sar_i64: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 63; mask = (int64_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_shr_i32: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 31; mask = (uint32_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_shr_i64: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 63; mask = (uint64_t)temps[args[1]].mask >> tmp; } break; - case INDEX_op_trunc_shr_i32: - mask = (uint64_t)temps[args[1]].mask >> args[2]; + case INDEX_op_extrl_i64_i32: + mask = (uint32_t)temps[args[1]].mask; + break; + case INDEX_op_extrh_i64_i32: + mask = (uint64_t)temps[args[1]].mask >> 32; break; CASE_OP_32_64(shl): - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); mask = temps[args[1]].mask << tmp; } @@ -962,8 +977,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(mul): CASE_OP_32_64(muluh): CASE_OP_32_64(mulsh): - if ((temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0)) { + if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) { tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } @@ -1018,21 +1032,17 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(ext16u): case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: - if (temps[args[1]].state == TCG_TEMP_CONST) { + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: + if (temp_is_const(args[1])) { tmp = do_constant_folding(opc, temps[args[1]].val, 0); tcg_opt_gen_movi(s, op, args, args[0], tmp); break; } goto do_default; - case INDEX_op_trunc_shr_i32: - if (temps[args[1]].state == TCG_TEMP_CONST) { - tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); - tcg_opt_gen_movi(s, op, args, args[0], tmp); - break; - } - goto do_default; - CASE_OP_32_64(add): CASE_OP_32_64(sub): CASE_OP_32_64(mul): @@ -1055,8 +1065,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(divu): CASE_OP_32_64(rem): CASE_OP_32_64(remu): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[1]) && temp_is_const(args[2])) { tmp = do_constant_folding(opc, temps[args[1]].val, temps[args[2]].val); tcg_opt_gen_movi(s, op, args, args[0], tmp); @@ -1065,8 +1074,7 @@ void tcg_optimize(TCGContext *s) goto do_default; CASE_OP_32_64(deposit): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[1]) && temp_is_const(args[2])) { tmp = deposit64(temps[args[1]].val, args[3], args[4], temps[args[2]].val); tcg_opt_gen_movi(s, op, args, args[0], tmp); @@ -1106,10 +1114,8 @@ void tcg_optimize(TCGContext *s) case INDEX_op_add2_i32: case INDEX_op_sub2_i32: - if (temps[args[2]].state == TCG_TEMP_CONST - && temps[args[3]].state == TCG_TEMP_CONST - && temps[args[4]].state == TCG_TEMP_CONST - && temps[args[5]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2]) && temp_is_const(args[3]) + && temp_is_const(args[4]) && temp_is_const(args[5])) { uint32_t al = temps[args[2]].val; uint32_t ah = temps[args[3]].val; uint32_t bl = temps[args[4]].val; @@ -1128,8 +1134,8 @@ void tcg_optimize(TCGContext *s) rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op, args, rl, (uint32_t)a); - tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(a >> 32)); + tcg_opt_gen_movi(s, op, args, rl, (int32_t)a); + tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32)); /* We've done all we need to do with the movi. Skip it. */ oi_next = op2->next; @@ -1138,8 +1144,7 @@ void tcg_optimize(TCGContext *s) goto do_default; case INDEX_op_mulu2_i32: - if (temps[args[2]].state == TCG_TEMP_CONST - && temps[args[3]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2]) && temp_is_const(args[3])) { uint32_t a = temps[args[2]].val; uint32_t b = temps[args[3]].val; uint64_t r = (uint64_t)a * b; @@ -1149,8 +1154,8 @@ void tcg_optimize(TCGContext *s) rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op, args, rl, (uint32_t)r); - tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(r >> 32)); + tcg_opt_gen_movi(s, op, args, rl, (int32_t)r); + tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32)); /* We've done all we need to do with the movi. Skip it. */ oi_next = op2->next; @@ -1171,10 +1176,8 @@ void tcg_optimize(TCGContext *s) tcg_op_remove(s, op); } } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[3]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0 - && temps[args[3]].val == 0) { + && temp_is_const(args[2]) && temps[args[2]].val == 0 + && temp_is_const(args[3]) && temps[args[3]].val == 0) { /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ do_brcond_high: @@ -1236,10 +1239,8 @@ void tcg_optimize(TCGContext *s) do_setcond_const: tcg_opt_gen_movi(s, op, args, args[0], tmp); } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) - && temps[args[3]].state == TCG_TEMP_CONST - && temps[args[4]].state == TCG_TEMP_CONST - && temps[args[3]].val == 0 - && temps[args[4]].val == 0) { + && temp_is_const(args[3]) && temps[args[3]].val == 0 + && temp_is_const(args[4]) && temps[args[4]].val == 0) { /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ do_setcond_high: @@ -1299,7 +1300,9 @@ void tcg_optimize(TCGContext *s) if (!(args[nb_oargs + nb_iargs + 1] & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { for (i = 0; i < nb_globals; i++) { - reset_temp(i); + if (test_bit(i, temps_used.l)) { + reset_temp(i); + } } } goto do_reset_output; diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 2b6eafa..92ef719 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -80,19 +80,13 @@ static tcg_insn_unit *tb_ret_addr; -#ifndef GUEST_BASE -#define GUEST_BASE 0 -#endif - #include "elf.h" static bool have_isa_2_06; #define HAVE_ISA_2_06 have_isa_2_06 #define HAVE_ISEL have_isa_2_06 -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG 30 -#else -#define TCG_GUEST_BASE_REG 0 #endif #ifndef NDEBUG @@ -1361,7 +1355,7 @@ static void * const qemu_st_helpers[16] = { in CR7, loads the addend of the TLB into R3, and returns the register containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc, TCGReg addrlo, TCGReg addrhi, int mem_index, bool is_read) { @@ -1371,6 +1365,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); TCGReg base = TCG_AREG0; + TCGMemOp s_bits = opc & MO_SIZE; /* Extract the page index, shifted into place for tlb index. */ if (TCG_TARGET_REG_BITS == 64) { @@ -1422,17 +1417,37 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, to minimize any load use delay. */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off); - /* Clear the non-page, non-alignment bits from the address. */ + /* Clear the non-page, non-alignment bits from the address */ if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { + /* We don't support unaligned accesses on 32-bits, preserve + * the bottom bits and thus trigger a comparison failure on + * unaligned accesses + */ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); - } else if (!s_bits) { - tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, - 0, 63 - TARGET_PAGE_BITS); + } else if (s_bits) { + /* > byte access, we need to handle alignment */ + if ((opc & MO_AMASK) == MO_ALIGN) { + /* Alignment required by the front-end, same as 32-bits */ + tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + } else { + /* We support unaligned accesses, we need to make sure we fail + * if we cross a page boundary. The trick is to add the + * access_size-1 to the address before masking the low bits. + * That will make the address overflow to the next page if we + * cross a page boundary which will then force a mismatch of + * the TLB compare since the next page cannot possibly be in + * the same TLB index. + */ + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1)); + tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0, + 0, 63 - TARGET_PAGE_BITS); + } } else { - tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, - 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); - tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + /* Byte access, just chop off the bits below the page index */ + tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS); } if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { @@ -1592,7 +1607,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) #ifdef CONFIG_SOFTMMU mem_index = get_mmuidx(oi); - addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, true); + addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true); /* Load a pointer into the current opcode w/conditional branch-link. */ label_ptr = s->code_ptr; @@ -1600,7 +1615,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + rbase = guest_base ? TCG_GUEST_BASE_REG : 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); addrlo = TCG_REG_TMP1; @@ -1667,7 +1682,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #ifdef CONFIG_SOFTMMU mem_index = get_mmuidx(oi); - addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, false); + addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false); /* Load a pointer into the current opcode w/conditional branch-link. */ label_ptr = s->code_ptr; @@ -1675,7 +1690,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + rbase = guest_base ? TCG_GUEST_BASE_REG : 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); addrlo = TCG_REG_TMP1; @@ -1779,9 +1794,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) } tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); -#ifdef CONFIG_USE_GUEST_BASE - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); +#ifndef CONFIG_SOFTMMU + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif @@ -2200,12 +2215,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_ext16s_i64: c = EXTSH; goto gen_ext; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: c = EXTSW; goto gen_ext; gen_ext: tcg_out32(s, c | RS(args[1]) | RA(args[0])); break; + case INDEX_op_extu_i32_i64: + tcg_out_ext32u(s, args[0], args[1]); + break; case INDEX_op_setcond_i32: tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], @@ -2482,6 +2501,8 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_ext8s_i64, { "r", "r" } }, { INDEX_op_ext16s_i64, { "r", "r" } }, { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, { INDEX_op_bswap16_i64, { "r", "r" } }, { INDEX_op_bswap32_i64, { "r", "r" } }, { INDEX_op_bswap64_i64, { "r", "r" } }, diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 7ce7048..b4f0818 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -77,7 +77,8 @@ typedef enum { #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index aa718ec..ee2e58d 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -51,17 +51,12 @@ /* A scratch register that may be be used throughout the backend. */ #define TCG_TMP0 TCG_REG_R14 -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG TCG_REG_R13 #else #define TCG_GUEST_BASE_REG TCG_REG_R0 #endif -#ifndef GUEST_BASE -#define GUEST_BASE 0 -#endif - - /* All of the following instructions are prefixed with their instruction format, and are defined as 8- or 16-bit quantities, even when the two halves of the 16-bit quantity may appear 32 bits apart in the insn. @@ -1504,20 +1499,36 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, int mem_index, bool is_ld) { - TCGMemOp s_bits = opc & MO_SIZE; - uint64_t tlb_mask = TARGET_PAGE_MASK | ((1 << s_bits) - 1); - int ofs; + int s_mask = (1 << (opc & MO_SIZE)) - 1; + int ofs, a_off; + uint64_t tlb_mask; + + /* For aligned accesses, we check the first byte and include the alignment + bits within the address. For unaligned access, we check that we don't + cross pages using the address of the last byte of the access. */ + if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { + a_off = 0; + tlb_mask = TARGET_PAGE_MASK | s_mask; + } else { + a_off = s_mask; + tlb_mask = TARGET_PAGE_MASK; + } if (facilities & FACILITY_GEN_INST_EXT) { tcg_out_risbg(s, TCG_REG_R2, addr_reg, 64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS, 63 - CPU_TLB_ENTRY_BITS, 64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1); - tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); + if (a_off) { + tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); + } else { + tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); + } } else { tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_R3, addr_reg); + tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); @@ -1622,9 +1633,9 @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, tgen_ext32u(s, TCG_TMP0, *addr_reg); *addr_reg = TCG_TMP0; } - if (GUEST_BASE < 0x80000) { + if (guest_base < 0x80000) { *index_reg = TCG_REG_NONE; - *disp = GUEST_BASE; + *disp = guest_base; } else { *index_reg = TCG_GUEST_BASE_REG; *disp = 0; @@ -2090,6 +2101,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16s_i64: tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tgen_ext32s(s, args[0], args[1]); break; @@ -2099,6 +2111,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16u_i64: tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tgen_ext32u(s, args[0], args[1]); break; @@ -2251,6 +2264,9 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_ext32s_i64, { "r", "r" } }, { INDEX_op_ext32u_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + { INDEX_op_bswap16_i64, { "r", "r" } }, { INDEX_op_bswap32_i64, { "r", "r" } }, { INDEX_op_bswap64_i64, { "r", "r" } }, @@ -2328,8 +2344,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET, CPU_TEMP_BUF_NLONGS * sizeof(long)); - if (GUEST_BASE >= 0x80000) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); + if (guest_base >= 0x80000) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 91576d5..d9dc038 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -72,7 +72,8 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index 1a870a8..54df1bc 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -83,10 +83,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #define TCG_REG_T1 TCG_REG_G1 #define TCG_REG_T2 TCG_REG_O7 -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU # define TCG_GUEST_BASE_REG TCG_REG_I5 -#else -# define TCG_GUEST_BASE_REG TCG_REG_G0 #endif static const int tcg_target_reg_alloc_order[] = { @@ -955,9 +953,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) | INSN_IMM13(-frame_size)); -#ifdef CONFIG_USE_GUEST_BASE - if (GUEST_BASE != 0) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); +#ifndef CONFIG_SOFTMMU + if (guest_base != 0) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif @@ -1146,7 +1144,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, addr = TCG_REG_T1; } tcg_out_ldst_rr(s, data, addr, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); #endif /* CONFIG_SOFTMMU */ } @@ -1201,7 +1199,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, addr = TCG_REG_T1; } tcg_out_ldst_rr(s, data, addr, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); #endif /* CONFIG_SOFTMMU */ } @@ -1407,18 +1405,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_divu_i64: c = ARITH_UDIVX; goto gen_arith; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); break; - case INDEX_op_trunc_shr_i32: - if (a2 == 0) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - } else { - tcg_out_arithi(s, a0, a1, a2, SHIFT_SRLX); - } + case INDEX_op_extrl_i64_i32: + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + break; + case INDEX_op_extrh_i64_i32: + tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX); break; case INDEX_op_brcond_i64: @@ -1531,9 +1530,12 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_neg_i64, { "R", "RJ" } }, { INDEX_op_not_i64, { "R", "RJ" } }, - { INDEX_op_ext32s_i64, { "R", "r" } }, - { INDEX_op_ext32u_i64, { "R", "r" } }, - { INDEX_op_trunc_shr_i32, { "r", "R" } }, + { INDEX_op_ext32s_i64, { "R", "R" } }, + { INDEX_op_ext32u_i64, { "R", "R" } }, + { INDEX_op_ext_i32_i64, { "R", "r" } }, + { INDEX_op_extu_i32_i64, { "R", "r" } }, + { INDEX_op_extrl_i64_i32, { "r", "R" } }, + { INDEX_op_extrh_i64_i32, { "r", "R" } }, { INDEX_op_brcond_i64, { "RZ", "RJ" } }, { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index f584de4..2cd72d2 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -118,7 +118,8 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 1 +#define TCG_TARGET_HAS_extrl_i64_i32 1 +#define TCG_TARGET_HAS_extrh_i64_i32 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 45098c3..0b9dd8f 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1737,28 +1737,28 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) /* Size changing operations. */ -void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned count) +void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { - tcg_debug_assert(count < 64); if (TCG_TARGET_REG_BITS == 32) { - if (count >= 32) { - tcg_gen_shri_i32(ret, TCGV_HIGH(arg), count - 32); - } else if (count == 0) { - tcg_gen_mov_i32(ret, TCGV_LOW(arg)); - } else { - TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_shri_i64(t, arg, count); - tcg_gen_mov_i32(ret, TCGV_LOW(t)); - tcg_temp_free_i64(t); - } - } else if (TCG_TARGET_HAS_trunc_shr_i32) { - tcg_gen_op3i_i32(INDEX_op_trunc_shr_i32, ret, - MAKE_TCGV_I32(GET_TCGV_I64(arg)), count); - } else if (count == 0) { + tcg_gen_mov_i32(ret, TCGV_LOW(arg)); + } else if (TCG_TARGET_HAS_extrl_i64_i32) { + tcg_gen_op2(&tcg_ctx, INDEX_op_extrl_i64_i32, + GET_TCGV_I32(ret), GET_TCGV_I64(arg)); + } else { tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); + } +} + +void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_mov_i32(ret, TCGV_HIGH(arg)); + } else if (TCG_TARGET_HAS_extrh_i64_i32) { + tcg_gen_op2(&tcg_ctx, INDEX_op_extrh_i64_i32, + GET_TCGV_I32(ret), GET_TCGV_I64(arg)); } else { TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_shri_i64(t, arg, count); + tcg_gen_shri_i64(t, arg, 32); tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(t))); tcg_temp_free_i64(t); } @@ -1770,9 +1770,8 @@ void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) tcg_gen_mov_i32(TCGV_LOW(ret), arg); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } else { - /* Note: we assume the target supports move between - 32 and 64 bit registers. */ - tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); + tcg_gen_op2(&tcg_ctx, INDEX_op_extu_i32_i64, + GET_TCGV_I64(ret), GET_TCGV_I32(arg)); } } @@ -1782,9 +1781,8 @@ void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg) tcg_gen_mov_i32(TCGV_LOW(ret), arg); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); } else { - /* Note: we assume the target supports move between - 32 and 64 bit registers. */ - tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); + tcg_gen_op2(&tcg_ctx, INDEX_op_ext_i32_i64, + GET_TCGV_I64(ret), GET_TCGV_I32(arg)); } } @@ -1820,8 +1818,8 @@ void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg) tcg_gen_mov_i32(lo, TCGV_LOW(arg)); tcg_gen_mov_i32(hi, TCGV_HIGH(arg)); } else { - tcg_gen_trunc_shr_i64_i32(lo, arg, 0); - tcg_gen_trunc_shr_i64_i32(hi, arg, 32); + tcg_gen_extrl_i64_i32(lo, arg); + tcg_gen_extrh_i64_i32(hi, arg); } } diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index d1d763f..6da083a 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -684,7 +684,8 @@ static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg); void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg); void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high); -void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned int c); +void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg); +void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg); void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg); void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg); @@ -693,11 +694,6 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) tcg_gen_deposit_i64(ret, lo, hi, 32, 32); } -static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) -{ - tcg_gen_trunc_shr_i64_i32(ret, arg, 0); -} - /* QEMU specific operations. */ #ifndef TARGET_LONG_BITS @@ -853,7 +849,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_divu_tl tcg_gen_divu_i64 #define tcg_gen_remu_tl tcg_gen_remu_i64 #define tcg_gen_discard_tl tcg_gen_discard_i64 -#define tcg_gen_trunc_tl_i32 tcg_gen_trunc_i64_i32 +#define tcg_gen_trunc_tl_i32 tcg_gen_extrl_i64_i32 #define tcg_gen_trunc_i64_tl tcg_gen_mov_i64 #define tcg_gen_extu_i32_tl tcg_gen_extu_i32_i64 #define tcg_gen_ext_i32_tl tcg_gen_ext_i32_i64 @@ -932,7 +928,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_remu_tl tcg_gen_remu_i32 #define tcg_gen_discard_tl tcg_gen_discard_i32 #define tcg_gen_trunc_tl_i32 tcg_gen_mov_i32 -#define tcg_gen_trunc_i64_tl tcg_gen_trunc_i64_i32 +#define tcg_gen_trunc_i64_tl tcg_gen_extrl_i64_i32 #define tcg_gen_extu_i32_tl tcg_gen_mov_i32 #define tcg_gen_ext_i32_tl tcg_gen_mov_i32 #define tcg_gen_extu_tl_i64 tcg_gen_extu_i32_i64 diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 13ccb60..02bbf30 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -138,8 +138,14 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) -DEF(trunc_shr_i32, 1, 1, 1, - IMPL(TCG_TARGET_HAS_trunc_shr_i32) +/* size changing ops */ +DEF(ext_i32_i64, 1, 1, 0, IMPL64) +DEF(extu_i32_i64, 1, 1, 0, IMPL64) +DEF(extrl_i64_i32, 1, 1, 0, + IMPL(TCG_TARGET_HAS_extrl_i64_i32) + | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) +DEF(extrh_i64_i32, 1, 1, 0, + IMPL(TCG_TARGET_HAS_extrh_i64_i32) | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64) @@ -66,7 +66,8 @@ typedef uint64_t TCGRegSet; #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_div2_i64 0 diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c index 83472db..bbb54d4 100644 --- a/tcg/tci/tcg-target.c +++ b/tcg/tci/tcg-target.c @@ -210,6 +210,8 @@ static const TCGTargetOpDef tcg_target_op_defs[] = { #if TCG_TARGET_HAS_ext32u_i64 { INDEX_op_ext32u_i64, { R, R } }, #endif + { INDEX_op_ext_i32_i64, { R, R } }, + { INDEX_op_extu_i32_i64, { R, R } }, #if TCG_TARGET_HAS_bswap16_i64 { INDEX_op_bswap16_i64, { R, R } }, #endif @@ -701,6 +703,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_ext16u_i64: /* Optional (TCG_TARGET_HAS_ext16u_i64). */ case INDEX_op_ext32s_i64: /* Optional (TCG_TARGET_HAS_ext32s_i64). */ case INDEX_op_ext32u_i64: /* Optional (TCG_TARGET_HAS_ext32u_i64). */ + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: #endif /* TCG_TARGET_REG_BITS == 64 */ case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */ case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */ diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index cbf3f9b..77e5952 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -84,7 +84,8 @@ #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 |