diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2023-02-03 09:30:45 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2023-02-03 09:30:45 +0000 |
commit | bf4460a8d9a86f6cfe05d7a7f470c48e3a93d8b2 (patch) | |
tree | 65cd5cd1f1d20f1375f68dbbed6d4dcf214ef6db /tcg | |
parent | f991d61d35d037ba5e627becb6f99bfd065443bf (diff) | |
parent | 709bcd7da3f6b4655d910634a0d520fa1439df38 (diff) | |
download | qemu-bf4460a8d9a86f6cfe05d7a7f470c48e3a93d8b2.zip qemu-bf4460a8d9a86f6cfe05d7a7f470c48e3a93d8b2.tar.gz qemu-bf4460a8d9a86f6cfe05d7a7f470c48e3a93d8b2.tar.bz2 |
Merge tag 'pull-tcg-20230123' of https://gitlab.com/rth7680/qemu into staging
common-user: Re-enable ppc32 host
tcg: Avoid recursion in tcg_gen_mulu2_i32
tcg: Mark tcg helpers noinline to avoid an issue with LTO
tcg/arm: Use register pair allocation for qemu_{ld,st}_i64
disas: Enable loongarch disassembler, and fixes
tcg/loongarch64: Improve move immediate
tcg/loongarch64: Improve add immediate
tcg/loongarch64: Improve setcond
tcg/loongarch64: Implement movcond
tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
tcg/loongarch64: Reorg goto_tb implementation
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmPPO+0dHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV93jwgAhG+H5XHtJqF2isCc
# a6pYuUWRbhsOFL23FmWKx2O41tHlJ2Seort8M9eIHOu21L9DUJFd291O/4ckiMQM
# 13+KH/Kl5fumM+uEkO9YMyplOddmvygdTd5dCi5y349Gi3CgJH3n4HUl0qnioM/7
# Dy3n8JIvYsBp+8jUsLXo1gSl5P1kLMLwJmP68qgy8z8Xly4bDco1Nb2UKb7qKevO
# lMr6L+2/ALbKLZ6OU50erdUrlbgNs0eiQyJAfJ47SQ57RGuqF4pZ09+9yRI2FPZt
# UlSn+srsec1ieYyM2e5krVWbNcXaj6FouV7CkbgFXoUZt29xA1HTXsso+8vLgDPu
# g8vvuw==
# =Up0b
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 24 Jan 2023 02:01:17 GMT
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* tag 'pull-tcg-20230123' of https://gitlab.com/rth7680/qemu:
tcg/loongarch64: Reorg goto_tb implementation
tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
tcg/loongarch64: Implement movcond
tcg/loongarch64: Improve setcond expansion
tcg/loongarch64: Introduce tcg_out_addi
tcg/loongarch64: Update tcg-insn-defs.c.inc
tcg/loongarch64: Optimize immediate loading
target/loongarch: Disassemble pcadd* addresses
target/loongarch: Disassemble jirl properly
target/loongarch: Enable the disassembler for host tcg
tcg: Mark tcg helpers noinline to avoid an issue with LTO
linux-user: Implment host/ppc/host-signal.h
common-user/host/ppc: Implement safe-syscall.inc.S
tcg/arm: Use register pair allocation for qemu_{ld,st}_i64
tcg: Avoid recursion in tcg_gen_mulu2_i32
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/arm/tcg-target-con-set.h | 7 | ||||
-rw-r--r-- | tcg/arm/tcg-target-con-str.h | 2 | ||||
-rw-r--r-- | tcg/arm/tcg-target.c.inc | 28 | ||||
-rw-r--r-- | tcg/loongarch64/tcg-insn-defs.c.inc | 10 | ||||
-rw-r--r-- | tcg/loongarch64/tcg-target-con-set.h | 5 | ||||
-rw-r--r-- | tcg/loongarch64/tcg-target-con-str.h | 2 | ||||
-rw-r--r-- | tcg/loongarch64/tcg-target.c.inc | 348 | ||||
-rw-r--r-- | tcg/loongarch64/tcg-target.h | 11 | ||||
-rw-r--r-- | tcg/tcg-op.c | 4 |
9 files changed, 278 insertions, 139 deletions
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h index 3685e17..b8849b2 100644 --- a/tcg/arm/tcg-target-con-set.h +++ b/tcg/arm/tcg-target-con-set.h @@ -15,8 +15,9 @@ C_O0_I2(r, rIN) C_O0_I2(s, s) C_O0_I2(w, r) C_O0_I3(s, s, s) +C_O0_I3(S, p, s) C_O0_I4(r, r, rI, rI) -C_O0_I4(s, s, s, s) +C_O0_I4(S, p, s, s) C_O1_I1(r, l) C_O1_I1(r, r) C_O1_I1(w, r) @@ -38,8 +39,8 @@ C_O1_I2(w, w, wZ) C_O1_I3(w, w, w, w) C_O1_I4(r, r, r, rI, rI) C_O1_I4(r, r, rIN, rIK, 0) -C_O2_I1(r, r, l) -C_O2_I2(r, r, l, l) +C_O2_I1(e, p, l) +C_O2_I2(e, p, l, l) C_O2_I2(r, r, r, r) C_O2_I4(r, r, r, r, rIN, rIK) C_O2_I4(r, r, rI, rI, rIN, rIK) diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h index 8f50114..24b4b59 100644 --- a/tcg/arm/tcg-target-con-str.h +++ b/tcg/arm/tcg-target-con-str.h @@ -8,9 +8,11 @@ * Define constraint letters for register sets: * REGS(letter, register_mask) */ +REGS('e', ALL_GENERAL_REGS & 0x5555) /* even regs */ REGS('r', ALL_GENERAL_REGS) REGS('l', ALL_QLOAD_REGS) REGS('s', ALL_QSTORE_REGS) +REGS('S', ALL_QSTORE_REGS & 0x5555) /* even qstore */ REGS('w', ALL_VECTOR_REGS) /* diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 6abe941..0f5f9f4 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1694,9 +1694,11 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); break; case MO_UQ: + /* We used pair allocation for datalo, so already should be aligned. */ + tcg_debug_assert((datalo & 1) == 0); + tcg_debug_assert(datahi == datalo + 1); /* LDRD requires alignment; double-check that. */ - if (get_alignment_bits(opc) >= MO_64 - && (datalo & 1) == 0 && datahi == datalo + 1) { + if (get_alignment_bits(opc) >= MO_64) { /* * Rm (the second address op) must not overlap Rt or Rt + 1. * Since datalo is aligned, we can simplify the test via alignment. @@ -1750,9 +1752,11 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); break; case MO_UQ: + /* We used pair allocation for datalo, so already should be aligned. */ + tcg_debug_assert((datalo & 1) == 0); + tcg_debug_assert(datahi == datalo + 1); /* LDRD requires alignment; double-check that. */ - if (get_alignment_bits(opc) >= MO_64 - && (datalo & 1) == 0 && datahi == datalo + 1) { + if (get_alignment_bits(opc) >= MO_64) { tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0); } else if (datalo == addrlo) { tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4); @@ -1834,9 +1838,11 @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc, tcg_out_st32_r(s, cond, datalo, addrlo, addend); break; case MO_64: + /* We used pair allocation for datalo, so already should be aligned. */ + tcg_debug_assert((datalo & 1) == 0); + tcg_debug_assert(datahi == datalo + 1); /* STRD requires alignment; double-check that. */ - if (get_alignment_bits(opc) >= MO_64 - && (datalo & 1) == 0 && datahi == datalo + 1) { + if (get_alignment_bits(opc) >= MO_64) { tcg_out_strd_r(s, cond, datalo, addrlo, addend); } else if (scratch_addend) { tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); @@ -1871,9 +1877,11 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); break; case MO_64: + /* We used pair allocation for datalo, so already should be aligned. */ + tcg_debug_assert((datalo & 1) == 0); + tcg_debug_assert(datahi == datalo + 1); /* STRD requires alignment; double-check that. */ - if (get_alignment_bits(opc) >= MO_64 - && (datalo & 1) == 0 && datahi == datalo + 1) { + if (get_alignment_bits(opc) >= MO_64) { tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); } else { tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); @@ -2372,11 +2380,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_qemu_ld_i32: return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l); case INDEX_op_qemu_ld_i64: - return TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, l) : C_O2_I2(r, r, l, l); + return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, l) : C_O2_I2(e, p, l, l); case INDEX_op_qemu_st_i32: return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s); case INDEX_op_qemu_st_i64: - return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s); + return TARGET_LONG_BITS == 32 ? C_O0_I3(S, p, s) : C_O0_I4(S, p, s, s); case INDEX_op_st_vec: return C_O0_I2(w, r); diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc b/tcg/loongarch64/tcg-insn-defs.c.inc index d162571..b5bb0c5 100644 --- a/tcg/loongarch64/tcg-insn-defs.c.inc +++ b/tcg/loongarch64/tcg-insn-defs.c.inc @@ -4,7 +4,7 @@ * * This file is auto-generated by genqemutcgdefs from * https://github.com/loongson-community/loongarch-opcodes, - * from commit 961f0c60f5b63e574d785995600c71ad5413fdc4. + * from commit 25ca7effe9d88101c1cf96c4005423643386d81f. * DO NOT EDIT. */ @@ -74,6 +74,7 @@ typedef enum { OPC_ANDI = 0x03400000, OPC_ORI = 0x03800000, OPC_XORI = 0x03c00000, + OPC_ADDU16I_D = 0x10000000, OPC_LU12I_W = 0x14000000, OPC_CU32I_D = 0x16000000, OPC_PCADDU2I = 0x18000000, @@ -710,6 +711,13 @@ tcg_out_opc_xori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12) tcg_out32(s, encode_djuk12_insn(OPC_XORI, d, j, uk12)); } +/* Emits the `addu16i.d d, j, sk16` instruction. */ +static void __attribute__((unused)) +tcg_out_opc_addu16i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16) +{ + tcg_out32(s, encode_djsk16_insn(OPC_ADDU16I_D, d, j, sk16)); +} + /* Emits the `lu12i.w d, sj20` instruction. */ static void __attribute__((unused)) tcg_out_opc_lu12i_w(TCGContext *s, TCGReg d, int32_t sj20) diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index 349c672..172c107 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -23,9 +23,12 @@ C_O1_I1(r, L) C_O1_I2(r, r, rC) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) +C_O1_I2(r, r, rJ) C_O1_I2(r, r, rU) C_O1_I2(r, r, rW) C_O1_I2(r, r, rZ) C_O1_I2(r, 0, rZ) -C_O1_I2(r, rZ, rN) +C_O1_I2(r, rZ, ri) +C_O1_I2(r, rZ, rJ) C_O1_I2(r, rZ, rZ) +C_O1_I4(r, rZ, rJ, rZ, rZ) diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h index c3986a4..541ff47 100644 --- a/tcg/loongarch64/tcg-target-con-str.h +++ b/tcg/loongarch64/tcg-target-con-str.h @@ -21,7 +21,7 @@ REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) * CONST(letter, TCG_CT_CONST_* bit set) */ CONST('I', TCG_CT_CONST_S12) -CONST('N', TCG_CT_CONST_N12) +CONST('J', TCG_CT_CONST_S32) CONST('U', TCG_CT_CONST_U12) CONST('Z', TCG_CT_CONST_ZERO) CONST('C', TCG_CT_CONST_C12) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 3174557..ce4a153 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -126,7 +126,7 @@ static const int tcg_target_call_oarg_regs[] = { #define TCG_CT_CONST_ZERO 0x100 #define TCG_CT_CONST_S12 0x200 -#define TCG_CT_CONST_N12 0x400 +#define TCG_CT_CONST_S32 0x400 #define TCG_CT_CONST_U12 0x800 #define TCG_CT_CONST_C12 0x1000 #define TCG_CT_CONST_WSZ 0x2000 @@ -161,7 +161,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) { return true; } - if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) { + if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { return true; } if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) { @@ -274,16 +274,6 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) return true; } -static bool imm_part_needs_loading(bool high_bits_are_ones, - tcg_target_long part) -{ - if (high_bits_are_ones) { - return part != -1; - } else { - return part != 0; - } -} - /* Loads a 32-bit immediate into rd, sign-extended. */ static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val) { @@ -291,16 +281,16 @@ static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val) tcg_target_long hi12 = sextreg(val, 12, 20); /* Single-instruction cases. */ - if (lo == val) { - /* val fits in simm12: addi.w rd, zero, val */ - tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val); - return; - } - if (0x800 <= val && val <= 0xfff) { + if (hi12 == 0) { /* val fits in uimm12: ori rd, zero, val */ tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val); return; } + if (hi12 == sextreg(lo, 12, 20)) { + /* val fits in simm12: addi.w rd, zero, val */ + tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val); + return; + } /* High bits must be set; load with lu12i.w + optional ori. */ tcg_out_opc_lu12i_w(s, rd, hi12); @@ -334,8 +324,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, intptr_t pc_offset; tcg_target_long val_lo, val_hi, pc_hi, offset_hi; - tcg_target_long hi32, hi52; - bool rd_high_bits_are_ones; + tcg_target_long hi12, hi32, hi52; /* Value fits in signed i32. */ if (type == TCG_TYPE_I32 || val == (int32_t)val) { @@ -366,29 +355,68 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, return; } + hi12 = sextreg(val, 12, 20); hi32 = sextreg(val, 32, 20); hi52 = sextreg(val, 52, 12); /* Single cu52i.d case. */ - if (ctz64(val) >= 52) { + if ((hi52 != 0) && (ctz64(val) >= 52)) { tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52); return; } /* Slow path. Initialize the low 32 bits, then concat high bits. */ tcg_out_movi_i32(s, rd, val); - rd_high_bits_are_ones = (int32_t)val < 0; - if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) { + /* Load hi32 and hi52 explicitly when they are unexpected values. */ + if (hi32 != sextreg(hi12, 20, 20)) { tcg_out_opc_cu32i_d(s, rd, hi32); - rd_high_bits_are_ones = hi32 < 0; } - if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) { + if (hi52 != sextreg(hi32, 20, 12)) { tcg_out_opc_cu52i_d(s, rd, rd, hi52); } } +static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rs, tcg_target_long imm) +{ + tcg_target_long lo12 = sextreg(imm, 0, 12); + tcg_target_long hi16 = sextreg(imm - lo12, 16, 16); + + /* + * Note that there's a hole in between hi16 and lo12: + * + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * ...+-------------------------------+-------+-----------------------+ + * | hi16 | | lo12 | + * ...+-------------------------------+-------+-----------------------+ + * + * For bits within that hole, it's more efficient to use LU12I and ADD. + */ + if (imm == (hi16 << 16) + lo12) { + if (hi16) { + tcg_out_opc_addu16i_d(s, rd, rs, hi16); + rs = rd; + } + if (type == TCG_TYPE_I32) { + tcg_out_opc_addi_w(s, rd, rs, lo12); + } else if (lo12) { + tcg_out_opc_addi_d(s, rd, rs, lo12); + } else { + tcg_out_mov(s, type, rd, rs); + } + } else { + tcg_out_movi(s, type, TCG_REG_TMP0, imm); + if (type == TCG_TYPE_I32) { + tcg_out_opc_add_w(s, rd, rs, TCG_REG_TMP0); + } else { + tcg_out_opc_add_d(s, rd, rs, TCG_REG_TMP0); + } + } +} + static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) { tcg_out_opc_andi(s, ret, arg, 0xff); @@ -441,64 +469,155 @@ static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc, tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0); } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg arg1, TCGReg arg2, bool c2) +#define SETCOND_INV TCG_TARGET_NB_REGS +#define SETCOND_NEZ (SETCOND_INV << 1) +#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) + +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) { - TCGReg tmp; + int flags = 0; - if (c2) { - tcg_debug_assert(arg2 == 0); + switch (cond) { + case TCG_COND_EQ: /* -> NE */ + case TCG_COND_GE: /* -> LT */ + case TCG_COND_GEU: /* -> LTU */ + case TCG_COND_GT: /* -> LE */ + case TCG_COND_GTU: /* -> LEU */ + cond = tcg_invert_cond(cond); + flags ^= SETCOND_INV; + break; + default: + break; } switch (cond) { - case TCG_COND_EQ: + case TCG_COND_LE: + case TCG_COND_LEU: + /* + * If we have a constant input, the most efficient way to implement + * LE is by adding 1 and using LT. Watch out for wrap around for LEU. + * We don't need to care for this for LE because the constant input + * is still constrained to int32_t, and INT32_MAX+1 is representable + * in the 64-bit temporary register. + */ if (c2) { - tmp = arg1; + if (cond == TCG_COND_LEU) { + /* unsigned <= -1 is true */ + if (arg2 == -1) { + tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV)); + return ret; + } + cond = TCG_COND_LTU; + } else { + cond = TCG_COND_LT; + } + arg2 += 1; } else { - tcg_out_opc_sub_d(s, ret, arg1, arg2); - tmp = ret; + TCGReg tmp = arg2; + arg2 = arg1; + arg1 = tmp; + cond = tcg_swap_cond(cond); /* LE -> GE */ + cond = tcg_invert_cond(cond); /* GE -> LT */ + flags ^= SETCOND_INV; } - tcg_out_opc_sltui(s, ret, tmp, 1); break; + default: + break; + } + + switch (cond) { case TCG_COND_NE: - if (c2) { - tmp = arg1; + flags |= SETCOND_NEZ; + if (!c2) { + tcg_out_opc_xor(s, ret, arg1, arg2); + } else if (arg2 == 0) { + ret = arg1; + } else if (arg2 >= 0 && arg2 <= 0xfff) { + tcg_out_opc_xori(s, ret, arg1, arg2); } else { - tcg_out_opc_sub_d(s, ret, arg1, arg2); - tmp = ret; + tcg_out_addi(s, TCG_TYPE_REG, ret, arg1, -arg2); } - tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp); break; + case TCG_COND_LT: - tcg_out_opc_slt(s, ret, arg1, arg2); - break; - case TCG_COND_GE: - tcg_out_opc_slt(s, ret, arg1, arg2); - tcg_out_opc_xori(s, ret, ret, 1); - break; - case TCG_COND_LE: - tcg_out_setcond(s, TCG_COND_GE, ret, arg2, arg1, false); - break; - case TCG_COND_GT: - tcg_out_setcond(s, TCG_COND_LT, ret, arg2, arg1, false); - break; case TCG_COND_LTU: - tcg_out_opc_sltu(s, ret, arg1, arg2); - break; - case TCG_COND_GEU: - tcg_out_opc_sltu(s, ret, arg1, arg2); - tcg_out_opc_xori(s, ret, ret, 1); - break; - case TCG_COND_LEU: - tcg_out_setcond(s, TCG_COND_GEU, ret, arg2, arg1, false); - break; - case TCG_COND_GTU: - tcg_out_setcond(s, TCG_COND_LTU, ret, arg2, arg1, false); + if (c2) { + if (arg2 >= -0x800 && arg2 <= 0x7ff) { + if (cond == TCG_COND_LT) { + tcg_out_opc_slti(s, ret, arg1, arg2); + } else { + tcg_out_opc_sltui(s, ret, arg1, arg2); + } + break; + } + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2); + arg2 = TCG_REG_TMP0; + } + if (cond == TCG_COND_LT) { + tcg_out_opc_slt(s, ret, arg1, arg2); + } else { + tcg_out_opc_sltu(s, ret, arg1, arg2); + } break; + default: g_assert_not_reached(); break; } + + return ret | flags; +} + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) +{ + int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2); + + if (tmpflags != ret) { + TCGReg tmp = tmpflags & ~SETCOND_FLAGS; + + switch (tmpflags & SETCOND_FLAGS) { + case SETCOND_INV: + /* Intermediate result is boolean: simply invert. */ + tcg_out_opc_xori(s, ret, tmp, 1); + break; + case SETCOND_NEZ: + /* Intermediate result is zero/non-zero: test != 0. */ + tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp); + break; + case SETCOND_NEZ | SETCOND_INV: + /* Intermediate result is zero/non-zero: test == 0. */ + tcg_out_opc_sltui(s, ret, tmp, 1); + break; + default: + g_assert_not_reached(); + } + } +} + +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, tcg_target_long c2, bool const2, + TCGReg v1, TCGReg v2) +{ + int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2); + TCGReg t; + + /* Standardize the test below to t != 0. */ + if (tmpflags & SETCOND_INV) { + t = v1, v1 = v2, v2 = t; + } + + t = tmpflags & ~SETCOND_FLAGS; + if (v1 == TCG_REG_ZERO) { + tcg_out_opc_masknez(s, ret, v2, t); + } else if (v2 == TCG_REG_ZERO) { + tcg_out_opc_maskeqz(s, ret, v1, t); + } else { + tcg_out_opc_masknez(s, TCG_REG_TMP2, v2, t); /* t ? 0 : v2 */ + tcg_out_opc_maskeqz(s, TCG_REG_TMP1, v1, t); /* t ? v1 : 0 */ + tcg_out_opc_or(s, ret, TCG_REG_TMP1, TCG_REG_TMP2); + } } /* @@ -583,7 +702,7 @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data, intptr_t imm12 = sextreg(offset, 0, 12); if (offset != imm12) { - intptr_t diff = offset - (uintptr_t)s->code_ptr; + intptr_t diff = tcg_pcrel_diff(s, (void *)offset); if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { imm12 = sextreg(diff, 0, 12); @@ -1032,37 +1151,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) #endif } -/* LoongArch uses `andi zero, zero, 0` as NOP. */ -#define NOP OPC_ANDI -static void tcg_out_nop(TCGContext *s) -{ - tcg_out32(s, NOP); -} - -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t jmp_rx, uintptr_t jmp_rw) -{ - tcg_insn_unit i1, i2; - ptrdiff_t upper, lower; - uintptr_t addr = tb->jmp_target_addr[n]; - ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2; - - if (offset == sextreg(offset, 0, 26)) { - i1 = encode_sd10k16_insn(OPC_B, offset); - i2 = NOP; - } else { - tcg_debug_assert(offset == sextreg(offset, 0, 36)); - lower = (int16_t)offset; - upper = (offset - lower) >> 16; - - i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_TMP0, upper); - i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_TMP0, lower); - } - uint64_t pair = ((uint64_t)i2 << 32) | i1; - qatomic_set((uint64_t *)jmp_rw, pair); - flush_idcache_range(jmp_rx, jmp_rw, 8); -} - /* * Entry-points */ @@ -1083,22 +1171,43 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { /* - * Ensure that patch area is 8-byte aligned so that an - * atomic write can be used to patch the target address. + * Direct branch, or load indirect address, to be patched + * by tb_target_set_jmp_target. Check indirect load offset + * in range early, regardless of direct branch distance, + * via assert within tcg_out_opc_pcaddu2i. */ - if ((uintptr_t)s->code_ptr & 7) { - tcg_out_nop(s); - } + uintptr_t i_addr = get_jmp_target_addr(s, which); + intptr_t i_disp = tcg_pcrel_diff(s, (void *)i_addr); + set_jmp_insn_offset(s, which); - /* - * actual branch destination will be patched by - * tb_target_set_jmp_target later - */ - tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0); + tcg_out_opc_pcaddu2i(s, TCG_REG_TMP0, i_disp >> 2); + + /* Finish the load and indirect branch. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_TMP0, 0); tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + uintptr_t d_addr = tb->jmp_target_addr[n]; + ptrdiff_t d_disp = (ptrdiff_t)(d_addr - jmp_rx) >> 2; + tcg_insn_unit insn; + + /* Either directly branch, or load slot address for indirect branch. */ + if (d_disp == sextreg(d_disp, 0, 26)) { + insn = encode_sd10k16_insn(OPC_B, d_disp); + } else { + uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; + intptr_t i_disp = i_addr - jmp_rx; + insn = encode_dsj20_insn(OPC_PCADDU2I, TCG_REG_TMP0, i_disp >> 2); + } + + qatomic_set((tcg_insn_unit *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1361,14 +1470,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_add_i32: if (c2) { - tcg_out_opc_addi_w(s, a0, a1, a2); + tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2); } else { tcg_out_opc_add_w(s, a0, a1, a2); } break; case INDEX_op_add_i64: if (c2) { - tcg_out_opc_addi_d(s, a0, a1, a2); + tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2); } else { tcg_out_opc_add_d(s, a0, a1, a2); } @@ -1376,14 +1485,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_sub_i32: if (c2) { - tcg_out_opc_addi_w(s, a0, a1, -a2); + tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2); } else { tcg_out_opc_sub_w(s, a0, a1, a2); } break; case INDEX_op_sub_i64: if (c2) { - tcg_out_opc_addi_d(s, a0, a1, -a2); + tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2); } else { tcg_out_opc_sub_d(s, a0, a1, a2); } @@ -1443,6 +1552,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_setcond(s, args[3], a0, a1, a2, c2); break; + case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: + tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]); + break; + case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); @@ -1597,8 +1711,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) return C_O1_I2(r, r, ri); case INDEX_op_add_i32: + return C_O1_I2(r, r, ri); case INDEX_op_add_i64: - return C_O1_I2(r, r, rI); + return C_O1_I2(r, r, rJ); case INDEX_op_and_i32: case INDEX_op_and_i64: @@ -1617,18 +1732,17 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_ctz_i64: return C_O1_I2(r, r, rW); - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - return C_O1_I2(r, r, rZ); - case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: /* Must deposit into the same register as input */ return C_O1_I2(r, 0, rZ); case INDEX_op_sub_i32: + case INDEX_op_setcond_i32: + return C_O1_I2(r, rZ, ri); case INDEX_op_sub_i64: - return C_O1_I2(r, rZ, rN); + case INDEX_op_setcond_i64: + return C_O1_I2(r, rZ, rJ); case INDEX_op_mul_i32: case INDEX_op_mul_i64: @@ -1646,6 +1760,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_remu_i64: return C_O1_I2(r, rZ, rZ); + case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: + return C_O1_I4(r, rZ, rJ, rZ, rZ); + default: g_assert_not_reached(); } diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 1c3e48d..8b151e7 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -42,11 +42,8 @@ #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_NB_REGS 32 -/* - * PCADDU18I + JIRL sequence can give 20 + 16 + 2 = 38 bits - * signed offset, which is +/- 128 GiB. - */ -#define MAX_CODE_GEN_BUFFER_SIZE (128 * GiB) + +#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) typedef enum { TCG_REG_ZERO, @@ -97,7 +94,7 @@ typedef enum { #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL /* optional instructions */ -#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_div2_i32 0 @@ -133,7 +130,7 @@ typedef enum { #define TCG_TARGET_HAS_qemu_st8_i32 0 /* 64-bit operations */ -#define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_div2_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 9fa9f1b..326a918 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -874,7 +874,7 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); tcg_gen_mov_i32(rl, t); tcg_temp_free_i32(t); - } else { + } else if (TCG_TARGET_REG_BITS == 64) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); tcg_gen_extu_i32_i64(t0, arg1); @@ -883,6 +883,8 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_extr_i64_i32(rl, rh, t0); tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); + } else { + qemu_build_not_reached(); } } |