aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tcg/s390/tcg-target.c543
-rw-r--r--tcg/s390/tcg-target.h26
-rw-r--r--tcg/tci/tcg-target.c22
-rw-r--r--tcg/tci/tcg-target.h8
-rw-r--r--tci.c57
5 files changed, 364 insertions, 292 deletions
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index e12a152..7d6f777 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -24,16 +24,18 @@
* THE SOFTWARE.
*/
+/* We only support generating code for 64-bit mode. */
+#if TCG_TARGET_REG_BITS != 64
+#error "unsupported code generation mode"
+#endif
+
/* ??? The translation blocks produced by TCG are generally small enough to
be entirely reachable with a 16-bit displacement. Leaving the option for
a 32-bit displacement here Just In Case. */
#define USE_LONG_BRANCHES 0
#define TCG_CT_CONST_32 0x0100
-#define TCG_CT_CONST_NEG 0x0200
-#define TCG_CT_CONST_ADDI 0x0400
#define TCG_CT_CONST_MULI 0x0800
-#define TCG_CT_CONST_ANDI 0x1000
#define TCG_CT_CONST_ORI 0x2000
#define TCG_CT_CONST_XORI 0x4000
#define TCG_CT_CONST_CMPI 0x8000
@@ -63,6 +65,7 @@
typedef enum S390Opcode {
RIL_AFI = 0xc209,
RIL_AGFI = 0xc208,
+ RIL_ALFI = 0xc20b,
RIL_ALGFI = 0xc20a,
RIL_BRASL = 0xc005,
RIL_BRCL = 0xc004,
@@ -84,6 +87,8 @@ typedef enum S390Opcode {
RIL_NILF = 0xc00b,
RIL_OIHF = 0xc00c,
RIL_OILF = 0xc00d,
+ RIL_SLFI = 0xc205,
+ RIL_SLGFI = 0xc204,
RIL_XIHF = 0xc006,
RIL_XILF = 0xc007,
@@ -118,8 +123,12 @@ typedef enum S390Opcode {
RIE_CLGIJ = 0xec7d,
RIE_CLRJ = 0xec77,
RIE_CRJ = 0xec76,
+ RIE_RISBG = 0xec55,
RRE_AGR = 0xb908,
+ RRE_ALGR = 0xb90a,
+ RRE_ALCR = 0xb998,
+ RRE_ALCGR = 0xb988,
RRE_CGR = 0xb920,
RRE_CLGR = 0xb921,
RRE_DLGR = 0xb987,
@@ -137,14 +146,22 @@ typedef enum S390Opcode {
RRE_LRVR = 0xb91f,
RRE_LRVGR = 0xb90f,
RRE_LTGR = 0xb902,
+ RRE_MLGR = 0xb986,
RRE_MSGR = 0xb90c,
RRE_MSR = 0xb252,
RRE_NGR = 0xb980,
RRE_OGR = 0xb981,
RRE_SGR = 0xb909,
+ RRE_SLGR = 0xb90b,
+ RRE_SLBR = 0xb999,
+ RRE_SLBGR = 0xb989,
RRE_XGR = 0xb982,
+ RRF_LOCR = 0xb9f2,
+ RRF_LOCGR = 0xb9e2,
+
RR_AR = 0x1a,
+ RR_ALR = 0x1e,
RR_BASR = 0x0d,
RR_BCR = 0x07,
RR_CLR = 0x15,
@@ -156,6 +173,7 @@ typedef enum S390Opcode {
RR_NR = 0x14,
RR_OR = 0x16,
RR_SR = 0x1b,
+ RR_SLR = 0x1f,
RR_XR = 0x17,
RSY_RLL = 0xeb1d,
@@ -172,6 +190,7 @@ typedef enum S390Opcode {
RXY_AY = 0xe35a,
RXY_CG = 0xe320,
RXY_CY = 0xe359,
+ RXY_LAY = 0xe371,
RXY_LB = 0xe376,
RXY_LG = 0xe304,
RXY_LGB = 0xe377,
@@ -198,6 +217,7 @@ typedef enum S390Opcode {
RX_A = 0x5a,
RX_C = 0x59,
RX_L = 0x58,
+ RX_LA = 0x41,
RX_LH = 0x48,
RX_ST = 0x50,
RX_STC = 0x42,
@@ -252,9 +272,6 @@ static const int tcg_target_call_iarg_regs[] = {
static const int tcg_target_call_oarg_regs[] = {
TCG_REG_R2,
-#if TCG_TARGET_REG_BITS == 32
- TCG_REG_R3
-#endif
};
#define S390_CC_EQ 8
@@ -329,6 +346,7 @@ static uint8_t *tb_ret_addr;
#define FACILITY_LONG_DISP (1ULL << (63 - 18))
#define FACILITY_EXT_IMM (1ULL << (63 - 21))
#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34))
+#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45))
static uint64_t facilities;
@@ -376,6 +394,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set32(ct->u.regs, 0, 0xffff);
tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2);
tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3);
+ tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
break;
case 'a': /* force R2 for division */
ct->ct |= TCG_CT_REG;
@@ -387,21 +406,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_clear(ct->u.regs);
tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
break;
- case 'N': /* force immediate negate */
- ct->ct |= TCG_CT_CONST_NEG;
- break;
case 'W': /* force 32-bit ("word") immediate */
ct->ct |= TCG_CT_CONST_32;
break;
- case 'I':
- ct->ct |= TCG_CT_CONST_ADDI;
- break;
case 'K':
ct->ct |= TCG_CT_CONST_MULI;
break;
- case 'A':
- ct->ct |= TCG_CT_CONST_ANDI;
- break;
case 'O':
ct->ct |= TCG_CT_CONST_ORI;
break;
@@ -420,63 +430,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
return 0;
}
-/* Immediates to be used with logical AND. This is an optimization only,
- since a full 64-bit immediate AND can always be performed with 4 sequential
- NI[LH][LH] instructions. What we're looking for is immediates that we
- can load efficiently, and the immediate load plus the reg-reg AND is
- smaller than the sequential NI's. */
-
-static int tcg_match_andi(int ct, tcg_target_ulong val)
-{
- int i;
-
- if (facilities & FACILITY_EXT_IMM) {
- if (ct & TCG_CT_CONST_32) {
- /* All 32-bit ANDs can be performed with 1 48-bit insn. */
- return 1;
- }
-
- /* Zero-extensions. */
- if (val == 0xff || val == 0xffff || val == 0xffffffff) {
- return 1;
- }
- } else {
- if (ct & TCG_CT_CONST_32) {
- val = (uint32_t)val;
- } else if (val == 0xffffffff) {
- return 1;
- }
- }
-
- /* Try all 32-bit insns that can perform it in one go. */
- for (i = 0; i < 4; i++) {
- tcg_target_ulong mask = ~(0xffffull << i*16);
- if ((val & mask) == mask) {
- return 1;
- }
- }
-
- /* Look for 16-bit values performing the mask. These are better
- to load with LLI[LH][LH]. */
- for (i = 0; i < 4; i++) {
- tcg_target_ulong mask = 0xffffull << i*16;
- if ((val & mask) == val) {
- return 0;
- }
- }
-
- /* Look for 32-bit values performing the 64-bit mask. These
- are better to load with LLI[LH]F, or if extended immediates
- not available, with a pair of LLI insns. */
- if ((ct & TCG_CT_CONST_32) == 0) {
- if (val <= 0xffffffff || (val & 0xffffffff) == 0) {
- return 0;
- }
- }
-
- return 1;
-}
-
/* Immediates to be used with logical OR. This is an optimization only,
since a full 64-bit immediate OR can always be performed with 4 sequential
OI[LH][LH] instructions. What we're looking for is immediates that we
@@ -571,25 +524,12 @@ static int tcg_target_const_match(tcg_target_long val,
}
/* Handle the modifiers. */
- if (ct & TCG_CT_CONST_NEG) {
- val = -val;
- }
if (ct & TCG_CT_CONST_32) {
val = (int32_t)val;
}
/* The following are mutually exclusive. */
- if (ct & TCG_CT_CONST_ADDI) {
- /* Immediates that may be used with add. If we have the
- extended-immediates facility then we have ADD IMMEDIATE
- with signed and unsigned 32-bit, otherwise we have only
- ADD HALFWORD IMMEDIATE with a signed 16-bit. */
- if (facilities & FACILITY_EXT_IMM) {
- return val == (int32_t)val || val == (uint32_t)val;
- } else {
- return val == (int16_t)val;
- }
- } else if (ct & TCG_CT_CONST_MULI) {
+ if (ct & TCG_CT_CONST_MULI) {
/* Immediates that may be used with multiply. If we have the
general-instruction-extensions, then we have MULTIPLY SINGLE
IMMEDIATE with a signed 32-bit, otherwise we have only
@@ -599,8 +539,6 @@ static int tcg_target_const_match(tcg_target_long val,
} else {
return val == (int16_t)val;
}
- } else if (ct & TCG_CT_CONST_ANDI) {
- return tcg_match_andi(ct, val);
} else if (ct & TCG_CT_CONST_ORI) {
return tcg_match_ori(ct, val);
} else if (ct & TCG_CT_CONST_XORI) {
@@ -625,6 +563,12 @@ static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
tcg_out32(s, (op << 16) | (r1 << 4) | r2);
}
+static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
+ TCGReg r1, TCGReg r2, int m3)
+{
+ tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
+}
+
static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
{
tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
@@ -770,7 +714,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
/* If we get here, both the high and low parts have non-zero bits. */
/* Recurse to load the lower 32-bits. */
- tcg_out_movi(s, TCG_TYPE_I32, ret, sval);
+ tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff);
/* Insert data into the high 32-bits. */
uval = uval >> 31 >> 1;
@@ -804,10 +748,11 @@ static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
tcg_target_long ofs)
{
if (ofs < -0x80000 || ofs >= 0x80000) {
- /* Combine the low 16 bits of the offset with the actual load insn;
- the high 48 bits must come from an immediate load. */
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs & ~0xffff);
- ofs &= 0xffff;
+ /* Combine the low 20 bits of the offset with the actual load insn;
+ the high 44 bits must come from an immediate load. */
+ tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
+ ofs = low;
/* If we were already given an index register, add it in. */
if (index != TCG_REG_NONE) {
@@ -866,6 +811,15 @@ static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs)
tcg_out_ld(s, type, dest, dest, addr & 0xffff);
}
+static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
+ int msb, int lsb, int ofs, int z)
+{
+ /* Format RIE-f */
+ tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
+ tcg_out16(s, (msb << 8) | (z << 7) | lsb);
+ tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
+}
+
static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
{
if (facilities & FACILITY_EXT_IMM) {
@@ -956,30 +910,37 @@ static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
tcg_out_insn(s, RRE, LLGFR, dest, src);
}
-static inline void tgen32_addi(TCGContext *s, TCGReg dest, int32_t val)
+/* Accept bit patterns like these:
+ 0....01....1
+ 1....10....0
+ 1..10..01..1
+ 0..01..10..0
+ Copied from gcc sources. */
+static inline bool risbg_mask(uint64_t c)
{
- if (val == (int16_t)val) {
- tcg_out_insn(s, RI, AHI, dest, val);
- } else {
- tcg_out_insn(s, RIL, AFI, dest, val);
- }
-}
-
-static inline void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val)
-{
- if (val == (int16_t)val) {
- tcg_out_insn(s, RI, AGHI, dest, val);
- } else if (val == (int32_t)val) {
- tcg_out_insn(s, RIL, AGFI, dest, val);
- } else if (val == (uint32_t)val) {
- tcg_out_insn(s, RIL, ALGFI, dest, val);
- } else {
- tcg_abort();
- }
-
+ uint64_t lsb;
+ /* We don't change the number of transitions by inverting,
+ so make sure we start with the LSB zero. */
+ if (c & 1) {
+ c = ~c;
+ }
+ /* Reject all zeros or all ones. */
+ if (c == 0) {
+ return false;
+ }
+ /* Find the first transition. */
+ lsb = c & -c;
+ /* Invert to look for a second transition. */
+ c = ~c;
+ /* Erase the first transition. */
+ c &= -lsb;
+ /* Find the second transition, if any. */
+ lsb = c & -c;
+ /* Match if all the bits are 1's, or if c is zero. */
+ return c == -lsb;
}
-static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
{
static const S390Opcode ni_insns[4] = {
RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
@@ -987,63 +948,64 @@ static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
static const S390Opcode nif_insns[2] = {
RIL_NILF, RIL_NIHF
};
-
+ uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
int i;
- /* Look for no-op. */
- if (val == -1) {
- return;
- }
-
/* Look for the zero-extensions. */
- if (val == 0xffffffff) {
+ if ((val & valid) == 0xffffffff) {
tgen_ext32u(s, dest, dest);
return;
}
-
if (facilities & FACILITY_EXT_IMM) {
- if (val == 0xff) {
+ if ((val & valid) == 0xff) {
tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
return;
}
- if (val == 0xffff) {
+ if ((val & valid) == 0xffff) {
tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
return;
}
+ }
- /* Try all 32-bit insns that can perform it in one go. */
- for (i = 0; i < 4; i++) {
- tcg_target_ulong mask = ~(0xffffull << i*16);
- if ((val & mask) == mask) {
- tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
- return;
- }
+ /* Try all 32-bit insns that can perform it in one go. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = ~(0xffffull << i*16);
+ if (((val | ~valid) & mask) == mask) {
+ tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+ return;
}
+ }
- /* Try all 48-bit insns that can perform it in one go. */
- if (facilities & FACILITY_EXT_IMM) {
- for (i = 0; i < 2; i++) {
- tcg_target_ulong mask = ~(0xffffffffull << i*32);
- if ((val & mask) == mask) {
- tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
- return;
- }
+ /* Try all 48-bit insns that can perform it in one go. */
+ if (facilities & FACILITY_EXT_IMM) {
+ for (i = 0; i < 2; i++) {
+ tcg_target_ulong mask = ~(0xffffffffull << i*32);
+ if (((val | ~valid) & mask) == mask) {
+ tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+ return;
}
}
+ }
+ if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
+ int msb, lsb;
+ if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
+ /* Achieve wraparound by swapping msb and lsb. */
+ msb = 63 - ctz64(~val);
+ lsb = clz64(~val) + 1;
+ } else {
+ msb = clz64(val);
+ lsb = 63 - ctz64(val);
+ }
+ tcg_out_risbg(s, dest, dest, msb, lsb, 0, 1);
+ return;
+ }
- /* Perform the AND via sequential modifications to the high and low
- parts. Do this via recursion to handle 16-bit vs 32-bit masks in
- each half. */
- tgen64_andi(s, dest, val | 0xffffffff00000000ull);
- tgen64_andi(s, dest, val | 0x00000000ffffffffull);
+ /* Fall back to loading the constant. */
+ tcg_out_movi(s, type, TCG_TMP0, val);
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
} else {
- /* With no extended-immediate facility, just emit the sequence. */
- for (i = 0; i < 4; i++) {
- tcg_target_ulong mask = 0xffffull << i*16;
- if ((val & mask) != mask) {
- tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
- }
- }
+ tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
}
}
@@ -1156,9 +1118,9 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
}
static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c,
- TCGReg dest, TCGReg r1, TCGArg c2, int c2const)
+ TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
{
- int cc = tgen_cmp(s, type, c, r1, c2, c2const);
+ int cc = tgen_cmp(s, type, c, c1, c2, c2const);
/* Emit: r1 = 1; if (cc) goto over; r1 = 0; over: */
tcg_out_movi(s, type, dest, 1);
@@ -1166,6 +1128,36 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c,
tcg_out_movi(s, type, dest, 0);
}
+static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
+ TCGReg c1, TCGArg c2, int c2const, TCGReg r3)
+{
+ int cc;
+ if (facilities & FACILITY_LOAD_ON_COND) {
+ cc = tgen_cmp(s, type, c, c1, c2, c2const);
+ tcg_out_insn(s, RRF, LOCGR, dest, r3, cc);
+ } else {
+ c = tcg_invert_cond(c);
+ cc = tgen_cmp(s, type, c, c1, c2, c2const);
+
+ /* Emit: if (cc) goto over; dest = r3; over: */
+ tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+ tcg_out_insn(s, RRE, LGR, dest, r3);
+ }
+}
+
+bool tcg_target_deposit_valid(int ofs, int len)
+{
+ return (facilities & FACILITY_GEN_INST_EXT) != 0;
+}
+
+static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
+ int ofs, int len)
+{
+ int lsb = (63 - ofs);
+ int msb = lsb - (len - 1);
+ tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0);
+}
+
static void tgen_gotoi(TCGContext *s, int cc, tcg_target_long dest)
{
tcg_target_long off = (dest - (tcg_target_long)s->code_ptr) >> 1;
@@ -1239,7 +1231,7 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
int cc;
if (facilities & FACILITY_GEN_INST_EXT) {
- bool is_unsigned = (c > TCG_COND_GT);
+ bool is_unsigned = is_unsigned_cond(c);
bool in_range;
S390Opcode opc;
@@ -1405,37 +1397,29 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data,
}
#if defined(CONFIG_SOFTMMU)
-static void tgen64_andi_tmp(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
+ TCGReg addr_reg, int mem_index, int opc,
+ uint16_t **label2_ptr_p, int is_store)
{
- if (tcg_match_andi(0, val)) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
- tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
- } else {
- tgen64_andi(s, dest, val);
- }
-}
-
-static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
- TCGReg addr_reg, int mem_index, int opc,
- uint16_t **label2_ptr_p, int is_store)
-{
- const TCGReg arg0 = TCG_REG_R2;
- const TCGReg arg1 = TCG_REG_R3;
+ const TCGReg arg0 = tcg_target_call_iarg_regs[0];
+ const TCGReg arg1 = tcg_target_call_iarg_regs[1];
+ const TCGReg arg2 = tcg_target_call_iarg_regs[2];
+ const TCGReg arg3 = tcg_target_call_iarg_regs[3];
int s_bits = opc & 3;
uint16_t *label1_ptr;
tcg_target_long ofs;
if (TARGET_LONG_BITS == 32) {
- tgen_ext32u(s, arg0, addr_reg);
+ tgen_ext32u(s, arg1, addr_reg);
} else {
- tcg_out_mov(s, TCG_TYPE_I64, arg0, addr_reg);
+ tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
}
- tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, TCG_REG_NONE,
+ tcg_out_sh64(s, RSY_SRLG, arg2, addr_reg, TCG_REG_NONE,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- tgen64_andi_tmp(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
- tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+ tgen_andi(s, TCG_TYPE_I64, arg1, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+ tgen_andi(s, TCG_TYPE_I64, arg2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
if (is_store) {
ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
@@ -1445,15 +1429,15 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
assert(ofs < 0x80000);
if (TARGET_LONG_BITS == 32) {
- tcg_out_mem(s, RX_C, RXY_CY, arg0, arg1, TCG_AREG0, ofs);
+ tcg_out_mem(s, RX_C, RXY_CY, arg1, arg2, TCG_AREG0, ofs);
} else {
- tcg_out_mem(s, 0, RXY_CG, arg0, arg1, TCG_AREG0, ofs);
+ tcg_out_mem(s, 0, RXY_CG, arg1, arg2, TCG_AREG0, ofs);
}
if (TARGET_LONG_BITS == 32) {
- tgen_ext32u(s, arg0, addr_reg);
+ tgen_ext32u(s, arg1, addr_reg);
} else {
- tcg_out_mov(s, TCG_TYPE_I64, arg0, addr_reg);
+ tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
}
label1_ptr = (uint16_t*)s->code_ptr;
@@ -1467,56 +1451,42 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
for the calling convention. */
switch (opc) {
case LD_UINT8:
- tgen_ext8u(s, TCG_TYPE_I64, arg1, data_reg);
+ tgen_ext8u(s, TCG_TYPE_I64, arg2, data_reg);
break;
case LD_UINT16:
- tgen_ext16u(s, TCG_TYPE_I64, arg1, data_reg);
+ tgen_ext16u(s, TCG_TYPE_I64, arg2, data_reg);
break;
case LD_UINT32:
- tgen_ext32u(s, arg1, data_reg);
+ tgen_ext32u(s, arg2, data_reg);
break;
case LD_UINT64:
- tcg_out_mov(s, TCG_TYPE_I64, arg1, data_reg);
+ tcg_out_mov(s, TCG_TYPE_I64, arg2, data_reg);
break;
default:
tcg_abort();
}
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
+ tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index);
+ tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
} else {
- tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
+ tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
+ tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
/* sign extension */
switch (opc) {
case LD_INT8:
- tgen_ext8s(s, TCG_TYPE_I64, data_reg, arg0);
+ tgen_ext8s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
break;
case LD_INT16:
- tgen_ext16s(s, TCG_TYPE_I64, data_reg, arg0);
+ tgen_ext16s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
break;
case LD_INT32:
- tgen_ext32s(s, data_reg, arg0);
+ tgen_ext32s(s, data_reg, TCG_REG_R2);
break;
default:
/* unsigned -> just copy */
- tcg_out_mov(s, TCG_TYPE_I64, data_reg, arg0);
+ tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
break;
}
}
@@ -1533,7 +1503,9 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
assert(ofs < 0x80000);
- tcg_out_mem(s, 0, RXY_AG, arg0, arg1, TCG_AREG0, ofs);
+ tcg_out_mem(s, 0, RXY_AG, arg1, arg2, TCG_AREG0, ofs);
+
+ return arg1;
}
static void tcg_finish_qemu_ldst(TCGContext* s, uint16_t *label2_ptr)
@@ -1579,10 +1551,10 @@ static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, int opc)
#if defined(CONFIG_SOFTMMU)
mem_index = *args;
- tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
- opc, &label2_ptr, 0);
+ addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
+ opc, &label2_ptr, 0);
- tcg_out_qemu_ld_direct(s, opc, data_reg, TCG_REG_R2, TCG_REG_NONE, 0);
+ tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
tcg_finish_qemu_ldst(s, label2_ptr);
#else
@@ -1608,10 +1580,10 @@ static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc)
#if defined(CONFIG_SOFTMMU)
mem_index = *args;
- tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
- opc, &label2_ptr, 1);
+ addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
+ opc, &label2_ptr, 1);
- tcg_out_qemu_st_direct(s, opc, data_reg, TCG_REG_R2, TCG_REG_NONE, 0);
+ tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
tcg_finish_qemu_ldst(s, label2_ptr);
#else
@@ -1620,19 +1592,15 @@ static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc)
#endif
}
-#if TCG_TARGET_REG_BITS == 64
# define OP_32_64(x) \
case glue(glue(INDEX_op_,x),_i32): \
case glue(glue(INDEX_op_,x),_i64)
-#else
-# define OP_32_64(x) \
- case glue(glue(INDEX_op_,x),_i32)
-#endif
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
{
S390Opcode op;
+ TCGArg a0, a1, a2;
switch (opc) {
case INDEX_op_exit_tb:
@@ -1708,23 +1676,38 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_add_i32:
+ a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
if (const_args[2]) {
- tgen32_addi(s, args[0], args[2]);
+ do_addi_32:
+ if (a0 == a1) {
+ if (a2 == (int16_t)a2) {
+ tcg_out_insn(s, RI, AHI, a0, a2);
+ break;
+ }
+ if (facilities & FACILITY_EXT_IMM) {
+ tcg_out_insn(s, RIL, AFI, a0, a2);
+ break;
+ }
+ }
+ tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RR, AR, a0, a2);
} else {
- tcg_out_insn(s, RR, AR, args[0], args[2]);
+ tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
}
break;
case INDEX_op_sub_i32:
+ a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
if (const_args[2]) {
- tgen32_addi(s, args[0], -args[2]);
- } else {
- tcg_out_insn(s, RR, SR, args[0], args[2]);
+ a2 = -a2;
+ goto do_addi_32;
}
+ tcg_out_insn(s, RR, SR, args[0], args[2]);
break;
case INDEX_op_and_i32:
if (const_args[2]) {
- tgen64_andi(s, args[0], args[2] | 0xffffffff00000000ull);
+ tgen_andi(s, TCG_TYPE_I32, args[0], args[2]);
} else {
tcg_out_insn(s, RR, NR, args[0], args[2]);
}
@@ -1824,6 +1807,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
break;
+ case INDEX_op_add2_i32:
+ /* ??? Make use of ALFI. */
+ tcg_out_insn(s, RR, ALR, args[0], args[4]);
+ tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
+ break;
+ case INDEX_op_sub2_i32:
+ /* ??? Make use of SLFI. */
+ tcg_out_insn(s, RR, SLR, args[0], args[4]);
+ tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
+ break;
+
case INDEX_op_br:
tgen_branch(s, S390_CC_ALWAYS, args[0]);
break;
@@ -1836,6 +1830,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
args[2], const_args[2]);
break;
+ case INDEX_op_movcond_i32:
+ tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, LD_UINT8);
@@ -1870,7 +1868,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_qemu_st(s, args, LD_UINT64);
break;
-#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
tcg_out_mov(s, TCG_TYPE_I64, args[0], args[1]);
break;
@@ -1899,15 +1896,39 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_add_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- tgen64_addi(s, args[0], args[2]);
+ do_addi_64:
+ if (a0 == a1) {
+ if (a2 == (int16_t)a2) {
+ tcg_out_insn(s, RI, AGHI, a0, a2);
+ break;
+ }
+ if (facilities & FACILITY_EXT_IMM) {
+ if (a2 == (int32_t)a2) {
+ tcg_out_insn(s, RIL, AGFI, a0, a2);
+ break;
+ } else if (a2 == (uint32_t)a2) {
+ tcg_out_insn(s, RIL, ALGFI, a0, a2);
+ break;
+ } else if (-a2 == (uint32_t)-a2) {
+ tcg_out_insn(s, RIL, SLGFI, a0, -a2);
+ break;
+ }
+ }
+ }
+ tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, AGR, a0, a2);
} else {
- tcg_out_insn(s, RRE, AGR, args[0], args[2]);
+ tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
}
break;
case INDEX_op_sub_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- tgen64_addi(s, args[0], -args[2]);
+ a2 = -a2;
+ goto do_addi_64;
} else {
tcg_out_insn(s, RRE, SGR, args[0], args[2]);
}
@@ -1915,7 +1936,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_and_i64:
if (const_args[2]) {
- tgen64_andi(s, args[0], args[2]);
+ tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
} else {
tcg_out_insn(s, RRE, NGR, args[0], args[2]);
}
@@ -1964,6 +1985,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_divu2_i64:
tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
break;
+ case INDEX_op_mulu2_i64:
+ tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
+ break;
case INDEX_op_shl_i64:
op = RSY_SLLG;
@@ -2020,6 +2044,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tgen_ext32u(s, args[0], args[1]);
break;
+ case INDEX_op_add2_i64:
+ /* ??? Make use of ALGFI and SLGFI. */
+ tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
+ tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
+ break;
+ case INDEX_op_sub2_i64:
+ /* ??? Make use of ALGFI and SLGFI. */
+ tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
+ tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
+ break;
+
case INDEX_op_brcond_i64:
tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
args[1], const_args[1], args[3]);
@@ -2028,6 +2063,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
args[2], const_args[2]);
break;
+ case INDEX_op_movcond_i64:
+ tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
case INDEX_op_qemu_ld32u:
tcg_out_qemu_ld(s, args, LD_UINT32);
@@ -2035,7 +2074,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_qemu_ld32s:
tcg_out_qemu_ld(s, args, LD_INT32);
break;
-#endif /* TCG_TARGET_REG_BITS == 64 */
+
+ OP_32_64(deposit):
+ tgen_deposit(s, args[0], args[2], args[3], args[4]);
+ break;
default:
fprintf(stderr,"unimplemented opc 0x%x\n",opc);
@@ -2061,14 +2103,14 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_st16_i32, { "r", "r" } },
{ INDEX_op_st_i32, { "r", "r" } },
- { INDEX_op_add_i32, { "r", "0", "rWI" } },
- { INDEX_op_sub_i32, { "r", "0", "rWNI" } },
+ { INDEX_op_add_i32, { "r", "r", "ri" } },
+ { INDEX_op_sub_i32, { "r", "0", "ri" } },
{ INDEX_op_mul_i32, { "r", "0", "rK" } },
{ INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } },
{ INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
- { INDEX_op_and_i32, { "r", "0", "rWA" } },
+ { INDEX_op_and_i32, { "r", "0", "ri" } },
{ INDEX_op_or_i32, { "r", "0", "rWO" } },
{ INDEX_op_xor_i32, { "r", "0", "rWX" } },
@@ -2089,8 +2131,13 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_bswap16_i32, { "r", "r" } },
{ INDEX_op_bswap32_i32, { "r", "r" } },
+ { INDEX_op_add2_i32, { "r", "r", "0", "1", "r", "r" } },
+ { INDEX_op_sub2_i32, { "r", "r", "0", "1", "r", "r" } },
+
{ INDEX_op_brcond_i32, { "r", "rWC" } },
{ INDEX_op_setcond_i32, { "r", "r", "rWC" } },
+ { INDEX_op_movcond_i32, { "r", "r", "rWC", "r", "0" } },
+ { INDEX_op_deposit_i32, { "r", "0", "r" } },
{ INDEX_op_qemu_ld8u, { "r", "L" } },
{ INDEX_op_qemu_ld8s, { "r", "L" } },
@@ -2104,7 +2151,6 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_qemu_st32, { "L", "L" } },
{ INDEX_op_qemu_st64, { "L", "L" } },
-#if defined(__s390x__)
{ INDEX_op_mov_i64, { "r", "r" } },
{ INDEX_op_movi_i64, { "r" } },
@@ -2121,14 +2167,15 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_st32_i64, { "r", "r" } },
{ INDEX_op_st_i64, { "r", "r" } },
- { INDEX_op_add_i64, { "r", "0", "rI" } },
- { INDEX_op_sub_i64, { "r", "0", "rNI" } },
+ { INDEX_op_add_i64, { "r", "r", "ri" } },
+ { INDEX_op_sub_i64, { "r", "0", "ri" } },
{ INDEX_op_mul_i64, { "r", "0", "rK" } },
{ INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } },
{ INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
+ { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } },
- { INDEX_op_and_i64, { "r", "0", "rA" } },
+ { INDEX_op_and_i64, { "r", "0", "ri" } },
{ INDEX_op_or_i64, { "r", "0", "rO" } },
{ INDEX_op_xor_i64, { "r", "0", "rX" } },
@@ -2152,12 +2199,16 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_bswap32_i64, { "r", "r" } },
{ INDEX_op_bswap64_i64, { "r", "r" } },
+ { INDEX_op_add2_i64, { "r", "r", "0", "1", "r", "r" } },
+ { INDEX_op_sub2_i64, { "r", "r", "0", "1", "r", "r" } },
+
{ INDEX_op_brcond_i64, { "r", "rC" } },
{ INDEX_op_setcond_i64, { "r", "r", "rC" } },
+ { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } },
+ { INDEX_op_deposit_i64, { "r", "0", "r" } },
{ INDEX_op_qemu_ld32u, { "r", "L" } },
{ INDEX_op_qemu_ld32s, { "r", "L" } },
-#endif
{ -1 },
};
@@ -2302,17 +2353,24 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
tcg_add_target_add_op_defs(s390_op_defs);
- tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
- CPU_TEMP_BUF_NLONGS * sizeof(long));
}
static void tcg_target_qemu_prologue(TCGContext *s)
{
+ tcg_target_long frame_size;
+
/* stmg %r6,%r15,48(%r15) (save registers) */
tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
- /* aghi %r15,-160 (stack frame) */
- tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -160);
+ /* aghi %r15,-frame_size */
+ frame_size = TCG_TARGET_CALL_STACK_OFFSET;
+ frame_size += TCG_STATIC_CALL_ARGS_SIZE;
+ frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long);
+ tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -frame_size);
+
+ tcg_set_frame(s, TCG_REG_CALL_STACK,
+ TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
if (GUEST_BASE >= 0x80000) {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
@@ -2325,8 +2383,9 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tb_ret_addr = s->code_ptr;
- /* lmg %r6,%r15,208(%r15) (restore registers) */
- tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 208);
+ /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
+ tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
+ frame_size + 48);
/* br %r14 (return) */
tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 40211e6..42ca36c 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -63,14 +63,13 @@ typedef enum TCGReg {
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
-#define TCG_TARGET_HAS_deposit_i32 0
-#define TCG_TARGET_HAS_movcond_i32 0
-#define TCG_TARGET_HAS_add2_i32 0
-#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
-#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div2_i64 1
#define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_ext8s_i64 1
@@ -89,18 +88,21 @@ typedef enum TCGReg {
#define TCG_TARGET_HAS_eqv_i64 0
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
-#define TCG_TARGET_HAS_deposit_i64 0
-#define TCG_TARGET_HAS_movcond_i64 0
-#define TCG_TARGET_HAS_add2_i64 0
-#define TCG_TARGET_HAS_sub2_i64 0
-#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muls2_i64 0
-#endif
+
+extern bool tcg_target_deposit_valid(int ofs, int len);
+#define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid
+#define TCG_TARGET_deposit_i64_valid tcg_target_deposit_valid
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_R15
#define TCG_TARGET_STACK_ALIGN 8
-#define TCG_TARGET_CALL_STACK_OFFSET 0
+#define TCG_TARGET_CALL_STACK_OFFSET 160
#define TCG_TARGET_EXTEND_ARGS 1
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index 2d561b3..d1241b5 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -40,14 +40,6 @@
/* Bitfield n...m (in 32 bit value). */
#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
-/* Used for function call generation. */
-#define TCG_REG_CALL_STACK TCG_REG_R4
-#define TCG_TARGET_STACK_ALIGN 16
-#define TCG_TARGET_CALL_STACK_OFFSET 0
-
-/* TODO: documentation. */
-static uint8_t *tb_ret_addr;
-
/* Macros used in tcg_target_op_defs. */
#define R "r"
#define RI "ri"
@@ -513,7 +505,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
tcg_out_op_t(s, INDEX_op_ld_i64);
tcg_out_r(s, ret);
tcg_out_r(s, arg1);
- assert(arg2 == (uint32_t)arg2);
+ assert(arg2 == (int32_t)arg2);
tcg_out32(s, arg2);
#else
TODO();
@@ -636,7 +628,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_st_i64:
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
- assert(args[2] == (uint32_t)args[2]);
+ assert(args[2] == (int32_t)args[2]);
tcg_out32(s, args[2]);
break;
case INDEX_op_add_i32:
@@ -904,15 +896,19 @@ static void tcg_target_init(TCGContext *s)
/* TODO: Which registers should be set here? */
tcg_regset_set32(tcg_target_call_clobber_regs, 0,
BIT(TCG_TARGET_NB_REGS) - 1);
+
tcg_regset_clear(s->reserved_regs);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
tcg_add_target_add_op_defs(tcg_target_op_defs);
- tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
+
+ /* We use negative offsets from "sp" so that we can distinguish
+ stores that might pretend to be call arguments. */
+ tcg_set_frame(s, TCG_REG_CALL_STACK,
+ -CPU_TEMP_BUF_NLONGS * sizeof(long),
CPU_TEMP_BUF_NLONGS * sizeof(long));
}
/* Generate global QEMU prologue and epilogue code. */
-static void tcg_target_qemu_prologue(TCGContext *s)
+static inline void tcg_target_qemu_prologue(TCGContext *s)
{
- tb_ret_addr = s->code_ptr;
}
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 1f17576..0395bbb 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -127,7 +127,6 @@ typedef enum {
TCG_REG_R5,
TCG_REG_R6,
TCG_REG_R7,
- TCG_AREG0 = TCG_REG_R7,
#if TCG_TARGET_NB_REGS >= 16
TCG_REG_R8,
TCG_REG_R9,
@@ -160,6 +159,13 @@ typedef enum {
TCG_CONST = UINT8_MAX
} TCGReg;
+#define TCG_AREG0 (TCG_TARGET_NB_REGS - 2)
+
+/* Used for function call generation. */
+#define TCG_REG_CALL_STACK (TCG_TARGET_NB_REGS - 1)
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+#define TCG_TARGET_STACK_ALIGN 16
+
void tci_disas(uint8_t opc);
tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr);
diff --git a/tci.c b/tci.c
index 2b2c11f..c742c8d 100644
--- a/tci.c
+++ b/tci.c
@@ -51,11 +51,6 @@ typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
tcg_target_ulong);
#endif
-/* TCI can optionally use a global register variable for env. */
-#if !defined(AREG0)
-CPUArchState *env;
-#endif
-
/* Targets which don't use GETPC also don't need tci_tb_ptr
which makes them a little faster. */
#if defined(GETPC)
@@ -117,6 +112,7 @@ static void tci_write_reg(TCGReg index, tcg_target_ulong value)
{
assert(index < ARRAY_SIZE(tci_reg));
assert(index != TCG_AREG0);
+ assert(index != TCG_REG_CALL_STACK);
tci_reg[index] = value;
}
@@ -182,7 +178,7 @@ static tcg_target_ulong tci_read_i(uint8_t **tb_ptr)
return value;
}
-/* Read constant (32 bit) from bytecode. */
+/* Read unsigned constant (32 bit) from bytecode. */
static uint32_t tci_read_i32(uint8_t **tb_ptr)
{
uint32_t value = *(uint32_t *)(*tb_ptr);
@@ -190,6 +186,14 @@ static uint32_t tci_read_i32(uint8_t **tb_ptr)
return value;
}
+/* Read signed constant (32 bit) from bytecode. */
+static int32_t tci_read_s32(uint8_t **tb_ptr)
+{
+ int32_t value = *(int32_t *)(*tb_ptr);
+ *tb_ptr += sizeof(value);
+ return value;
+}
+
#if TCG_TARGET_REG_BITS == 64
/* Read constant (64 bit) from bytecode. */
static uint64_t tci_read_i64(uint8_t **tb_ptr)
@@ -430,18 +434,17 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
}
/* Interpret pseudo code in tb. */
-tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
+tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
{
+ long tcg_temps[CPU_TEMP_BUF_NLONGS];
+ uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS);
tcg_target_ulong next_tb = 0;
- env = cpustate;
tci_reg[TCG_AREG0] = (tcg_target_ulong)env;
+ tci_reg[TCG_REG_CALL_STACK] = sp_value;
assert(tb_ptr);
for (;;) {
-#if defined(GETPC)
- tci_tb_ptr = (uintptr_t)tb_ptr;
-#endif
TCGOpcode opc = tb_ptr[0];
#if !defined(NDEBUG)
uint8_t op_size = tb_ptr[1];
@@ -464,6 +467,10 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
uint64_t v64;
#endif
+#if defined(GETPC)
+ tci_tb_ptr = (uintptr_t)tb_ptr;
+#endif
+
/* Skip opcode and size entry. */
tb_ptr += 2;
@@ -550,7 +557,7 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
case INDEX_op_ld8u_i32:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg8(t0, *(uint8_t *)(t1 + t2));
break;
case INDEX_op_ld8s_i32:
@@ -563,25 +570,26 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
case INDEX_op_ld_i32:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg32(t0, *(uint32_t *)(t1 + t2));
break;
case INDEX_op_st8_i32:
t0 = tci_read_r8(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
*(uint8_t *)(t1 + t2) = t0;
break;
case INDEX_op_st16_i32:
t0 = tci_read_r16(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
*(uint16_t *)(t1 + t2) = t0;
break;
case INDEX_op_st_i32:
t0 = tci_read_r32(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
+ assert(t1 != sp_value || (int32_t)t2 < 0);
*(uint32_t *)(t1 + t2) = t0;
break;
@@ -818,7 +826,7 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
case INDEX_op_ld8u_i64:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg8(t0, *(uint8_t *)(t1 + t2));
break;
case INDEX_op_ld8s_i64:
@@ -829,43 +837,44 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
case INDEX_op_ld32u_i64:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg32(t0, *(uint32_t *)(t1 + t2));
break;
case INDEX_op_ld32s_i64:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg32s(t0, *(int32_t *)(t1 + t2));
break;
case INDEX_op_ld_i64:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg64(t0, *(uint64_t *)(t1 + t2));
break;
case INDEX_op_st8_i64:
t0 = tci_read_r8(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
*(uint8_t *)(t1 + t2) = t0;
break;
case INDEX_op_st16_i64:
t0 = tci_read_r16(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
*(uint16_t *)(t1 + t2) = t0;
break;
case INDEX_op_st32_i64:
t0 = tci_read_r32(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
*(uint32_t *)(t1 + t2) = t0;
break;
case INDEX_op_st_i64:
t0 = tci_read_r64(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
+ assert(t1 != sp_value || (int32_t)t2 < 0);
*(uint64_t *)(t1 + t2) = t0;
break;