From ce151109813e2770fd3cee2f37bfa2cdd01a12b9 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 23 Feb 2016 14:49:41 +0000 Subject: tcg: Rename tcg-target.c to tcg-target.inc.c Rename the per-architecture tcg-target.c files to tcg-target.inc.c. This makes it clearer that they are not intended to be standalone C files, but are instead #included into another source file. Reviewed-by: Eric Blake Signed-off-by: Peter Maydell Message-Id: <1456238983-10160-2-git-send-email-peter.maydell@linaro.org> Signed-off-by: Richard Henderson --- qemu-tech.texi | 2 +- tcg/README | 5 +- tcg/aarch64/tcg-target.c | 1893 ----------------------------- tcg/aarch64/tcg-target.inc.c | 1893 +++++++++++++++++++++++++++++ tcg/arm/tcg-target.c | 2129 -------------------------------- tcg/arm/tcg-target.inc.c | 2129 ++++++++++++++++++++++++++++++++ tcg/i386/tcg-target.c | 2464 ------------------------------------- tcg/i386/tcg-target.inc.c | 2464 +++++++++++++++++++++++++++++++++++++ tcg/ia64/tcg-target.c | 2453 ------------------------------------- tcg/ia64/tcg-target.inc.c | 2453 +++++++++++++++++++++++++++++++++++++ tcg/mips/tcg-target.c | 1892 ----------------------------- tcg/mips/tcg-target.inc.c | 1892 +++++++++++++++++++++++++++++ tcg/ppc/tcg-target.c | 2762 ------------------------------------------ tcg/ppc/tcg-target.inc.c | 2762 ++++++++++++++++++++++++++++++++++++++++++ tcg/s390/tcg-target.c | 2410 ------------------------------------ tcg/s390/tcg-target.inc.c | 2410 ++++++++++++++++++++++++++++++++++++ tcg/sparc/tcg-target.c | 1653 ------------------------- tcg/sparc/tcg-target.inc.c | 1653 +++++++++++++++++++++++++ tcg/tcg.c | 7 +- tcg/tcg.h | 2 +- tcg/tci/README | 4 +- tcg/tci/tcg-target.c | 880 -------------- tcg/tci/tcg-target.inc.c | 880 ++++++++++++++ 23 files changed, 18547 insertions(+), 18545 deletions(-) delete mode 100644 tcg/aarch64/tcg-target.c create mode 100644 tcg/aarch64/tcg-target.inc.c delete mode 100644 tcg/arm/tcg-target.c create mode 100644 tcg/arm/tcg-target.inc.c delete mode 100644 tcg/i386/tcg-target.c create mode 100644 tcg/i386/tcg-target.inc.c delete mode 100644 tcg/ia64/tcg-target.c create mode 100644 tcg/ia64/tcg-target.inc.c delete mode 100644 tcg/mips/tcg-target.c create mode 100644 tcg/mips/tcg-target.inc.c delete mode 100644 tcg/ppc/tcg-target.c create mode 100644 tcg/ppc/tcg-target.inc.c delete mode 100644 tcg/s390/tcg-target.c create mode 100644 tcg/s390/tcg-target.inc.c delete mode 100644 tcg/sparc/tcg-target.c create mode 100644 tcg/sparc/tcg-target.inc.c delete mode 100644 tcg/tci/tcg-target.c create mode 100644 tcg/tci/tcg-target.inc.c diff --git a/qemu-tech.texi b/qemu-tech.texi index 022017d..bdb2285 100644 --- a/qemu-tech.texi +++ b/qemu-tech.texi @@ -385,7 +385,7 @@ ops (see @code{target-i386/translate.c}). Some optimizations can be performed at this stage, including liveness analysis and trivial constant expression evaluation. TCG ops are then implemented in the host CPU back end, also known as TCG target (see -@code{tcg/i386/tcg-target.c}). For more information, please take a +@code{tcg/i386/tcg-target.inc.c}). For more information, please take a look at @code{tcg/README}. @node Condition code optimisations diff --git a/tcg/README b/tcg/README index 34c0775..f4a8ac1 100644 --- a/tcg/README +++ b/tcg/README @@ -460,8 +460,9 @@ function tcg_gen_xxx(args). 4) Backend -tcg-target.h contains the target specific definitions. tcg-target.c -contains the target specific code. +tcg-target.h contains the target specific definitions. tcg-target.inc.c +contains the target specific code; it is #included by tcg/tcg.c, rather +than being a standalone C file. 4.1) Assumptions diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c deleted file mode 100644 index 8467d5d..0000000 --- a/tcg/aarch64/tcg-target.c +++ /dev/null @@ -1,1893 +0,0 @@ -/* - * Initial TCG Implementation for aarch64 - * - * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH - * Written by Claudio Fontana - * - * This work is licensed under the terms of the GNU GPL, version 2 or - * (at your option) any later version. - * - * See the COPYING file in the top-level directory for details. - */ - -#include "qemu/osdep.h" -#include "tcg-be-ldst.h" -#include "qemu/bitops.h" - -/* We're going to re-use TCGType in setting of the SF bit, which controls - the size of the operation performed. If we know the values match, it - makes things much cleaner. */ -QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7", - "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15", - "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23", - "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp", -}; -#endif /* NDEBUG */ - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, - TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, - TCG_REG_X28, /* we will reserve this for guest_base if configured */ - - TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, - TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, - TCG_REG_X16, TCG_REG_X17, - - TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, - TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, - - /* X18 reserved by system */ - /* X19 reserved for AREG0 */ - /* X29 reserved as fp */ - /* X30 reserved as temporary */ -}; - -static const int tcg_target_call_iarg_regs[8] = { - TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, - TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 -}; -static const int tcg_target_call_oarg_regs[1] = { - TCG_REG_X0 -}; - -#define TCG_REG_TMP TCG_REG_X30 - -#ifndef CONFIG_SOFTMMU -/* Note that XZR cannot be encoded in the address base register slot, - as that actaully encodes SP. So if we need to zero-extend the guest - address, via the address index register slot, we need to load even - a zero guest base into a register. */ -#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) -#define TCG_REG_GUEST_BASE TCG_REG_X28 -#endif - -static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) -{ - ptrdiff_t offset = target - code_ptr; - assert(offset == sextract64(offset, 0, 26)); - /* read instruction, mask away previous PC_REL26 parameter contents, - set the proper offset, then write back the instruction. */ - *code_ptr = deposit32(*code_ptr, 0, 26, offset); -} - -static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) -{ - ptrdiff_t offset = target - code_ptr; - assert(offset == sextract64(offset, 0, 19)); - *code_ptr = deposit32(*code_ptr, 5, 19, offset); -} - -static inline void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - assert(addend == 0); - switch (type) { - case R_AARCH64_JUMP26: - case R_AARCH64_CALL26: - reloc_pc26(code_ptr, (tcg_insn_unit *)value); - break; - case R_AARCH64_CONDBR19: - reloc_pc19(code_ptr, (tcg_insn_unit *)value); - break; - default: - tcg_abort(); - } -} - -#define TCG_CT_CONST_AIMM 0x100 -#define TCG_CT_CONST_LIMM 0x200 -#define TCG_CT_CONST_ZERO 0x400 -#define TCG_CT_CONST_MONE 0x800 - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, - const char **pct_str) -{ - const char *ct_str = *pct_str; - - switch (ct_str[0]) { - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); - break; - case 'l': /* qemu_ld / qemu_st address, data_reg */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); -#ifdef CONFIG_SOFTMMU - /* x0 and x1 will be overwritten when reading the tlb entry, - and x2, and x3 for helper args, better to avoid using them. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3); -#endif - break; - case 'A': /* Valid for arithmetic immediate (positive or negative). */ - ct->ct |= TCG_CT_CONST_AIMM; - break; - case 'L': /* Valid for logical immediate. */ - ct->ct |= TCG_CT_CONST_LIMM; - break; - case 'M': /* minus one */ - ct->ct |= TCG_CT_CONST_MONE; - break; - case 'Z': /* zero */ - ct->ct |= TCG_CT_CONST_ZERO; - break; - default: - return -1; - } - - ct_str++; - *pct_str = ct_str; - return 0; -} - -static inline bool is_aimm(uint64_t val) -{ - return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; -} - -static inline bool is_limm(uint64_t val) -{ - /* Taking a simplified view of the logical immediates for now, ignoring - the replication that can happen across the field. Match bit patterns - of the forms - 0....01....1 - 0..01..10..0 - and their inverses. */ - - /* Make things easier below, by testing the form with msb clear. */ - if ((int64_t)val < 0) { - val = ~val; - } - if (val == 0) { - return false; - } - val += val & -val; - return (val & (val - 1)) == 0; -} - -static int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - - if (ct & TCG_CT_CONST) { - return 1; - } - if (type == TCG_TYPE_I32) { - val = (int32_t)val; - } - if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { - return 1; - } - if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { - return 1; - } - if ((ct & TCG_CT_CONST_ZERO) && val == 0) { - return 1; - } - if ((ct & TCG_CT_CONST_MONE) && val == -1) { - return 1; - } - - return 0; -} - -enum aarch64_cond_code { - COND_EQ = 0x0, - COND_NE = 0x1, - COND_CS = 0x2, /* Unsigned greater or equal */ - COND_HS = COND_CS, /* ALIAS greater or equal */ - COND_CC = 0x3, /* Unsigned less than */ - COND_LO = COND_CC, /* ALIAS Lower */ - COND_MI = 0x4, /* Negative */ - COND_PL = 0x5, /* Zero or greater */ - COND_VS = 0x6, /* Overflow */ - COND_VC = 0x7, /* No overflow */ - COND_HI = 0x8, /* Unsigned greater than */ - COND_LS = 0x9, /* Unsigned less or equal */ - COND_GE = 0xa, - COND_LT = 0xb, - COND_GT = 0xc, - COND_LE = 0xd, - COND_AL = 0xe, - COND_NV = 0xf, /* behaves like COND_AL here */ -}; - -static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { - [TCG_COND_EQ] = COND_EQ, - [TCG_COND_NE] = COND_NE, - [TCG_COND_LT] = COND_LT, - [TCG_COND_GE] = COND_GE, - [TCG_COND_LE] = COND_LE, - [TCG_COND_GT] = COND_GT, - /* unsigned */ - [TCG_COND_LTU] = COND_LO, - [TCG_COND_GTU] = COND_HI, - [TCG_COND_GEU] = COND_HS, - [TCG_COND_LEU] = COND_LS, -}; - -typedef enum { - LDST_ST = 0, /* store */ - LDST_LD = 1, /* load */ - LDST_LD_S_X = 2, /* load and sign-extend into Xt */ - LDST_LD_S_W = 3, /* load and sign-extend into Wt */ -} AArch64LdstType; - -/* We encode the format of the insn into the beginning of the name, so that - we can have the preprocessor help "typecheck" the insn vs the output - function. Arm didn't provide us with nice names for the formats, so we - use the section number of the architecture reference manual in which the - instruction group is described. */ -typedef enum { - /* Compare and branch (immediate). */ - I3201_CBZ = 0x34000000, - I3201_CBNZ = 0x35000000, - - /* Conditional branch (immediate). */ - I3202_B_C = 0x54000000, - - /* Unconditional branch (immediate). */ - I3206_B = 0x14000000, - I3206_BL = 0x94000000, - - /* Unconditional branch (register). */ - I3207_BR = 0xd61f0000, - I3207_BLR = 0xd63f0000, - I3207_RET = 0xd65f0000, - - /* Load/store register. Described here as 3.3.12, but the helper - that emits them can transform to 3.3.10 or 3.3.13. */ - I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, - I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, - I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, - I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, - - I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, - I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, - I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, - I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, - - I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, - I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, - - I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, - I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, - I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, - - I3312_TO_I3310 = 0x00200800, - I3312_TO_I3313 = 0x01000000, - - /* Load/store register pair instructions. */ - I3314_LDP = 0x28400000, - I3314_STP = 0x28000000, - - /* Add/subtract immediate instructions. */ - I3401_ADDI = 0x11000000, - I3401_ADDSI = 0x31000000, - I3401_SUBI = 0x51000000, - I3401_SUBSI = 0x71000000, - - /* Bitfield instructions. */ - I3402_BFM = 0x33000000, - I3402_SBFM = 0x13000000, - I3402_UBFM = 0x53000000, - - /* Extract instruction. */ - I3403_EXTR = 0x13800000, - - /* Logical immediate instructions. */ - I3404_ANDI = 0x12000000, - I3404_ORRI = 0x32000000, - I3404_EORI = 0x52000000, - - /* Move wide immediate instructions. */ - I3405_MOVN = 0x12800000, - I3405_MOVZ = 0x52800000, - I3405_MOVK = 0x72800000, - - /* PC relative addressing instructions. */ - I3406_ADR = 0x10000000, - I3406_ADRP = 0x90000000, - - /* Add/subtract shifted register instructions (without a shift). */ - I3502_ADD = 0x0b000000, - I3502_ADDS = 0x2b000000, - I3502_SUB = 0x4b000000, - I3502_SUBS = 0x6b000000, - - /* Add/subtract shifted register instructions (with a shift). */ - I3502S_ADD_LSL = I3502_ADD, - - /* Add/subtract with carry instructions. */ - I3503_ADC = 0x1a000000, - I3503_SBC = 0x5a000000, - - /* Conditional select instructions. */ - I3506_CSEL = 0x1a800000, - I3506_CSINC = 0x1a800400, - - /* Data-processing (1 source) instructions. */ - I3507_REV16 = 0x5ac00400, - I3507_REV32 = 0x5ac00800, - I3507_REV64 = 0x5ac00c00, - - /* Data-processing (2 source) instructions. */ - I3508_LSLV = 0x1ac02000, - I3508_LSRV = 0x1ac02400, - I3508_ASRV = 0x1ac02800, - I3508_RORV = 0x1ac02c00, - I3508_SMULH = 0x9b407c00, - I3508_UMULH = 0x9bc07c00, - I3508_UDIV = 0x1ac00800, - I3508_SDIV = 0x1ac00c00, - - /* Data-processing (3 source) instructions. */ - I3509_MADD = 0x1b000000, - I3509_MSUB = 0x1b008000, - - /* Logical shifted register instructions (without a shift). */ - I3510_AND = 0x0a000000, - I3510_BIC = 0x0a200000, - I3510_ORR = 0x2a000000, - I3510_ORN = 0x2a200000, - I3510_EOR = 0x4a000000, - I3510_EON = 0x4a200000, - I3510_ANDS = 0x6a000000, -} AArch64Insn; - -static inline uint32_t tcg_in32(TCGContext *s) -{ - uint32_t v = *(uint32_t *)s->code_ptr; - return v; -} - -/* Emit an opcode with "type-checking" of the format. */ -#define tcg_out_insn(S, FMT, OP, ...) \ - glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) - -static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rt, int imm19) -{ - tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); -} - -static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, - TCGCond c, int imm19) -{ - tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); -} - -static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) -{ - tcg_out32(s, insn | (imm26 & 0x03ffffff)); -} - -static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) -{ - tcg_out32(s, insn | rn << 5); -} - -static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, - TCGReg r1, TCGReg r2, TCGReg rn, - tcg_target_long ofs, bool pre, bool w) -{ - insn |= 1u << 31; /* ext */ - insn |= pre << 24; - insn |= w << 23; - - assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); - insn |= (ofs & (0x7f << 3)) << (15 - 3); - - tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); -} - -static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, uint64_t aimm) -{ - if (aimm > 0xfff) { - assert((aimm & 0xfff) == 0); - aimm >>= 12; - assert(aimm <= 0xfff); - aimm |= 1 << 12; /* apply LSL 12 */ - } - tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); -} - -/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 - (Logical immediate). Both insn groups have N, IMMR and IMMS fields - that feed the DecodeBitMasks pseudo function. */ -static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, int n, int immr, int imms) -{ - tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 - | rn << 5 | rd); -} - -#define tcg_out_insn_3404 tcg_out_insn_3402 - -static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, TCGReg rm, int imms) -{ - tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 - | rn << 5 | rd); -} - -/* This function is used for the Move (wide immediate) instruction group. - Note that SHIFT is a full shift count, not the 2 bit HW field. */ -static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, uint16_t half, unsigned shift) -{ - assert((shift & ~0x30) == 0); - tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); -} - -static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, - TCGReg rd, int64_t disp) -{ - tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); -} - -/* This function is for both 3.5.2 (Add/Subtract shifted register), for - the rare occasion when we actually want to supply a shift amount. */ -static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, - TCGType ext, TCGReg rd, TCGReg rn, - TCGReg rm, int imm6) -{ - tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); -} - -/* This function is for 3.5.2 (Add/subtract shifted register), - and 3.5.10 (Logical shifted register), for the vast majorty of cases - when we don't want to apply a shift. Thus it can also be used for - 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ -static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, TCGReg rm) -{ - tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); -} - -#define tcg_out_insn_3503 tcg_out_insn_3502 -#define tcg_out_insn_3508 tcg_out_insn_3502 -#define tcg_out_insn_3510 tcg_out_insn_3502 - -static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) -{ - tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd - | tcg_cond_to_aarch64[c] << 12); -} - -static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn) -{ - tcg_out32(s, insn | ext << 31 | rn << 5 | rd); -} - -static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) -{ - tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); -} - -static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, - TCGReg rd, TCGReg base, TCGType ext, - TCGReg regoff) -{ - /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ - tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | - 0x4000 | ext << 13 | base << 5 | rd); -} - -static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, - TCGReg rd, TCGReg rn, intptr_t offset) -{ - tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd); -} - -static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, - TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) -{ - /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ - tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd); -} - -/* Register to register move using ORR (shifted register with no shift). */ -static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) -{ - tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); -} - -/* Register to register move using ADDI (move to/from SP). */ -static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); -} - -/* This function is used for the Logical (immediate) instruction group. - The value of LIMM must satisfy IS_LIMM. See the comment above about - only supporting simplified logical immediates. */ -static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, - TCGReg rd, TCGReg rn, uint64_t limm) -{ - unsigned h, l, r, c; - - assert(is_limm(limm)); - - h = clz64(limm); - l = ctz64(limm); - if (l == 0) { - r = 0; /* form 0....01....1 */ - c = ctz64(~limm) - 1; - if (h == 0) { - r = clz64(~limm); /* form 1..10..01..1 */ - c += r; - } - } else { - r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ - c = r - h - 1; - } - if (ext == TCG_TYPE_I32) { - r &= 31; - c &= 31; - } - - tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); -} - -static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, - tcg_target_long value) -{ - AArch64Insn insn; - int i, wantinv, shift; - tcg_target_long svalue = value; - tcg_target_long ivalue = ~value; - tcg_target_long imask; - - /* For 32-bit values, discard potential garbage in value. For 64-bit - values within [2**31, 2**32-1], we can create smaller sequences by - interpreting this as a negative 32-bit number, while ensuring that - the high 32 bits are cleared by setting SF=0. */ - if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { - svalue = (int32_t)value; - value = (uint32_t)value; - ivalue = (uint32_t)ivalue; - type = TCG_TYPE_I32; - } - - /* Speed things up by handling the common case of small positive - and negative values specially. */ - if ((value & ~0xffffull) == 0) { - tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); - return; - } else if ((ivalue & ~0xffffull) == 0) { - tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); - return; - } - - /* Check for bitfield immediates. For the benefit of 32-bit quantities, - use the sign-extended value. That lets us match rotated values such - as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ - if (is_limm(svalue)) { - tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); - return; - } - - /* Look for host pointer values within 4G of the PC. This happens - often when loading pointers to QEMU's own data structures. */ - if (type == TCG_TYPE_I64) { - tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12); - if (disp == sextract64(disp, 0, 21)) { - tcg_out_insn(s, 3406, ADRP, rd, disp); - if (value & 0xfff) { - tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); - } - return; - } - } - - /* Would it take fewer insns to begin with MOVN? For the value and its - inverse, count the number of 16-bit lanes that are 0. */ - for (i = wantinv = imask = 0; i < 64; i += 16) { - tcg_target_long mask = 0xffffull << i; - if ((value & mask) == 0) { - wantinv -= 1; - } - if ((ivalue & mask) == 0) { - wantinv += 1; - imask |= mask; - } - } - - /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */ - insn = I3405_MOVZ; - if (wantinv > 0) { - value = ivalue; - insn = I3405_MOVN; - } - - /* Find the lowest lane that is not 0x0000. */ - shift = ctz64(value) & (63 & -16); - tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift); - - if (wantinv > 0) { - /* Re-invert the value, so MOVK sees non-inverted bits. */ - value = ~value; - /* Clear out all the 0xffff lanes. */ - value ^= imask; - } - /* Clear out the lane that we just set. */ - value &= ~(0xffffUL << shift); - - /* Iterate until all lanes have been set, and thus cleared from VALUE. */ - while (value) { - shift = ctz64(value) & (63 & -16); - tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift); - value &= ~(0xffffUL << shift); - } -} - -/* Define something more legible for general use. */ -#define tcg_out_ldst_r tcg_out_insn_3310 - -static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, - TCGReg rd, TCGReg rn, intptr_t offset) -{ - TCGMemOp size = (uint32_t)insn >> 30; - - /* If the offset is naturally aligned and in range, then we can - use the scaled uimm12 encoding */ - if (offset >= 0 && !(offset & ((1 << size) - 1))) { - uintptr_t scaled_uimm = offset >> size; - if (scaled_uimm <= 0xfff) { - tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); - return; - } - } - - /* Small signed offsets can use the unscaled encoding. */ - if (offset >= -256 && offset < 256) { - tcg_out_insn_3312(s, insn, rd, rn, offset); - return; - } - - /* Worst-case scenario, move offset to temp register, use reg offset. */ - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); - tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); -} - -static inline void tcg_out_mov(TCGContext *s, - TCGType type, TCGReg ret, TCGReg arg) -{ - if (ret != arg) { - tcg_out_movr(s, type, ret, arg); - } -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX, - arg, arg1, arg2); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX, - arg, arg1, arg2); -} - -static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, unsigned int a, unsigned int b) -{ - tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); -} - -static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, unsigned int a, unsigned int b) -{ - tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); -} - -static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, unsigned int a, unsigned int b) -{ - tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); -} - -static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, TCGReg rm, unsigned int a) -{ - tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); -} - -static inline void tcg_out_shl(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int bits = ext ? 64 : 32; - int max = bits - 1; - tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); -} - -static inline void tcg_out_shr(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_ubfm(s, ext, rd, rn, m & max, max); -} - -static inline void tcg_out_sar(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_sbfm(s, ext, rd, rn, m & max, max); -} - -static inline void tcg_out_rotr(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_extr(s, ext, rd, rn, rn, m & max); -} - -static inline void tcg_out_rotl(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int bits = ext ? 64 : 32; - int max = bits - 1; - tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); -} - -static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, unsigned lsb, unsigned width) -{ - unsigned size = ext ? 64 : 32; - unsigned a = (size - lsb) & (size - 1); - unsigned b = width - 1; - tcg_out_bfm(s, ext, rd, rn, a, b); -} - -static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, - tcg_target_long b, bool const_b) -{ - if (const_b) { - /* Using CMP or CMN aliases. */ - if (b >= 0) { - tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); - } else { - tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); - } - } else { - /* Using CMP alias SUBS wzr, Wn, Wm */ - tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); - } -} - -static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) -{ - ptrdiff_t offset = target - s->code_ptr; - assert(offset == sextract64(offset, 0, 26)); - tcg_out_insn(s, 3206, B, offset); -} - -static inline void tcg_out_goto_noaddr(TCGContext *s) -{ - /* We pay attention here to not modify the branch target by reading from - the buffer. This ensure that caches and memory are kept coherent during - retranslation. Mask away possible garbage in the high bits for the - first translation, while keeping the offset bits for retranslation. */ - uint32_t old = tcg_in32(s); - tcg_out_insn(s, 3206, B, old); -} - -static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c) -{ - /* See comments in tcg_out_goto_noaddr. */ - uint32_t old = tcg_in32(s) >> 5; - tcg_out_insn(s, 3202, B_C, c, old); -} - -static inline void tcg_out_callr(TCGContext *s, TCGReg reg) -{ - tcg_out_insn(s, 3207, BLR, reg); -} - -static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) -{ - ptrdiff_t offset = target - s->code_ptr; - if (offset == sextract64(offset, 0, 26)) { - tcg_out_insn(s, 3206, BL, offset); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); - tcg_out_callr(s, TCG_REG_TMP); - } -} - -void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) -{ - tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr; - tcg_insn_unit *target = (tcg_insn_unit *)addr; - - reloc_pc26(code_ptr, target); - flush_icache_range(jmp_addr, jmp_addr + 4); -} - -static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) -{ - if (!l->has_value) { - tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); - tcg_out_goto_noaddr(s); - } else { - tcg_out_goto(s, l->u.value_ptr); - } -} - -static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a, - TCGArg b, bool b_const, TCGLabel *l) -{ - intptr_t offset; - bool need_cmp; - - if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { - need_cmp = false; - } else { - need_cmp = true; - tcg_out_cmp(s, ext, a, b, b_const); - } - - if (!l->has_value) { - tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); - offset = tcg_in32(s) >> 5; - } else { - offset = l->u.value_ptr - s->code_ptr; - assert(offset == sextract64(offset, 0, 19)); - } - - if (need_cmp) { - tcg_out_insn(s, 3202, B_C, c, offset); - } else if (c == TCG_COND_EQ) { - tcg_out_insn(s, 3201, CBZ, ext, a, offset); - } else { - tcg_out_insn(s, 3201, CBNZ, ext, a, offset); - } -} - -static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); -} - -static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); -} - -static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); -} - -static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits, - TCGReg rd, TCGReg rn) -{ - /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ - int bits = (8 << s_bits) - 1; - tcg_out_sbfm(s, ext, rd, rn, 0, bits); -} - -static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits, - TCGReg rd, TCGReg rn) -{ - /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ - int bits = (8 << s_bits) - 1; - tcg_out_ubfm(s, 0, rd, rn, 0, bits); -} - -static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, - TCGReg rn, int64_t aimm) -{ - if (aimm >= 0) { - tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); - } else { - tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); - } -} - -static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl, - TCGReg rh, TCGReg al, TCGReg ah, - tcg_target_long bl, tcg_target_long bh, - bool const_bl, bool const_bh, bool sub) -{ - TCGReg orig_rl = rl; - AArch64Insn insn; - - if (rl == ah || (!const_bh && rl == bh)) { - rl = TCG_REG_TMP; - } - - if (const_bl) { - insn = I3401_ADDSI; - if ((bl < 0) ^ sub) { - insn = I3401_SUBSI; - bl = -bl; - } - tcg_out_insn_3401(s, insn, ext, rl, al, bl); - } else { - tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); - } - - insn = I3503_ADC; - if (const_bh) { - /* Note that the only two constants we support are 0 and -1, and - that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ - if ((bh != 0) ^ sub) { - insn = I3503_SBC; - } - bh = TCG_REG_XZR; - } else if (sub) { - insn = I3503_SBC; - } - tcg_out_insn_3503(s, insn, ext, rh, ah, bh); - - tcg_out_mov(s, ext, orig_rl, rl); -} - -#ifdef CONFIG_SOFTMMU -/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, - * TCGMemOpIdx oi, uintptr_t ra) - */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, - * uintxx_t val, TCGMemOpIdx oi, - * uintptr_t ra) - */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) -{ - ptrdiff_t offset = tcg_pcrel_diff(s, target); - assert(offset == sextract64(offset, 0, 21)); - tcg_out_insn(s, 3406, ADR, rd, offset); -} - -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - TCGMemOp size = opc & MO_SIZE; - - reloc_pc19(lb->label_ptr[0], s->code_ptr); - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); - tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); - tcg_out_adr(s, TCG_REG_X3, lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); - if (opc & MO_SIGN) { - tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); - } else { - tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); - } - - tcg_out_goto(s, lb->raddr); -} - -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - TCGMemOp size = opc & MO_SIZE; - - reloc_pc19(lb->label_ptr[0], s->code_ptr); - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); - tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); - tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); - tcg_out_adr(s, TCG_REG_X4, lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); - tcg_out_goto(s, lb->raddr); -} - -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, - TCGType ext, TCGReg data_reg, TCGReg addr_reg, - tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = ext; - label->datalo_reg = data_reg; - label->addrlo_reg = addr_reg; - label->raddr = raddr; - label->label_ptr[0] = label_ptr; -} - -/* Load and compare a TLB entry, emitting the conditional jump to the - slow path for the failure case, which will be patched later when finalizing - the slow path. Generated code returns the host addend in X1, - clobbers X0,X2,X3,TMP. */ -static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc, - tcg_insn_unit **label_ptr, int mem_index, - bool is_read) -{ - int tlb_offset = is_read ? - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) - : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - int s_mask = (1 << (opc & MO_SIZE)) - 1; - TCGReg base = TCG_AREG0, x3; - uint64_t tlb_mask; - - /* For aligned accesses, we check the first byte and include the alignment - bits within the address. For unaligned access, we check that we don't - cross pages using the address of the last byte of the access. */ - if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { - tlb_mask = TARGET_PAGE_MASK | s_mask; - x3 = addr_reg; - } else { - tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, - TCG_REG_X3, addr_reg, s_mask); - tlb_mask = TARGET_PAGE_MASK; - x3 = TCG_REG_X3; - } - - /* Extract the TLB index from the address into X0. - X0 = - addr_reg */ - tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg, - TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS); - - /* Store the page mask part of the address into X3. */ - tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, - TCG_REG_X3, x3, tlb_mask); - - /* Add any "high bits" from the tlb offset to the env address into X2, - to take advantage of the LSL12 form of the ADDI instruction. - X2 = env + (tlb_offset & 0xfff000) */ - if (tlb_offset & 0xfff000) { - tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base, - tlb_offset & 0xfff000); - base = TCG_REG_X2; - } - - /* Merge the tlb index contribution into X2. - X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */ - tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base, - TCG_REG_X0, CPU_TLB_ENTRY_BITS); - - /* Merge "low bits" from tlb offset, load the tlb comparator into X0. - X0 = load [X2 + (tlb_offset & 0x000fff)] */ - tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX, - TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff); - - /* Load the tlb addend. Do that early to avoid stalling. - X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */ - tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2, - (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) - - (is_read ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write))); - - /* Perform the address comparison. */ - tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0); - - /* If not equal, we jump to the slow path. */ - *label_ptr = s->code_ptr; - tcg_out_goto_cond_noaddr(s, TCG_COND_NE); -} - -#endif /* CONFIG_SOFTMMU */ - -static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext, - TCGReg data_r, TCGReg addr_r, - TCGType otype, TCGReg off_r) -{ - const TCGMemOp bswap = memop & MO_BSWAP; - - switch (memop & MO_SSIZE) { - case MO_UB: - tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); - break; - case MO_SB: - tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, - data_r, addr_r, otype, off_r); - break; - case MO_UW: - tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev16(s, data_r, data_r); - } - break; - case MO_SW: - if (bswap) { - tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); - tcg_out_rev16(s, data_r, data_r); - tcg_out_sxt(s, ext, MO_16, data_r, data_r); - } else { - tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), - data_r, addr_r, otype, off_r); - } - break; - case MO_UL: - tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev32(s, data_r, data_r); - } - break; - case MO_SL: - if (bswap) { - tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); - tcg_out_rev32(s, data_r, data_r); - tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); - } else { - tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); - } - break; - case MO_Q: - tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev64(s, data_r, data_r); - } - break; - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop, - TCGReg data_r, TCGReg addr_r, - TCGType otype, TCGReg off_r) -{ - const TCGMemOp bswap = memop & MO_BSWAP; - - switch (memop & MO_SIZE) { - case MO_8: - tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); - break; - case MO_16: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev16(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } - tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); - break; - case MO_32: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev32(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } - tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); - break; - case MO_64: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev64(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } - tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); - break; - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - TCGMemOpIdx oi, TCGType ext) -{ - TCGMemOp memop = get_memop(oi); - const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; - - tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); - tcg_out_qemu_ld_direct(s, memop, ext, data_reg, - TCG_REG_X1, otype, addr_reg); - add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (USE_GUEST_BASE) { - tcg_out_qemu_ld_direct(s, memop, ext, data_reg, - TCG_REG_GUEST_BASE, otype, addr_reg); - } else { - tcg_out_qemu_ld_direct(s, memop, ext, data_reg, - addr_reg, TCG_TYPE_I64, TCG_REG_XZR); - } -#endif /* CONFIG_SOFTMMU */ -} - -static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - TCGMemOpIdx oi) -{ - TCGMemOp memop = get_memop(oi); - const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; - - tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); - tcg_out_qemu_st_direct(s, memop, data_reg, - TCG_REG_X1, otype, addr_reg); - add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, - data_reg, addr_reg, s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (USE_GUEST_BASE) { - tcg_out_qemu_st_direct(s, memop, data_reg, - TCG_REG_GUEST_BASE, otype, addr_reg); - } else { - tcg_out_qemu_st_direct(s, memop, data_reg, - addr_reg, TCG_TYPE_I64, TCG_REG_XZR); - } -#endif /* CONFIG_SOFTMMU */ -} - -static tcg_insn_unit *tb_ret_addr; - -static void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - /* 99% of the time, we can signal the use of extension registers - by looking to see if the opcode handles 64-bit data. */ - TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; - - /* Hoist the loads of the most common arguments. */ - TCGArg a0 = args[0]; - TCGArg a1 = args[1]; - TCGArg a2 = args[2]; - int c2 = const_args[2]; - - /* Some operands are defined with "rZ" constraint, a register or - the zero register. These need not actually test args[I] == 0. */ -#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) - - switch (opc) { - case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); - tcg_out_goto(s, tb_ret_addr); - break; - - case INDEX_op_goto_tb: -#ifndef USE_DIRECT_JUMP -#error "USE_DIRECT_JUMP required for aarch64" -#endif - assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */ - s->tb_jmp_offset[a0] = tcg_current_code_size(s); - /* actual branch destination will be patched by - aarch64_tb_set_jmp_target later, beware retranslation. */ - tcg_out_goto_noaddr(s); - s->tb_next_offset[a0] = tcg_current_code_size(s); - break; - - case INDEX_op_br: - tcg_out_goto_label(s, arg_label(a0)); - break; - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_ldst(s, I3312_LDRB, a0, a1, a2); - break; - case INDEX_op_ld8s_i32: - tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2); - break; - case INDEX_op_ld8s_i64: - tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_ldst(s, I3312_LDRH, a0, a1, a2); - break; - case INDEX_op_ld16s_i32: - tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2); - break; - case INDEX_op_ld16s_i64: - tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, I3312_LDRW, a0, a1, a2); - break; - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, I3312_LDRX, a0, a1, a2); - break; - - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2); - break; - case INDEX_op_st_i64: - tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2); - break; - - case INDEX_op_add_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_add_i64: - if (c2) { - tcg_out_addsubi(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); - } - break; - - case INDEX_op_sub_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_sub_i64: - if (c2) { - tcg_out_addsubi(s, ext, a0, a1, -a2); - } else { - tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); - } - break; - - case INDEX_op_neg_i64: - case INDEX_op_neg_i32: - tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); - break; - - case INDEX_op_and_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_and_i64: - if (c2) { - tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); - } - break; - - case INDEX_op_andc_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_andc_i64: - if (c2) { - tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); - } - break; - - case INDEX_op_or_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_or_i64: - if (c2) { - tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); - } - break; - - case INDEX_op_orc_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_orc_i64: - if (c2) { - tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); - } - break; - - case INDEX_op_xor_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_xor_i64: - if (c2) { - tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); - } - break; - - case INDEX_op_eqv_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_eqv_i64: - if (c2) { - tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); - } - break; - - case INDEX_op_not_i64: - case INDEX_op_not_i32: - tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); - break; - - case INDEX_op_mul_i64: - case INDEX_op_mul_i32: - tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); - break; - - case INDEX_op_div_i64: - case INDEX_op_div_i32: - tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); - break; - case INDEX_op_divu_i64: - case INDEX_op_divu_i32: - tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); - break; - - case INDEX_op_rem_i64: - case INDEX_op_rem_i32: - tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); - break; - case INDEX_op_remu_i64: - case INDEX_op_remu_i32: - tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); - break; - - case INDEX_op_shl_i64: - case INDEX_op_shl_i32: - if (c2) { - tcg_out_shl(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); - } - break; - - case INDEX_op_shr_i64: - case INDEX_op_shr_i32: - if (c2) { - tcg_out_shr(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); - } - break; - - case INDEX_op_sar_i64: - case INDEX_op_sar_i32: - if (c2) { - tcg_out_sar(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); - } - break; - - case INDEX_op_rotr_i64: - case INDEX_op_rotr_i32: - if (c2) { - tcg_out_rotr(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); - } - break; - - case INDEX_op_rotl_i64: - case INDEX_op_rotl_i32: - if (c2) { - tcg_out_rotl(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); - tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); - } - break; - - case INDEX_op_brcond_i32: - a1 = (int32_t)a1; - /* FALLTHRU */ - case INDEX_op_brcond_i64: - tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); - break; - - case INDEX_op_setcond_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_setcond_i64: - tcg_out_cmp(s, ext, a1, a2, c2); - /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ - tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, - TCG_REG_XZR, tcg_invert_cond(args[3])); - break; - - case INDEX_op_movcond_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_movcond_i64: - tcg_out_cmp(s, ext, a1, a2, c2); - tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); - break; - - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, a0, a1, a2, ext); - break; - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, REG0(0), a1, a2); - break; - - case INDEX_op_bswap64_i64: - tcg_out_rev64(s, a0, a1); - break; - case INDEX_op_bswap32_i64: - case INDEX_op_bswap32_i32: - tcg_out_rev32(s, a0, a1); - break; - case INDEX_op_bswap16_i64: - case INDEX_op_bswap16_i32: - tcg_out_rev16(s, a0, a1); - break; - - case INDEX_op_ext8s_i64: - case INDEX_op_ext8s_i32: - tcg_out_sxt(s, ext, MO_8, a0, a1); - break; - case INDEX_op_ext16s_i64: - case INDEX_op_ext16s_i32: - tcg_out_sxt(s, ext, MO_16, a0, a1); - break; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); - break; - case INDEX_op_ext8u_i64: - case INDEX_op_ext8u_i32: - tcg_out_uxt(s, MO_8, a0, a1); - break; - case INDEX_op_ext16u_i64: - case INDEX_op_ext16u_i32: - tcg_out_uxt(s, MO_16, a0, a1); - break; - case INDEX_op_extu_i32_i64: - case INDEX_op_ext32u_i64: - tcg_out_movr(s, TCG_TYPE_I32, a0, a1); - break; - - case INDEX_op_deposit_i64: - case INDEX_op_deposit_i32: - tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); - break; - - case INDEX_op_add2_i32: - tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), - (int32_t)args[4], args[5], const_args[4], - const_args[5], false); - break; - case INDEX_op_add2_i64: - tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], - args[5], const_args[4], const_args[5], false); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), - (int32_t)args[4], args[5], const_args[4], - const_args[5], true); - break; - case INDEX_op_sub2_i64: - tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], - args[5], const_args[4], const_args[5], true); - break; - - case INDEX_op_muluh_i64: - tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); - break; - case INDEX_op_mulsh_i64: - tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } - -#undef REG0 -} - -static const TCGTargetOpDef aarch64_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - - { INDEX_op_st8_i32, { "rZ", "r" } }, - { INDEX_op_st16_i32, { "rZ", "r" } }, - { INDEX_op_st_i32, { "rZ", "r" } }, - { INDEX_op_st8_i64, { "rZ", "r" } }, - { INDEX_op_st16_i64, { "rZ", "r" } }, - { INDEX_op_st32_i64, { "rZ", "r" } }, - { INDEX_op_st_i64, { "rZ", "r" } }, - - { INDEX_op_add_i32, { "r", "r", "rA" } }, - { INDEX_op_add_i64, { "r", "r", "rA" } }, - { INDEX_op_sub_i32, { "r", "r", "rA" } }, - { INDEX_op_sub_i64, { "r", "r", "rA" } }, - { INDEX_op_mul_i32, { "r", "r", "r" } }, - { INDEX_op_mul_i64, { "r", "r", "r" } }, - { INDEX_op_div_i32, { "r", "r", "r" } }, - { INDEX_op_div_i64, { "r", "r", "r" } }, - { INDEX_op_divu_i32, { "r", "r", "r" } }, - { INDEX_op_divu_i64, { "r", "r", "r" } }, - { INDEX_op_rem_i32, { "r", "r", "r" } }, - { INDEX_op_rem_i64, { "r", "r", "r" } }, - { INDEX_op_remu_i32, { "r", "r", "r" } }, - { INDEX_op_remu_i64, { "r", "r", "r" } }, - { INDEX_op_and_i32, { "r", "r", "rL" } }, - { INDEX_op_and_i64, { "r", "r", "rL" } }, - { INDEX_op_or_i32, { "r", "r", "rL" } }, - { INDEX_op_or_i64, { "r", "r", "rL" } }, - { INDEX_op_xor_i32, { "r", "r", "rL" } }, - { INDEX_op_xor_i64, { "r", "r", "rL" } }, - { INDEX_op_andc_i32, { "r", "r", "rL" } }, - { INDEX_op_andc_i64, { "r", "r", "rL" } }, - { INDEX_op_orc_i32, { "r", "r", "rL" } }, - { INDEX_op_orc_i64, { "r", "r", "rL" } }, - { INDEX_op_eqv_i32, { "r", "r", "rL" } }, - { INDEX_op_eqv_i64, { "r", "r", "rL" } }, - - { INDEX_op_neg_i32, { "r", "r" } }, - { INDEX_op_neg_i64, { "r", "r" } }, - { INDEX_op_not_i32, { "r", "r" } }, - { INDEX_op_not_i64, { "r", "r" } }, - - { INDEX_op_shl_i32, { "r", "r", "ri" } }, - { INDEX_op_shr_i32, { "r", "r", "ri" } }, - { INDEX_op_sar_i32, { "r", "r", "ri" } }, - { INDEX_op_rotl_i32, { "r", "r", "ri" } }, - { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - { INDEX_op_shl_i64, { "r", "r", "ri" } }, - { INDEX_op_shr_i64, { "r", "r", "ri" } }, - { INDEX_op_sar_i64, { "r", "r", "ri" } }, - { INDEX_op_rotl_i64, { "r", "r", "ri" } }, - { INDEX_op_rotr_i64, { "r", "r", "ri" } }, - - { INDEX_op_brcond_i32, { "r", "rA" } }, - { INDEX_op_brcond_i64, { "r", "rA" } }, - { INDEX_op_setcond_i32, { "r", "r", "rA" } }, - { INDEX_op_setcond_i64, { "r", "r", "rA" } }, - { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } }, - { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } }, - - { INDEX_op_qemu_ld_i32, { "r", "l" } }, - { INDEX_op_qemu_ld_i64, { "r", "l" } }, - { INDEX_op_qemu_st_i32, { "lZ", "l" } }, - { INDEX_op_qemu_st_i64, { "lZ", "l" } }, - - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - { INDEX_op_bswap16_i64, { "r", "r" } }, - { INDEX_op_bswap32_i64, { "r", "r" } }, - { INDEX_op_bswap64_i64, { "r", "r" } }, - - { INDEX_op_ext8s_i32, { "r", "r" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_ext8u_i32, { "r", "r" } }, - { INDEX_op_ext16u_i32, { "r", "r" } }, - - { INDEX_op_ext8s_i64, { "r", "r" } }, - { INDEX_op_ext16s_i64, { "r", "r" } }, - { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext8u_i64, { "r", "r" } }, - { INDEX_op_ext16u_i64, { "r", "r" } }, - { INDEX_op_ext32u_i64, { "r", "r" } }, - { INDEX_op_ext_i32_i64, { "r", "r" } }, - { INDEX_op_extu_i32_i64, { "r", "r" } }, - - { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, - { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, - - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, - { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, - { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, - - { INDEX_op_muluh_i64, { "r", "r", "r" } }, - { INDEX_op_mulsh_i64, { "r", "r", "r" } }, - - { -1 }, -}; - -static void tcg_target_init(TCGContext *s) -{ - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); - - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - (1 << TCG_REG_X0) | (1 << TCG_REG_X1) | - (1 << TCG_REG_X2) | (1 << TCG_REG_X3) | - (1 << TCG_REG_X4) | (1 << TCG_REG_X5) | - (1 << TCG_REG_X6) | (1 << TCG_REG_X7) | - (1 << TCG_REG_X8) | (1 << TCG_REG_X9) | - (1 << TCG_REG_X10) | (1 << TCG_REG_X11) | - (1 << TCG_REG_X12) | (1 << TCG_REG_X13) | - (1 << TCG_REG_X14) | (1 << TCG_REG_X15) | - (1 << TCG_REG_X16) | (1 << TCG_REG_X17) | - (1 << TCG_REG_X18) | (1 << TCG_REG_X30)); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ - - tcg_add_target_add_op_defs(aarch64_op_defs); -} - -/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ -#define PUSH_SIZE ((30 - 19 + 1) * 8) - -#define FRAME_SIZE \ - ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ - + CPU_TEMP_BUF_NLONGS * sizeof(long) \ - + TCG_TARGET_STACK_ALIGN - 1) \ - & ~(TCG_TARGET_STACK_ALIGN - 1)) - -/* We're expecting a 2 byte uleb128 encoded value. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); - -/* We're expecting to use a single ADDI insn. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); - -static void tcg_target_qemu_prologue(TCGContext *s) -{ - TCGReg r; - - /* Push (FP, LR) and allocate space for all saved registers. */ - tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, - TCG_REG_SP, -PUSH_SIZE, 1, 1); - - /* Set up frame pointer for canonical unwinding. */ - tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); - - /* Store callee-preserved regs x19..x28. */ - for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { - int ofs = (r - TCG_REG_X19 + 2) * 8; - tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); - } - - /* Make stack space for TCG locals. */ - tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, - FRAME_SIZE - PUSH_SIZE); - - /* Inform TCG about how to find TCG locals with register, offset, size. */ - tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - -#if !defined(CONFIG_SOFTMMU) - if (USE_GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); - } -#endif - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); - - tb_ret_addr = s->code_ptr; - - /* Remove TCG locals stack space. */ - tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, - FRAME_SIZE - PUSH_SIZE); - - /* Restore registers x19..x28. */ - for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { - int ofs = (r - TCG_REG_X19 + 2) * 8; - tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); - } - - /* Pop (FP, LR), restore SP to previous frame. */ - tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, - TCG_REG_SP, PUSH_SIZE, 0, 1); - tcg_out_insn(s, 3207, RET, TCG_REG_LR); -} - -typedef struct { - DebugFrameHeader h; - uint8_t fde_def_cfa[4]; - uint8_t fde_reg_ofs[24]; -} DebugFrame; - -#define ELF_HOST_MACHINE EM_AARCH64 - -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = 0x78, /* sleb128 -8 */ - .h.cie.return_column = TCG_REG_LR, - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { - 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ - 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ - 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ - 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ - 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ - 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ - 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ - 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ - 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ - 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ - 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ - 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ - } -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c new file mode 100644 index 0000000..8467d5d --- /dev/null +++ b/tcg/aarch64/tcg-target.inc.c @@ -0,0 +1,1893 @@ +/* + * Initial TCG Implementation for aarch64 + * + * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH + * Written by Claudio Fontana + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + */ + +#include "qemu/osdep.h" +#include "tcg-be-ldst.h" +#include "qemu/bitops.h" + +/* We're going to re-use TCGType in setting of the SF bit, which controls + the size of the operation performed. If we know the values match, it + makes things much cleaner. */ +QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7", + "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15", + "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23", + "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp", +}; +#endif /* NDEBUG */ + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, + TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, + TCG_REG_X28, /* we will reserve this for guest_base if configured */ + + TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, + TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, + TCG_REG_X16, TCG_REG_X17, + + TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, + TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, + + /* X18 reserved by system */ + /* X19 reserved for AREG0 */ + /* X29 reserved as fp */ + /* X30 reserved as temporary */ +}; + +static const int tcg_target_call_iarg_regs[8] = { + TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, + TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 +}; +static const int tcg_target_call_oarg_regs[1] = { + TCG_REG_X0 +}; + +#define TCG_REG_TMP TCG_REG_X30 + +#ifndef CONFIG_SOFTMMU +/* Note that XZR cannot be encoded in the address base register slot, + as that actaully encodes SP. So if we need to zero-extend the guest + address, via the address index register slot, we need to load even + a zero guest base into a register. */ +#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32) +#define TCG_REG_GUEST_BASE TCG_REG_X28 +#endif + +static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + ptrdiff_t offset = target - code_ptr; + assert(offset == sextract64(offset, 0, 26)); + /* read instruction, mask away previous PC_REL26 parameter contents, + set the proper offset, then write back the instruction. */ + *code_ptr = deposit32(*code_ptr, 0, 26, offset); +} + +static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + ptrdiff_t offset = target - code_ptr; + assert(offset == sextract64(offset, 0, 19)); + *code_ptr = deposit32(*code_ptr, 5, 19, offset); +} + +static inline void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + assert(addend == 0); + switch (type) { + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + reloc_pc26(code_ptr, (tcg_insn_unit *)value); + break; + case R_AARCH64_CONDBR19: + reloc_pc19(code_ptr, (tcg_insn_unit *)value); + break; + default: + tcg_abort(); + } +} + +#define TCG_CT_CONST_AIMM 0x100 +#define TCG_CT_CONST_LIMM 0x200 +#define TCG_CT_CONST_ZERO 0x400 +#define TCG_CT_CONST_MONE 0x800 + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, + const char **pct_str) +{ + const char *ct_str = *pct_str; + + switch (ct_str[0]) { + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); + break; + case 'l': /* qemu_ld / qemu_st address, data_reg */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); +#ifdef CONFIG_SOFTMMU + /* x0 and x1 will be overwritten when reading the tlb entry, + and x2, and x3 for helper args, better to avoid using them. */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3); +#endif + break; + case 'A': /* Valid for arithmetic immediate (positive or negative). */ + ct->ct |= TCG_CT_CONST_AIMM; + break; + case 'L': /* Valid for logical immediate. */ + ct->ct |= TCG_CT_CONST_LIMM; + break; + case 'M': /* minus one */ + ct->ct |= TCG_CT_CONST_MONE; + break; + case 'Z': /* zero */ + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return -1; + } + + ct_str++; + *pct_str = ct_str; + return 0; +} + +static inline bool is_aimm(uint64_t val) +{ + return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; +} + +static inline bool is_limm(uint64_t val) +{ + /* Taking a simplified view of the logical immediates for now, ignoring + the replication that can happen across the field. Match bit patterns + of the forms + 0....01....1 + 0..01..10..0 + and their inverses. */ + + /* Make things easier below, by testing the form with msb clear. */ + if ((int64_t)val < 0) { + val = ~val; + } + if (val == 0) { + return false; + } + val += val & -val; + return (val & (val - 1)) == 0; +} + +static int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + + if (ct & TCG_CT_CONST) { + return 1; + } + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { + return 1; + } + if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { + return 1; + } + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } + if ((ct & TCG_CT_CONST_MONE) && val == -1) { + return 1; + } + + return 0; +} + +enum aarch64_cond_code { + COND_EQ = 0x0, + COND_NE = 0x1, + COND_CS = 0x2, /* Unsigned greater or equal */ + COND_HS = COND_CS, /* ALIAS greater or equal */ + COND_CC = 0x3, /* Unsigned less than */ + COND_LO = COND_CC, /* ALIAS Lower */ + COND_MI = 0x4, /* Negative */ + COND_PL = 0x5, /* Zero or greater */ + COND_VS = 0x6, /* Overflow */ + COND_VC = 0x7, /* No overflow */ + COND_HI = 0x8, /* Unsigned greater than */ + COND_LS = 0x9, /* Unsigned less or equal */ + COND_GE = 0xa, + COND_LT = 0xb, + COND_GT = 0xc, + COND_LE = 0xd, + COND_AL = 0xe, + COND_NV = 0xf, /* behaves like COND_AL here */ +}; + +static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { + [TCG_COND_EQ] = COND_EQ, + [TCG_COND_NE] = COND_NE, + [TCG_COND_LT] = COND_LT, + [TCG_COND_GE] = COND_GE, + [TCG_COND_LE] = COND_LE, + [TCG_COND_GT] = COND_GT, + /* unsigned */ + [TCG_COND_LTU] = COND_LO, + [TCG_COND_GTU] = COND_HI, + [TCG_COND_GEU] = COND_HS, + [TCG_COND_LEU] = COND_LS, +}; + +typedef enum { + LDST_ST = 0, /* store */ + LDST_LD = 1, /* load */ + LDST_LD_S_X = 2, /* load and sign-extend into Xt */ + LDST_LD_S_W = 3, /* load and sign-extend into Wt */ +} AArch64LdstType; + +/* We encode the format of the insn into the beginning of the name, so that + we can have the preprocessor help "typecheck" the insn vs the output + function. Arm didn't provide us with nice names for the formats, so we + use the section number of the architecture reference manual in which the + instruction group is described. */ +typedef enum { + /* Compare and branch (immediate). */ + I3201_CBZ = 0x34000000, + I3201_CBNZ = 0x35000000, + + /* Conditional branch (immediate). */ + I3202_B_C = 0x54000000, + + /* Unconditional branch (immediate). */ + I3206_B = 0x14000000, + I3206_BL = 0x94000000, + + /* Unconditional branch (register). */ + I3207_BR = 0xd61f0000, + I3207_BLR = 0xd63f0000, + I3207_RET = 0xd65f0000, + + /* Load/store register. Described here as 3.3.12, but the helper + that emits them can transform to 3.3.10 or 3.3.13. */ + I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, + I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30, + I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30, + I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30, + + I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30, + I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30, + I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30, + I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30, + + I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, + I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, + + I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, + I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, + I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, + + I3312_TO_I3310 = 0x00200800, + I3312_TO_I3313 = 0x01000000, + + /* Load/store register pair instructions. */ + I3314_LDP = 0x28400000, + I3314_STP = 0x28000000, + + /* Add/subtract immediate instructions. */ + I3401_ADDI = 0x11000000, + I3401_ADDSI = 0x31000000, + I3401_SUBI = 0x51000000, + I3401_SUBSI = 0x71000000, + + /* Bitfield instructions. */ + I3402_BFM = 0x33000000, + I3402_SBFM = 0x13000000, + I3402_UBFM = 0x53000000, + + /* Extract instruction. */ + I3403_EXTR = 0x13800000, + + /* Logical immediate instructions. */ + I3404_ANDI = 0x12000000, + I3404_ORRI = 0x32000000, + I3404_EORI = 0x52000000, + + /* Move wide immediate instructions. */ + I3405_MOVN = 0x12800000, + I3405_MOVZ = 0x52800000, + I3405_MOVK = 0x72800000, + + /* PC relative addressing instructions. */ + I3406_ADR = 0x10000000, + I3406_ADRP = 0x90000000, + + /* Add/subtract shifted register instructions (without a shift). */ + I3502_ADD = 0x0b000000, + I3502_ADDS = 0x2b000000, + I3502_SUB = 0x4b000000, + I3502_SUBS = 0x6b000000, + + /* Add/subtract shifted register instructions (with a shift). */ + I3502S_ADD_LSL = I3502_ADD, + + /* Add/subtract with carry instructions. */ + I3503_ADC = 0x1a000000, + I3503_SBC = 0x5a000000, + + /* Conditional select instructions. */ + I3506_CSEL = 0x1a800000, + I3506_CSINC = 0x1a800400, + + /* Data-processing (1 source) instructions. */ + I3507_REV16 = 0x5ac00400, + I3507_REV32 = 0x5ac00800, + I3507_REV64 = 0x5ac00c00, + + /* Data-processing (2 source) instructions. */ + I3508_LSLV = 0x1ac02000, + I3508_LSRV = 0x1ac02400, + I3508_ASRV = 0x1ac02800, + I3508_RORV = 0x1ac02c00, + I3508_SMULH = 0x9b407c00, + I3508_UMULH = 0x9bc07c00, + I3508_UDIV = 0x1ac00800, + I3508_SDIV = 0x1ac00c00, + + /* Data-processing (3 source) instructions. */ + I3509_MADD = 0x1b000000, + I3509_MSUB = 0x1b008000, + + /* Logical shifted register instructions (without a shift). */ + I3510_AND = 0x0a000000, + I3510_BIC = 0x0a200000, + I3510_ORR = 0x2a000000, + I3510_ORN = 0x2a200000, + I3510_EOR = 0x4a000000, + I3510_EON = 0x4a200000, + I3510_ANDS = 0x6a000000, +} AArch64Insn; + +static inline uint32_t tcg_in32(TCGContext *s) +{ + uint32_t v = *(uint32_t *)s->code_ptr; + return v; +} + +/* Emit an opcode with "type-checking" of the format. */ +#define tcg_out_insn(S, FMT, OP, ...) \ + glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) + +static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rt, int imm19) +{ + tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); +} + +static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, + TCGCond c, int imm19) +{ + tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); +} + +static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) +{ + tcg_out32(s, insn | (imm26 & 0x03ffffff)); +} + +static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) +{ + tcg_out32(s, insn | rn << 5); +} + +static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, + TCGReg r1, TCGReg r2, TCGReg rn, + tcg_target_long ofs, bool pre, bool w) +{ + insn |= 1u << 31; /* ext */ + insn |= pre << 24; + insn |= w << 23; + + assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); + insn |= (ofs & (0x7f << 3)) << (15 - 3); + + tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); +} + +static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, uint64_t aimm) +{ + if (aimm > 0xfff) { + assert((aimm & 0xfff) == 0); + aimm >>= 12; + assert(aimm <= 0xfff); + aimm |= 1 << 12; /* apply LSL 12 */ + } + tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); +} + +/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 + (Logical immediate). Both insn groups have N, IMMR and IMMS fields + that feed the DecodeBitMasks pseudo function. */ +static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, int n, int immr, int imms) +{ + tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 + | rn << 5 | rd); +} + +#define tcg_out_insn_3404 tcg_out_insn_3402 + +static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, TCGReg rm, int imms) +{ + tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 + | rn << 5 | rd); +} + +/* This function is used for the Move (wide immediate) instruction group. + Note that SHIFT is a full shift count, not the 2 bit HW field. */ +static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, uint16_t half, unsigned shift) +{ + assert((shift & ~0x30) == 0); + tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); +} + +static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, + TCGReg rd, int64_t disp) +{ + tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); +} + +/* This function is for both 3.5.2 (Add/Subtract shifted register), for + the rare occasion when we actually want to supply a shift amount. */ +static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, + TCGType ext, TCGReg rd, TCGReg rn, + TCGReg rm, int imm6) +{ + tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); +} + +/* This function is for 3.5.2 (Add/subtract shifted register), + and 3.5.10 (Logical shifted register), for the vast majorty of cases + when we don't want to apply a shift. Thus it can also be used for + 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */ +static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, TCGReg rm) +{ + tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); +} + +#define tcg_out_insn_3503 tcg_out_insn_3502 +#define tcg_out_insn_3508 tcg_out_insn_3502 +#define tcg_out_insn_3510 tcg_out_insn_3502 + +static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) +{ + tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd + | tcg_cond_to_aarch64[c] << 12); +} + +static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn) +{ + tcg_out32(s, insn | ext << 31 | rn << 5 | rd); +} + +static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) +{ + tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); +} + +static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg base, TCGType ext, + TCGReg regoff) +{ + /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ + tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | + 0x4000 | ext << 13 | base << 5 | rd); +} + +static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg rn, intptr_t offset) +{ + tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd); +} + +static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) +{ + /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ + tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd); +} + +/* Register to register move using ORR (shifted register with no shift). */ +static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) +{ + tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); +} + +/* Register to register move using ADDI (move to/from SP). */ +static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) +{ + tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); +} + +/* This function is used for the Logical (immediate) instruction group. + The value of LIMM must satisfy IS_LIMM. See the comment above about + only supporting simplified logical immediates. */ +static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, + TCGReg rd, TCGReg rn, uint64_t limm) +{ + unsigned h, l, r, c; + + assert(is_limm(limm)); + + h = clz64(limm); + l = ctz64(limm); + if (l == 0) { + r = 0; /* form 0....01....1 */ + c = ctz64(~limm) - 1; + if (h == 0) { + r = clz64(~limm); /* form 1..10..01..1 */ + c += r; + } + } else { + r = 64 - l; /* form 1....10....0 or 0..01..10..0 */ + c = r - h - 1; + } + if (ext == TCG_TYPE_I32) { + r &= 31; + c &= 31; + } + + tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, + tcg_target_long value) +{ + AArch64Insn insn; + int i, wantinv, shift; + tcg_target_long svalue = value; + tcg_target_long ivalue = ~value; + tcg_target_long imask; + + /* For 32-bit values, discard potential garbage in value. For 64-bit + values within [2**31, 2**32-1], we can create smaller sequences by + interpreting this as a negative 32-bit number, while ensuring that + the high 32 bits are cleared by setting SF=0. */ + if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { + svalue = (int32_t)value; + value = (uint32_t)value; + ivalue = (uint32_t)ivalue; + type = TCG_TYPE_I32; + } + + /* Speed things up by handling the common case of small positive + and negative values specially. */ + if ((value & ~0xffffull) == 0) { + tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); + return; + } else if ((ivalue & ~0xffffull) == 0) { + tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); + return; + } + + /* Check for bitfield immediates. For the benefit of 32-bit quantities, + use the sign-extended value. That lets us match rotated values such + as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ + if (is_limm(svalue)) { + tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); + return; + } + + /* Look for host pointer values within 4G of the PC. This happens + often when loading pointers to QEMU's own data structures. */ + if (type == TCG_TYPE_I64) { + tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12); + if (disp == sextract64(disp, 0, 21)) { + tcg_out_insn(s, 3406, ADRP, rd, disp); + if (value & 0xfff) { + tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); + } + return; + } + } + + /* Would it take fewer insns to begin with MOVN? For the value and its + inverse, count the number of 16-bit lanes that are 0. */ + for (i = wantinv = imask = 0; i < 64; i += 16) { + tcg_target_long mask = 0xffffull << i; + if ((value & mask) == 0) { + wantinv -= 1; + } + if ((ivalue & mask) == 0) { + wantinv += 1; + imask |= mask; + } + } + + /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */ + insn = I3405_MOVZ; + if (wantinv > 0) { + value = ivalue; + insn = I3405_MOVN; + } + + /* Find the lowest lane that is not 0x0000. */ + shift = ctz64(value) & (63 & -16); + tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift); + + if (wantinv > 0) { + /* Re-invert the value, so MOVK sees non-inverted bits. */ + value = ~value; + /* Clear out all the 0xffff lanes. */ + value ^= imask; + } + /* Clear out the lane that we just set. */ + value &= ~(0xffffUL << shift); + + /* Iterate until all lanes have been set, and thus cleared from VALUE. */ + while (value) { + shift = ctz64(value) & (63 & -16); + tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift); + value &= ~(0xffffUL << shift); + } +} + +/* Define something more legible for general use. */ +#define tcg_out_ldst_r tcg_out_insn_3310 + +static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, + TCGReg rd, TCGReg rn, intptr_t offset) +{ + TCGMemOp size = (uint32_t)insn >> 30; + + /* If the offset is naturally aligned and in range, then we can + use the scaled uimm12 encoding */ + if (offset >= 0 && !(offset & ((1 << size) - 1))) { + uintptr_t scaled_uimm = offset >> size; + if (scaled_uimm <= 0xfff) { + tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); + return; + } + } + + /* Small signed offsets can use the unscaled encoding. */ + if (offset >= -256 && offset < 256) { + tcg_out_insn_3312(s, insn, rd, rn, offset); + return; + } + + /* Worst-case scenario, move offset to temp register, use reg offset. */ + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); + tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); +} + +static inline void tcg_out_mov(TCGContext *s, + TCGType type, TCGReg ret, TCGReg arg) +{ + if (ret != arg) { + tcg_out_movr(s, type, ret, arg); + } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX, + arg, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX, + arg, arg1, arg2); +} + +static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, + TCGReg rn, unsigned int a, unsigned int b) +{ + tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); +} + +static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, + TCGReg rn, unsigned int a, unsigned int b) +{ + tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); +} + +static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, + TCGReg rn, unsigned int a, unsigned int b) +{ + tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); +} + +static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, + TCGReg rn, TCGReg rm, unsigned int a) +{ + tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); +} + +static inline void tcg_out_shl(TCGContext *s, TCGType ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int bits = ext ? 64 : 32; + int max = bits - 1; + tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); +} + +static inline void tcg_out_shr(TCGContext *s, TCGType ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int max = ext ? 63 : 31; + tcg_out_ubfm(s, ext, rd, rn, m & max, max); +} + +static inline void tcg_out_sar(TCGContext *s, TCGType ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int max = ext ? 63 : 31; + tcg_out_sbfm(s, ext, rd, rn, m & max, max); +} + +static inline void tcg_out_rotr(TCGContext *s, TCGType ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int max = ext ? 63 : 31; + tcg_out_extr(s, ext, rd, rn, rn, m & max); +} + +static inline void tcg_out_rotl(TCGContext *s, TCGType ext, + TCGReg rd, TCGReg rn, unsigned int m) +{ + int bits = ext ? 64 : 32; + int max = bits - 1; + tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); +} + +static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, + TCGReg rn, unsigned lsb, unsigned width) +{ + unsigned size = ext ? 64 : 32; + unsigned a = (size - lsb) & (size - 1); + unsigned b = width - 1; + tcg_out_bfm(s, ext, rd, rn, a, b); +} + +static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, + tcg_target_long b, bool const_b) +{ + if (const_b) { + /* Using CMP or CMN aliases. */ + if (b >= 0) { + tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); + } else { + tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); + } + } else { + /* Using CMP alias SUBS wzr, Wn, Wm */ + tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); + } +} + +static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) +{ + ptrdiff_t offset = target - s->code_ptr; + assert(offset == sextract64(offset, 0, 26)); + tcg_out_insn(s, 3206, B, offset); +} + +static inline void tcg_out_goto_noaddr(TCGContext *s) +{ + /* We pay attention here to not modify the branch target by reading from + the buffer. This ensure that caches and memory are kept coherent during + retranslation. Mask away possible garbage in the high bits for the + first translation, while keeping the offset bits for retranslation. */ + uint32_t old = tcg_in32(s); + tcg_out_insn(s, 3206, B, old); +} + +static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c) +{ + /* See comments in tcg_out_goto_noaddr. */ + uint32_t old = tcg_in32(s) >> 5; + tcg_out_insn(s, 3202, B_C, c, old); +} + +static inline void tcg_out_callr(TCGContext *s, TCGReg reg) +{ + tcg_out_insn(s, 3207, BLR, reg); +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) +{ + ptrdiff_t offset = target - s->code_ptr; + if (offset == sextract64(offset, 0, 26)) { + tcg_out_insn(s, 3206, BL, offset); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); + tcg_out_callr(s, TCG_REG_TMP); + } +} + +void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) +{ + tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr; + tcg_insn_unit *target = (tcg_insn_unit *)addr; + + reloc_pc26(code_ptr, target); + flush_icache_range(jmp_addr, jmp_addr + 4); +} + +static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) +{ + if (!l->has_value) { + tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); + tcg_out_goto_noaddr(s); + } else { + tcg_out_goto(s, l->u.value_ptr); + } +} + +static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a, + TCGArg b, bool b_const, TCGLabel *l) +{ + intptr_t offset; + bool need_cmp; + + if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { + need_cmp = false; + } else { + need_cmp = true; + tcg_out_cmp(s, ext, a, b, b_const); + } + + if (!l->has_value) { + tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); + offset = tcg_in32(s) >> 5; + } else { + offset = l->u.value_ptr - s->code_ptr; + assert(offset == sextract64(offset, 0, 19)); + } + + if (need_cmp) { + tcg_out_insn(s, 3202, B_C, c, offset); + } else if (c == TCG_COND_EQ) { + tcg_out_insn(s, 3201, CBZ, ext, a, offset); + } else { + tcg_out_insn(s, 3201, CBNZ, ext, a, offset); + } +} + +static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) +{ + tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); +} + +static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) +{ + tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); +} + +static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) +{ + tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); +} + +static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits, + TCGReg rd, TCGReg rn) +{ + /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ + int bits = (8 << s_bits) - 1; + tcg_out_sbfm(s, ext, rd, rn, 0, bits); +} + +static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits, + TCGReg rd, TCGReg rn) +{ + /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ + int bits = (8 << s_bits) - 1; + tcg_out_ubfm(s, 0, rd, rn, 0, bits); +} + +static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, + TCGReg rn, int64_t aimm) +{ + if (aimm >= 0) { + tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); + } else { + tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); + } +} + +static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl, + TCGReg rh, TCGReg al, TCGReg ah, + tcg_target_long bl, tcg_target_long bh, + bool const_bl, bool const_bh, bool sub) +{ + TCGReg orig_rl = rl; + AArch64Insn insn; + + if (rl == ah || (!const_bh && rl == bh)) { + rl = TCG_REG_TMP; + } + + if (const_bl) { + insn = I3401_ADDSI; + if ((bl < 0) ^ sub) { + insn = I3401_SUBSI; + bl = -bl; + } + tcg_out_insn_3401(s, insn, ext, rl, al, bl); + } else { + tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); + } + + insn = I3503_ADC; + if (const_bh) { + /* Note that the only two constants we support are 0 and -1, and + that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ + if ((bh != 0) ^ sub) { + insn = I3503_SBC; + } + bh = TCG_REG_XZR; + } else if (sub) { + insn = I3503_SBC; + } + tcg_out_insn_3503(s, insn, ext, rh, ah, bh); + + tcg_out_mov(s, ext, orig_rl, rl); +} + +#ifdef CONFIG_SOFTMMU +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * TCGMemOpIdx oi, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +}; + +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, TCGMemOpIdx oi, + * uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) +{ + ptrdiff_t offset = tcg_pcrel_diff(s, target); + assert(offset == sextract64(offset, 0, 21)); + tcg_out_insn(s, 3406, ADR, rd, offset); +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp size = opc & MO_SIZE; + + reloc_pc19(lb->label_ptr[0], s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); + tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); + tcg_out_adr(s, TCG_REG_X3, lb->raddr); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + if (opc & MO_SIGN) { + tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); + } else { + tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); + } + + tcg_out_goto(s, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp size = opc & MO_SIZE; + + reloc_pc19(lb->label_ptr[0], s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); + tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); + tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); + tcg_out_adr(s, TCG_REG_X4, lb->raddr); + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_goto(s, lb->raddr); +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, + TCGType ext, TCGReg data_reg, TCGReg addr_reg, + tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->type = ext; + label->datalo_reg = data_reg; + label->addrlo_reg = addr_reg; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} + +/* Load and compare a TLB entry, emitting the conditional jump to the + slow path for the failure case, which will be patched later when finalizing + the slow path. Generated code returns the host addend in X1, + clobbers X0,X2,X3,TMP. */ +static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc, + tcg_insn_unit **label_ptr, int mem_index, + bool is_read) +{ + int tlb_offset = is_read ? + offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); + int s_mask = (1 << (opc & MO_SIZE)) - 1; + TCGReg base = TCG_AREG0, x3; + uint64_t tlb_mask; + + /* For aligned accesses, we check the first byte and include the alignment + bits within the address. For unaligned access, we check that we don't + cross pages using the address of the last byte of the access. */ + if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { + tlb_mask = TARGET_PAGE_MASK | s_mask; + x3 = addr_reg; + } else { + tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, addr_reg, s_mask); + tlb_mask = TARGET_PAGE_MASK; + x3 = TCG_REG_X3; + } + + /* Extract the TLB index from the address into X0. + X0 = + addr_reg */ + tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg, + TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS); + + /* Store the page mask part of the address into X3. */ + tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, x3, tlb_mask); + + /* Add any "high bits" from the tlb offset to the env address into X2, + to take advantage of the LSL12 form of the ADDI instruction. + X2 = env + (tlb_offset & 0xfff000) */ + if (tlb_offset & 0xfff000) { + tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base, + tlb_offset & 0xfff000); + base = TCG_REG_X2; + } + + /* Merge the tlb index contribution into X2. + X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */ + tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base, + TCG_REG_X0, CPU_TLB_ENTRY_BITS); + + /* Merge "low bits" from tlb offset, load the tlb comparator into X0. + X0 = load [X2 + (tlb_offset & 0x000fff)] */ + tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX, + TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff); + + /* Load the tlb addend. Do that early to avoid stalling. + X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */ + tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2, + (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) - + (is_read ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write))); + + /* Perform the address comparison. */ + tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0); + + /* If not equal, we jump to the slow path. */ + *label_ptr = s->code_ptr; + tcg_out_goto_cond_noaddr(s, TCG_COND_NE); +} + +#endif /* CONFIG_SOFTMMU */ + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext, + TCGReg data_r, TCGReg addr_r, + TCGType otype, TCGReg off_r) +{ + const TCGMemOp bswap = memop & MO_BSWAP; + + switch (memop & MO_SSIZE) { + case MO_UB: + tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); + break; + case MO_SB: + tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, + data_r, addr_r, otype, off_r); + break; + case MO_UW: + tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); + if (bswap) { + tcg_out_rev16(s, data_r, data_r); + } + break; + case MO_SW: + if (bswap) { + tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); + tcg_out_rev16(s, data_r, data_r); + tcg_out_sxt(s, ext, MO_16, data_r, data_r); + } else { + tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), + data_r, addr_r, otype, off_r); + } + break; + case MO_UL: + tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); + if (bswap) { + tcg_out_rev32(s, data_r, data_r); + } + break; + case MO_SL: + if (bswap) { + tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); + tcg_out_rev32(s, data_r, data_r); + tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); + } else { + tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); + } + break; + case MO_Q: + tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); + if (bswap) { + tcg_out_rev64(s, data_r, data_r); + } + break; + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop, + TCGReg data_r, TCGReg addr_r, + TCGType otype, TCGReg off_r) +{ + const TCGMemOp bswap = memop & MO_BSWAP; + + switch (memop & MO_SIZE) { + case MO_8: + tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); + break; + case MO_16: + if (bswap && data_r != TCG_REG_XZR) { + tcg_out_rev16(s, TCG_REG_TMP, data_r); + data_r = TCG_REG_TMP; + } + tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); + break; + case MO_32: + if (bswap && data_r != TCG_REG_XZR) { + tcg_out_rev32(s, TCG_REG_TMP, data_r); + data_r = TCG_REG_TMP; + } + tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); + break; + case MO_64: + if (bswap && data_r != TCG_REG_XZR) { + tcg_out_rev64(s, TCG_REG_TMP, data_r); + data_r = TCG_REG_TMP; + } + tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); + break; + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOpIdx oi, TCGType ext) +{ + TCGMemOp memop = get_memop(oi); + const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; +#ifdef CONFIG_SOFTMMU + unsigned mem_index = get_mmuidx(oi); + tcg_insn_unit *label_ptr; + + tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); + tcg_out_qemu_ld_direct(s, memop, ext, data_reg, + TCG_REG_X1, otype, addr_reg); + add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, + s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (USE_GUEST_BASE) { + tcg_out_qemu_ld_direct(s, memop, ext, data_reg, + TCG_REG_GUEST_BASE, otype, addr_reg); + } else { + tcg_out_qemu_ld_direct(s, memop, ext, data_reg, + addr_reg, TCG_TYPE_I64, TCG_REG_XZR); + } +#endif /* CONFIG_SOFTMMU */ +} + +static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOpIdx oi) +{ + TCGMemOp memop = get_memop(oi); + const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; +#ifdef CONFIG_SOFTMMU + unsigned mem_index = get_mmuidx(oi); + tcg_insn_unit *label_ptr; + + tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); + tcg_out_qemu_st_direct(s, memop, data_reg, + TCG_REG_X1, otype, addr_reg); + add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, + data_reg, addr_reg, s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (USE_GUEST_BASE) { + tcg_out_qemu_st_direct(s, memop, data_reg, + TCG_REG_GUEST_BASE, otype, addr_reg); + } else { + tcg_out_qemu_st_direct(s, memop, data_reg, + addr_reg, TCG_TYPE_I64, TCG_REG_XZR); + } +#endif /* CONFIG_SOFTMMU */ +} + +static tcg_insn_unit *tb_ret_addr; + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg args[TCG_MAX_OP_ARGS], + const int const_args[TCG_MAX_OP_ARGS]) +{ + /* 99% of the time, we can signal the use of extension registers + by looking to see if the opcode handles 64-bit data. */ + TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; + + /* Hoist the loads of the most common arguments. */ + TCGArg a0 = args[0]; + TCGArg a1 = args[1]; + TCGArg a2 = args[2]; + int c2 = const_args[2]; + + /* Some operands are defined with "rZ" constraint, a register or + the zero register. These need not actually test args[I] == 0. */ +#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) + + switch (opc) { + case INDEX_op_exit_tb: + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); + tcg_out_goto(s, tb_ret_addr); + break; + + case INDEX_op_goto_tb: +#ifndef USE_DIRECT_JUMP +#error "USE_DIRECT_JUMP required for aarch64" +#endif + assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */ + s->tb_jmp_offset[a0] = tcg_current_code_size(s); + /* actual branch destination will be patched by + aarch64_tb_set_jmp_target later, beware retranslation. */ + tcg_out_goto_noaddr(s); + s->tb_next_offset[a0] = tcg_current_code_size(s); + break; + + case INDEX_op_br: + tcg_out_goto_label(s, arg_label(a0)); + break; + + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + tcg_out_ldst(s, I3312_LDRB, a0, a1, a2); + break; + case INDEX_op_ld8s_i32: + tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2); + break; + case INDEX_op_ld8s_i64: + tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2); + break; + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + tcg_out_ldst(s, I3312_LDRH, a0, a1, a2); + break; + case INDEX_op_ld16s_i32: + tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2); + break; + case INDEX_op_ld16s_i64: + tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2); + break; + case INDEX_op_ld_i32: + case INDEX_op_ld32u_i64: + tcg_out_ldst(s, I3312_LDRW, a0, a1, a2); + break; + case INDEX_op_ld32s_i64: + tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2); + break; + case INDEX_op_ld_i64: + tcg_out_ldst(s, I3312_LDRX, a0, a1, a2); + break; + + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2); + break; + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2); + break; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2); + break; + case INDEX_op_st_i64: + tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2); + break; + + case INDEX_op_add_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_add_i64: + if (c2) { + tcg_out_addsubi(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); + } + break; + + case INDEX_op_sub_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_sub_i64: + if (c2) { + tcg_out_addsubi(s, ext, a0, a1, -a2); + } else { + tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); + } + break; + + case INDEX_op_neg_i64: + case INDEX_op_neg_i32: + tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); + break; + + case INDEX_op_and_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_and_i64: + if (c2) { + tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); + } + break; + + case INDEX_op_andc_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_andc_i64: + if (c2) { + tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); + } else { + tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); + } + break; + + case INDEX_op_or_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_or_i64: + if (c2) { + tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); + } + break; + + case INDEX_op_orc_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_orc_i64: + if (c2) { + tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); + } else { + tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); + } + break; + + case INDEX_op_xor_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_xor_i64: + if (c2) { + tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); + } + break; + + case INDEX_op_eqv_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_eqv_i64: + if (c2) { + tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); + } else { + tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); + } + break; + + case INDEX_op_not_i64: + case INDEX_op_not_i32: + tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); + break; + + case INDEX_op_mul_i64: + case INDEX_op_mul_i32: + tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); + break; + + case INDEX_op_div_i64: + case INDEX_op_div_i32: + tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); + break; + case INDEX_op_divu_i64: + case INDEX_op_divu_i32: + tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); + break; + + case INDEX_op_rem_i64: + case INDEX_op_rem_i32: + tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); + tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); + break; + case INDEX_op_remu_i64: + case INDEX_op_remu_i32: + tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); + tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); + break; + + case INDEX_op_shl_i64: + case INDEX_op_shl_i32: + if (c2) { + tcg_out_shl(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); + } + break; + + case INDEX_op_shr_i64: + case INDEX_op_shr_i32: + if (c2) { + tcg_out_shr(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); + } + break; + + case INDEX_op_sar_i64: + case INDEX_op_sar_i32: + if (c2) { + tcg_out_sar(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); + } + break; + + case INDEX_op_rotr_i64: + case INDEX_op_rotr_i32: + if (c2) { + tcg_out_rotr(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); + } + break; + + case INDEX_op_rotl_i64: + case INDEX_op_rotl_i32: + if (c2) { + tcg_out_rotl(s, ext, a0, a1, a2); + } else { + tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); + tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); + } + break; + + case INDEX_op_brcond_i32: + a1 = (int32_t)a1; + /* FALLTHRU */ + case INDEX_op_brcond_i64: + tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); + break; + + case INDEX_op_setcond_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_setcond_i64: + tcg_out_cmp(s, ext, a1, a2, c2); + /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ + tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, + TCG_REG_XZR, tcg_invert_cond(args[3])); + break; + + case INDEX_op_movcond_i32: + a2 = (int32_t)a2; + /* FALLTHRU */ + case INDEX_op_movcond_i64: + tcg_out_cmp(s, ext, a1, a2, c2); + tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); + break; + + case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, a0, a1, a2, ext); + break; + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, REG0(0), a1, a2); + break; + + case INDEX_op_bswap64_i64: + tcg_out_rev64(s, a0, a1); + break; + case INDEX_op_bswap32_i64: + case INDEX_op_bswap32_i32: + tcg_out_rev32(s, a0, a1); + break; + case INDEX_op_bswap16_i64: + case INDEX_op_bswap16_i32: + tcg_out_rev16(s, a0, a1); + break; + + case INDEX_op_ext8s_i64: + case INDEX_op_ext8s_i32: + tcg_out_sxt(s, ext, MO_8, a0, a1); + break; + case INDEX_op_ext16s_i64: + case INDEX_op_ext16s_i32: + tcg_out_sxt(s, ext, MO_16, a0, a1); + break; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); + break; + case INDEX_op_ext8u_i64: + case INDEX_op_ext8u_i32: + tcg_out_uxt(s, MO_8, a0, a1); + break; + case INDEX_op_ext16u_i64: + case INDEX_op_ext16u_i32: + tcg_out_uxt(s, MO_16, a0, a1); + break; + case INDEX_op_extu_i32_i64: + case INDEX_op_ext32u_i64: + tcg_out_movr(s, TCG_TYPE_I32, a0, a1); + break; + + case INDEX_op_deposit_i64: + case INDEX_op_deposit_i32: + tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); + break; + + case INDEX_op_add2_i32: + tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), + (int32_t)args[4], args[5], const_args[4], + const_args[5], false); + break; + case INDEX_op_add2_i64: + tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], + args[5], const_args[4], const_args[5], false); + break; + case INDEX_op_sub2_i32: + tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), + (int32_t)args[4], args[5], const_args[4], + const_args[5], true); + break; + case INDEX_op_sub2_i64: + tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], + args[5], const_args[4], const_args[5], true); + break; + + case INDEX_op_muluh_i64: + tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); + break; + case INDEX_op_mulsh_i64: + tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } + +#undef REG0 +} + +static const TCGTargetOpDef aarch64_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + + { INDEX_op_st8_i32, { "rZ", "r" } }, + { INDEX_op_st16_i32, { "rZ", "r" } }, + { INDEX_op_st_i32, { "rZ", "r" } }, + { INDEX_op_st8_i64, { "rZ", "r" } }, + { INDEX_op_st16_i64, { "rZ", "r" } }, + { INDEX_op_st32_i64, { "rZ", "r" } }, + { INDEX_op_st_i64, { "rZ", "r" } }, + + { INDEX_op_add_i32, { "r", "r", "rA" } }, + { INDEX_op_add_i64, { "r", "r", "rA" } }, + { INDEX_op_sub_i32, { "r", "r", "rA" } }, + { INDEX_op_sub_i64, { "r", "r", "rA" } }, + { INDEX_op_mul_i32, { "r", "r", "r" } }, + { INDEX_op_mul_i64, { "r", "r", "r" } }, + { INDEX_op_div_i32, { "r", "r", "r" } }, + { INDEX_op_div_i64, { "r", "r", "r" } }, + { INDEX_op_divu_i32, { "r", "r", "r" } }, + { INDEX_op_divu_i64, { "r", "r", "r" } }, + { INDEX_op_rem_i32, { "r", "r", "r" } }, + { INDEX_op_rem_i64, { "r", "r", "r" } }, + { INDEX_op_remu_i32, { "r", "r", "r" } }, + { INDEX_op_remu_i64, { "r", "r", "r" } }, + { INDEX_op_and_i32, { "r", "r", "rL" } }, + { INDEX_op_and_i64, { "r", "r", "rL" } }, + { INDEX_op_or_i32, { "r", "r", "rL" } }, + { INDEX_op_or_i64, { "r", "r", "rL" } }, + { INDEX_op_xor_i32, { "r", "r", "rL" } }, + { INDEX_op_xor_i64, { "r", "r", "rL" } }, + { INDEX_op_andc_i32, { "r", "r", "rL" } }, + { INDEX_op_andc_i64, { "r", "r", "rL" } }, + { INDEX_op_orc_i32, { "r", "r", "rL" } }, + { INDEX_op_orc_i64, { "r", "r", "rL" } }, + { INDEX_op_eqv_i32, { "r", "r", "rL" } }, + { INDEX_op_eqv_i64, { "r", "r", "rL" } }, + + { INDEX_op_neg_i32, { "r", "r" } }, + { INDEX_op_neg_i64, { "r", "r" } }, + { INDEX_op_not_i32, { "r", "r" } }, + { INDEX_op_not_i64, { "r", "r" } }, + + { INDEX_op_shl_i32, { "r", "r", "ri" } }, + { INDEX_op_shr_i32, { "r", "r", "ri" } }, + { INDEX_op_sar_i32, { "r", "r", "ri" } }, + { INDEX_op_rotl_i32, { "r", "r", "ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "ri" } }, + { INDEX_op_shl_i64, { "r", "r", "ri" } }, + { INDEX_op_shr_i64, { "r", "r", "ri" } }, + { INDEX_op_sar_i64, { "r", "r", "ri" } }, + { INDEX_op_rotl_i64, { "r", "r", "ri" } }, + { INDEX_op_rotr_i64, { "r", "r", "ri" } }, + + { INDEX_op_brcond_i32, { "r", "rA" } }, + { INDEX_op_brcond_i64, { "r", "rA" } }, + { INDEX_op_setcond_i32, { "r", "r", "rA" } }, + { INDEX_op_setcond_i64, { "r", "r", "rA" } }, + { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } }, + { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } }, + + { INDEX_op_qemu_ld_i32, { "r", "l" } }, + { INDEX_op_qemu_ld_i64, { "r", "l" } }, + { INDEX_op_qemu_st_i32, { "lZ", "l" } }, + { INDEX_op_qemu_st_i64, { "lZ", "l" } }, + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + { INDEX_op_bswap16_i64, { "r", "r" } }, + { INDEX_op_bswap32_i64, { "r", "r" } }, + { INDEX_op_bswap64_i64, { "r", "r" } }, + + { INDEX_op_ext8s_i32, { "r", "r" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext8u_i32, { "r", "r" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext8u_i64, { "r", "r" } }, + { INDEX_op_ext16u_i64, { "r", "r" } }, + { INDEX_op_ext32u_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, + + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, + { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, + { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, + + { INDEX_op_muluh_i64, { "r", "r", "r" } }, + { INDEX_op_mulsh_i64, { "r", "r", "r" } }, + + { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); + + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + (1 << TCG_REG_X0) | (1 << TCG_REG_X1) | + (1 << TCG_REG_X2) | (1 << TCG_REG_X3) | + (1 << TCG_REG_X4) | (1 << TCG_REG_X5) | + (1 << TCG_REG_X6) | (1 << TCG_REG_X7) | + (1 << TCG_REG_X8) | (1 << TCG_REG_X9) | + (1 << TCG_REG_X10) | (1 << TCG_REG_X11) | + (1 << TCG_REG_X12) | (1 << TCG_REG_X13) | + (1 << TCG_REG_X14) | (1 << TCG_REG_X15) | + (1 << TCG_REG_X16) | (1 << TCG_REG_X17) | + (1 << TCG_REG_X18) | (1 << TCG_REG_X30)); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ + + tcg_add_target_add_op_defs(aarch64_op_defs); +} + +/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ +#define PUSH_SIZE ((30 - 19 + 1) * 8) + +#define FRAME_SIZE \ + ((PUSH_SIZE \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & ~(TCG_TARGET_STACK_ALIGN - 1)) + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +/* We're expecting to use a single ADDI insn. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); + +static void tcg_target_qemu_prologue(TCGContext *s) +{ + TCGReg r; + + /* Push (FP, LR) and allocate space for all saved registers. */ + tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, + TCG_REG_SP, -PUSH_SIZE, 1, 1); + + /* Set up frame pointer for canonical unwinding. */ + tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); + + /* Store callee-preserved regs x19..x28. */ + for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { + int ofs = (r - TCG_REG_X19 + 2) * 8; + tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); + } + + /* Make stack space for TCG locals. */ + tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, + FRAME_SIZE - PUSH_SIZE); + + /* Inform TCG about how to find TCG locals with register, offset, size. */ + tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + +#if !defined(CONFIG_SOFTMMU) + if (USE_GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); + } +#endif + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); + + tb_ret_addr = s->code_ptr; + + /* Remove TCG locals stack space. */ + tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, + FRAME_SIZE - PUSH_SIZE); + + /* Restore registers x19..x28. */ + for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { + int ofs = (r - TCG_REG_X19 + 2) * 8; + tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); + } + + /* Pop (FP, LR), restore SP to previous frame. */ + tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, + TCG_REG_SP, PUSH_SIZE, 0, 1); + tcg_out_insn(s, 3207, RET, TCG_REG_LR); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[24]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_AARCH64 + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x78, /* sleb128 -8 */ + .h.cie.return_column = TCG_REG_LR, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */ + 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */ + 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */ + 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */ + 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */ + 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */ + 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */ + 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */ + 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */ + 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */ + 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */ + 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c deleted file mode 100644 index 146ac00..0000000 --- a/tcg/arm/tcg-target.c +++ /dev/null @@ -1,2129 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Andrzej Zaborowski - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "elf.h" -#include "tcg-be-ldst.h" - -/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ -#ifndef __ARM_ARCH -# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ - || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ - || defined(__ARM_ARCH_7EM__) -# define __ARM_ARCH 7 -# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ - || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ - || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) -# define __ARM_ARCH 6 -# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \ - || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) -# define __ARM_ARCH 5 -# else -# define __ARM_ARCH 4 -# endif -#endif - -static int arm_arch = __ARM_ARCH; - -#if defined(__ARM_ARCH_5T__) \ - || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) -# define use_armv5t_instructions 1 -#else -# define use_armv5t_instructions use_armv6_instructions -#endif - -#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6) -#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) - -#ifndef use_idiv_instructions -bool use_idiv_instructions; -#endif - -/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */ -#ifdef CONFIG_SOFTMMU -# define USING_SOFTMMU 1 -#else -# define USING_SOFTMMU 0 -#endif - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "%r0", - "%r1", - "%r2", - "%r3", - "%r4", - "%r5", - "%r6", - "%r7", - "%r8", - "%r9", - "%r10", - "%r11", - "%r12", - "%r13", - "%r14", - "%pc", -}; -#endif - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11, - TCG_REG_R13, - TCG_REG_R0, - TCG_REG_R1, - TCG_REG_R2, - TCG_REG_R3, - TCG_REG_R12, - TCG_REG_R14, -}; - -static const int tcg_target_call_iarg_regs[4] = { - TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3 -}; -static const int tcg_target_call_oarg_regs[2] = { - TCG_REG_R0, TCG_REG_R1 -}; - -#define TCG_REG_TMP TCG_REG_R12 - -static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) -{ - ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; - *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); -} - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - assert(type == R_ARM_PC24); - assert(addend == 0); - reloc_pc24(code_ptr, (tcg_insn_unit *)value); -} - -#define TCG_CT_CONST_ARM 0x100 -#define TCG_CT_CONST_INV 0x200 -#define TCG_CT_CONST_NEG 0x400 -#define TCG_CT_CONST_ZERO 0x800 - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch (ct_str[0]) { - case 'I': - ct->ct |= TCG_CT_CONST_ARM; - break; - case 'K': - ct->ct |= TCG_CT_CONST_INV; - break; - case 'N': /* The gcc constraint letter is L, already used here. */ - ct->ct |= TCG_CT_CONST_NEG; - break; - case 'Z': - ct->ct |= TCG_CT_CONST_ZERO; - break; - - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - break; - - /* qemu_ld address */ - case 'l': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); -#ifdef CONFIG_SOFTMMU - /* r0-r2,lr will be overwritten when reading the tlb entry, - so don't use these. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); -#endif - break; - - /* qemu_st address & data */ - case 's': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - /* r0-r2 will be overwritten when reading the tlb entry (softmmu only) - and r0-r1 doing the byte swapping, so don't use these. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); -#if defined(CONFIG_SOFTMMU) - /* Avoid clashes with registers being used for helper args */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); -#if TARGET_LONG_BITS == 64 - /* Avoid clashes with registers being used for helper args */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); -#endif - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); -#endif - break; - - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - - return 0; -} - -static inline uint32_t rotl(uint32_t val, int n) -{ - return (val << n) | (val >> (32 - n)); -} - -/* ARM immediates for ALU instructions are made of an unsigned 8-bit - right-rotated by an even amount between 0 and 30. */ -static inline int encode_imm(uint32_t imm) -{ - int shift; - - /* simple case, only lower bits */ - if ((imm & ~0xff) == 0) - return 0; - /* then try a simple even shift */ - shift = ctz32(imm) & ~1; - if (((imm >> shift) & ~0xff) == 0) - return 32 - shift; - /* now try harder with rotations */ - if ((rotl(imm, 2) & ~0xff) == 0) - return 2; - if ((rotl(imm, 4) & ~0xff) == 0) - return 4; - if ((rotl(imm, 6) & ~0xff) == 0) - return 6; - /* imm can't be encoded */ - return -1; -} - -static inline int check_fit_imm(uint32_t imm) -{ - return encode_imm(imm) >= 0; -} - -/* Test if a constant matches the constraint. - * TODO: define constraints for: - * - * ldr/str offset: between -0xfff and 0xfff - * ldrh/strh offset: between -0xff and 0xff - * mov operand2: values represented with x << (2 * y), x < 0x100 - * add, sub, eor...: ditto - */ -static inline int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct; - ct = arg_ct->ct; - if (ct & TCG_CT_CONST) { - return 1; - } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) { - return 1; - } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) { - return 1; - } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) { - return 1; - } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { - return 1; - } else { - return 0; - } -} - -#define TO_CPSR (1 << 20) - -typedef enum { - ARITH_AND = 0x0 << 21, - ARITH_EOR = 0x1 << 21, - ARITH_SUB = 0x2 << 21, - ARITH_RSB = 0x3 << 21, - ARITH_ADD = 0x4 << 21, - ARITH_ADC = 0x5 << 21, - ARITH_SBC = 0x6 << 21, - ARITH_RSC = 0x7 << 21, - ARITH_TST = 0x8 << 21 | TO_CPSR, - ARITH_CMP = 0xa << 21 | TO_CPSR, - ARITH_CMN = 0xb << 21 | TO_CPSR, - ARITH_ORR = 0xc << 21, - ARITH_MOV = 0xd << 21, - ARITH_BIC = 0xe << 21, - ARITH_MVN = 0xf << 21, - - INSN_LDR_IMM = 0x04100000, - INSN_LDR_REG = 0x06100000, - INSN_STR_IMM = 0x04000000, - INSN_STR_REG = 0x06000000, - - INSN_LDRH_IMM = 0x005000b0, - INSN_LDRH_REG = 0x001000b0, - INSN_LDRSH_IMM = 0x005000f0, - INSN_LDRSH_REG = 0x001000f0, - INSN_STRH_IMM = 0x004000b0, - INSN_STRH_REG = 0x000000b0, - - INSN_LDRB_IMM = 0x04500000, - INSN_LDRB_REG = 0x06500000, - INSN_LDRSB_IMM = 0x005000d0, - INSN_LDRSB_REG = 0x001000d0, - INSN_STRB_IMM = 0x04400000, - INSN_STRB_REG = 0x06400000, - - INSN_LDRD_IMM = 0x004000d0, - INSN_LDRD_REG = 0x000000d0, - INSN_STRD_IMM = 0x004000f0, - INSN_STRD_REG = 0x000000f0, -} ARMInsn; - -#define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00) -#define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20) -#define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40) -#define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60) -#define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10) -#define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30) -#define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50) -#define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70) - -enum arm_cond_code_e { - COND_EQ = 0x0, - COND_NE = 0x1, - COND_CS = 0x2, /* Unsigned greater or equal */ - COND_CC = 0x3, /* Unsigned less than */ - COND_MI = 0x4, /* Negative */ - COND_PL = 0x5, /* Zero or greater */ - COND_VS = 0x6, /* Overflow */ - COND_VC = 0x7, /* No overflow */ - COND_HI = 0x8, /* Unsigned greater than */ - COND_LS = 0x9, /* Unsigned less or equal */ - COND_GE = 0xa, - COND_LT = 0xb, - COND_GT = 0xc, - COND_LE = 0xd, - COND_AL = 0xe, -}; - -static const uint8_t tcg_cond_to_arm_cond[] = { - [TCG_COND_EQ] = COND_EQ, - [TCG_COND_NE] = COND_NE, - [TCG_COND_LT] = COND_LT, - [TCG_COND_GE] = COND_GE, - [TCG_COND_LE] = COND_LE, - [TCG_COND_GT] = COND_GT, - /* unsigned */ - [TCG_COND_LTU] = COND_CC, - [TCG_COND_GEU] = COND_CS, - [TCG_COND_LEU] = COND_LS, - [TCG_COND_GTU] = COND_HI, -}; - -static inline void tcg_out_bx(TCGContext *s, int cond, int rn) -{ - tcg_out32(s, (cond << 28) | 0x012fff10 | rn); -} - -static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset) -{ - tcg_out32(s, (cond << 28) | 0x0a000000 | - (((offset - 8) >> 2) & 0x00ffffff)); -} - -static inline void tcg_out_b_noaddr(TCGContext *s, int cond) -{ - /* We pay attention here to not modify the branch target by masking - the corresponding bytes. This ensure that caches and memory are - kept coherent during retranslation. */ - tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a)); -} - -static inline void tcg_out_bl_noaddr(TCGContext *s, int cond) -{ - /* We pay attention here to not modify the branch target by masking - the corresponding bytes. This ensure that caches and memory are - kept coherent during retranslation. */ - tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b)); -} - -static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset) -{ - tcg_out32(s, (cond << 28) | 0x0b000000 | - (((offset - 8) >> 2) & 0x00ffffff)); -} - -static inline void tcg_out_blx(TCGContext *s, int cond, int rn) -{ - tcg_out32(s, (cond << 28) | 0x012fff30 | rn); -} - -static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset) -{ - tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) | - (((offset - 8) >> 2) & 0x00ffffff)); -} - -static inline void tcg_out_dat_reg(TCGContext *s, - int cond, int opc, int rd, int rn, int rm, int shift) -{ - tcg_out32(s, (cond << 28) | (0 << 25) | opc | - (rn << 16) | (rd << 12) | shift | rm); -} - -static inline void tcg_out_nop(TCGContext *s) -{ - if (use_armv7_instructions) { - /* Architected nop introduced in v6k. */ - /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this - also Just So Happened to do nothing on pre-v6k so that we - don't need to conditionalize it? */ - tcg_out32(s, 0xe320f000); - } else { - /* Prior to that the assembler uses mov r0, r0. */ - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0)); - } -} - -static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) -{ - /* Simple reg-reg move, optimising out the 'do nothing' case */ - if (rd != rm) { - tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0)); - } -} - -static inline void tcg_out_dat_imm(TCGContext *s, - int cond, int opc, int rd, int rn, int im) -{ - tcg_out32(s, (cond << 28) | (1 << 25) | opc | - (rn << 16) | (rd << 12) | im); -} - -static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) -{ - int rot, opc, rn; - - /* For armv7, make sure not to use movw+movt when mov/mvn would do. - Speed things up by only checking when movt would be required. - Prior to armv7, have one go at fully rotated immediates before - doing the decomposition thing below. */ - if (!use_armv7_instructions || (arg & 0xffff0000)) { - rot = encode_imm(arg); - if (rot >= 0) { - tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, - rotl(arg, rot) | (rot << 7)); - return; - } - rot = encode_imm(~arg); - if (rot >= 0) { - tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, - rotl(~arg, rot) | (rot << 7)); - return; - } - } - - /* Use movw + movt. */ - if (use_armv7_instructions) { - /* movw */ - tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) - | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); - if (arg & 0xffff0000) { - /* movt */ - tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) - | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); - } - return; - } - - /* TODO: This is very suboptimal, we can easily have a constant - pool somewhere after all the instructions. */ - opc = ARITH_MOV; - rn = 0; - /* If we have lots of leading 1's, we can shorten the sequence by - beginning with mvn and then clearing higher bits with eor. */ - if (clz32(~arg) > clz32(arg)) { - opc = ARITH_MVN, arg = ~arg; - } - do { - int i = ctz32(arg) & ~1; - rot = ((32 - i) << 7) & 0xf00; - tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); - arg &= ~(0xff << i); - - opc = ARITH_EOR; - rn = rd; - } while (arg); -} - -static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, - TCGArg lhs, TCGArg rhs, int rhs_is_const) -{ - /* Emit either the reg,imm or reg,reg form of a data-processing insn. - * rhs must satisfy the "rI" constraint. - */ - if (rhs_is_const) { - int rot = encode_imm(rhs); - assert(rot >= 0); - tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); - } else { - tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); - } -} - -static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv, - TCGReg dst, TCGReg lhs, TCGArg rhs, - bool rhs_is_const) -{ - /* Emit either the reg,imm or reg,reg form of a data-processing insn. - * rhs must satisfy the "rIK" constraint. - */ - if (rhs_is_const) { - int rot = encode_imm(rhs); - if (rot < 0) { - rhs = ~rhs; - rot = encode_imm(rhs); - assert(rot >= 0); - opc = opinv; - } - tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); - } else { - tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); - } -} - -static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg, - TCGArg dst, TCGArg lhs, TCGArg rhs, - bool rhs_is_const) -{ - /* Emit either the reg,imm or reg,reg form of a data-processing insn. - * rhs must satisfy the "rIN" constraint. - */ - if (rhs_is_const) { - int rot = encode_imm(rhs); - if (rot < 0) { - rhs = -rhs; - rot = encode_imm(rhs); - assert(rot >= 0); - opc = opneg; - } - tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); - } else { - tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); - } -} - -static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd, - TCGReg rn, TCGReg rm) -{ - /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */ - if (!use_armv6_instructions && rd == rn) { - if (rd == rm) { - /* rd == rn == rm; copy an input to tmp first. */ - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rm = rn = TCG_REG_TMP; - } else { - rn = rm; - rm = rd; - } - } - /* mul */ - tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); -} - -static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0, - TCGReg rd1, TCGReg rn, TCGReg rm) -{ - /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ - if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { - if (rd0 == rm || rd1 == rm) { - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rn = TCG_REG_TMP; - } else { - TCGReg t = rn; - rn = rm; - rm = t; - } - } - /* umull */ - tcg_out32(s, (cond << 28) | 0x00800090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); -} - -static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0, - TCGReg rd1, TCGReg rn, TCGReg rm) -{ - /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ - if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { - if (rd0 == rm || rd1 == rm) { - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rn = TCG_REG_TMP; - } else { - TCGReg t = rn; - rn = rm; - rm = t; - } - } - /* smull */ - tcg_out32(s, (cond << 28) | 0x00c00090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); -} - -static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm) -{ - tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); -} - -static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm) -{ - tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); -} - -static inline void tcg_out_ext8s(TCGContext *s, int cond, - int rd, int rn) -{ - if (use_armv6_instructions) { - /* sxtb */ - tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_ASR(24)); - } -} - -static inline void tcg_out_ext8u(TCGContext *s, int cond, - int rd, int rn) -{ - tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff); -} - -static inline void tcg_out_ext16s(TCGContext *s, int cond, - int rd, int rn) -{ - if (use_armv6_instructions) { - /* sxth */ - tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_ASR(16)); - } -} - -static inline void tcg_out_ext16u(TCGContext *s, int cond, - int rd, int rn) -{ - if (use_armv6_instructions) { - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_LSR(16)); - } -} - -static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { - /* revsh */ - tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); - } -} - -static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { - /* rev16 */ - tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); - } -} - -/* swap the two low bytes assuming that the two high input bytes and the - two high output bit can hold any value. */ -static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { - /* rev16 */ - tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); - tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); - } -} - -static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { - /* rev */ - tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_EOR, - TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16)); - tcg_out_dat_imm(s, cond, ARITH_BIC, - TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_ROR(8)); - tcg_out_dat_reg(s, cond, ARITH_EOR, - rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8)); - } -} - -bool tcg_target_deposit_valid(int ofs, int len) -{ - /* ??? Without bfi, we could improve over generic code by combining - the right-shift from a non-zero ofs with the orr. We do run into - problems when rd == rs, and the mask generated from ofs+len doesn't - fit into an immediate. We would have to be careful not to pessimize - wrt the optimizations performed on the expanded code. */ - return use_armv7_instructions; -} - -static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd, - TCGArg a1, int ofs, int len, bool const_a1) -{ - if (const_a1) { - /* bfi becomes bfc with rn == 15. */ - a1 = 15; - } - /* bfi/bfc */ - tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1 - | (ofs << 7) | ((ofs + len - 1) << 16)); -} - -/* Note that this routine is used for both LDR and LDRH formats, so we do - not wish to include an immediate shift at this point. */ -static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, - TCGReg rn, TCGReg rm, bool u, bool p, bool w) -{ - tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) - | (w << 21) | (rn << 16) | (rt << 12) | rm); -} - -static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, - TCGReg rn, int imm8, bool p, bool w) -{ - bool u = 1; - if (imm8 < 0) { - imm8 = -imm8; - u = 0; - } - tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | - (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf)); -} - -static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, - TCGReg rn, int imm12, bool p, bool w) -{ - bool u = 1; - if (imm12 < 0) { - imm12 = -imm12; - u = 0; - } - tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | - (rn << 16) | (rt << 12) | imm12); -} - -static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm12) -{ - tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0); -} - -static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm12) -{ - tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0); -} - -static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0); -} - -/* Register pre-increment with base writeback. */ -static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1); -} - -static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1); -} - -static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm12) -{ - tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0); -} - -static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm12) -{ - tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0); -} - -static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, int imm8) -{ - tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0); -} - -static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt, - TCGReg rn, TCGReg rm) -{ - tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0); -} - -static inline void tcg_out_ld32u(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld32_12(s, cond, rd, rn, offset); -} - -static inline void tcg_out_st32(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_st32_12(s, cond, rd, rn, offset); -} - -static inline void tcg_out_ld16u(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld16u_8(s, cond, rd, rn, offset); -} - -static inline void tcg_out_ld16s(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld16s_8(s, cond, rd, rn, offset); -} - -static inline void tcg_out_st16(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_st16_8(s, cond, rd, rn, offset); -} - -static inline void tcg_out_ld8u(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld8_12(s, cond, rd, rn, offset); -} - -static inline void tcg_out_ld8s(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld8s_8(s, cond, rd, rn, offset); -} - -static inline void tcg_out_st8(TCGContext *s, int cond, - int rd, int rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_st8_12(s, cond, rd, rn, offset); -} - -/* The _goto case is normally between TBs within the same code buffer, and - * with the code buffer limited to 16MB we wouldn't need the long case. - * But we also use it for the tail-call to the qemu_ld/st helpers, which does. - */ -static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) -{ - intptr_t addri = (intptr_t)addr; - ptrdiff_t disp = tcg_pcrel_diff(s, addr); - - if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) { - tcg_out_b(s, cond, disp); - return; - } - - tcg_out_movi32(s, cond, TCG_REG_TMP, addri); - if (use_armv5t_instructions) { - tcg_out_bx(s, cond, TCG_REG_TMP); - } else { - if (addri & 1) { - tcg_abort(); - } - tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP); - } -} - -/* The call case is mostly used for helpers - so it's not unreasonable - * for them to be beyond branch range */ -static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr) -{ - intptr_t addri = (intptr_t)addr; - ptrdiff_t disp = tcg_pcrel_diff(s, addr); - - if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) { - if (addri & 1) { - /* Use BLX if the target is in Thumb mode */ - if (!use_armv5t_instructions) { - tcg_abort(); - } - tcg_out_blx_imm(s, disp); - } else { - tcg_out_bl(s, COND_AL, disp); - } - } else if (use_armv7_instructions) { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); - tcg_out_blx(s, COND_AL, TCG_REG_TMP); - } else { - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - tcg_out32(s, addri); - } -} - -static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l) -{ - if (l->has_value) { - tcg_out_goto(s, cond, l->u.value_ptr); - } else { - tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0); - tcg_out_b_noaddr(s, cond); - } -} - -#ifdef CONFIG_SOFTMMU -/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, - * int mmu_idx, uintptr_t ra) - */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_SB] = helper_ret_ldsb_mmu, - - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LESL] = helper_le_ldul_mmu, - - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BESL] = helper_be_ldul_mmu, -}; - -/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, - * uintxx_t val, int mmu_idx, uintptr_t ra) - */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * argreg is where we want to put this argument, arg is the argument itself. - * Return value is the updated argreg ready for the next call. - * Note that argreg 0..3 is real registers, 4+ on stack. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ -#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \ -static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \ -{ \ - if (argreg < 4) { \ - MOV_ARG(s, COND_AL, argreg, arg); \ - } else { \ - int ofs = (argreg - 4) * 4; \ - EXT_ARG; \ - assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \ - tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \ - } \ - return argreg + 1; \ -} - -DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32, - (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u, - (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u, - (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, ) - -static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, - TCGReg arglo, TCGReg arghi) -{ - /* 64 bit arguments must go in even/odd register pairs - * and in 8-aligned stack slots. - */ - if (argreg & 1) { - argreg++; - } - if (use_armv6_instructions && argreg >= 4 - && (arglo & 1) == 0 && arghi == arglo + 1) { - tcg_out_strd_8(s, COND_AL, arglo, - TCG_REG_CALL_STACK, (argreg - 4) * 4); - return argreg + 2; - } else { - argreg = tcg_out_arg_reg32(s, argreg, arglo); - argreg = tcg_out_arg_reg32(s, argreg, arghi); - return argreg; - } -} - -#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) - -/* We're expecting to use an 8-bit immediate and to mask. */ -QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8); - -/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset. - Using the offset of the second entry in the last tlb table ensures - that we can index all of the elements of the first entry. */ -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) - > 0xffff); - -/* Load and compare a TLB entry, leaving the flags set. Returns the register - containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ - -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - TCGMemOp s_bits, int mem_index, bool is_load) -{ - TCGReg base = TCG_AREG0; - int cmp_off = - (is_load - ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) - : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); - int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - - /* Should generate something like the following: - * shr tmp, addrlo, #TARGET_PAGE_BITS (1) - * add r2, env, #high - * and r0, tmp, #(CPU_TLB_SIZE - 1) (2) - * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3) - * ldr r0, [r2, #cmp] (4) - * tst addrlo, #s_mask - * ldr r2, [r2, #add] (5) - * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS - */ - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, - 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - - /* We checked that the offset is contained within 16 bits above. */ - if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) { - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, - (24 << 7) | (cmp_off >> 8)); - base = TCG_REG_R2; - add_off -= cmp_off & 0xff00; - cmp_off &= 0xff; - } - - tcg_out_dat_imm(s, COND_AL, ARITH_AND, - TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, - TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); - - /* Load the tlb comparator. Use ldrd if needed and available, - but due to how the pointer needs setting up, ldm isn't useful. - Base arm5 doesn't have ldrd, but armv5te does. */ - if (use_armv6_instructions && TARGET_LONG_BITS == 64) { - tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); - } else { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); - if (TARGET_LONG_BITS == 64) { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4); - } - } - - /* Check alignment. */ - if (s_bits) { - tcg_out_dat_imm(s, COND_AL, ARITH_TST, - 0, addrlo, (1 << s_bits) - 1); - } - - /* Load the tlb addend. */ - tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off); - - tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0, - TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); - - if (TARGET_LONG_BITS == 64) { - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, - TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0)); - } - - return TCG_REG_R2; -} - -/* Record the context of a call to the out of line helper code for the slow - path for a load or store, so that we can later generate the correct - helper code. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, - TCGReg datalo, TCGReg datahi, TCGReg addrlo, - TCGReg addrhi, tcg_insn_unit *raddr, - tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = raddr; - label->label_ptr[0] = label_ptr; -} - -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGReg argreg, datalo, datahi; - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - void *func; - - reloc_pc24(lb->label_ptr[0], s->code_ptr); - - argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); - } else { - argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); - } - argreg = tcg_out_arg_imm32(s, argreg, oi); - argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - - /* For armv6 we can use the canonical unsigned helpers and minimize - icache usage. For pre-armv6, use the signed helpers since we do - not have a single insn sign-extend. */ - if (use_armv6_instructions) { - func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]; - } else { - func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]; - if (opc & MO_SIGN) { - opc = MO_UL; - } - } - tcg_out_call(s, func); - - datalo = lb->datalo_reg; - datahi = lb->datahi_reg; - switch (opc & MO_SSIZE) { - case MO_SB: - tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0); - break; - case MO_SW: - tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0); - break; - default: - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); - break; - case MO_Q: - if (datalo != TCG_REG_R1) { - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); - } else if (datahi != TCG_REG_R0) { - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); - } else { - tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0); - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP); - } - break; - } - - tcg_out_goto(s, COND_AL, lb->raddr); -} - -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGReg argreg, datalo, datahi; - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - - reloc_pc24(lb->label_ptr[0], s->code_ptr); - - argreg = TCG_REG_R0; - argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); - } else { - argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); - } - - datalo = lb->datalo_reg; - datahi = lb->datahi_reg; - switch (opc & MO_SIZE) { - case MO_8: - argreg = tcg_out_arg_reg8(s, argreg, datalo); - break; - case MO_16: - argreg = tcg_out_arg_reg16(s, argreg, datalo); - break; - case MO_32: - default: - argreg = tcg_out_arg_reg32(s, argreg, datalo); - break; - case MO_64: - argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi); - break; - } - - argreg = tcg_out_arg_imm32(s, argreg, oi); - argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - - /* Tail-call to the helper, which will return to the fast path. */ - tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); -} -#endif /* SOFTMMU */ - -static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addend) -{ - TCGMemOp bswap = opc & MO_BSWAP; - - switch (opc & MO_SSIZE) { - case MO_UB: - tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend); - break; - case MO_SB: - tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend); - break; - case MO_UW: - tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); - if (bswap) { - tcg_out_bswap16(s, COND_AL, datalo, datalo); - } - break; - case MO_SW: - if (bswap) { - tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); - tcg_out_bswap16s(s, COND_AL, datalo, datalo); - } else { - tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend); - } - break; - case MO_UL: - default: - tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); - if (bswap) { - tcg_out_bswap32(s, COND_AL, datalo, datalo); - } - break; - case MO_Q: - { - TCGReg dl = (bswap ? datahi : datalo); - TCGReg dh = (bswap ? datalo : datahi); - - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (dl & 1) == 0 && dh == dl + 1) { - tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend); - } else if (dl != addend) { - tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo); - tcg_out_ld32_12(s, COND_AL, dh, addend, 4); - } else { - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP, - addend, addrlo, SHIFT_IMM_LSL(0)); - tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0); - tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4); - } - if (bswap) { - tcg_out_bswap32(s, COND_AL, dl, dl); - tcg_out_bswap32(s, COND_AL, dh, dh); - } - } - break; - } -} - -static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo) -{ - TCGMemOp bswap = opc & MO_BSWAP; - - switch (opc & MO_SSIZE) { - case MO_UB: - tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0); - break; - case MO_SB: - tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0); - break; - case MO_UW: - tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); - if (bswap) { - tcg_out_bswap16(s, COND_AL, datalo, datalo); - } - break; - case MO_SW: - if (bswap) { - tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); - tcg_out_bswap16s(s, COND_AL, datalo, datalo); - } else { - tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0); - } - break; - case MO_UL: - default: - tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); - if (bswap) { - tcg_out_bswap32(s, COND_AL, datalo, datalo); - } - break; - case MO_Q: - { - TCGReg dl = (bswap ? datahi : datalo); - TCGReg dh = (bswap ? datalo : datahi); - - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (dl & 1) == 0 && dh == dl + 1) { - tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0); - } else if (dl == addrlo) { - tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); - tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); - } else { - tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); - tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); - } - if (bswap) { - tcg_out_bswap32(s, COND_AL, dl, dl); - tcg_out_bswap32(s, COND_AL, dh, dh); - } - } - break; - } -} - -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) -{ - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc; -#ifdef CONFIG_SOFTMMU - int mem_index; - TCGReg addend; - tcg_insn_unit *label_ptr; -#endif - - datalo = *args++; - datahi = (is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#ifdef CONFIG_SOFTMMU - mem_index = get_mmuidx(oi); - addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1); - - /* This a conditional BL only to load a pointer within this opcode into LR - for the slow path. We will not be using the value for a tail call. */ - label_ptr = s->code_ptr; - tcg_out_bl_noaddr(s, COND_NE); - - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); - - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); - } else { - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); - } -#endif -} - -static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addend) -{ - TCGMemOp bswap = opc & MO_BSWAP; - - switch (opc & MO_SIZE) { - case MO_8: - tcg_out_st8_r(s, cond, datalo, addrlo, addend); - break; - case MO_16: - if (bswap) { - tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo); - tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend); - } else { - tcg_out_st16_r(s, cond, datalo, addrlo, addend); - } - break; - case MO_32: - default: - if (bswap) { - tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); - tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend); - } else { - tcg_out_st32_r(s, cond, datalo, addrlo, addend); - } - break; - case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (bswap) { - tcg_out_bswap32(s, cond, TCG_REG_R0, datahi); - tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo); - tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); - tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4); - } else if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_strd_r(s, cond, datalo, addrlo, addend); - } else { - tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); - tcg_out_st32_12(s, cond, datahi, addend, 4); - } - break; - } -} - -static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo) -{ - TCGMemOp bswap = opc & MO_BSWAP; - - switch (opc & MO_SIZE) { - case MO_8: - tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0); - break; - case MO_16: - if (bswap) { - tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0); - } else { - tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0); - } - break; - case MO_32: - default: - if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); - } else { - tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); - } - break; - case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4); - } else if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); - } else { - tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); - tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4); - } - break; - } -} - -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) -{ - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc; -#ifdef CONFIG_SOFTMMU - int mem_index; - TCGReg addend; - tcg_insn_unit *label_ptr; -#endif - - datalo = *args++; - datahi = (is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#ifdef CONFIG_SOFTMMU - mem_index = get_mmuidx(oi); - addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0); - - tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); - - /* The conditional call must come last, as we're going to return here. */ - label_ptr = s->code_ptr; - tcg_out_bl_noaddr(s, COND_NE); - - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); - tcg_out_qemu_st_index(s, COND_AL, opc, datalo, - datahi, addrlo, TCG_REG_TMP); - } else { - tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo); - } -#endif -} - -static tcg_insn_unit *tb_ret_addr; - -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg *args, const int *const_args) -{ - TCGArg a0, a1, a2, a3, a4, a5; - int c; - - switch (opc) { - case INDEX_op_exit_tb: - tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); - tcg_out_goto(s, COND_AL, tb_ret_addr); - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* Direct jump method */ - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - tcg_out_b_noaddr(s, COND_AL); - } else { - /* Indirect jump method */ - intptr_t ptr = (intptr_t)(s->tb_next + args[0]); - tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff); - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff); - } - s->tb_next_offset[args[0]] = tcg_current_code_size(s); - break; - case INDEX_op_br: - tcg_out_goto_label(s, COND_AL, arg_label(args[0])); - break; - - case INDEX_op_ld8u_i32: - tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld8s_i32: - tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld16u_i32: - tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld16s_i32: - tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i32: - tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_st8_i32: - tcg_out_st8(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_st16_i32: - tcg_out_st16(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_st_i32: - tcg_out_st32(s, COND_AL, args[0], args[1], args[2]); - break; - - case INDEX_op_movcond_i32: - /* Constraints mean that v2 is always in the same register as dest, - * so we only need to do "if condition passed, move v1 to dest". - */ - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[2], const_args[2]); - tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV, - ARITH_MVN, args[0], 0, args[3], const_args[3]); - break; - case INDEX_op_add_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, - args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_sub_i32: - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]); - } else { - tcg_out_dat_rI(s, COND_AL, ARITH_RSB, - args[0], args[2], args[1], 1); - } - } else { - tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, - args[0], args[1], args[2], const_args[2]); - } - break; - case INDEX_op_and_i32: - tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, - args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_andc_i32: - tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, - args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_or_i32: - c = ARITH_ORR; - goto gen_arith; - case INDEX_op_xor_i32: - c = ARITH_EOR; - /* Fall through. */ - gen_arith: - tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_add2_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - a3 = args[3], a4 = args[4], a5 = args[5]; - if (a0 == a3 || (a0 == a5 && !const_args[5])) { - a0 = TCG_REG_TMP; - } - tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR, - a0, a2, a4, const_args[4]); - tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC, - a1, a3, a5, const_args[5]); - tcg_out_mov_reg(s, COND_AL, args[0], a0); - break; - case INDEX_op_sub2_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - a3 = args[3], a4 = args[4], a5 = args[5]; - if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) { - a0 = TCG_REG_TMP; - } - if (const_args[2]) { - if (const_args[4]) { - tcg_out_movi32(s, COND_AL, a0, a4); - a4 = a0; - } - tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1); - } else { - tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR, - ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]); - } - if (const_args[3]) { - if (const_args[5]) { - tcg_out_movi32(s, COND_AL, a1, a5); - a5 = a1; - } - tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1); - } else { - tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC, - a1, a3, a5, const_args[5]); - } - tcg_out_mov_reg(s, COND_AL, args[0], a0); - break; - case INDEX_op_neg_i32: - tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0); - break; - case INDEX_op_not_i32: - tcg_out_dat_reg(s, COND_AL, - ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0)); - break; - case INDEX_op_mul_i32: - tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_mulu2_i32: - tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); - break; - case INDEX_op_muls2_i32: - tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); - break; - /* XXX: Perhaps args[2] & 0x1f is wrong */ - case INDEX_op_shl_i32: - c = const_args[2] ? - SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]); - goto gen_shift32; - case INDEX_op_shr_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]); - goto gen_shift32; - case INDEX_op_sar_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]); - goto gen_shift32; - case INDEX_op_rotr_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]); - /* Fall through. */ - gen_shift32: - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c); - break; - - case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], - ((0x20 - args[2]) & 0x1f) ? - SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) : - SHIFT_IMM_LSL(0)); - } else { - tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20); - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], - SHIFT_REG_ROR(TCG_REG_TMP)); - } - break; - - case INDEX_op_brcond_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[0], args[1], const_args[1]); - tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], - arg_label(args[3])); - break; - case INDEX_op_brcond2_i32: - /* The resulting conditions are: - * TCG_COND_EQ --> a0 == a2 && a1 == a3, - * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3, - * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3, - * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3), - * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3), - * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3, - */ - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[3], const_args[3]); - tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, - args[0], args[2], const_args[2]); - tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], - arg_label(args[5])); - break; - case INDEX_op_setcond_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[2], const_args[2]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], - ARITH_MOV, args[0], 0, 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], - ARITH_MOV, args[0], 0, 0); - break; - case INDEX_op_setcond2_i32: - /* See brcond2_i32 comment */ - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[2], args[4], const_args[4]); - tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, - args[1], args[3], const_args[3]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]], - ARITH_MOV, args[0], 0, 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])], - ARITH_MOV, args[0], 0, 0); - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, 0); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, 1); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, 0); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, 1); - break; - - case INDEX_op_bswap16_i32: - tcg_out_bswap16(s, COND_AL, args[0], args[1]); - break; - case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, COND_AL, args[0], args[1]); - break; - - case INDEX_op_ext8s_i32: - tcg_out_ext8s(s, COND_AL, args[0], args[1]); - break; - case INDEX_op_ext16s_i32: - tcg_out_ext16s(s, COND_AL, args[0], args[1]); - break; - case INDEX_op_ext16u_i32: - tcg_out_ext16u(s, COND_AL, args[0], args[1]); - break; - - case INDEX_op_deposit_i32: - tcg_out_deposit(s, COND_AL, args[0], args[2], - args[3], args[4], const_args[2]); - break; - - case INDEX_op_div_i32: - tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_divu_i32: - tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef arm_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "r", "r" } }, - { INDEX_op_st16_i32, { "r", "r" } }, - { INDEX_op_st_i32, { "r", "r" } }, - - /* TODO: "r", "r", "ri" */ - { INDEX_op_add_i32, { "r", "r", "rIN" } }, - { INDEX_op_sub_i32, { "r", "rI", "rIN" } }, - { INDEX_op_mul_i32, { "r", "r", "r" } }, - { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_muls2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_and_i32, { "r", "r", "rIK" } }, - { INDEX_op_andc_i32, { "r", "r", "rIK" } }, - { INDEX_op_or_i32, { "r", "r", "rI" } }, - { INDEX_op_xor_i32, { "r", "r", "rI" } }, - { INDEX_op_neg_i32, { "r", "r" } }, - { INDEX_op_not_i32, { "r", "r" } }, - - { INDEX_op_shl_i32, { "r", "r", "ri" } }, - { INDEX_op_shr_i32, { "r", "r", "ri" } }, - { INDEX_op_sar_i32, { "r", "r", "ri" } }, - { INDEX_op_rotl_i32, { "r", "r", "ri" } }, - { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - - { INDEX_op_brcond_i32, { "r", "rIN" } }, - { INDEX_op_setcond_i32, { "r", "r", "rIN" } }, - { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } }, - - { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } }, - { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } }, - { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } }, - { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } }, - -#if TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld_i32, { "r", "l" } }, - { INDEX_op_qemu_ld_i64, { "r", "r", "l" } }, - { INDEX_op_qemu_st_i32, { "s", "s" } }, - { INDEX_op_qemu_st_i64, { "s", "s", "s" } }, -#else - { INDEX_op_qemu_ld_i32, { "r", "l", "l" } }, - { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } }, - { INDEX_op_qemu_st_i32, { "s", "s", "s" } }, - { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } }, -#endif - - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - - { INDEX_op_ext8s_i32, { "r", "r" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_ext16u_i32, { "r", "r" } }, - - { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, - - { INDEX_op_div_i32, { "r", "r", "r" } }, - { INDEX_op_divu_i32, { "r", "r", "r" } }, - - { -1 }, -}; - -static void tcg_target_init(TCGContext *s) -{ - /* Only probe for the platform and capabilities if we havn't already - determined maximum values at compile time. */ -#ifndef use_idiv_instructions - { - unsigned long hwcap = qemu_getauxval(AT_HWCAP); - use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0; - } -#endif - if (__ARM_ARCH < 7) { - const char *pl = (const char *)qemu_getauxval(AT_PLATFORM); - if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') { - arm_arch = pl[1] - '0'; - } - } - - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - (1 << TCG_REG_R0) | - (1 << TCG_REG_R1) | - (1 << TCG_REG_R2) | - (1 << TCG_REG_R3) | - (1 << TCG_REG_R12) | - (1 << TCG_REG_R14)); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC); - - tcg_add_target_add_op_defs(arm_op_defs); -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ld32u(s, COND_AL, arg, arg1, arg2); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_st32(s, COND_AL, arg, arg1, arg2); -} - -static inline void tcg_out_mov(TCGContext *s, TCGType type, - TCGReg ret, TCGReg arg) -{ - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0)); -} - -static inline void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg) -{ - tcg_out_movi32(s, COND_AL, ret, arg); -} - -/* Compute frame size via macros, to share between tcg_target_qemu_prologue - and tcg_register_jit. */ - -#define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long)) - -#define FRAME_SIZE \ - ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ - + CPU_TEMP_BUF_NLONGS * sizeof(long) \ - + TCG_TARGET_STACK_ALIGN - 1) \ - & -TCG_TARGET_STACK_ALIGN) - -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int stack_addend; - - /* Calling convention requires us to save r4-r11 and lr. */ - /* stmdb sp!, { r4 - r11, lr } */ - tcg_out32(s, (COND_AL << 28) | 0x092d4ff0); - - /* Reserve callee argument and tcg temp space. */ - stack_addend = FRAME_SIZE - PUSH_SIZE; - - tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK, - TCG_REG_CALL_STACK, stack_addend, 1); - tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - - tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]); - tb_ret_addr = s->code_ptr; - - /* Epilogue. We branch here via tb_ret_addr. */ - tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, - TCG_REG_CALL_STACK, stack_addend, 1); - - /* ldmia sp!, { r4 - r11, pc } */ - tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0); -} - -typedef struct { - DebugFrameHeader h; - uint8_t fde_def_cfa[4]; - uint8_t fde_reg_ofs[18]; -} DebugFrame; - -#define ELF_HOST_MACHINE EM_ARM - -/* We're expecting a 2 byte uleb128 encoded value. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); - -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = 0x7c, /* sleb128 -4 */ - .h.cie.return_column = 14, - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { - 12, 13, /* DW_CFA_def_cfa sp, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - /* The following must match the stmdb in the prologue. */ - 0x8e, 1, /* DW_CFA_offset, lr, -4 */ - 0x8b, 2, /* DW_CFA_offset, r11, -8 */ - 0x8a, 3, /* DW_CFA_offset, r10, -12 */ - 0x89, 4, /* DW_CFA_offset, r9, -16 */ - 0x88, 5, /* DW_CFA_offset, r8, -20 */ - 0x87, 6, /* DW_CFA_offset, r7, -24 */ - 0x86, 7, /* DW_CFA_offset, r6, -28 */ - 0x85, 8, /* DW_CFA_offset, r5, -32 */ - 0x84, 9, /* DW_CFA_offset, r4, -36 */ - } -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c new file mode 100644 index 0000000..146ac00 --- /dev/null +++ b/tcg/arm/tcg-target.inc.c @@ -0,0 +1,2129 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Andrzej Zaborowski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "elf.h" +#include "tcg-be-ldst.h" + +/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ +#ifndef __ARM_ARCH +# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH 7 +# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) +# define __ARM_ARCH 6 +# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \ + || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH 5 +# else +# define __ARM_ARCH 4 +# endif +#endif + +static int arm_arch = __ARM_ARCH; + +#if defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) +# define use_armv5t_instructions 1 +#else +# define use_armv5t_instructions use_armv6_instructions +#endif + +#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6) +#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) + +#ifndef use_idiv_instructions +bool use_idiv_instructions; +#endif + +/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */ +#ifdef CONFIG_SOFTMMU +# define USING_SOFTMMU 1 +#else +# define USING_SOFTMMU 0 +#endif + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "%r0", + "%r1", + "%r2", + "%r3", + "%r4", + "%r5", + "%r6", + "%r7", + "%r8", + "%r9", + "%r10", + "%r11", + "%r12", + "%r13", + "%r14", + "%pc", +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R13, + TCG_REG_R0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R12, + TCG_REG_R14, +}; + +static const int tcg_target_call_iarg_regs[4] = { + TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3 +}; +static const int tcg_target_call_oarg_regs[2] = { + TCG_REG_R0, TCG_REG_R1 +}; + +#define TCG_REG_TMP TCG_REG_R12 + +static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; + *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + assert(type == R_ARM_PC24); + assert(addend == 0); + reloc_pc24(code_ptr, (tcg_insn_unit *)value); +} + +#define TCG_CT_CONST_ARM 0x100 +#define TCG_CT_CONST_INV 0x200 +#define TCG_CT_CONST_NEG 0x400 +#define TCG_CT_CONST_ZERO 0x800 + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch (ct_str[0]) { + case 'I': + ct->ct |= TCG_CT_CONST_ARM; + break; + case 'K': + ct->ct |= TCG_CT_CONST_INV; + break; + case 'N': /* The gcc constraint letter is L, already used here. */ + ct->ct |= TCG_CT_CONST_NEG; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; + break; + + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); + break; + + /* qemu_ld address */ + case 'l': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); +#ifdef CONFIG_SOFTMMU + /* r0-r2,lr will be overwritten when reading the tlb entry, + so don't use these. */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); +#endif + break; + + /* qemu_st address & data */ + case 's': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); + /* r0-r2 will be overwritten when reading the tlb entry (softmmu only) + and r0-r1 doing the byte swapping, so don't use these. */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); +#if defined(CONFIG_SOFTMMU) + /* Avoid clashes with registers being used for helper args */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); +#if TARGET_LONG_BITS == 64 + /* Avoid clashes with registers being used for helper args */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); +#endif + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); +#endif + break; + + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + + return 0; +} + +static inline uint32_t rotl(uint32_t val, int n) +{ + return (val << n) | (val >> (32 - n)); +} + +/* ARM immediates for ALU instructions are made of an unsigned 8-bit + right-rotated by an even amount between 0 and 30. */ +static inline int encode_imm(uint32_t imm) +{ + int shift; + + /* simple case, only lower bits */ + if ((imm & ~0xff) == 0) + return 0; + /* then try a simple even shift */ + shift = ctz32(imm) & ~1; + if (((imm >> shift) & ~0xff) == 0) + return 32 - shift; + /* now try harder with rotations */ + if ((rotl(imm, 2) & ~0xff) == 0) + return 2; + if ((rotl(imm, 4) & ~0xff) == 0) + return 4; + if ((rotl(imm, 6) & ~0xff) == 0) + return 6; + /* imm can't be encoded */ + return -1; +} + +static inline int check_fit_imm(uint32_t imm) +{ + return encode_imm(imm) >= 0; +} + +/* Test if a constant matches the constraint. + * TODO: define constraints for: + * + * ldr/str offset: between -0xfff and 0xfff + * ldrh/strh offset: between -0xff and 0xff + * mov operand2: values represented with x << (2 * y), x < 0x100 + * add, sub, eor...: ditto + */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct; + ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) { + return 1; + } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) { + return 1; + } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) { + return 1; + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } else { + return 0; + } +} + +#define TO_CPSR (1 << 20) + +typedef enum { + ARITH_AND = 0x0 << 21, + ARITH_EOR = 0x1 << 21, + ARITH_SUB = 0x2 << 21, + ARITH_RSB = 0x3 << 21, + ARITH_ADD = 0x4 << 21, + ARITH_ADC = 0x5 << 21, + ARITH_SBC = 0x6 << 21, + ARITH_RSC = 0x7 << 21, + ARITH_TST = 0x8 << 21 | TO_CPSR, + ARITH_CMP = 0xa << 21 | TO_CPSR, + ARITH_CMN = 0xb << 21 | TO_CPSR, + ARITH_ORR = 0xc << 21, + ARITH_MOV = 0xd << 21, + ARITH_BIC = 0xe << 21, + ARITH_MVN = 0xf << 21, + + INSN_LDR_IMM = 0x04100000, + INSN_LDR_REG = 0x06100000, + INSN_STR_IMM = 0x04000000, + INSN_STR_REG = 0x06000000, + + INSN_LDRH_IMM = 0x005000b0, + INSN_LDRH_REG = 0x001000b0, + INSN_LDRSH_IMM = 0x005000f0, + INSN_LDRSH_REG = 0x001000f0, + INSN_STRH_IMM = 0x004000b0, + INSN_STRH_REG = 0x000000b0, + + INSN_LDRB_IMM = 0x04500000, + INSN_LDRB_REG = 0x06500000, + INSN_LDRSB_IMM = 0x005000d0, + INSN_LDRSB_REG = 0x001000d0, + INSN_STRB_IMM = 0x04400000, + INSN_STRB_REG = 0x06400000, + + INSN_LDRD_IMM = 0x004000d0, + INSN_LDRD_REG = 0x000000d0, + INSN_STRD_IMM = 0x004000f0, + INSN_STRD_REG = 0x000000f0, +} ARMInsn; + +#define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00) +#define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20) +#define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40) +#define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60) +#define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10) +#define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30) +#define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50) +#define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70) + +enum arm_cond_code_e { + COND_EQ = 0x0, + COND_NE = 0x1, + COND_CS = 0x2, /* Unsigned greater or equal */ + COND_CC = 0x3, /* Unsigned less than */ + COND_MI = 0x4, /* Negative */ + COND_PL = 0x5, /* Zero or greater */ + COND_VS = 0x6, /* Overflow */ + COND_VC = 0x7, /* No overflow */ + COND_HI = 0x8, /* Unsigned greater than */ + COND_LS = 0x9, /* Unsigned less or equal */ + COND_GE = 0xa, + COND_LT = 0xb, + COND_GT = 0xc, + COND_LE = 0xd, + COND_AL = 0xe, +}; + +static const uint8_t tcg_cond_to_arm_cond[] = { + [TCG_COND_EQ] = COND_EQ, + [TCG_COND_NE] = COND_NE, + [TCG_COND_LT] = COND_LT, + [TCG_COND_GE] = COND_GE, + [TCG_COND_LE] = COND_LE, + [TCG_COND_GT] = COND_GT, + /* unsigned */ + [TCG_COND_LTU] = COND_CC, + [TCG_COND_GEU] = COND_CS, + [TCG_COND_LEU] = COND_LS, + [TCG_COND_GTU] = COND_HI, +}; + +static inline void tcg_out_bx(TCGContext *s, int cond, int rn) +{ + tcg_out32(s, (cond << 28) | 0x012fff10 | rn); +} + +static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset) +{ + tcg_out32(s, (cond << 28) | 0x0a000000 | + (((offset - 8) >> 2) & 0x00ffffff)); +} + +static inline void tcg_out_b_noaddr(TCGContext *s, int cond) +{ + /* We pay attention here to not modify the branch target by masking + the corresponding bytes. This ensure that caches and memory are + kept coherent during retranslation. */ + tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a)); +} + +static inline void tcg_out_bl_noaddr(TCGContext *s, int cond) +{ + /* We pay attention here to not modify the branch target by masking + the corresponding bytes. This ensure that caches and memory are + kept coherent during retranslation. */ + tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b)); +} + +static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset) +{ + tcg_out32(s, (cond << 28) | 0x0b000000 | + (((offset - 8) >> 2) & 0x00ffffff)); +} + +static inline void tcg_out_blx(TCGContext *s, int cond, int rn) +{ + tcg_out32(s, (cond << 28) | 0x012fff30 | rn); +} + +static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset) +{ + tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) | + (((offset - 8) >> 2) & 0x00ffffff)); +} + +static inline void tcg_out_dat_reg(TCGContext *s, + int cond, int opc, int rd, int rn, int rm, int shift) +{ + tcg_out32(s, (cond << 28) | (0 << 25) | opc | + (rn << 16) | (rd << 12) | shift | rm); +} + +static inline void tcg_out_nop(TCGContext *s) +{ + if (use_armv7_instructions) { + /* Architected nop introduced in v6k. */ + /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this + also Just So Happened to do nothing on pre-v6k so that we + don't need to conditionalize it? */ + tcg_out32(s, 0xe320f000); + } else { + /* Prior to that the assembler uses mov r0, r0. */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0)); + } +} + +static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) +{ + /* Simple reg-reg move, optimising out the 'do nothing' case */ + if (rd != rm) { + tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0)); + } +} + +static inline void tcg_out_dat_imm(TCGContext *s, + int cond, int opc, int rd, int rn, int im) +{ + tcg_out32(s, (cond << 28) | (1 << 25) | opc | + (rn << 16) | (rd << 12) | im); +} + +static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) +{ + int rot, opc, rn; + + /* For armv7, make sure not to use movw+movt when mov/mvn would do. + Speed things up by only checking when movt would be required. + Prior to armv7, have one go at fully rotated immediates before + doing the decomposition thing below. */ + if (!use_armv7_instructions || (arg & 0xffff0000)) { + rot = encode_imm(arg); + if (rot >= 0) { + tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, + rotl(arg, rot) | (rot << 7)); + return; + } + rot = encode_imm(~arg); + if (rot >= 0) { + tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, + rotl(~arg, rot) | (rot << 7)); + return; + } + } + + /* Use movw + movt. */ + if (use_armv7_instructions) { + /* movw */ + tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) + | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); + if (arg & 0xffff0000) { + /* movt */ + tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) + | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); + } + return; + } + + /* TODO: This is very suboptimal, we can easily have a constant + pool somewhere after all the instructions. */ + opc = ARITH_MOV; + rn = 0; + /* If we have lots of leading 1's, we can shorten the sequence by + beginning with mvn and then clearing higher bits with eor. */ + if (clz32(~arg) > clz32(arg)) { + opc = ARITH_MVN, arg = ~arg; + } + do { + int i = ctz32(arg) & ~1; + rot = ((32 - i) << 7) & 0xf00; + tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); + arg &= ~(0xff << i); + + opc = ARITH_EOR; + rn = rd; + } while (arg); +} + +static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, + TCGArg lhs, TCGArg rhs, int rhs_is_const) +{ + /* Emit either the reg,imm or reg,reg form of a data-processing insn. + * rhs must satisfy the "rI" constraint. + */ + if (rhs_is_const) { + int rot = encode_imm(rhs); + assert(rot >= 0); + tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); + } +} + +static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv, + TCGReg dst, TCGReg lhs, TCGArg rhs, + bool rhs_is_const) +{ + /* Emit either the reg,imm or reg,reg form of a data-processing insn. + * rhs must satisfy the "rIK" constraint. + */ + if (rhs_is_const) { + int rot = encode_imm(rhs); + if (rot < 0) { + rhs = ~rhs; + rot = encode_imm(rhs); + assert(rot >= 0); + opc = opinv; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); + } +} + +static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg, + TCGArg dst, TCGArg lhs, TCGArg rhs, + bool rhs_is_const) +{ + /* Emit either the reg,imm or reg,reg form of a data-processing insn. + * rhs must satisfy the "rIN" constraint. + */ + if (rhs_is_const) { + int rot = encode_imm(rhs); + if (rot < 0) { + rhs = -rhs; + rot = encode_imm(rhs); + assert(rot >= 0); + opc = opneg; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); + } +} + +static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd, + TCGReg rn, TCGReg rm) +{ + /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */ + if (!use_armv6_instructions && rd == rn) { + if (rd == rm) { + /* rd == rn == rm; copy an input to tmp first. */ + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rm = rn = TCG_REG_TMP; + } else { + rn = rm; + rm = rd; + } + } + /* mul */ + tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); +} + +static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0, + TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ + if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { + if (rd0 == rm || rd1 == rm) { + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rn = TCG_REG_TMP; + } else { + TCGReg t = rn; + rn = rm; + rm = t; + } + } + /* umull */ + tcg_out32(s, (cond << 28) | 0x00800090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0, + TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ + if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { + if (rd0 == rm || rd1 == rm) { + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rn = TCG_REG_TMP; + } else { + TCGReg t = rn; + rn = rm; + rm = t; + } + } + /* smull */ + tcg_out32(s, (cond << 28) | 0x00c00090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm) +{ + tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); +} + +static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm) +{ + tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); +} + +static inline void tcg_out_ext8s(TCGContext *s, int cond, + int rd, int rn) +{ + if (use_armv6_instructions) { + /* sxtb */ + tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_LSL(24)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rd, SHIFT_IMM_ASR(24)); + } +} + +static inline void tcg_out_ext8u(TCGContext *s, int cond, + int rd, int rn) +{ + tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff); +} + +static inline void tcg_out_ext16s(TCGContext *s, int cond, + int rd, int rn) +{ + if (use_armv6_instructions) { + /* sxth */ + tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_LSL(16)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rd, SHIFT_IMM_ASR(16)); + } +} + +static inline void tcg_out_ext16u(TCGContext *s, int cond, + int rd, int rn) +{ + if (use_armv6_instructions) { + /* uxth */ + tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_LSL(16)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rd, SHIFT_IMM_LSR(16)); + } +} + +static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* revsh */ + tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16)); + tcg_out_dat_reg(s, cond, ARITH_ORR, + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); + } +} + +static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16)); + tcg_out_dat_reg(s, cond, ARITH_ORR, + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); + } +} + +/* swap the two low bytes assuming that the two high input bytes and the + two high output bit can hold any value. */ +static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); + tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); + tcg_out_dat_reg(s, cond, ARITH_ORR, + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); + } +} + +static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* rev */ + tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_EOR, + TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16)); + tcg_out_dat_imm(s, cond, ARITH_BIC, + TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_ROR(8)); + tcg_out_dat_reg(s, cond, ARITH_EOR, + rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8)); + } +} + +bool tcg_target_deposit_valid(int ofs, int len) +{ + /* ??? Without bfi, we could improve over generic code by combining + the right-shift from a non-zero ofs with the orr. We do run into + problems when rd == rs, and the mask generated from ofs+len doesn't + fit into an immediate. We would have to be careful not to pessimize + wrt the optimizations performed on the expanded code. */ + return use_armv7_instructions; +} + +static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd, + TCGArg a1, int ofs, int len, bool const_a1) +{ + if (const_a1) { + /* bfi becomes bfc with rn == 15. */ + a1 = 15; + } + /* bfi/bfc */ + tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1 + | (ofs << 7) | ((ofs + len - 1) << 16)); +} + +/* Note that this routine is used for both LDR and LDRH formats, so we do + not wish to include an immediate shift at this point. */ +static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, TCGReg rm, bool u, bool p, bool w) +{ + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) + | (w << 21) | (rn << 16) | (rt << 12) | rm); +} + +static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, int imm8, bool p, bool w) +{ + bool u = 1; + if (imm8 < 0) { + imm8 = -imm8; + u = 0; + } + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | + (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf)); +} + +static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, int imm12, bool p, bool w) +{ + bool u = 1; + if (imm12 < 0) { + imm12 = -imm12; + u = 0; + } + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | + (rn << 16) | (rt << 12) | imm12); +} + +static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) +{ + tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) +{ + tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0); +} + +/* Register pre-increment with base writeback. */ +static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1); +} + +static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1); +} + +static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) +{ + tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) +{ + tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) +{ + tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) +{ + tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld32u(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_ld32_12(s, cond, rd, rn, offset); +} + +static inline void tcg_out_st32(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_st32_12(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld16u(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_ld16u_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld16s(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_ld16s_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_st16(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_st16_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld8u(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_ld8_12(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld8s(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_ld8s_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_st8(TCGContext *s, int cond, + int rd, int rn, int32_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP); + } else + tcg_out_st8_12(s, cond, rd, rn, offset); +} + +/* The _goto case is normally between TBs within the same code buffer, and + * with the code buffer limited to 16MB we wouldn't need the long case. + * But we also use it for the tail-call to the qemu_ld/st helpers, which does. + */ +static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) +{ + intptr_t addri = (intptr_t)addr; + ptrdiff_t disp = tcg_pcrel_diff(s, addr); + + if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) { + tcg_out_b(s, cond, disp); + return; + } + + tcg_out_movi32(s, cond, TCG_REG_TMP, addri); + if (use_armv5t_instructions) { + tcg_out_bx(s, cond, TCG_REG_TMP); + } else { + if (addri & 1) { + tcg_abort(); + } + tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP); + } +} + +/* The call case is mostly used for helpers - so it's not unreasonable + * for them to be beyond branch range */ +static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr) +{ + intptr_t addri = (intptr_t)addr; + ptrdiff_t disp = tcg_pcrel_diff(s, addr); + + if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) { + if (addri & 1) { + /* Use BLX if the target is in Thumb mode */ + if (!use_armv5t_instructions) { + tcg_abort(); + } + tcg_out_blx_imm(s, disp); + } else { + tcg_out_bl(s, COND_AL, disp); + } + } else if (use_armv7_instructions) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); + tcg_out_blx(s, COND_AL, TCG_REG_TMP); + } else { + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); + tcg_out32(s, addri); + } +} + +static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l) +{ + if (l->has_value) { + tcg_out_goto(s, cond, l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0); + tcg_out_b_noaddr(s, cond); + } +} + +#ifdef CONFIG_SOFTMMU +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LESL] = helper_le_ldul_mmu, + + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BESL] = helper_be_ldul_mmu, +}; + +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +/* Helper routines for marshalling helper function arguments into + * the correct registers and stack. + * argreg is where we want to put this argument, arg is the argument itself. + * Return value is the updated argreg ready for the next call. + * Note that argreg 0..3 is real registers, 4+ on stack. + * + * We provide routines for arguments which are: immediate, 32 bit + * value in register, 16 and 8 bit values in register (which must be zero + * extended before use) and 64 bit value in a lo:hi register pair. + */ +#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \ +static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \ +{ \ + if (argreg < 4) { \ + MOV_ARG(s, COND_AL, argreg, arg); \ + } else { \ + int ofs = (argreg - 4) * 4; \ + EXT_ARG; \ + assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \ + tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \ + } \ + return argreg + 1; \ +} + +DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32, + (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u, + (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u, + (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, ) + +static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, + TCGReg arglo, TCGReg arghi) +{ + /* 64 bit arguments must go in even/odd register pairs + * and in 8-aligned stack slots. + */ + if (argreg & 1) { + argreg++; + } + if (use_armv6_instructions && argreg >= 4 + && (arglo & 1) == 0 && arghi == arglo + 1) { + tcg_out_strd_8(s, COND_AL, arglo, + TCG_REG_CALL_STACK, (argreg - 4) * 4); + return argreg + 2; + } else { + argreg = tcg_out_arg_reg32(s, argreg, arglo); + argreg = tcg_out_arg_reg32(s, argreg, arghi); + return argreg; + } +} + +#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) + +/* We're expecting to use an 8-bit immediate and to mask. */ +QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8); + +/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset. + Using the offset of the second entry in the last tlb table ensures + that we can index all of the elements of the first entry. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0xffff); + +/* Load and compare a TLB entry, leaving the flags set. Returns the register + containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ + +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, + TCGMemOp s_bits, int mem_index, bool is_load) +{ + TCGReg base = TCG_AREG0; + int cmp_off = + (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + + /* Should generate something like the following: + * shr tmp, addrlo, #TARGET_PAGE_BITS (1) + * add r2, env, #high + * and r0, tmp, #(CPU_TLB_SIZE - 1) (2) + * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3) + * ldr r0, [r2, #cmp] (4) + * tst addrlo, #s_mask + * ldr r2, [r2, #add] (5) + * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS + */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, + 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); + + /* We checked that the offset is contained within 16 bits above. */ + if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) { + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, + (24 << 7) | (cmp_off >> 8)); + base = TCG_REG_R2; + add_off -= cmp_off & 0xff00; + cmp_off &= 0xff; + } + + tcg_out_dat_imm(s, COND_AL, ARITH_AND, + TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, + TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); + + /* Load the tlb comparator. Use ldrd if needed and available, + but due to how the pointer needs setting up, ldm isn't useful. + Base arm5 doesn't have ldrd, but armv5te does. */ + if (use_armv6_instructions && TARGET_LONG_BITS == 64) { + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); + } else { + tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); + if (TARGET_LONG_BITS == 64) { + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4); + } + } + + /* Check alignment. */ + if (s_bits) { + tcg_out_dat_imm(s, COND_AL, ARITH_TST, + 0, addrlo, (1 << s_bits) - 1); + } + + /* Load the tlb addend. */ + tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off); + + tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0, + TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); + + if (TARGET_LONG_BITS == 64) { + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, + TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0)); + } + + return TCG_REG_R2; +} + +/* Record the context of a call to the out of line helper code for the slow + path for a load or store, so that we can later generate the correct + helper code. */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, + TCGReg datalo, TCGReg datahi, TCGReg addrlo, + TCGReg addrhi, tcg_insn_unit *raddr, + tcg_insn_unit *label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg argreg, datalo, datahi; + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + void *func; + + reloc_pc24(lb->label_ptr[0], s->code_ptr); + + argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); + } else { + argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); + } + argreg = tcg_out_arg_imm32(s, argreg, oi); + argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); + + /* For armv6 we can use the canonical unsigned helpers and minimize + icache usage. For pre-armv6, use the signed helpers since we do + not have a single insn sign-extend. */ + if (use_armv6_instructions) { + func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]; + } else { + func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]; + if (opc & MO_SIGN) { + opc = MO_UL; + } + } + tcg_out_call(s, func); + + datalo = lb->datalo_reg; + datahi = lb->datahi_reg; + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0); + break; + case MO_SW: + tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0); + break; + default: + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); + break; + case MO_Q: + if (datalo != TCG_REG_R1) { + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); + tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); + } else if (datahi != TCG_REG_R0) { + tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); + } else { + tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0); + tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); + tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP); + } + break; + } + + tcg_out_goto(s, COND_AL, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg argreg, datalo, datahi; + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + + reloc_pc24(lb->label_ptr[0], s->code_ptr); + + argreg = TCG_REG_R0; + argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); + } else { + argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); + } + + datalo = lb->datalo_reg; + datahi = lb->datahi_reg; + switch (opc & MO_SIZE) { + case MO_8: + argreg = tcg_out_arg_reg8(s, argreg, datalo); + break; + case MO_16: + argreg = tcg_out_arg_reg16(s, argreg, datalo); + break; + case MO_32: + default: + argreg = tcg_out_arg_reg32(s, argreg, datalo); + break; + case MO_64: + argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi); + break; + } + + argreg = tcg_out_arg_imm32(s, argreg, oi); + argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); + + /* Tail-call to the helper, which will return to the fast path. */ + tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); +} +#endif /* SOFTMMU */ + +static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addend) +{ + TCGMemOp bswap = opc & MO_BSWAP; + + switch (opc & MO_SSIZE) { + case MO_UB: + tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend); + break; + case MO_SB: + tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend); + break; + case MO_UW: + tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); + if (bswap) { + tcg_out_bswap16(s, COND_AL, datalo, datalo); + } + break; + case MO_SW: + if (bswap) { + tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); + tcg_out_bswap16s(s, COND_AL, datalo, datalo); + } else { + tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend); + } + break; + case MO_UL: + default: + tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); + if (bswap) { + tcg_out_bswap32(s, COND_AL, datalo, datalo); + } + break; + case MO_Q: + { + TCGReg dl = (bswap ? datahi : datalo); + TCGReg dh = (bswap ? datalo : datahi); + + /* Avoid ldrd for user-only emulation, to handle unaligned. */ + if (USING_SOFTMMU && use_armv6_instructions + && (dl & 1) == 0 && dh == dl + 1) { + tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend); + } else if (dl != addend) { + tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo); + tcg_out_ld32_12(s, COND_AL, dh, addend, 4); + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP, + addend, addrlo, SHIFT_IMM_LSL(0)); + tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0); + tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4); + } + if (bswap) { + tcg_out_bswap32(s, COND_AL, dl, dl); + tcg_out_bswap32(s, COND_AL, dh, dh); + } + } + break; + } +} + +static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo) +{ + TCGMemOp bswap = opc & MO_BSWAP; + + switch (opc & MO_SSIZE) { + case MO_UB: + tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0); + break; + case MO_SB: + tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0); + break; + case MO_UW: + tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); + if (bswap) { + tcg_out_bswap16(s, COND_AL, datalo, datalo); + } + break; + case MO_SW: + if (bswap) { + tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); + tcg_out_bswap16s(s, COND_AL, datalo, datalo); + } else { + tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0); + } + break; + case MO_UL: + default: + tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); + if (bswap) { + tcg_out_bswap32(s, COND_AL, datalo, datalo); + } + break; + case MO_Q: + { + TCGReg dl = (bswap ? datahi : datalo); + TCGReg dh = (bswap ? datalo : datahi); + + /* Avoid ldrd for user-only emulation, to handle unaligned. */ + if (USING_SOFTMMU && use_armv6_instructions + && (dl & 1) == 0 && dh == dl + 1) { + tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0); + } else if (dl == addrlo) { + tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); + tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); + } else { + tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); + tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); + } + if (bswap) { + tcg_out_bswap32(s, COND_AL, dl, dl); + tcg_out_bswap32(s, COND_AL, dh, dh); + } + } + break; + } +} + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +{ + TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc; +#ifdef CONFIG_SOFTMMU + int mem_index; + TCGReg addend; + tcg_insn_unit *label_ptr; +#endif + + datalo = *args++; + datahi = (is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#ifdef CONFIG_SOFTMMU + mem_index = get_mmuidx(oi); + addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1); + + /* This a conditional BL only to load a pointer within this opcode into LR + for the slow path. We will not be using the value for a tail call. */ + label_ptr = s->code_ptr; + tcg_out_bl_noaddr(s, COND_NE); + + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); + + add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); + } else { + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); + } +#endif +} + +static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addend) +{ + TCGMemOp bswap = opc & MO_BSWAP; + + switch (opc & MO_SIZE) { + case MO_8: + tcg_out_st8_r(s, cond, datalo, addrlo, addend); + break; + case MO_16: + if (bswap) { + tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo); + tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend); + } else { + tcg_out_st16_r(s, cond, datalo, addrlo, addend); + } + break; + case MO_32: + default: + if (bswap) { + tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); + tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend); + } else { + tcg_out_st32_r(s, cond, datalo, addrlo, addend); + } + break; + case MO_64: + /* Avoid strd for user-only emulation, to handle unaligned. */ + if (bswap) { + tcg_out_bswap32(s, cond, TCG_REG_R0, datahi); + tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo); + tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); + tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4); + } else if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { + tcg_out_strd_r(s, cond, datalo, addrlo, addend); + } else { + tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); + tcg_out_st32_12(s, cond, datahi, addend, 4); + } + break; + } +} + +static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo) +{ + TCGMemOp bswap = opc & MO_BSWAP; + + switch (opc & MO_SIZE) { + case MO_8: + tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0); + break; + case MO_16: + if (bswap) { + tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo); + tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0); + } else { + tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0); + } + break; + case MO_32: + default: + if (bswap) { + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); + } else { + tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); + } + break; + case MO_64: + /* Avoid strd for user-only emulation, to handle unaligned. */ + if (bswap) { + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4); + } else if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { + tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); + } else { + tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); + tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4); + } + break; + } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +{ + TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc; +#ifdef CONFIG_SOFTMMU + int mem_index; + TCGReg addend; + tcg_insn_unit *label_ptr; +#endif + + datalo = *args++; + datahi = (is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#ifdef CONFIG_SOFTMMU + mem_index = get_mmuidx(oi); + addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0); + + tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); + + /* The conditional call must come last, as we're going to return here. */ + label_ptr = s->code_ptr; + tcg_out_bl_noaddr(s, COND_NE); + + add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); + tcg_out_qemu_st_index(s, COND_AL, opc, datalo, + datahi, addrlo, TCG_REG_TMP); + } else { + tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo); + } +#endif +} + +static tcg_insn_unit *tb_ret_addr; + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + TCGArg a0, a1, a2, a3, a4, a5; + int c; + + switch (opc) { + case INDEX_op_exit_tb: + tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); + tcg_out_goto(s, COND_AL, tb_ret_addr); + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* Direct jump method */ + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + tcg_out_b_noaddr(s, COND_AL); + } else { + /* Indirect jump method */ + intptr_t ptr = (intptr_t)(s->tb_next + args[0]); + tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff); + } + s->tb_next_offset[args[0]] = tcg_current_code_size(s); + break; + case INDEX_op_br: + tcg_out_goto_label(s, COND_AL, arg_label(args[0])); + break; + + case INDEX_op_ld8u_i32: + tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_ld8s_i32: + tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_ld16u_i32: + tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_ld16s_i32: + tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i32: + tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_st8_i32: + tcg_out_st8(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_st16_i32: + tcg_out_st16(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_st_i32: + tcg_out_st32(s, COND_AL, args[0], args[1], args[2]); + break; + + case INDEX_op_movcond_i32: + /* Constraints mean that v2 is always in the same register as dest, + * so we only need to do "if condition passed, move v1 to dest". + */ + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[2], const_args[2]); + tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV, + ARITH_MVN, args[0], 0, args[3], const_args[3]); + break; + case INDEX_op_add_i32: + tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, + args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_sub_i32: + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]); + } else { + tcg_out_dat_rI(s, COND_AL, ARITH_RSB, + args[0], args[2], args[1], 1); + } + } else { + tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, + args[0], args[1], args[2], const_args[2]); + } + break; + case INDEX_op_and_i32: + tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, + args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_andc_i32: + tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, + args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_or_i32: + c = ARITH_ORR; + goto gen_arith; + case INDEX_op_xor_i32: + c = ARITH_EOR; + /* Fall through. */ + gen_arith: + tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_add2_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + a3 = args[3], a4 = args[4], a5 = args[5]; + if (a0 == a3 || (a0 == a5 && !const_args[5])) { + a0 = TCG_REG_TMP; + } + tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR, + a0, a2, a4, const_args[4]); + tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC, + a1, a3, a5, const_args[5]); + tcg_out_mov_reg(s, COND_AL, args[0], a0); + break; + case INDEX_op_sub2_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + a3 = args[3], a4 = args[4], a5 = args[5]; + if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) { + a0 = TCG_REG_TMP; + } + if (const_args[2]) { + if (const_args[4]) { + tcg_out_movi32(s, COND_AL, a0, a4); + a4 = a0; + } + tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1); + } else { + tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR, + ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]); + } + if (const_args[3]) { + if (const_args[5]) { + tcg_out_movi32(s, COND_AL, a1, a5); + a5 = a1; + } + tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1); + } else { + tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC, + a1, a3, a5, const_args[5]); + } + tcg_out_mov_reg(s, COND_AL, args[0], a0); + break; + case INDEX_op_neg_i32: + tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0); + break; + case INDEX_op_not_i32: + tcg_out_dat_reg(s, COND_AL, + ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0)); + break; + case INDEX_op_mul_i32: + tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_mulu2_i32: + tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); + break; + case INDEX_op_muls2_i32: + tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); + break; + /* XXX: Perhaps args[2] & 0x1f is wrong */ + case INDEX_op_shl_i32: + c = const_args[2] ? + SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]); + goto gen_shift32; + case INDEX_op_shr_i32: + c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) : + SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]); + goto gen_shift32; + case INDEX_op_sar_i32: + c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) : + SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]); + goto gen_shift32; + case INDEX_op_rotr_i32: + c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) : + SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]); + /* Fall through. */ + gen_shift32: + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c); + break; + + case INDEX_op_rotl_i32: + if (const_args[2]) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], + ((0x20 - args[2]) & 0x1f) ? + SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) : + SHIFT_IMM_LSL(0)); + } else { + tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], + SHIFT_REG_ROR(TCG_REG_TMP)); + } + break; + + case INDEX_op_brcond_i32: + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[0], args[1], const_args[1]); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], + arg_label(args[3])); + break; + case INDEX_op_brcond2_i32: + /* The resulting conditions are: + * TCG_COND_EQ --> a0 == a2 && a1 == a3, + * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3, + * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3, + * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3), + * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3), + * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3, + */ + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[3], const_args[3]); + tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, + args[0], args[2], const_args[2]); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], + arg_label(args[5])); + break; + case INDEX_op_setcond_i32: + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[2], const_args[2]); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], + ARITH_MOV, args[0], 0, 1); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], + ARITH_MOV, args[0], 0, 0); + break; + case INDEX_op_setcond2_i32: + /* See brcond2_i32 comment */ + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[2], args[4], const_args[4]); + tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, + args[1], args[3], const_args[3]); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]], + ARITH_MOV, args[0], 0, 1); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])], + ARITH_MOV, args[0], 0, 0); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, 0); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, 1); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, 0); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, 1); + break; + + case INDEX_op_bswap16_i32: + tcg_out_bswap16(s, COND_AL, args[0], args[1]); + break; + case INDEX_op_bswap32_i32: + tcg_out_bswap32(s, COND_AL, args[0], args[1]); + break; + + case INDEX_op_ext8s_i32: + tcg_out_ext8s(s, COND_AL, args[0], args[1]); + break; + case INDEX_op_ext16s_i32: + tcg_out_ext16s(s, COND_AL, args[0], args[1]); + break; + case INDEX_op_ext16u_i32: + tcg_out_ext16u(s, COND_AL, args[0], args[1]); + break; + + case INDEX_op_deposit_i32: + tcg_out_deposit(s, COND_AL, args[0], args[2], + args[3], args[4], const_args[2]); + break; + + case INDEX_op_div_i32: + tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_divu_i32: + tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef arm_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "r", "r" } }, + { INDEX_op_st16_i32, { "r", "r" } }, + { INDEX_op_st_i32, { "r", "r" } }, + + /* TODO: "r", "r", "ri" */ + { INDEX_op_add_i32, { "r", "r", "rIN" } }, + { INDEX_op_sub_i32, { "r", "rI", "rIN" } }, + { INDEX_op_mul_i32, { "r", "r", "r" } }, + { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, + { INDEX_op_muls2_i32, { "r", "r", "r", "r" } }, + { INDEX_op_and_i32, { "r", "r", "rIK" } }, + { INDEX_op_andc_i32, { "r", "r", "rIK" } }, + { INDEX_op_or_i32, { "r", "r", "rI" } }, + { INDEX_op_xor_i32, { "r", "r", "rI" } }, + { INDEX_op_neg_i32, { "r", "r" } }, + { INDEX_op_not_i32, { "r", "r" } }, + + { INDEX_op_shl_i32, { "r", "r", "ri" } }, + { INDEX_op_shr_i32, { "r", "r", "ri" } }, + { INDEX_op_sar_i32, { "r", "r", "ri" } }, + { INDEX_op_rotl_i32, { "r", "r", "ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "ri" } }, + + { INDEX_op_brcond_i32, { "r", "rIN" } }, + { INDEX_op_setcond_i32, { "r", "r", "rIN" } }, + { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } }, + + { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } }, + { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } }, + { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } }, + +#if TARGET_LONG_BITS == 32 + { INDEX_op_qemu_ld_i32, { "r", "l" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "l" } }, + { INDEX_op_qemu_st_i32, { "s", "s" } }, + { INDEX_op_qemu_st_i64, { "s", "s", "s" } }, +#else + { INDEX_op_qemu_ld_i32, { "r", "l", "l" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } }, + { INDEX_op_qemu_st_i32, { "s", "s", "s" } }, + { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } }, +#endif + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + + { INDEX_op_ext8s_i32, { "r", "r" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + + { INDEX_op_div_i32, { "r", "r", "r" } }, + { INDEX_op_divu_i32, { "r", "r", "r" } }, + + { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ + /* Only probe for the platform and capabilities if we havn't already + determined maximum values at compile time. */ +#ifndef use_idiv_instructions + { + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0; + } +#endif + if (__ARM_ARCH < 7) { + const char *pl = (const char *)qemu_getauxval(AT_PLATFORM); + if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') { + arm_arch = pl[1] - '0'; + } + } + + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + (1 << TCG_REG_R0) | + (1 << TCG_REG_R1) | + (1 << TCG_REG_R2) | + (1 << TCG_REG_R3) | + (1 << TCG_REG_R12) | + (1 << TCG_REG_R14)); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC); + + tcg_add_target_add_op_defs(arm_op_defs); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ld32u(s, COND_AL, arg, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_st32(s, COND_AL, arg, arg1, arg2); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0)); +} + +static inline void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + tcg_out_movi32(s, COND_AL, ret, arg); +} + +/* Compute frame size via macros, to share between tcg_target_qemu_prologue + and tcg_register_jit. */ + +#define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long)) + +#define FRAME_SIZE \ + ((PUSH_SIZE \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & -TCG_TARGET_STACK_ALIGN) + +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int stack_addend; + + /* Calling convention requires us to save r4-r11 and lr. */ + /* stmdb sp!, { r4 - r11, lr } */ + tcg_out32(s, (COND_AL << 28) | 0x092d4ff0); + + /* Reserve callee argument and tcg temp space. */ + stack_addend = FRAME_SIZE - PUSH_SIZE; + + tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK, + TCG_REG_CALL_STACK, stack_addend, 1); + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + + tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]); + tb_ret_addr = s->code_ptr; + + /* Epilogue. We branch here via tb_ret_addr. */ + tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, + TCG_REG_CALL_STACK, stack_addend, 1); + + /* ldmia sp!, { r4 - r11, pc } */ + tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[18]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_ARM + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x7c, /* sleb128 -4 */ + .h.cie.return_column = 14, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, 13, /* DW_CFA_def_cfa sp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + /* The following must match the stmdb in the prologue. */ + 0x8e, 1, /* DW_CFA_offset, lr, -4 */ + 0x8b, 2, /* DW_CFA_offset, r11, -8 */ + 0x8a, 3, /* DW_CFA_offset, r10, -12 */ + 0x89, 4, /* DW_CFA_offset, r9, -16 */ + 0x88, 5, /* DW_CFA_offset, r8, -20 */ + 0x87, 6, /* DW_CFA_offset, r7, -24 */ + 0x86, 7, /* DW_CFA_offset, r6, -28 */ + 0x85, 8, /* DW_CFA_offset, r5, -32 */ + 0x84, 9, /* DW_CFA_offset, r4, -36 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c deleted file mode 100644 index d90636c..0000000 --- a/tcg/i386/tcg-target.c +++ /dev/null @@ -1,2464 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-ldst.h" - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { -#if TCG_TARGET_REG_BITS == 64 - "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", - "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", -#else - "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", -#endif -}; -#endif - -static const int tcg_target_reg_alloc_order[] = { -#if TCG_TARGET_REG_BITS == 64 - TCG_REG_RBP, - TCG_REG_RBX, - TCG_REG_R12, - TCG_REG_R13, - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R10, - TCG_REG_R11, - TCG_REG_R9, - TCG_REG_R8, - TCG_REG_RCX, - TCG_REG_RDX, - TCG_REG_RSI, - TCG_REG_RDI, - TCG_REG_RAX, -#else - TCG_REG_EBX, - TCG_REG_ESI, - TCG_REG_EDI, - TCG_REG_EBP, - TCG_REG_ECX, - TCG_REG_EDX, - TCG_REG_EAX, -#endif -}; - -static const int tcg_target_call_iarg_regs[] = { -#if TCG_TARGET_REG_BITS == 64 -#if defined(_WIN64) - TCG_REG_RCX, - TCG_REG_RDX, -#else - TCG_REG_RDI, - TCG_REG_RSI, - TCG_REG_RDX, - TCG_REG_RCX, -#endif - TCG_REG_R8, - TCG_REG_R9, -#else - /* 32 bit mode uses stack based calling convention (GCC default). */ -#endif -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_EAX, -#if TCG_TARGET_REG_BITS == 32 - TCG_REG_EDX -#endif -}; - -/* Constants we accept. */ -#define TCG_CT_CONST_S32 0x100 -#define TCG_CT_CONST_U32 0x200 -#define TCG_CT_CONST_I32 0x400 - -/* Registers used with L constraint, which are the first argument - registers on x86_64, and two random call clobbered registers on - i386. */ -#if TCG_TARGET_REG_BITS == 64 -# define TCG_REG_L0 tcg_target_call_iarg_regs[0] -# define TCG_REG_L1 tcg_target_call_iarg_regs[1] -#else -# define TCG_REG_L0 TCG_REG_EAX -# define TCG_REG_L1 TCG_REG_EDX -#endif - -/* The host compiler should supply to enable runtime features - detection, as we're not going to go so far as our own inline assembly. - If not available, default values will be assumed. */ -#if defined(CONFIG_CPUID_H) -#include -#endif - -/* For 32-bit, we are going to attempt to determine at runtime whether cmov - is available. */ -#if TCG_TARGET_REG_BITS == 64 -# define have_cmov 1 -#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV) -static bool have_cmov; -#else -# define have_cmov 0 -#endif - -/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are - going to attempt to determine at runtime whether movbe is available. */ -#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE) -static bool have_movbe; -#else -# define have_movbe 0 -#endif - -/* We need this symbol in tcg-target.h, and we can't properly conditionalize - it there. Therefore we always define the variable. */ -bool have_bmi1; - -#if defined(CONFIG_CPUID_H) && defined(bit_BMI2) -static bool have_bmi2; -#else -# define have_bmi2 0 -#endif - -static tcg_insn_unit *tb_ret_addr; - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - value += addend; - switch(type) { - case R_386_PC32: - value -= (uintptr_t)code_ptr; - if (value != (int32_t)value) { - tcg_abort(); - } - tcg_patch32(code_ptr, value); - break; - case R_386_PC8: - value -= (uintptr_t)code_ptr; - if (value != (int8_t)value) { - tcg_abort(); - } - tcg_patch8(code_ptr, value); - break; - default: - tcg_abort(); - } -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch(ct_str[0]) { - case 'a': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX); - break; - case 'b': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX); - break; - case 'c': - case_c: - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX); - break; - case 'd': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX); - break; - case 'S': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI); - break; - case 'D': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI); - break; - case 'q': - ct->ct |= TCG_CT_REG; - if (TCG_TARGET_REG_BITS == 64) { - tcg_regset_set32(ct->u.regs, 0, 0xffff); - } else { - tcg_regset_set32(ct->u.regs, 0, 0xf); - } - break; - case 'Q': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xf); - break; - case 'r': - case_r: - ct->ct |= TCG_CT_REG; - if (TCG_TARGET_REG_BITS == 64) { - tcg_regset_set32(ct->u.regs, 0, 0xffff); - } else { - tcg_regset_set32(ct->u.regs, 0, 0xff); - } - break; - case 'C': - /* With SHRX et al, we need not use ECX as shift count register. */ - if (have_bmi2) { - goto case_r; - } else { - goto case_c; - } - - /* qemu_ld/st address constraint */ - case 'L': - ct->ct |= TCG_CT_REG; - if (TCG_TARGET_REG_BITS == 64) { - tcg_regset_set32(ct->u.regs, 0, 0xffff); - } else { - tcg_regset_set32(ct->u.regs, 0, 0xff); - } - tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); - break; - - case 'e': - ct->ct |= TCG_CT_CONST_S32; - break; - case 'Z': - ct->ct |= TCG_CT_CONST_U32; - break; - case 'I': - ct->ct |= TCG_CT_CONST_I32; - break; - - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - if (ct & TCG_CT_CONST) { - return 1; - } - if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { - return 1; - } - if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { - return 1; - } - if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) { - return 1; - } - return 0; -} - -#if TCG_TARGET_REG_BITS == 64 -# define LOWREGMASK(x) ((x) & 7) -#else -# define LOWREGMASK(x) (x) -#endif - -#define P_EXT 0x100 /* 0x0f opcode prefix */ -#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ -#define P_DATA16 0x400 /* 0x66 opcode prefix */ -#if TCG_TARGET_REG_BITS == 64 -# define P_ADDR32 0x800 /* 0x67 opcode prefix */ -# define P_REXW 0x1000 /* Set REX.W = 1 */ -# define P_REXB_R 0x2000 /* REG field as byte register */ -# define P_REXB_RM 0x4000 /* R/M field as byte register */ -# define P_GS 0x8000 /* gs segment override */ -#else -# define P_ADDR32 0 -# define P_REXW 0 -# define P_REXB_R 0 -# define P_REXB_RM 0 -# define P_GS 0 -#endif -#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */ -#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */ - -#define OPC_ARITH_EvIz (0x81) -#define OPC_ARITH_EvIb (0x83) -#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ -#define OPC_ANDN (0xf2 | P_EXT38) -#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) -#define OPC_BSWAP (0xc8 | P_EXT) -#define OPC_CALL_Jz (0xe8) -#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ -#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) -#define OPC_DEC_r32 (0x48) -#define OPC_IMUL_GvEv (0xaf | P_EXT) -#define OPC_IMUL_GvEvIb (0x6b) -#define OPC_IMUL_GvEvIz (0x69) -#define OPC_INC_r32 (0x40) -#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ -#define OPC_JCC_short (0x70) /* ... plus condition code */ -#define OPC_JMP_long (0xe9) -#define OPC_JMP_short (0xeb) -#define OPC_LEA (0x8d) -#define OPC_MOVB_EvGv (0x88) /* stores, more or less */ -#define OPC_MOVL_EvGv (0x89) /* stores, more or less */ -#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ -#define OPC_MOVB_EvIz (0xc6) -#define OPC_MOVL_EvIz (0xc7) -#define OPC_MOVL_Iv (0xb8) -#define OPC_MOVBE_GyMy (0xf0 | P_EXT38) -#define OPC_MOVBE_MyGy (0xf1 | P_EXT38) -#define OPC_MOVSBL (0xbe | P_EXT) -#define OPC_MOVSWL (0xbf | P_EXT) -#define OPC_MOVSLQ (0x63 | P_REXW) -#define OPC_MOVZBL (0xb6 | P_EXT) -#define OPC_MOVZWL (0xb7 | P_EXT) -#define OPC_POP_r32 (0x58) -#define OPC_PUSH_r32 (0x50) -#define OPC_PUSH_Iv (0x68) -#define OPC_PUSH_Ib (0x6a) -#define OPC_RET (0xc3) -#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ -#define OPC_SHIFT_1 (0xd1) -#define OPC_SHIFT_Ib (0xc1) -#define OPC_SHIFT_cl (0xd3) -#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3) -#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) -#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) -#define OPC_TESTL (0x85) -#define OPC_XCHG_ax_r32 (0x90) - -#define OPC_GRP3_Ev (0xf7) -#define OPC_GRP5 (0xff) - -/* Group 1 opcode extensions for 0x80-0x83. - These are also used as modifiers for OPC_ARITH. */ -#define ARITH_ADD 0 -#define ARITH_OR 1 -#define ARITH_ADC 2 -#define ARITH_SBB 3 -#define ARITH_AND 4 -#define ARITH_SUB 5 -#define ARITH_XOR 6 -#define ARITH_CMP 7 - -/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */ -#define SHIFT_ROL 0 -#define SHIFT_ROR 1 -#define SHIFT_SHL 4 -#define SHIFT_SHR 5 -#define SHIFT_SAR 7 - -/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ -#define EXT3_NOT 2 -#define EXT3_NEG 3 -#define EXT3_MUL 4 -#define EXT3_IMUL 5 -#define EXT3_DIV 6 -#define EXT3_IDIV 7 - -/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */ -#define EXT5_INC_Ev 0 -#define EXT5_DEC_Ev 1 -#define EXT5_CALLN_Ev 2 -#define EXT5_JMPN_Ev 4 - -/* Condition codes to be added to OPC_JCC_{long,short}. */ -#define JCC_JMP (-1) -#define JCC_JO 0x0 -#define JCC_JNO 0x1 -#define JCC_JB 0x2 -#define JCC_JAE 0x3 -#define JCC_JE 0x4 -#define JCC_JNE 0x5 -#define JCC_JBE 0x6 -#define JCC_JA 0x7 -#define JCC_JS 0x8 -#define JCC_JNS 0x9 -#define JCC_JP 0xa -#define JCC_JNP 0xb -#define JCC_JL 0xc -#define JCC_JGE 0xd -#define JCC_JLE 0xe -#define JCC_JG 0xf - -static const uint8_t tcg_cond_to_jcc[] = { - [TCG_COND_EQ] = JCC_JE, - [TCG_COND_NE] = JCC_JNE, - [TCG_COND_LT] = JCC_JL, - [TCG_COND_GE] = JCC_JGE, - [TCG_COND_LE] = JCC_JLE, - [TCG_COND_GT] = JCC_JG, - [TCG_COND_LTU] = JCC_JB, - [TCG_COND_GEU] = JCC_JAE, - [TCG_COND_LEU] = JCC_JBE, - [TCG_COND_GTU] = JCC_JA, -}; - -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) -{ - int rex; - - if (opc & P_GS) { - tcg_out8(s, 0x65); - } - if (opc & P_DATA16) { - /* We should never be asking for both 16 and 64-bit operation. */ - assert((opc & P_REXW) == 0); - tcg_out8(s, 0x66); - } - if (opc & P_ADDR32) { - tcg_out8(s, 0x67); - } - - rex = 0; - rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */ - rex |= (r & 8) >> 1; /* REX.R */ - rex |= (x & 8) >> 2; /* REX.X */ - rex |= (rm & 8) >> 3; /* REX.B */ - - /* P_REXB_{R,RM} indicates that the given register is the low byte. - For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, - as otherwise the encoding indicates %[abcd]h. Note that the values - that are ORed in merely indicate that the REX byte must be present; - those bits get discarded in output. */ - rex |= opc & (r >= 4 ? P_REXB_R : 0); - rex |= opc & (rm >= 4 ? P_REXB_RM : 0); - - if (rex) { - tcg_out8(s, (uint8_t)(rex | 0x40)); - } - - if (opc & (P_EXT | P_EXT38)) { - tcg_out8(s, 0x0f); - if (opc & P_EXT38) { - tcg_out8(s, 0x38); - } - } - - tcg_out8(s, opc); -} -#else -static void tcg_out_opc(TCGContext *s, int opc) -{ - if (opc & P_DATA16) { - tcg_out8(s, 0x66); - } - if (opc & (P_EXT | P_EXT38)) { - tcg_out8(s, 0x0f); - if (opc & P_EXT38) { - tcg_out8(s, 0x38); - } - } - tcg_out8(s, opc); -} -/* Discard the register arguments to tcg_out_opc early, so as not to penalize - the 32-bit compilation paths. This method works with all versions of gcc, - whereas relying on optimization may not be able to exclude them. */ -#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc) -#endif - -static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) -{ - tcg_out_opc(s, opc, r, rm, 0); - tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); -} - -static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm) -{ - int tmp; - - if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) { - /* Three byte VEX prefix. */ - tcg_out8(s, 0xc4); - - /* VEX.m-mmmm */ - if (opc & P_EXT38) { - tmp = 2; - } else if (opc & P_EXT) { - tmp = 1; - } else { - tcg_abort(); - } - tmp |= 0x40; /* VEX.X */ - tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */ - tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */ - tcg_out8(s, tmp); - - tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */ - } else { - /* Two byte VEX prefix. */ - tcg_out8(s, 0xc5); - - tmp = (r & 8 ? 0 : 0x80); /* VEX.R */ - } - /* VEX.pp */ - if (opc & P_DATA16) { - tmp |= 1; /* 0x66 */ - } else if (opc & P_SIMDF3) { - tmp |= 2; /* 0xf3 */ - } else if (opc & P_SIMDF2) { - tmp |= 3; /* 0xf2 */ - } - tmp |= (~v & 15) << 3; /* VEX.vvvv */ - tcg_out8(s, tmp); - tcg_out8(s, opc); - tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); -} - -/* Output an opcode with a full "rm + (index<code_ptr + 5 + ~rm; - intptr_t disp = offset - pc; - if (disp == (int32_t)disp) { - tcg_out_opc(s, opc, r, 0, 0); - tcg_out8(s, (LOWREGMASK(r) << 3) | 5); - tcg_out32(s, disp); - return; - } - - /* Try for an absolute address encoding. This requires the - use of the MODRM+SIB encoding and is therefore larger than - rip-relative addressing. */ - if (offset == (int32_t)offset) { - tcg_out_opc(s, opc, r, 0, 0); - tcg_out8(s, (LOWREGMASK(r) << 3) | 4); - tcg_out8(s, (4 << 3) | 5); - tcg_out32(s, offset); - return; - } - - /* ??? The memory isn't directly addressable. */ - tcg_abort(); - } else { - /* Absolute address. */ - tcg_out_opc(s, opc, r, 0, 0); - tcg_out8(s, (r << 3) | 5); - tcg_out32(s, offset); - return; - } - } - - /* Find the length of the immediate addend. Note that the encoding - that would be used for (%ebp) indicates absolute addressing. */ - if (rm < 0) { - mod = 0, len = 4, rm = 5; - } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) { - mod = 0, len = 0; - } else if (offset == (int8_t)offset) { - mod = 0x40, len = 1; - } else { - mod = 0x80, len = 4; - } - - /* Use a single byte MODRM format if possible. Note that the encoding - that would be used for %esp is the escape to the two byte form. */ - if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) { - /* Single byte MODRM format. */ - tcg_out_opc(s, opc, r, rm, 0); - tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); - } else { - /* Two byte MODRM+SIB format. */ - - /* Note that the encoding that would place %esp into the index - field indicates no index register. In 64-bit mode, the REX.X - bit counts, so %r12 can be used as the index. */ - if (index < 0) { - index = 4; - } else { - assert(index != TCG_REG_ESP); - } - - tcg_out_opc(s, opc, r, rm, index); - tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4); - tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm)); - } - - if (len == 1) { - tcg_out8(s, offset); - } else if (len == 4) { - tcg_out32(s, offset); - } -} - -/* A simplification of the above with no index or shift. */ -static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, - int rm, intptr_t offset) -{ - tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); -} - -/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ -static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) -{ - /* Propagate an opcode prefix, such as P_REXW. */ - int ext = subop & ~0x7; - subop &= 0x7; - - tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); -} - -static inline void tcg_out_mov(TCGContext *s, TCGType type, - TCGReg ret, TCGReg arg) -{ - if (arg != ret) { - int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); - tcg_out_modrm(s, opc, ret, arg); - } -} - -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg) -{ - tcg_target_long diff; - - if (arg == 0) { - tgen_arithr(s, ARITH_XOR, ret, ret); - return; - } - if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { - tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); - tcg_out32(s, arg); - return; - } - if (arg == (int32_t)arg) { - tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); - tcg_out32(s, arg); - return; - } - - /* Try a 7 byte pc-relative lea before the 10 byte movq. */ - diff = arg - ((uintptr_t)s->code_ptr + 7); - if (diff == (int32_t)diff) { - tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0); - tcg_out8(s, (LOWREGMASK(ret) << 3) | 5); - tcg_out32(s, diff); - return; - } - - tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); - tcg_out64(s, arg); -} - -static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) -{ - if (val == (int8_t)val) { - tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0); - tcg_out8(s, val); - } else if (val == (int32_t)val) { - tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0); - tcg_out32(s, val); - } else { - tcg_abort(); - } -} - -static inline void tcg_out_push(TCGContext *s, int reg) -{ - tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); -} - -static inline void tcg_out_pop(TCGContext *s, int reg) -{ - tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0); -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, - TCGReg arg1, intptr_t arg2) -{ - int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); - tcg_out_modrm_offset(s, opc, ret, arg1, arg2); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0); - tcg_out_modrm_offset(s, opc, arg, arg1, arg2); -} - -static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base, - tcg_target_long ofs, tcg_target_long val) -{ - int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0); - tcg_out_modrm_offset(s, opc, 0, base, ofs); - tcg_out32(s, val); -} - -static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) -{ - /* Propagate an opcode prefix, such as P_DATA16. */ - int ext = subopc & ~0x7; - subopc &= 0x7; - - if (count == 1) { - tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg); - } else { - tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg); - tcg_out8(s, count); - } -} - -static inline void tcg_out_bswap32(TCGContext *s, int reg) -{ - tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0); -} - -static inline void tcg_out_rolw_8(TCGContext *s, int reg) -{ - tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8); -} - -static inline void tcg_out_ext8u(TCGContext *s, int dest, int src) -{ - /* movzbl */ - assert(src < 4 || TCG_TARGET_REG_BITS == 64); - tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); -} - -static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw) -{ - /* movsbl */ - assert(src < 4 || TCG_TARGET_REG_BITS == 64); - tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); -} - -static inline void tcg_out_ext16u(TCGContext *s, int dest, int src) -{ - /* movzwl */ - tcg_out_modrm(s, OPC_MOVZWL, dest, src); -} - -static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw) -{ - /* movsw[lq] */ - tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src); -} - -static inline void tcg_out_ext32u(TCGContext *s, int dest, int src) -{ - /* 32-bit mov zero extends. */ - tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src); -} - -static inline void tcg_out_ext32s(TCGContext *s, int dest, int src) -{ - tcg_out_modrm(s, OPC_MOVSLQ, dest, src); -} - -static inline void tcg_out_bswap64(TCGContext *s, int reg) -{ - tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0); -} - -static void tgen_arithi(TCGContext *s, int c, int r0, - tcg_target_long val, int cf) -{ - int rexw = 0; - - if (TCG_TARGET_REG_BITS == 64) { - rexw = c & -8; - c &= 7; - } - - /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce - partial flags update stalls on Pentium4 and are not recommended - by current Intel optimization manuals. */ - if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { - int is_inc = (c == ARITH_ADD) ^ (val < 0); - if (TCG_TARGET_REG_BITS == 64) { - /* The single-byte increment encodings are re-tasked as the - REX prefixes. Use the MODRM encoding. */ - tcg_out_modrm(s, OPC_GRP5 + rexw, - (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); - } else { - tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); - } - return; - } - - if (c == ARITH_AND) { - if (TCG_TARGET_REG_BITS == 64) { - if (val == 0xffffffffu) { - tcg_out_ext32u(s, r0, r0); - return; - } - if (val == (uint32_t)val) { - /* AND with no high bits set can use a 32-bit operation. */ - rexw = 0; - } - } - if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { - tcg_out_ext8u(s, r0, r0); - return; - } - if (val == 0xffffu) { - tcg_out_ext16u(s, r0, r0); - return; - } - } - - if (val == (int8_t)val) { - tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0); - tcg_out8(s, val); - return; - } - if (rexw == 0 || val == (int32_t)val) { - tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0); - tcg_out32(s, val); - return; - } - - tcg_abort(); -} - -static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) -{ - if (val != 0) { - tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0); - } -} - -/* Use SMALL != 0 to force a short forward branch. */ -static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small) -{ - int32_t val, val1; - - if (l->has_value) { - val = tcg_pcrel_diff(s, l->u.value_ptr); - val1 = val - 2; - if ((int8_t)val1 == val1) { - if (opc == -1) { - tcg_out8(s, OPC_JMP_short); - } else { - tcg_out8(s, OPC_JCC_short + opc); - } - tcg_out8(s, val1); - } else { - if (small) { - tcg_abort(); - } - if (opc == -1) { - tcg_out8(s, OPC_JMP_long); - tcg_out32(s, val - 5); - } else { - tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); - tcg_out32(s, val - 6); - } - } - } else if (small) { - if (opc == -1) { - tcg_out8(s, OPC_JMP_short); - } else { - tcg_out8(s, OPC_JCC_short + opc); - } - tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1); - s->code_ptr += 1; - } else { - if (opc == -1) { - tcg_out8(s, OPC_JMP_long); - } else { - tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); - } - tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4); - s->code_ptr += 4; - } -} - -static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, - int const_arg2, int rexw) -{ - if (const_arg2) { - if (arg2 == 0) { - /* test r, r */ - tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); - } else { - tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); - } - } else { - tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); - } -} - -static void tcg_out_brcond32(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - TCGLabel *label, int small) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2, 0); - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); -} - -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_brcond64(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - TCGLabel *label, int small) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); -} -#else -/* XXX: we implement it at the target level to avoid having to - handle cross basic blocks temporaries */ -static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, - const int *const_args, int small) -{ - TCGLabel *label_next = gen_new_label(); - TCGLabel *label_this = arg_label(args[5]); - - switch(args[4]) { - case TCG_COND_EQ: - tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], - label_next, 1); - tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], - label_this, small); - break; - case TCG_COND_NE: - tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], - label_this, small); - tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], - label_this, small); - break; - case TCG_COND_LT: - tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_LE: - tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_GT: - tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_GE: - tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_LTU: - tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_LEU: - tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_GTU: - tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], - label_this, small); - break; - case TCG_COND_GEU: - tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], - label_this, small); - tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], - label_this, small); - break; - default: - tcg_abort(); - } - tcg_out_label(s, label_next, s->code_ptr); -} -#endif - -static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, - TCGArg arg1, TCGArg arg2, int const_arg2) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2, 0); - tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); - tcg_out_ext8u(s, dest, dest); -} - -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, - TCGArg arg1, TCGArg arg2, int const_arg2) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); - tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); - tcg_out_ext8u(s, dest, dest); -} -#else -static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, - const int *const_args) -{ - TCGArg new_args[6]; - TCGLabel *label_true, *label_over; - - memcpy(new_args, args+1, 5*sizeof(TCGArg)); - - if (args[0] == args[1] || args[0] == args[2] - || (!const_args[3] && args[0] == args[3]) - || (!const_args[4] && args[0] == args[4])) { - /* When the destination overlaps with one of the argument - registers, don't do anything tricky. */ - label_true = gen_new_label(); - label_over = gen_new_label(); - - new_args[5] = label_arg(label_true); - tcg_out_brcond2(s, new_args, const_args+1, 1); - - tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); - tcg_out_jxx(s, JCC_JMP, label_over, 1); - tcg_out_label(s, label_true, s->code_ptr); - - tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); - tcg_out_label(s, label_over, s->code_ptr); - } else { - /* When the destination does not overlap one of the arguments, - clear the destination first, jump if cond false, and emit an - increment in the true case. This results in smaller code. */ - - tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); - - label_over = gen_new_label(); - new_args[4] = tcg_invert_cond(new_args[4]); - new_args[5] = label_arg(label_over); - tcg_out_brcond2(s, new_args, const_args+1, 1); - - tgen_arithi(s, ARITH_ADD, args[0], 1, 0); - tcg_out_label(s, label_over, s->code_ptr); - } -} -#endif - -static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, - TCGArg c1, TCGArg c2, int const_c2, - TCGArg v1) -{ - tcg_out_cmp(s, c1, c2, const_c2, 0); - if (have_cmov) { - tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); - } else { - TCGLabel *over = gen_new_label(); - tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); - tcg_out_mov(s, TCG_TYPE_I32, dest, v1); - tcg_out_label(s, over, s->code_ptr); - } -} - -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest, - TCGArg c1, TCGArg c2, int const_c2, - TCGArg v1) -{ - tcg_out_cmp(s, c1, c2, const_c2, P_REXW); - tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1); -} -#endif - -static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest) -{ - intptr_t disp = tcg_pcrel_diff(s, dest) - 5; - - if (disp == (int32_t)disp) { - tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); - tcg_out32(s, disp); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest); - tcg_out_modrm(s, OPC_GRP5, - call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10); - } -} - -static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) -{ - tcg_out_branch(s, 1, dest); -} - -static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest) -{ - tcg_out_branch(s, 0, dest); -} - -#if defined(CONFIG_SOFTMMU) -/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, - * int mmu_idx, uintptr_t ra) - */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, - * uintxx_t val, int mmu_idx, uintptr_t ra) - */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -/* Perform the TLB load and compare. - - Inputs: - ADDRLO and ADDRHI contain the low and high part of the address. - - MEM_INDEX and S_BITS are the memory context and log2 size of the load. - - WHICH is the offset into the CPUTLBEntry structure of the slot to read. - This should be offsetof addr_read or addr_write. - - Outputs: - LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) - positions of the displacements of forward jumps to the TLB miss case. - - Second argument register is loaded with the low part of the address. - In the TLB hit case, it has been adjusted as indicated by the TLB - and so is a host address. In the TLB miss case, it continues to - hold a guest address. - - First argument register is clobbered. */ - -static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - int mem_index, TCGMemOp opc, - tcg_insn_unit **label_ptr, int which) -{ - const TCGReg r0 = TCG_REG_L0; - const TCGReg r1 = TCG_REG_L1; - TCGType ttype = TCG_TYPE_I32; - TCGType tlbtype = TCG_TYPE_I32; - int trexw = 0, hrexw = 0, tlbrexw = 0; - int s_mask = (1 << (opc & MO_SIZE)) - 1; - bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; - - if (TCG_TARGET_REG_BITS == 64) { - if (TARGET_LONG_BITS == 64) { - ttype = TCG_TYPE_I64; - trexw = P_REXW; - } - if (TCG_TYPE_PTR == TCG_TYPE_I64) { - hrexw = P_REXW; - if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) { - tlbtype = TCG_TYPE_I64; - tlbrexw = P_REXW; - } - } - } - - tcg_out_mov(s, tlbtype, r0, addrlo); - if (aligned) { - tcg_out_mov(s, ttype, r1, addrlo); - } else { - /* For unaligned access check that we don't cross pages using - the page address of the last byte. */ - tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); - } - - tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - tgen_arithi(s, ARITH_AND + trexw, r1, - TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); - tgen_arithi(s, ARITH_AND + tlbrexw, r0, - (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); - - tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0, - offsetof(CPUArchState, tlb_table[mem_index][0]) - + which); - - /* cmp 0(r0), r1 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); - - /* Prepare for both the fast path add of the tlb addend, and the slow - path function argument setup. There are two cases worth note: - For 32-bit guest and x86_64 host, MOVL zero-extends the guest address - before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ - copies the entire guest address for the slow path, while truncation - for the 32-bit host happens with the fastpath ADDL below. */ - tcg_out_mov(s, ttype, r1, addrlo); - - /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - label_ptr[0] = s->code_ptr; - s->code_ptr += 4; - - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - /* cmp 4(r0), addrhi */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4); - - /* jne slow_path */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - label_ptr[1] = s->code_ptr; - s->code_ptr += 4; - } - - /* TLB Hit. */ - - /* add addend(r0), r1 */ - tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0, - offsetof(CPUTLBEntry, addend) - which); -} - -/* - * Record the context of a call to the out of line helper code for the slow path - * for a load or store, so that we can later generate the correct helper code - */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addrhi, - tcg_insn_unit *raddr, - tcg_insn_unit **label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = raddr; - label->label_ptr[0] = label_ptr[0]; - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - label->label_ptr[1] = label_ptr[1]; - } -} - -/* - * Generate code for the slow path for a load at the end of block - */ -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) -{ - TCGMemOpIdx oi = l->oi; - TCGMemOp opc = get_memop(oi); - TCGReg data_reg; - tcg_insn_unit **label_ptr = &l->label_ptr[0]; - - /* resolve label address */ - tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); - } - - if (TCG_TARGET_REG_BITS == 32) { - int ofs = 0; - - tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); - ofs += 4; - - tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (TARGET_LONG_BITS == 64) { - tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); - ofs += 4; - - tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, ofs, (uintptr_t)l->raddr); - } else { - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - /* The second argument is already loaded with addrlo. */ - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi); - tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], - (uintptr_t)l->raddr); - } - - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); - - data_reg = l->datalo_reg; - switch (opc & MO_SSIZE) { - case MO_SB: - tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); - break; - case MO_SW: - tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); - break; -#if TCG_TARGET_REG_BITS == 64 - case MO_SL: - tcg_out_ext32s(s, data_reg, TCG_REG_EAX); - break; -#endif - case MO_UB: - case MO_UW: - /* Note that the helpers have zero-extended to tcg_target_long. */ - case MO_UL: - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); - break; - case MO_Q: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); - } else if (data_reg == TCG_REG_EDX) { - /* xchg %edx, %eax */ - tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX); - } else { - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX); - } - break; - default: - tcg_abort(); - } - - /* Jump to the code corresponding to next IR of qemu_st */ - tcg_out_jmp(s, l->raddr); -} - -/* - * Generate code for the slow path for a store at the end of block - */ -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) -{ - TCGMemOpIdx oi = l->oi; - TCGMemOp opc = get_memop(oi); - TCGMemOp s_bits = opc & MO_SIZE; - tcg_insn_unit **label_ptr = &l->label_ptr[0]; - TCGReg retaddr; - - /* resolve label address */ - tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); - } - - if (TCG_TARGET_REG_BITS == 32) { - int ofs = 0; - - tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); - ofs += 4; - - tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (TARGET_LONG_BITS == 64) { - tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs); - ofs += 4; - - if (s_bits == MO_64) { - tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs); - ofs += 4; - } - - tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); - ofs += 4; - - retaddr = TCG_REG_EAX; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs); - } else { - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - /* The second argument is already loaded with addrlo. */ - tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), - tcg_target_call_iarg_regs[2], l->datalo_reg); - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi); - - if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) { - retaddr = tcg_target_call_iarg_regs[4]; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - } else { - retaddr = TCG_REG_RAX; - tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); - tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, - TCG_TARGET_CALL_STACK_OFFSET); - } - } - - /* "Tail call" to the helper, with the return address back inline. */ - tcg_out_push(s, retaddr); - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); -} -#elif defined(__x86_64__) && defined(__linux__) -# include -# include - -int arch_prctl(int code, unsigned long addr); - -static int guest_base_flags; -static inline void setup_guest_base_seg(void) -{ - if (arch_prctl(ARCH_SET_GS, guest_base) == 0) { - guest_base_flags = P_GS; - } -} -#else -# define guest_base_flags 0 -static inline void setup_guest_base_seg(void) { } -#endif /* SOFTMMU */ - -static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg base, int index, intptr_t ofs, - int seg, TCGMemOp memop) -{ - const TCGMemOp real_bswap = memop & MO_BSWAP; - TCGMemOp bswap = real_bswap; - int movop = OPC_MOVL_GvEv; - - if (have_movbe && real_bswap) { - bswap = 0; - movop = OPC_MOVBE_GyMy; - } - - switch (memop & MO_SSIZE) { - case MO_UB: - tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo, - base, index, 0, ofs); - break; - case MO_SB: - tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, - base, index, 0, ofs); - break; - case MO_UW: - tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, - base, index, 0, ofs); - if (real_bswap) { - tcg_out_rolw_8(s, datalo); - } - break; - case MO_SW: - if (real_bswap) { - if (have_movbe) { - tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, - datalo, base, index, 0, ofs); - } else { - tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, - base, index, 0, ofs); - tcg_out_rolw_8(s, datalo); - } - tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); - } else { - tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg, - datalo, base, index, 0, ofs); - } - break; - case MO_UL: - tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); - if (bswap) { - tcg_out_bswap32(s, datalo); - } - break; -#if TCG_TARGET_REG_BITS == 64 - case MO_SL: - if (real_bswap) { - tcg_out_modrm_sib_offset(s, movop + seg, datalo, - base, index, 0, ofs); - if (bswap) { - tcg_out_bswap32(s, datalo); - } - tcg_out_ext32s(s, datalo, datalo); - } else { - tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo, - base, index, 0, ofs); - } - break; -#endif - case MO_Q: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, - base, index, 0, ofs); - if (bswap) { - tcg_out_bswap64(s, datalo); - } - } else { - if (real_bswap) { - int t = datalo; - datalo = datahi; - datahi = t; - } - if (base != datalo) { - tcg_out_modrm_sib_offset(s, movop + seg, datalo, - base, index, 0, ofs); - tcg_out_modrm_sib_offset(s, movop + seg, datahi, - base, index, 0, ofs + 4); - } else { - tcg_out_modrm_sib_offset(s, movop + seg, datahi, - base, index, 0, ofs + 4); - tcg_out_modrm_sib_offset(s, movop + seg, datalo, - base, index, 0, ofs); - } - if (bswap) { - tcg_out_bswap32(s, datalo); - tcg_out_bswap32(s, datahi); - } - } - break; - default: - tcg_abort(); - } -} - -/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and - EAX. It will be useful once fixed registers globals are less - common. */ -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) -{ - TCGReg datalo, datahi, addrlo; - TCGReg addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc; -#if defined(CONFIG_SOFTMMU) - int mem_index; - tcg_insn_unit *label_ptr[2]; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#if defined(CONFIG_SOFTMMU) - mem_index = get_mmuidx(oi); - - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, - label_ptr, offsetof(CPUTLBEntry, addr_read)); - - /* TLB Hit. */ - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc); - - /* Record the current context of a load into ldst label */ - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#else - { - int32_t offset = guest_base; - TCGReg base = addrlo; - int index = -1; - int seg = 0; - - /* For a 32-bit guest, the high 32 bits may contain garbage. - We can do this with the ADDR32 prefix if we're not using - a guest base, or when using segmentation. Otherwise we - need to zero-extend manually. */ - if (guest_base == 0 || guest_base_flags) { - seg = guest_base_flags; - offset = 0; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - seg |= P_ADDR32; - } - } else if (TCG_TARGET_REG_BITS == 64) { - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_L0, base); - base = TCG_REG_L0; - } - if (offset != guest_base) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); - index = TCG_REG_L1; - offset = 0; - } - } - - tcg_out_qemu_ld_direct(s, datalo, datahi, - base, index, offset, seg, opc); - } -#endif -} - -static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg base, intptr_t ofs, int seg, - TCGMemOp memop) -{ - /* ??? Ideally we wouldn't need a scratch register. For user-only, - we could perform the bswap twice to restore the original value - instead of moving to the scratch. But as it is, the L constraint - means that TCG_REG_L0 is definitely free here. */ - const TCGReg scratch = TCG_REG_L0; - const TCGMemOp real_bswap = memop & MO_BSWAP; - TCGMemOp bswap = real_bswap; - int movop = OPC_MOVL_EvGv; - - if (have_movbe && real_bswap) { - bswap = 0; - movop = OPC_MOVBE_MyGy; - } - - switch (memop & MO_SIZE) { - case MO_8: - /* In 32-bit mode, 8-bit stores can only happen from [abcd]x. - Use the scratch register if necessary. */ - if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - datalo = scratch; - } - tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, - datalo, base, ofs); - break; - case MO_16: - if (bswap) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - tcg_out_rolw_8(s, scratch); - datalo = scratch; - } - tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs); - break; - case MO_32: - if (bswap) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - tcg_out_bswap32(s, scratch); - datalo = scratch; - } - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); - break; - case MO_64: - if (TCG_TARGET_REG_BITS == 64) { - if (bswap) { - tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo); - tcg_out_bswap64(s, scratch); - datalo = scratch; - } - tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs); - } else if (bswap) { - tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); - tcg_out_bswap32(s, scratch); - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs); - tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); - tcg_out_bswap32(s, scratch); - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4); - } else { - if (real_bswap) { - int t = datalo; - datalo = datahi; - datahi = t; - } - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); - tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4); - } - break; - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) -{ - TCGReg datalo, datahi, addrlo; - TCGReg addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc; -#if defined(CONFIG_SOFTMMU) - int mem_index; - tcg_insn_unit *label_ptr[2]; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#if defined(CONFIG_SOFTMMU) - mem_index = get_mmuidx(oi); - - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, - label_ptr, offsetof(CPUTLBEntry, addr_write)); - - /* TLB Hit. */ - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); - - /* Record the current context of a store into ldst label */ - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#else - { - int32_t offset = guest_base; - TCGReg base = addrlo; - int seg = 0; - - /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */ - if (guest_base == 0 || guest_base_flags) { - seg = guest_base_flags; - offset = 0; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - seg |= P_ADDR32; - } - } else if (TCG_TARGET_REG_BITS == 64) { - /* ??? Note that we can't use the same SIB addressing scheme - as for loads, since we require L0 free for bswap. */ - if (offset != guest_base) { - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_L0, base); - base = TCG_REG_L0; - } - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); - base = TCG_REG_L1; - offset = 0; - } else if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_L1, base); - base = TCG_REG_L1; - } - } - - tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc); - } -#endif -} - -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg *args, const int *const_args) -{ - int c, vexop, rexw = 0; - -#if TCG_TARGET_REG_BITS == 64 -# define OP_32_64(x) \ - case glue(glue(INDEX_op_, x), _i64): \ - rexw = P_REXW; /* FALLTHRU */ \ - case glue(glue(INDEX_op_, x), _i32) -#else -# define OP_32_64(x) \ - case glue(glue(INDEX_op_, x), _i32) -#endif - - switch(opc) { - case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); - tcg_out_jmp(s, tb_ret_addr); - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* direct jump method */ - tcg_out8(s, OPC_JMP_long); /* jmp im */ - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - tcg_out32(s, 0); - } else { - /* indirect jump method */ - tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, - (intptr_t)(s->tb_next + args[0])); - } - s->tb_next_offset[args[0]] = tcg_current_code_size(s); - break; - case INDEX_op_br: - tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0); - break; - OP_32_64(ld8u): - /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]); - break; - OP_32_64(ld8s): - tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]); - break; - OP_32_64(ld16u): - /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]); - break; - OP_32_64(ld16s): - tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]); - break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_ld32u_i64: -#endif - case INDEX_op_ld_i32: - tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - - OP_32_64(st8): - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVB_EvIz, - 0, args[1], args[2]); - tcg_out8(s, args[0]); - } else { - tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, - args[0], args[1], args[2]); - } - break; - OP_32_64(st16): - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, - 0, args[1], args[2]); - tcg_out16(s, args[0]); - } else { - tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, - args[0], args[1], args[2]); - } - break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_st32_i64: -#endif - case INDEX_op_st_i32: - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]); - tcg_out32(s, args[0]); - } else { - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); - } - break; - - OP_32_64(add): - /* For 3-operand addition, use LEA. */ - if (args[0] != args[1]) { - TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0; - - if (const_args[2]) { - c3 = a2, a2 = -1; - } else if (a0 == a2) { - /* Watch out for dest = src + dest, since we've removed - the matching constraint on the add. */ - tgen_arithr(s, ARITH_ADD + rexw, a0, a1); - break; - } - - tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); - break; - } - c = ARITH_ADD; - goto gen_arith; - OP_32_64(sub): - c = ARITH_SUB; - goto gen_arith; - OP_32_64(and): - c = ARITH_AND; - goto gen_arith; - OP_32_64(or): - c = ARITH_OR; - goto gen_arith; - OP_32_64(xor): - c = ARITH_XOR; - goto gen_arith; - gen_arith: - if (const_args[2]) { - tgen_arithi(s, c + rexw, args[0], args[2], 0); - } else { - tgen_arithr(s, c + rexw, args[0], args[2]); - } - break; - - OP_32_64(andc): - if (const_args[2]) { - tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, - args[0], args[1]); - tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0); - } else { - tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]); - } - break; - - OP_32_64(mul): - if (const_args[2]) { - int32_t val; - val = args[2]; - if (val == (int8_t)val) { - tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]); - tcg_out8(s, val); - } else { - tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]); - tcg_out32(s, val); - } - } else { - tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]); - } - break; - - OP_32_64(div2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); - break; - OP_32_64(divu2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); - break; - - OP_32_64(shl): - c = SHIFT_SHL; - vexop = OPC_SHLX; - goto gen_shift_maybe_vex; - OP_32_64(shr): - c = SHIFT_SHR; - vexop = OPC_SHRX; - goto gen_shift_maybe_vex; - OP_32_64(sar): - c = SHIFT_SAR; - vexop = OPC_SARX; - goto gen_shift_maybe_vex; - OP_32_64(rotl): - c = SHIFT_ROL; - goto gen_shift; - OP_32_64(rotr): - c = SHIFT_ROR; - goto gen_shift; - gen_shift_maybe_vex: - if (have_bmi2 && !const_args[2]) { - tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]); - break; - } - /* FALLTHRU */ - gen_shift: - if (const_args[2]) { - tcg_out_shifti(s, c + rexw, args[0], args[2]); - } else { - tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]); - } - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), 0); - break; - case INDEX_op_setcond_i32: - tcg_out_setcond32(s, args[3], args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_movcond_i32: - tcg_out_movcond32(s, args[5], args[0], args[1], - args[2], const_args[2], args[3]); - break; - - OP_32_64(bswap16): - tcg_out_rolw_8(s, args[0]); - break; - OP_32_64(bswap32): - tcg_out_bswap32(s, args[0]); - break; - - OP_32_64(neg): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]); - break; - OP_32_64(not): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]); - break; - - OP_32_64(ext8s): - tcg_out_ext8s(s, args[0], args[1], rexw); - break; - OP_32_64(ext16s): - tcg_out_ext16s(s, args[0], args[1], rexw); - break; - OP_32_64(ext8u): - tcg_out_ext8u(s, args[0], args[1]); - break; - OP_32_64(ext16u): - tcg_out_ext16u(s, args[0], args[1]); - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, 0); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, 1); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, 0); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, 1); - break; - - OP_32_64(mulu2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); - break; - OP_32_64(muls2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); - break; - OP_32_64(add2): - if (const_args[4]) { - tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1); - } else { - tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]); - } - if (const_args[5]) { - tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1); - } else { - tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]); - } - break; - OP_32_64(sub2): - if (const_args[4]) { - tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1); - } else { - tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]); - } - if (const_args[5]) { - tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1); - } else { - tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]); - } - break; - -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args, 0); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args, const_args); - break; -#else /* TCG_TARGET_REG_BITS == 64 */ - case INDEX_op_ld32s_i64: - tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i64: - tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, - 0, args[1], args[2]); - tcg_out32(s, args[0]); - } else { - tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); - } - break; - - case INDEX_op_brcond_i64: - tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), 0); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond64(s, args[3], args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond64(s, args[5], args[0], args[1], - args[2], const_args[2], args[3]); - break; - - case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, args[0]); - break; - case INDEX_op_extu_i32_i64: - case INDEX_op_ext32u_i64: - tcg_out_ext32u(s, args[0], args[1]); - break; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - tcg_out_ext32s(s, args[0], args[1]); - break; -#endif - - OP_32_64(deposit): - if (args[3] == 0 && args[4] == 8) { - /* load bits 0..7 */ - tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, - args[2], args[0]); - } else if (args[3] == 8 && args[4] == 8) { - /* load bits 8..15 */ - tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4); - } else if (args[3] == 0 && args[4] == 16) { - /* load bits 0..15 */ - tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]); - } else { - tcg_abort(); - } - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } - -#undef OP_32_64 -} - -static const TCGTargetOpDef x86_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "qi", "r" } }, - { INDEX_op_st16_i32, { "ri", "r" } }, - { INDEX_op_st_i32, { "ri", "r" } }, - - { INDEX_op_add_i32, { "r", "r", "ri" } }, - { INDEX_op_sub_i32, { "r", "0", "ri" } }, - { INDEX_op_mul_i32, { "r", "0", "ri" } }, - { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } }, - { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } }, - { INDEX_op_and_i32, { "r", "0", "ri" } }, - { INDEX_op_or_i32, { "r", "0", "ri" } }, - { INDEX_op_xor_i32, { "r", "0", "ri" } }, - { INDEX_op_andc_i32, { "r", "r", "ri" } }, - - { INDEX_op_shl_i32, { "r", "0", "Ci" } }, - { INDEX_op_shr_i32, { "r", "0", "Ci" } }, - { INDEX_op_sar_i32, { "r", "0", "Ci" } }, - { INDEX_op_rotl_i32, { "r", "0", "ci" } }, - { INDEX_op_rotr_i32, { "r", "0", "ci" } }, - - { INDEX_op_brcond_i32, { "r", "ri" } }, - - { INDEX_op_bswap16_i32, { "r", "0" } }, - { INDEX_op_bswap32_i32, { "r", "0" } }, - - { INDEX_op_neg_i32, { "r", "0" } }, - - { INDEX_op_not_i32, { "r", "0" } }, - - { INDEX_op_ext8s_i32, { "r", "q" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_ext8u_i32, { "r", "q" } }, - { INDEX_op_ext16u_i32, { "r", "r" } }, - - { INDEX_op_setcond_i32, { "q", "r", "ri" } }, - - { INDEX_op_deposit_i32, { "Q", "0", "Q" } }, - { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } }, - - { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, - { INDEX_op_muls2_i32, { "a", "d", "a", "r" } }, - { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, - { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, - -#if TCG_TARGET_REG_BITS == 32 - { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, - { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, -#else - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - { INDEX_op_st8_i64, { "ri", "r" } }, - { INDEX_op_st16_i64, { "ri", "r" } }, - { INDEX_op_st32_i64, { "ri", "r" } }, - { INDEX_op_st_i64, { "re", "r" } }, - - { INDEX_op_add_i64, { "r", "r", "re" } }, - { INDEX_op_mul_i64, { "r", "0", "re" } }, - { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } }, - { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } }, - { INDEX_op_sub_i64, { "r", "0", "re" } }, - { INDEX_op_and_i64, { "r", "0", "reZ" } }, - { INDEX_op_or_i64, { "r", "0", "re" } }, - { INDEX_op_xor_i64, { "r", "0", "re" } }, - { INDEX_op_andc_i64, { "r", "r", "rI" } }, - - { INDEX_op_shl_i64, { "r", "0", "Ci" } }, - { INDEX_op_shr_i64, { "r", "0", "Ci" } }, - { INDEX_op_sar_i64, { "r", "0", "Ci" } }, - { INDEX_op_rotl_i64, { "r", "0", "ci" } }, - { INDEX_op_rotr_i64, { "r", "0", "ci" } }, - - { INDEX_op_brcond_i64, { "r", "re" } }, - { INDEX_op_setcond_i64, { "r", "r", "re" } }, - - { INDEX_op_bswap16_i64, { "r", "0" } }, - { INDEX_op_bswap32_i64, { "r", "0" } }, - { INDEX_op_bswap64_i64, { "r", "0" } }, - { INDEX_op_neg_i64, { "r", "0" } }, - { INDEX_op_not_i64, { "r", "0" } }, - - { INDEX_op_ext8s_i64, { "r", "r" } }, - { INDEX_op_ext16s_i64, { "r", "r" } }, - { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext8u_i64, { "r", "r" } }, - { INDEX_op_ext16u_i64, { "r", "r" } }, - { INDEX_op_ext32u_i64, { "r", "r" } }, - - { INDEX_op_ext_i32_i64, { "r", "r" } }, - { INDEX_op_extu_i32_i64, { "r", "r" } }, - - { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, - { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, - - { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } }, - { INDEX_op_muls2_i64, { "a", "d", "a", "r" } }, - { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } }, - { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } }, -#endif - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L" } }, -#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, -#else - { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, -#endif - { -1 }, -}; - -static int tcg_target_callee_save_regs[] = { -#if TCG_TARGET_REG_BITS == 64 - TCG_REG_RBP, - TCG_REG_RBX, -#if defined(_WIN64) - TCG_REG_RDI, - TCG_REG_RSI, -#endif - TCG_REG_R12, - TCG_REG_R13, - TCG_REG_R14, /* Currently used for the global env. */ - TCG_REG_R15, -#else - TCG_REG_EBP, /* Currently used for the global env. */ - TCG_REG_EBX, - TCG_REG_ESI, - TCG_REG_EDI, -#endif -}; - -/* Compute frame size via macros, to share between tcg_target_qemu_prologue - and tcg_register_jit. */ - -#define PUSH_SIZE \ - ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ - * (TCG_TARGET_REG_BITS / 8)) - -#define FRAME_SIZE \ - ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ - + CPU_TEMP_BUF_NLONGS * sizeof(long) \ - + TCG_TARGET_STACK_ALIGN - 1) \ - & ~(TCG_TARGET_STACK_ALIGN - 1)) - -/* Generate global QEMU prologue and epilogue code */ -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int i, stack_addend; - - /* TB prologue */ - - /* Reserve some stack space, also for TCG temps. */ - stack_addend = FRAME_SIZE - PUSH_SIZE; - tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - - /* Save all callee saved registers. */ - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { - tcg_out_push(s, tcg_target_callee_save_regs[i]); - } - -#if TCG_TARGET_REG_BITS == 32 - tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, - (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); - tcg_out_addi(s, TCG_REG_ESP, -stack_addend); - /* jmp *tb. */ - tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, - (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 - + stack_addend); -#else - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out_addi(s, TCG_REG_ESP, -stack_addend); - /* jmp *tb. */ - tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); -#endif - - /* TB epilogue */ - tb_ret_addr = s->code_ptr; - - tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); - - for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { - tcg_out_pop(s, tcg_target_callee_save_regs[i]); - } - tcg_out_opc(s, OPC_RET, 0, 0, 0); - -#if !defined(CONFIG_SOFTMMU) - /* Try to set up a segment register to point to guest_base. */ - if (guest_base) { - setup_guest_base_seg(); - } -#endif -} - -static void tcg_target_init(TCGContext *s) -{ -#ifdef CONFIG_CPUID_H - unsigned a, b, c, d; - int max = __get_cpuid_max(0, 0); - - if (max >= 1) { - __cpuid(1, a, b, c, d); -#ifndef have_cmov - /* For 32-bit, 99% certainty that we're running on hardware that - supports cmov, but we still need to check. In case cmov is not - available, we'll use a small forward branch. */ - have_cmov = (d & bit_CMOV) != 0; -#endif -#ifndef have_movbe - /* MOVBE is only available on Intel Atom and Haswell CPUs, so we - need to probe for it. */ - have_movbe = (c & bit_MOVBE) != 0; -#endif - } - - if (max >= 7) { - /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */ - __cpuid_count(7, 0, a, b, c, d); -#ifdef bit_BMI - have_bmi1 = (b & bit_BMI) != 0; -#endif -#ifndef have_bmi2 - have_bmi2 = (b & bit_BMI2) != 0; -#endif - } -#endif - - if (TCG_TARGET_REG_BITS == 64) { - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); - } else { - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff); - } - - tcg_regset_clear(tcg_target_call_clobber_regs); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX); - if (TCG_TARGET_REG_BITS == 64) { -#if !defined(_WIN64) - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI); -#endif - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); - } - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - - tcg_add_target_add_op_defs(x86_op_defs); -} - -typedef struct { - DebugFrameHeader h; - uint8_t fde_def_cfa[4]; - uint8_t fde_reg_ofs[14]; -} DebugFrame; - -/* We're expecting a 2 byte uleb128 encoded value. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); - -#if !defined(__ELF__) - /* Host machine without ELF. */ -#elif TCG_TARGET_REG_BITS == 64 -#define ELF_HOST_MACHINE EM_X86_64 -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = 0x78, /* sleb128 -8 */ - .h.cie.return_column = 16, - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { - 12, 7, /* DW_CFA_def_cfa %rsp, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - 0x90, 1, /* DW_CFA_offset, %rip, -8 */ - /* The following ordering must match tcg_target_callee_save_regs. */ - 0x86, 2, /* DW_CFA_offset, %rbp, -16 */ - 0x83, 3, /* DW_CFA_offset, %rbx, -24 */ - 0x8c, 4, /* DW_CFA_offset, %r12, -32 */ - 0x8d, 5, /* DW_CFA_offset, %r13, -40 */ - 0x8e, 6, /* DW_CFA_offset, %r14, -48 */ - 0x8f, 7, /* DW_CFA_offset, %r15, -56 */ - } -}; -#else -#define ELF_HOST_MACHINE EM_386 -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = 0x7c, /* sleb128 -4 */ - .h.cie.return_column = 8, - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { - 12, 4, /* DW_CFA_def_cfa %esp, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - 0x88, 1, /* DW_CFA_offset, %eip, -4 */ - /* The following ordering must match tcg_target_callee_save_regs. */ - 0x85, 2, /* DW_CFA_offset, %ebp, -8 */ - 0x83, 3, /* DW_CFA_offset, %ebx, -12 */ - 0x86, 4, /* DW_CFA_offset, %esi, -16 */ - 0x87, 5, /* DW_CFA_offset, %edi, -20 */ - } -}; -#endif - -#if defined(ELF_HOST_MACHINE) -void tcg_register_jit(void *buf, size_t buf_size) -{ - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} -#endif diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c new file mode 100644 index 0000000..d90636c --- /dev/null +++ b/tcg/i386/tcg-target.inc.c @@ -0,0 +1,2464 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-ldst.h" + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +#if TCG_TARGET_REG_BITS == 64 + "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", +#else + "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", +#endif +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { +#if TCG_TARGET_REG_BITS == 64 + TCG_REG_RBP, + TCG_REG_RBX, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_RCX, + TCG_REG_RDX, + TCG_REG_RSI, + TCG_REG_RDI, + TCG_REG_RAX, +#else + TCG_REG_EBX, + TCG_REG_ESI, + TCG_REG_EDI, + TCG_REG_EBP, + TCG_REG_ECX, + TCG_REG_EDX, + TCG_REG_EAX, +#endif +}; + +static const int tcg_target_call_iarg_regs[] = { +#if TCG_TARGET_REG_BITS == 64 +#if defined(_WIN64) + TCG_REG_RCX, + TCG_REG_RDX, +#else + TCG_REG_RDI, + TCG_REG_RSI, + TCG_REG_RDX, + TCG_REG_RCX, +#endif + TCG_REG_R8, + TCG_REG_R9, +#else + /* 32 bit mode uses stack based calling convention (GCC default). */ +#endif +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_EAX, +#if TCG_TARGET_REG_BITS == 32 + TCG_REG_EDX +#endif +}; + +/* Constants we accept. */ +#define TCG_CT_CONST_S32 0x100 +#define TCG_CT_CONST_U32 0x200 +#define TCG_CT_CONST_I32 0x400 + +/* Registers used with L constraint, which are the first argument + registers on x86_64, and two random call clobbered registers on + i386. */ +#if TCG_TARGET_REG_BITS == 64 +# define TCG_REG_L0 tcg_target_call_iarg_regs[0] +# define TCG_REG_L1 tcg_target_call_iarg_regs[1] +#else +# define TCG_REG_L0 TCG_REG_EAX +# define TCG_REG_L1 TCG_REG_EDX +#endif + +/* The host compiler should supply to enable runtime features + detection, as we're not going to go so far as our own inline assembly. + If not available, default values will be assumed. */ +#if defined(CONFIG_CPUID_H) +#include +#endif + +/* For 32-bit, we are going to attempt to determine at runtime whether cmov + is available. */ +#if TCG_TARGET_REG_BITS == 64 +# define have_cmov 1 +#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV) +static bool have_cmov; +#else +# define have_cmov 0 +#endif + +/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are + going to attempt to determine at runtime whether movbe is available. */ +#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE) +static bool have_movbe; +#else +# define have_movbe 0 +#endif + +/* We need this symbol in tcg-target.h, and we can't properly conditionalize + it there. Therefore we always define the variable. */ +bool have_bmi1; + +#if defined(CONFIG_CPUID_H) && defined(bit_BMI2) +static bool have_bmi2; +#else +# define have_bmi2 0 +#endif + +static tcg_insn_unit *tb_ret_addr; + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + value += addend; + switch(type) { + case R_386_PC32: + value -= (uintptr_t)code_ptr; + if (value != (int32_t)value) { + tcg_abort(); + } + tcg_patch32(code_ptr, value); + break; + case R_386_PC8: + value -= (uintptr_t)code_ptr; + if (value != (int8_t)value) { + tcg_abort(); + } + tcg_patch8(code_ptr, value); + break; + default: + tcg_abort(); + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch(ct_str[0]) { + case 'a': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX); + break; + case 'b': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX); + break; + case 'c': + case_c: + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX); + break; + case 'd': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX); + break; + case 'S': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI); + break; + case 'D': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI); + break; + case 'q': + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xf); + } + break; + case 'Q': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xf); + break; + case 'r': + case_r: + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xff); + } + break; + case 'C': + /* With SHRX et al, we need not use ECX as shift count register. */ + if (have_bmi2) { + goto case_r; + } else { + goto case_c; + } + + /* qemu_ld/st address constraint */ + case 'L': + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xff); + } + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); + break; + + case 'e': + ct->ct |= TCG_CT_CONST_S32; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_U32; + break; + case 'I': + ct->ct |= TCG_CT_CONST_I32; + break; + + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } + if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + return 1; + } + if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + return 1; + } + if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) { + return 1; + } + return 0; +} + +#if TCG_TARGET_REG_BITS == 64 +# define LOWREGMASK(x) ((x) & 7) +#else +# define LOWREGMASK(x) (x) +#endif + +#define P_EXT 0x100 /* 0x0f opcode prefix */ +#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ +#define P_DATA16 0x400 /* 0x66 opcode prefix */ +#if TCG_TARGET_REG_BITS == 64 +# define P_ADDR32 0x800 /* 0x67 opcode prefix */ +# define P_REXW 0x1000 /* Set REX.W = 1 */ +# define P_REXB_R 0x2000 /* REG field as byte register */ +# define P_REXB_RM 0x4000 /* R/M field as byte register */ +# define P_GS 0x8000 /* gs segment override */ +#else +# define P_ADDR32 0 +# define P_REXW 0 +# define P_REXB_R 0 +# define P_REXB_RM 0 +# define P_GS 0 +#endif +#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */ +#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */ + +#define OPC_ARITH_EvIz (0x81) +#define OPC_ARITH_EvIb (0x83) +#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ +#define OPC_ANDN (0xf2 | P_EXT38) +#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) +#define OPC_BSWAP (0xc8 | P_EXT) +#define OPC_CALL_Jz (0xe8) +#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ +#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) +#define OPC_DEC_r32 (0x48) +#define OPC_IMUL_GvEv (0xaf | P_EXT) +#define OPC_IMUL_GvEvIb (0x6b) +#define OPC_IMUL_GvEvIz (0x69) +#define OPC_INC_r32 (0x40) +#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ +#define OPC_JCC_short (0x70) /* ... plus condition code */ +#define OPC_JMP_long (0xe9) +#define OPC_JMP_short (0xeb) +#define OPC_LEA (0x8d) +#define OPC_MOVB_EvGv (0x88) /* stores, more or less */ +#define OPC_MOVL_EvGv (0x89) /* stores, more or less */ +#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ +#define OPC_MOVB_EvIz (0xc6) +#define OPC_MOVL_EvIz (0xc7) +#define OPC_MOVL_Iv (0xb8) +#define OPC_MOVBE_GyMy (0xf0 | P_EXT38) +#define OPC_MOVBE_MyGy (0xf1 | P_EXT38) +#define OPC_MOVSBL (0xbe | P_EXT) +#define OPC_MOVSWL (0xbf | P_EXT) +#define OPC_MOVSLQ (0x63 | P_REXW) +#define OPC_MOVZBL (0xb6 | P_EXT) +#define OPC_MOVZWL (0xb7 | P_EXT) +#define OPC_POP_r32 (0x58) +#define OPC_PUSH_r32 (0x50) +#define OPC_PUSH_Iv (0x68) +#define OPC_PUSH_Ib (0x6a) +#define OPC_RET (0xc3) +#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ +#define OPC_SHIFT_1 (0xd1) +#define OPC_SHIFT_Ib (0xc1) +#define OPC_SHIFT_cl (0xd3) +#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3) +#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) +#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) +#define OPC_TESTL (0x85) +#define OPC_XCHG_ax_r32 (0x90) + +#define OPC_GRP3_Ev (0xf7) +#define OPC_GRP5 (0xff) + +/* Group 1 opcode extensions for 0x80-0x83. + These are also used as modifiers for OPC_ARITH. */ +#define ARITH_ADD 0 +#define ARITH_OR 1 +#define ARITH_ADC 2 +#define ARITH_SBB 3 +#define ARITH_AND 4 +#define ARITH_SUB 5 +#define ARITH_XOR 6 +#define ARITH_CMP 7 + +/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */ +#define SHIFT_ROL 0 +#define SHIFT_ROR 1 +#define SHIFT_SHL 4 +#define SHIFT_SHR 5 +#define SHIFT_SAR 7 + +/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ +#define EXT3_NOT 2 +#define EXT3_NEG 3 +#define EXT3_MUL 4 +#define EXT3_IMUL 5 +#define EXT3_DIV 6 +#define EXT3_IDIV 7 + +/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */ +#define EXT5_INC_Ev 0 +#define EXT5_DEC_Ev 1 +#define EXT5_CALLN_Ev 2 +#define EXT5_JMPN_Ev 4 + +/* Condition codes to be added to OPC_JCC_{long,short}. */ +#define JCC_JMP (-1) +#define JCC_JO 0x0 +#define JCC_JNO 0x1 +#define JCC_JB 0x2 +#define JCC_JAE 0x3 +#define JCC_JE 0x4 +#define JCC_JNE 0x5 +#define JCC_JBE 0x6 +#define JCC_JA 0x7 +#define JCC_JS 0x8 +#define JCC_JNS 0x9 +#define JCC_JP 0xa +#define JCC_JNP 0xb +#define JCC_JL 0xc +#define JCC_JGE 0xd +#define JCC_JLE 0xe +#define JCC_JG 0xf + +static const uint8_t tcg_cond_to_jcc[] = { + [TCG_COND_EQ] = JCC_JE, + [TCG_COND_NE] = JCC_JNE, + [TCG_COND_LT] = JCC_JL, + [TCG_COND_GE] = JCC_JGE, + [TCG_COND_LE] = JCC_JLE, + [TCG_COND_GT] = JCC_JG, + [TCG_COND_LTU] = JCC_JB, + [TCG_COND_GEU] = JCC_JAE, + [TCG_COND_LEU] = JCC_JBE, + [TCG_COND_GTU] = JCC_JA, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) +{ + int rex; + + if (opc & P_GS) { + tcg_out8(s, 0x65); + } + if (opc & P_DATA16) { + /* We should never be asking for both 16 and 64-bit operation. */ + assert((opc & P_REXW) == 0); + tcg_out8(s, 0x66); + } + if (opc & P_ADDR32) { + tcg_out8(s, 0x67); + } + + rex = 0; + rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */ + rex |= (r & 8) >> 1; /* REX.R */ + rex |= (x & 8) >> 2; /* REX.X */ + rex |= (rm & 8) >> 3; /* REX.B */ + + /* P_REXB_{R,RM} indicates that the given register is the low byte. + For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, + as otherwise the encoding indicates %[abcd]h. Note that the values + that are ORed in merely indicate that the REX byte must be present; + those bits get discarded in output. */ + rex |= opc & (r >= 4 ? P_REXB_R : 0); + rex |= opc & (rm >= 4 ? P_REXB_RM : 0); + + if (rex) { + tcg_out8(s, (uint8_t)(rex | 0x40)); + } + + if (opc & (P_EXT | P_EXT38)) { + tcg_out8(s, 0x0f); + if (opc & P_EXT38) { + tcg_out8(s, 0x38); + } + } + + tcg_out8(s, opc); +} +#else +static void tcg_out_opc(TCGContext *s, int opc) +{ + if (opc & P_DATA16) { + tcg_out8(s, 0x66); + } + if (opc & (P_EXT | P_EXT38)) { + tcg_out8(s, 0x0f); + if (opc & P_EXT38) { + tcg_out8(s, 0x38); + } + } + tcg_out8(s, opc); +} +/* Discard the register arguments to tcg_out_opc early, so as not to penalize + the 32-bit compilation paths. This method works with all versions of gcc, + whereas relying on optimization may not be able to exclude them. */ +#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc) +#endif + +static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) +{ + tcg_out_opc(s, opc, r, rm, 0); + tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +} + +static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm) +{ + int tmp; + + if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) { + /* Three byte VEX prefix. */ + tcg_out8(s, 0xc4); + + /* VEX.m-mmmm */ + if (opc & P_EXT38) { + tmp = 2; + } else if (opc & P_EXT) { + tmp = 1; + } else { + tcg_abort(); + } + tmp |= 0x40; /* VEX.X */ + tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */ + tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */ + tcg_out8(s, tmp); + + tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */ + } else { + /* Two byte VEX prefix. */ + tcg_out8(s, 0xc5); + + tmp = (r & 8 ? 0 : 0x80); /* VEX.R */ + } + /* VEX.pp */ + if (opc & P_DATA16) { + tmp |= 1; /* 0x66 */ + } else if (opc & P_SIMDF3) { + tmp |= 2; /* 0xf3 */ + } else if (opc & P_SIMDF2) { + tmp |= 3; /* 0xf2 */ + } + tmp |= (~v & 15) << 3; /* VEX.vvvv */ + tcg_out8(s, tmp); + tcg_out8(s, opc); + tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +} + +/* Output an opcode with a full "rm + (index<code_ptr + 5 + ~rm; + intptr_t disp = offset - pc; + if (disp == (int32_t)disp) { + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (LOWREGMASK(r) << 3) | 5); + tcg_out32(s, disp); + return; + } + + /* Try for an absolute address encoding. This requires the + use of the MODRM+SIB encoding and is therefore larger than + rip-relative addressing. */ + if (offset == (int32_t)offset) { + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (LOWREGMASK(r) << 3) | 4); + tcg_out8(s, (4 << 3) | 5); + tcg_out32(s, offset); + return; + } + + /* ??? The memory isn't directly addressable. */ + tcg_abort(); + } else { + /* Absolute address. */ + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (r << 3) | 5); + tcg_out32(s, offset); + return; + } + } + + /* Find the length of the immediate addend. Note that the encoding + that would be used for (%ebp) indicates absolute addressing. */ + if (rm < 0) { + mod = 0, len = 4, rm = 5; + } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) { + mod = 0, len = 0; + } else if (offset == (int8_t)offset) { + mod = 0x40, len = 1; + } else { + mod = 0x80, len = 4; + } + + /* Use a single byte MODRM format if possible. Note that the encoding + that would be used for %esp is the escape to the two byte form. */ + if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) { + /* Single byte MODRM format. */ + tcg_out_opc(s, opc, r, rm, 0); + tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); + } else { + /* Two byte MODRM+SIB format. */ + + /* Note that the encoding that would place %esp into the index + field indicates no index register. In 64-bit mode, the REX.X + bit counts, so %r12 can be used as the index. */ + if (index < 0) { + index = 4; + } else { + assert(index != TCG_REG_ESP); + } + + tcg_out_opc(s, opc, r, rm, index); + tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4); + tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm)); + } + + if (len == 1) { + tcg_out8(s, offset); + } else if (len == 4) { + tcg_out32(s, offset); + } +} + +/* A simplification of the above with no index or shift. */ +static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, + int rm, intptr_t offset) +{ + tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); +} + +/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) +{ + /* Propagate an opcode prefix, such as P_REXW. */ + int ext = subop & ~0x7; + subop &= 0x7; + + tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) +{ + if (arg != ret) { + int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm(s, opc, ret, arg); + } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + tcg_target_long diff; + + if (arg == 0) { + tgen_arithr(s, ARITH_XOR, ret, ret); + return; + } + if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { + tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); + tcg_out32(s, arg); + return; + } + if (arg == (int32_t)arg) { + tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); + tcg_out32(s, arg); + return; + } + + /* Try a 7 byte pc-relative lea before the 10 byte movq. */ + diff = arg - ((uintptr_t)s->code_ptr + 7); + if (diff == (int32_t)diff) { + tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0); + tcg_out8(s, (LOWREGMASK(ret) << 3) | 5); + tcg_out32(s, diff); + return; + } + + tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); + tcg_out64(s, arg); +} + +static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) +{ + if (val == (int8_t)val) { + tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0); + tcg_out8(s, val); + } else if (val == (int32_t)val) { + tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0); + tcg_out32(s, val); + } else { + tcg_abort(); + } +} + +static inline void tcg_out_push(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_pop(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, intptr_t arg2) +{ + int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, ret, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, arg, arg1, arg2); +} + +static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base, + tcg_target_long ofs, tcg_target_long val) +{ + int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, 0, base, ofs); + tcg_out32(s, val); +} + +static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) +{ + /* Propagate an opcode prefix, such as P_DATA16. */ + int ext = subopc & ~0x7; + subopc &= 0x7; + + if (count == 1) { + tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg); + } else { + tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg); + tcg_out8(s, count); + } +} + +static inline void tcg_out_bswap32(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_rolw_8(TCGContext *s, int reg) +{ + tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8); +} + +static inline void tcg_out_ext8u(TCGContext *s, int dest, int src) +{ + /* movzbl */ + assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); +} + +static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw) +{ + /* movsbl */ + assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); +} + +static inline void tcg_out_ext16u(TCGContext *s, int dest, int src) +{ + /* movzwl */ + tcg_out_modrm(s, OPC_MOVZWL, dest, src); +} + +static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw) +{ + /* movsw[lq] */ + tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src); +} + +static inline void tcg_out_ext32u(TCGContext *s, int dest, int src) +{ + /* 32-bit mov zero extends. */ + tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src); +} + +static inline void tcg_out_ext32s(TCGContext *s, int dest, int src) +{ + tcg_out_modrm(s, OPC_MOVSLQ, dest, src); +} + +static inline void tcg_out_bswap64(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0); +} + +static void tgen_arithi(TCGContext *s, int c, int r0, + tcg_target_long val, int cf) +{ + int rexw = 0; + + if (TCG_TARGET_REG_BITS == 64) { + rexw = c & -8; + c &= 7; + } + + /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce + partial flags update stalls on Pentium4 and are not recommended + by current Intel optimization manuals. */ + if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { + int is_inc = (c == ARITH_ADD) ^ (val < 0); + if (TCG_TARGET_REG_BITS == 64) { + /* The single-byte increment encodings are re-tasked as the + REX prefixes. Use the MODRM encoding. */ + tcg_out_modrm(s, OPC_GRP5 + rexw, + (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); + } else { + tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); + } + return; + } + + if (c == ARITH_AND) { + if (TCG_TARGET_REG_BITS == 64) { + if (val == 0xffffffffu) { + tcg_out_ext32u(s, r0, r0); + return; + } + if (val == (uint32_t)val) { + /* AND with no high bits set can use a 32-bit operation. */ + rexw = 0; + } + } + if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { + tcg_out_ext8u(s, r0, r0); + return; + } + if (val == 0xffffu) { + tcg_out_ext16u(s, r0, r0); + return; + } + } + + if (val == (int8_t)val) { + tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0); + tcg_out8(s, val); + return; + } + if (rexw == 0 || val == (int32_t)val) { + tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0); + tcg_out32(s, val); + return; + } + + tcg_abort(); +} + +static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) +{ + if (val != 0) { + tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0); + } +} + +/* Use SMALL != 0 to force a short forward branch. */ +static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small) +{ + int32_t val, val1; + + if (l->has_value) { + val = tcg_pcrel_diff(s, l->u.value_ptr); + val1 = val - 2; + if ((int8_t)val1 == val1) { + if (opc == -1) { + tcg_out8(s, OPC_JMP_short); + } else { + tcg_out8(s, OPC_JCC_short + opc); + } + tcg_out8(s, val1); + } else { + if (small) { + tcg_abort(); + } + if (opc == -1) { + tcg_out8(s, OPC_JMP_long); + tcg_out32(s, val - 5); + } else { + tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); + tcg_out32(s, val - 6); + } + } + } else if (small) { + if (opc == -1) { + tcg_out8(s, OPC_JMP_short); + } else { + tcg_out8(s, OPC_JCC_short + opc); + } + tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1); + s->code_ptr += 1; + } else { + if (opc == -1) { + tcg_out8(s, OPC_JMP_long); + } else { + tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); + } + tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4); + s->code_ptr += 4; + } +} + +static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, + int const_arg2, int rexw) +{ + if (const_arg2) { + if (arg2 == 0) { + /* test r, r */ + tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); + } else { + tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); + } + } else { + tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); + } +} + +static void tcg_out_brcond32(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + TCGLabel *label, int small) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, 0); + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_brcond64(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + TCGLabel *label, int small) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); +} +#else +/* XXX: we implement it at the target level to avoid having to + handle cross basic blocks temporaries */ +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, + const int *const_args, int small) +{ + TCGLabel *label_next = gen_new_label(); + TCGLabel *label_this = arg_label(args[5]); + + switch(args[4]) { + case TCG_COND_EQ: + tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], + label_next, 1); + tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], + label_this, small); + break; + case TCG_COND_NE: + tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], + label_this, small); + tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], + label_this, small); + break; + case TCG_COND_LT: + tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_LE: + tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_GT: + tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_GE: + tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_LTU: + tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_LEU: + tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_GTU: + tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], + label_this, small); + break; + case TCG_COND_GEU: + tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], + label_this, small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], + label_this, small); + break; + default: + tcg_abort(); + } + tcg_out_label(s, label_next, s->code_ptr); +} +#endif + +static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, 0); + tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); + tcg_out_ext8u(s, dest, dest); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); + tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); + tcg_out_ext8u(s, dest, dest); +} +#else +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + TCGArg new_args[6]; + TCGLabel *label_true, *label_over; + + memcpy(new_args, args+1, 5*sizeof(TCGArg)); + + if (args[0] == args[1] || args[0] == args[2] + || (!const_args[3] && args[0] == args[3]) + || (!const_args[4] && args[0] == args[4])) { + /* When the destination overlaps with one of the argument + registers, don't do anything tricky. */ + label_true = gen_new_label(); + label_over = gen_new_label(); + + new_args[5] = label_arg(label_true); + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + tcg_out_jxx(s, JCC_JMP, label_over, 1); + tcg_out_label(s, label_true, s->code_ptr); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); + tcg_out_label(s, label_over, s->code_ptr); + } else { + /* When the destination does not overlap one of the arguments, + clear the destination first, jump if cond false, and emit an + increment in the true case. This results in smaller code. */ + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + + label_over = gen_new_label(); + new_args[4] = tcg_invert_cond(new_args[4]); + new_args[5] = label_arg(label_over); + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tgen_arithi(s, ARITH_ADD, args[0], 1, 0); + tcg_out_label(s, label_over, s->code_ptr); + } +} +#endif + +static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg c1, TCGArg c2, int const_c2, + TCGArg v1) +{ + tcg_out_cmp(s, c1, c2, const_c2, 0); + if (have_cmov) { + tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); + } else { + TCGLabel *over = gen_new_label(); + tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); + tcg_out_mov(s, TCG_TYPE_I32, dest, v1); + tcg_out_label(s, over, s->code_ptr); + } +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg c1, TCGArg c2, int const_c2, + TCGArg v1) +{ + tcg_out_cmp(s, c1, c2, const_c2, P_REXW); + tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1); +} +#endif + +static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest) +{ + intptr_t disp = tcg_pcrel_diff(s, dest) - 5; + + if (disp == (int32_t)disp) { + tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); + tcg_out32(s, disp); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest); + tcg_out_modrm(s, OPC_GRP5, + call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10); + } +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +{ + tcg_out_branch(s, 1, dest); +} + +static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest) +{ + tcg_out_branch(s, 0, dest); +} + +#if defined(CONFIG_SOFTMMU) +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +}; + +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +/* Perform the TLB load and compare. + + Inputs: + ADDRLO and ADDRHI contain the low and high part of the address. + + MEM_INDEX and S_BITS are the memory context and log2 size of the load. + + WHICH is the offset into the CPUTLBEntry structure of the slot to read. + This should be offsetof addr_read or addr_write. + + Outputs: + LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) + positions of the displacements of forward jumps to the TLB miss case. + + Second argument register is loaded with the low part of the address. + In the TLB hit case, it has been adjusted as indicated by the TLB + and so is a host address. In the TLB miss case, it continues to + hold a guest address. + + First argument register is clobbered. */ + +static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, + int mem_index, TCGMemOp opc, + tcg_insn_unit **label_ptr, int which) +{ + const TCGReg r0 = TCG_REG_L0; + const TCGReg r1 = TCG_REG_L1; + TCGType ttype = TCG_TYPE_I32; + TCGType tlbtype = TCG_TYPE_I32; + int trexw = 0, hrexw = 0, tlbrexw = 0; + int s_mask = (1 << (opc & MO_SIZE)) - 1; + bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; + + if (TCG_TARGET_REG_BITS == 64) { + if (TARGET_LONG_BITS == 64) { + ttype = TCG_TYPE_I64; + trexw = P_REXW; + } + if (TCG_TYPE_PTR == TCG_TYPE_I64) { + hrexw = P_REXW; + if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) { + tlbtype = TCG_TYPE_I64; + tlbrexw = P_REXW; + } + } + } + + tcg_out_mov(s, tlbtype, r0, addrlo); + if (aligned) { + tcg_out_mov(s, ttype, r1, addrlo); + } else { + /* For unaligned access check that we don't cross pages using + the page address of the last byte. */ + tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); + } + + tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + tgen_arithi(s, ARITH_AND + trexw, r1, + TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); + tgen_arithi(s, ARITH_AND + tlbrexw, r0, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); + + tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0, + offsetof(CPUArchState, tlb_table[mem_index][0]) + + which); + + /* cmp 0(r0), r1 */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); + + /* Prepare for both the fast path add of the tlb addend, and the slow + path function argument setup. There are two cases worth note: + For 32-bit guest and x86_64 host, MOVL zero-extends the guest address + before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ + copies the entire guest address for the slow path, while truncation + for the 32-bit host happens with the fastpath ADDL below. */ + tcg_out_mov(s, ttype, r1, addrlo); + + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + label_ptr[0] = s->code_ptr; + s->code_ptr += 4; + + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + /* cmp 4(r0), addrhi */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4); + + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + label_ptr[1] = s->code_ptr; + s->code_ptr += 4; + } + + /* TLB Hit. */ + + /* add addend(r0), r1 */ + tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0, + offsetof(CPUTLBEntry, addend) - which); +} + +/* + * Record the context of a call to the out of line helper code for the slow path + * for a load or store, so that we can later generate the correct helper code + */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + tcg_insn_unit *raddr, + tcg_insn_unit **label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->raddr = raddr; + label->label_ptr[0] = label_ptr[0]; + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + label->label_ptr[1] = label_ptr[1]; + } +} + +/* + * Generate code for the slow path for a load at the end of block + */ +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGReg data_reg; + tcg_insn_unit **label_ptr = &l->label_ptr[0]; + + /* resolve label address */ + tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); + } + + if (TCG_TARGET_REG_BITS == 32) { + int ofs = 0; + + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); + ofs += 4; + + tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); + ofs += 4; + + if (TARGET_LONG_BITS == 64) { + tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); + ofs += 4; + } + + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); + ofs += 4; + + tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, ofs, (uintptr_t)l->raddr); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + /* The second argument is already loaded with addrlo. */ + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi); + tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], + (uintptr_t)l->raddr); + } + + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + + data_reg = l->datalo_reg; + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); + break; + case MO_SW: + tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); + break; +#if TCG_TARGET_REG_BITS == 64 + case MO_SL: + tcg_out_ext32s(s, data_reg, TCG_REG_EAX); + break; +#endif + case MO_UB: + case MO_UW: + /* Note that the helpers have zero-extended to tcg_target_long. */ + case MO_UL: + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + break; + case MO_Q: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); + } else if (data_reg == TCG_REG_EDX) { + /* xchg %edx, %eax */ + tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX); + } else { + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX); + } + break; + default: + tcg_abort(); + } + + /* Jump to the code corresponding to next IR of qemu_st */ + tcg_out_jmp(s, l->raddr); +} + +/* + * Generate code for the slow path for a store at the end of block + */ +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp s_bits = opc & MO_SIZE; + tcg_insn_unit **label_ptr = &l->label_ptr[0]; + TCGReg retaddr; + + /* resolve label address */ + tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); + } + + if (TCG_TARGET_REG_BITS == 32) { + int ofs = 0; + + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); + ofs += 4; + + tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); + ofs += 4; + + if (TARGET_LONG_BITS == 64) { + tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); + ofs += 4; + } + + tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs); + ofs += 4; + + if (s_bits == MO_64) { + tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs); + ofs += 4; + } + + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); + ofs += 4; + + retaddr = TCG_REG_EAX; + tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); + tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + /* The second argument is already loaded with addrlo. */ + tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), + tcg_target_call_iarg_regs[2], l->datalo_reg); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi); + + if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) { + retaddr = tcg_target_call_iarg_regs[4]; + tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); + } else { + retaddr = TCG_REG_RAX; + tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); + tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, + TCG_TARGET_CALL_STACK_OFFSET); + } + } + + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_push(s, retaddr); + tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); +} +#elif defined(__x86_64__) && defined(__linux__) +# include +# include + +int arch_prctl(int code, unsigned long addr); + +static int guest_base_flags; +static inline void setup_guest_base_seg(void) +{ + if (arch_prctl(ARCH_SET_GS, guest_base) == 0) { + guest_base_flags = P_GS; + } +} +#else +# define guest_base_flags 0 +static inline void setup_guest_base_seg(void) { } +#endif /* SOFTMMU */ + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, int index, intptr_t ofs, + int seg, TCGMemOp memop) +{ + const TCGMemOp real_bswap = memop & MO_BSWAP; + TCGMemOp bswap = real_bswap; + int movop = OPC_MOVL_GvEv; + + if (have_movbe && real_bswap) { + bswap = 0; + movop = OPC_MOVBE_GyMy; + } + + switch (memop & MO_SSIZE) { + case MO_UB: + tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo, + base, index, 0, ofs); + break; + case MO_SB: + tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, + base, index, 0, ofs); + break; + case MO_UW: + tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, + base, index, 0, ofs); + if (real_bswap) { + tcg_out_rolw_8(s, datalo); + } + break; + case MO_SW: + if (real_bswap) { + if (have_movbe) { + tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, + datalo, base, index, 0, ofs); + } else { + tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, + base, index, 0, ofs); + tcg_out_rolw_8(s, datalo); + } + tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); + } else { + tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg, + datalo, base, index, 0, ofs); + } + break; + case MO_UL: + tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); + if (bswap) { + tcg_out_bswap32(s, datalo); + } + break; +#if TCG_TARGET_REG_BITS == 64 + case MO_SL: + if (real_bswap) { + tcg_out_modrm_sib_offset(s, movop + seg, datalo, + base, index, 0, ofs); + if (bswap) { + tcg_out_bswap32(s, datalo); + } + tcg_out_ext32s(s, datalo, datalo); + } else { + tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo, + base, index, 0, ofs); + } + break; +#endif + case MO_Q: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, + base, index, 0, ofs); + if (bswap) { + tcg_out_bswap64(s, datalo); + } + } else { + if (real_bswap) { + int t = datalo; + datalo = datahi; + datahi = t; + } + if (base != datalo) { + tcg_out_modrm_sib_offset(s, movop + seg, datalo, + base, index, 0, ofs); + tcg_out_modrm_sib_offset(s, movop + seg, datahi, + base, index, 0, ofs + 4); + } else { + tcg_out_modrm_sib_offset(s, movop + seg, datahi, + base, index, 0, ofs + 4); + tcg_out_modrm_sib_offset(s, movop + seg, datalo, + base, index, 0, ofs); + } + if (bswap) { + tcg_out_bswap32(s, datalo); + tcg_out_bswap32(s, datahi); + } + } + break; + default: + tcg_abort(); + } +} + +/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and + EAX. It will be useful once fixed registers globals are less + common. */ +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +{ + TCGReg datalo, datahi, addrlo; + TCGReg addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + int mem_index; + tcg_insn_unit *label_ptr[2]; +#endif + + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + mem_index = get_mmuidx(oi); + + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, + label_ptr, offsetof(CPUTLBEntry, addr_read)); + + /* TLB Hit. */ + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc); + + /* Record the current context of a load into ldst label */ + add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#else + { + int32_t offset = guest_base; + TCGReg base = addrlo; + int index = -1; + int seg = 0; + + /* For a 32-bit guest, the high 32 bits may contain garbage. + We can do this with the ADDR32 prefix if we're not using + a guest base, or when using segmentation. Otherwise we + need to zero-extend manually. */ + if (guest_base == 0 || guest_base_flags) { + seg = guest_base_flags; + offset = 0; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + seg |= P_ADDR32; + } + } else if (TCG_TARGET_REG_BITS == 64) { + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_L0, base); + base = TCG_REG_L0; + } + if (offset != guest_base) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); + index = TCG_REG_L1; + offset = 0; + } + } + + tcg_out_qemu_ld_direct(s, datalo, datahi, + base, index, offset, seg, opc); + } +#endif +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, intptr_t ofs, int seg, + TCGMemOp memop) +{ + /* ??? Ideally we wouldn't need a scratch register. For user-only, + we could perform the bswap twice to restore the original value + instead of moving to the scratch. But as it is, the L constraint + means that TCG_REG_L0 is definitely free here. */ + const TCGReg scratch = TCG_REG_L0; + const TCGMemOp real_bswap = memop & MO_BSWAP; + TCGMemOp bswap = real_bswap; + int movop = OPC_MOVL_EvGv; + + if (have_movbe && real_bswap) { + bswap = 0; + movop = OPC_MOVBE_MyGy; + } + + switch (memop & MO_SIZE) { + case MO_8: + /* In 32-bit mode, 8-bit stores can only happen from [abcd]x. + Use the scratch register if necessary. */ + if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + datalo = scratch; + } + tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, + datalo, base, ofs); + break; + case MO_16: + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_rolw_8(s, scratch); + datalo = scratch; + } + tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs); + break; + case MO_32: + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_bswap32(s, scratch); + datalo = scratch; + } + tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); + break; + case MO_64: + if (TCG_TARGET_REG_BITS == 64) { + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo); + tcg_out_bswap64(s, scratch); + datalo = scratch; + } + tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs); + } else if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); + tcg_out_bswap32(s, scratch); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs); + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_bswap32(s, scratch); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4); + } else { + if (real_bswap) { + int t = datalo; + datalo = datahi; + datahi = t; + } + tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); + tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4); + } + break; + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +{ + TCGReg datalo, datahi, addrlo; + TCGReg addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + int mem_index; + tcg_insn_unit *label_ptr[2]; +#endif + + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + mem_index = get_mmuidx(oi); + + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, + label_ptr, offsetof(CPUTLBEntry, addr_write)); + + /* TLB Hit. */ + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); + + /* Record the current context of a store into ldst label */ + add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#else + { + int32_t offset = guest_base; + TCGReg base = addrlo; + int seg = 0; + + /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */ + if (guest_base == 0 || guest_base_flags) { + seg = guest_base_flags; + offset = 0; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + seg |= P_ADDR32; + } + } else if (TCG_TARGET_REG_BITS == 64) { + /* ??? Note that we can't use the same SIB addressing scheme + as for loads, since we require L0 free for bswap. */ + if (offset != guest_base) { + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_L0, base); + base = TCG_REG_L0; + } + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); + base = TCG_REG_L1; + offset = 0; + } else if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_L1, base); + base = TCG_REG_L1; + } + } + + tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc); + } +#endif +} + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + int c, vexop, rexw = 0; + +#if TCG_TARGET_REG_BITS == 64 +# define OP_32_64(x) \ + case glue(glue(INDEX_op_, x), _i64): \ + rexw = P_REXW; /* FALLTHRU */ \ + case glue(glue(INDEX_op_, x), _i32) +#else +# define OP_32_64(x) \ + case glue(glue(INDEX_op_, x), _i32) +#endif + + switch(opc) { + case INDEX_op_exit_tb: + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); + tcg_out_jmp(s, tb_ret_addr); + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* direct jump method */ + tcg_out8(s, OPC_JMP_long); /* jmp im */ + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + tcg_out32(s, 0); + } else { + /* indirect jump method */ + tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, + (intptr_t)(s->tb_next + args[0])); + } + s->tb_next_offset[args[0]] = tcg_current_code_size(s); + break; + case INDEX_op_br: + tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0); + break; + OP_32_64(ld8u): + /* Note that we can ignore REXW for the zero-extend to 64-bit. */ + tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]); + break; + OP_32_64(ld8s): + tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]); + break; + OP_32_64(ld16u): + /* Note that we can ignore REXW for the zero-extend to 64-bit. */ + tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]); + break; + OP_32_64(ld16s): + tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]); + break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_ld32u_i64: +#endif + case INDEX_op_ld_i32: + tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + + OP_32_64(st8): + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVB_EvIz, + 0, args[1], args[2]); + tcg_out8(s, args[0]); + } else { + tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, + args[0], args[1], args[2]); + } + break; + OP_32_64(st16): + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, + 0, args[1], args[2]); + tcg_out16(s, args[0]); + } else { + tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, + args[0], args[1], args[2]); + } + break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_st32_i64: +#endif + case INDEX_op_st_i32: + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]); + tcg_out32(s, args[0]); + } else { + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + } + break; + + OP_32_64(add): + /* For 3-operand addition, use LEA. */ + if (args[0] != args[1]) { + TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0; + + if (const_args[2]) { + c3 = a2, a2 = -1; + } else if (a0 == a2) { + /* Watch out for dest = src + dest, since we've removed + the matching constraint on the add. */ + tgen_arithr(s, ARITH_ADD + rexw, a0, a1); + break; + } + + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); + break; + } + c = ARITH_ADD; + goto gen_arith; + OP_32_64(sub): + c = ARITH_SUB; + goto gen_arith; + OP_32_64(and): + c = ARITH_AND; + goto gen_arith; + OP_32_64(or): + c = ARITH_OR; + goto gen_arith; + OP_32_64(xor): + c = ARITH_XOR; + goto gen_arith; + gen_arith: + if (const_args[2]) { + tgen_arithi(s, c + rexw, args[0], args[2], 0); + } else { + tgen_arithr(s, c + rexw, args[0], args[2]); + } + break; + + OP_32_64(andc): + if (const_args[2]) { + tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, + args[0], args[1]); + tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0); + } else { + tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]); + } + break; + + OP_32_64(mul): + if (const_args[2]) { + int32_t val; + val = args[2]; + if (val == (int8_t)val) { + tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]); + tcg_out8(s, val); + } else { + tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]); + tcg_out32(s, val); + } + } else { + tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]); + } + break; + + OP_32_64(div2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); + break; + OP_32_64(divu2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); + break; + + OP_32_64(shl): + c = SHIFT_SHL; + vexop = OPC_SHLX; + goto gen_shift_maybe_vex; + OP_32_64(shr): + c = SHIFT_SHR; + vexop = OPC_SHRX; + goto gen_shift_maybe_vex; + OP_32_64(sar): + c = SHIFT_SAR; + vexop = OPC_SARX; + goto gen_shift_maybe_vex; + OP_32_64(rotl): + c = SHIFT_ROL; + goto gen_shift; + OP_32_64(rotr): + c = SHIFT_ROR; + goto gen_shift; + gen_shift_maybe_vex: + if (have_bmi2 && !const_args[2]) { + tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]); + break; + } + /* FALLTHRU */ + gen_shift: + if (const_args[2]) { + tcg_out_shifti(s, c + rexw, args[0], args[2]); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]); + } + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1], + arg_label(args[3]), 0); + break; + case INDEX_op_setcond_i32: + tcg_out_setcond32(s, args[3], args[0], args[1], + args[2], const_args[2]); + break; + case INDEX_op_movcond_i32: + tcg_out_movcond32(s, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; + + OP_32_64(bswap16): + tcg_out_rolw_8(s, args[0]); + break; + OP_32_64(bswap32): + tcg_out_bswap32(s, args[0]); + break; + + OP_32_64(neg): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]); + break; + OP_32_64(not): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]); + break; + + OP_32_64(ext8s): + tcg_out_ext8s(s, args[0], args[1], rexw); + break; + OP_32_64(ext16s): + tcg_out_ext16s(s, args[0], args[1], rexw); + break; + OP_32_64(ext8u): + tcg_out_ext8u(s, args[0], args[1]); + break; + OP_32_64(ext16u): + tcg_out_ext16u(s, args[0], args[1]); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, 0); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, 1); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, 0); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, 1); + break; + + OP_32_64(mulu2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); + break; + OP_32_64(muls2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); + break; + OP_32_64(add2): + if (const_args[4]) { + tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1); + } else { + tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]); + } + if (const_args[5]) { + tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1); + } else { + tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]); + } + break; + OP_32_64(sub2): + if (const_args[4]) { + tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1); + } else { + tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]); + } + if (const_args[5]) { + tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1); + } else { + tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]); + } + break; + +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args, const_args, 0); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args, const_args); + break; +#else /* TCG_TARGET_REG_BITS == 64 */ + case INDEX_op_ld32s_i64: + tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i64: + tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + if (const_args[0]) { + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, + 0, args[1], args[2]); + tcg_out32(s, args[0]); + } else { + tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + } + break; + + case INDEX_op_brcond_i64: + tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1], + arg_label(args[3]), 0); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond64(s, args[3], args[0], args[1], + args[2], const_args[2]); + break; + case INDEX_op_movcond_i64: + tcg_out_movcond64(s, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; + + case INDEX_op_bswap64_i64: + tcg_out_bswap64(s, args[0]); + break; + case INDEX_op_extu_i32_i64: + case INDEX_op_ext32u_i64: + tcg_out_ext32u(s, args[0], args[1]); + break; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + tcg_out_ext32s(s, args[0], args[1]); + break; +#endif + + OP_32_64(deposit): + if (args[3] == 0 && args[4] == 8) { + /* load bits 0..7 */ + tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, + args[2], args[0]); + } else if (args[3] == 8 && args[4] == 8) { + /* load bits 8..15 */ + tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4); + } else if (args[3] == 0 && args[4] == 16) { + /* load bits 0..15 */ + tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]); + } else { + tcg_abort(); + } + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } + +#undef OP_32_64 +} + +static const TCGTargetOpDef x86_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "qi", "r" } }, + { INDEX_op_st16_i32, { "ri", "r" } }, + { INDEX_op_st_i32, { "ri", "r" } }, + + { INDEX_op_add_i32, { "r", "r", "ri" } }, + { INDEX_op_sub_i32, { "r", "0", "ri" } }, + { INDEX_op_mul_i32, { "r", "0", "ri" } }, + { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } }, + { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } }, + { INDEX_op_and_i32, { "r", "0", "ri" } }, + { INDEX_op_or_i32, { "r", "0", "ri" } }, + { INDEX_op_xor_i32, { "r", "0", "ri" } }, + { INDEX_op_andc_i32, { "r", "r", "ri" } }, + + { INDEX_op_shl_i32, { "r", "0", "Ci" } }, + { INDEX_op_shr_i32, { "r", "0", "Ci" } }, + { INDEX_op_sar_i32, { "r", "0", "Ci" } }, + { INDEX_op_rotl_i32, { "r", "0", "ci" } }, + { INDEX_op_rotr_i32, { "r", "0", "ci" } }, + + { INDEX_op_brcond_i32, { "r", "ri" } }, + + { INDEX_op_bswap16_i32, { "r", "0" } }, + { INDEX_op_bswap32_i32, { "r", "0" } }, + + { INDEX_op_neg_i32, { "r", "0" } }, + + { INDEX_op_not_i32, { "r", "0" } }, + + { INDEX_op_ext8s_i32, { "r", "q" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext8u_i32, { "r", "q" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_setcond_i32, { "q", "r", "ri" } }, + + { INDEX_op_deposit_i32, { "Q", "0", "Q" } }, + { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } }, + + { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, + { INDEX_op_muls2_i32, { "a", "d", "a", "r" } }, + { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + +#if TCG_TARGET_REG_BITS == 32 + { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, +#else + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + { INDEX_op_st8_i64, { "ri", "r" } }, + { INDEX_op_st16_i64, { "ri", "r" } }, + { INDEX_op_st32_i64, { "ri", "r" } }, + { INDEX_op_st_i64, { "re", "r" } }, + + { INDEX_op_add_i64, { "r", "r", "re" } }, + { INDEX_op_mul_i64, { "r", "0", "re" } }, + { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } }, + { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } }, + { INDEX_op_sub_i64, { "r", "0", "re" } }, + { INDEX_op_and_i64, { "r", "0", "reZ" } }, + { INDEX_op_or_i64, { "r", "0", "re" } }, + { INDEX_op_xor_i64, { "r", "0", "re" } }, + { INDEX_op_andc_i64, { "r", "r", "rI" } }, + + { INDEX_op_shl_i64, { "r", "0", "Ci" } }, + { INDEX_op_shr_i64, { "r", "0", "Ci" } }, + { INDEX_op_sar_i64, { "r", "0", "Ci" } }, + { INDEX_op_rotl_i64, { "r", "0", "ci" } }, + { INDEX_op_rotr_i64, { "r", "0", "ci" } }, + + { INDEX_op_brcond_i64, { "r", "re" } }, + { INDEX_op_setcond_i64, { "r", "r", "re" } }, + + { INDEX_op_bswap16_i64, { "r", "0" } }, + { INDEX_op_bswap32_i64, { "r", "0" } }, + { INDEX_op_bswap64_i64, { "r", "0" } }, + { INDEX_op_neg_i64, { "r", "0" } }, + { INDEX_op_not_i64, { "r", "0" } }, + + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext8u_i64, { "r", "r" } }, + { INDEX_op_ext16u_i64, { "r", "r" } }, + { INDEX_op_ext32u_i64, { "r", "r" } }, + + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + + { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, + { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, + + { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } }, + { INDEX_op_muls2_i64, { "a", "d", "a", "r" } }, + { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } }, + { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L" } }, +#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, +#else + { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, +#endif + { -1 }, +}; + +static int tcg_target_callee_save_regs[] = { +#if TCG_TARGET_REG_BITS == 64 + TCG_REG_RBP, + TCG_REG_RBX, +#if defined(_WIN64) + TCG_REG_RDI, + TCG_REG_RSI, +#endif + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, /* Currently used for the global env. */ + TCG_REG_R15, +#else + TCG_REG_EBP, /* Currently used for the global env. */ + TCG_REG_EBX, + TCG_REG_ESI, + TCG_REG_EDI, +#endif +}; + +/* Compute frame size via macros, to share between tcg_target_qemu_prologue + and tcg_register_jit. */ + +#define PUSH_SIZE \ + ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ + * (TCG_TARGET_REG_BITS / 8)) + +#define FRAME_SIZE \ + ((PUSH_SIZE \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & ~(TCG_TARGET_STACK_ALIGN - 1)) + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i, stack_addend; + + /* TB prologue */ + + /* Reserve some stack space, also for TCG temps. */ + stack_addend = FRAME_SIZE - PUSH_SIZE; + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + + /* Save all callee saved registers. */ + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_push(s, tcg_target_callee_save_regs[i]); + } + +#if TCG_TARGET_REG_BITS == 32 + tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, + (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + /* jmp *tb. */ + tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, + (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 + + stack_addend); +#else + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + /* jmp *tb. */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); +#endif + + /* TB epilogue */ + tb_ret_addr = s->code_ptr; + + tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); + + for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { + tcg_out_pop(s, tcg_target_callee_save_regs[i]); + } + tcg_out_opc(s, OPC_RET, 0, 0, 0); + +#if !defined(CONFIG_SOFTMMU) + /* Try to set up a segment register to point to guest_base. */ + if (guest_base) { + setup_guest_base_seg(); + } +#endif +} + +static void tcg_target_init(TCGContext *s) +{ +#ifdef CONFIG_CPUID_H + unsigned a, b, c, d; + int max = __get_cpuid_max(0, 0); + + if (max >= 1) { + __cpuid(1, a, b, c, d); +#ifndef have_cmov + /* For 32-bit, 99% certainty that we're running on hardware that + supports cmov, but we still need to check. In case cmov is not + available, we'll use a small forward branch. */ + have_cmov = (d & bit_CMOV) != 0; +#endif +#ifndef have_movbe + /* MOVBE is only available on Intel Atom and Haswell CPUs, so we + need to probe for it. */ + have_movbe = (c & bit_MOVBE) != 0; +#endif + } + + if (max >= 7) { + /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */ + __cpuid_count(7, 0, a, b, c, d); +#ifdef bit_BMI + have_bmi1 = (b & bit_BMI) != 0; +#endif +#ifndef have_bmi2 + have_bmi2 = (b & bit_BMI2) != 0; +#endif + } +#endif + + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); + } else { + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff); + } + + tcg_regset_clear(tcg_target_call_clobber_regs); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX); + if (TCG_TARGET_REG_BITS == 64) { +#if !defined(_WIN64) + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI); +#endif + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); + } + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + + tcg_add_target_add_op_defs(x86_op_defs); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[14]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#if !defined(__ELF__) + /* Host machine without ELF. */ +#elif TCG_TARGET_REG_BITS == 64 +#define ELF_HOST_MACHINE EM_X86_64 +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x78, /* sleb128 -8 */ + .h.cie.return_column = 16, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, 7, /* DW_CFA_def_cfa %rsp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x90, 1, /* DW_CFA_offset, %rip, -8 */ + /* The following ordering must match tcg_target_callee_save_regs. */ + 0x86, 2, /* DW_CFA_offset, %rbp, -16 */ + 0x83, 3, /* DW_CFA_offset, %rbx, -24 */ + 0x8c, 4, /* DW_CFA_offset, %r12, -32 */ + 0x8d, 5, /* DW_CFA_offset, %r13, -40 */ + 0x8e, 6, /* DW_CFA_offset, %r14, -48 */ + 0x8f, 7, /* DW_CFA_offset, %r15, -56 */ + } +}; +#else +#define ELF_HOST_MACHINE EM_386 +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 0x7c, /* sleb128 -4 */ + .h.cie.return_column = 8, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, 4, /* DW_CFA_def_cfa %esp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x88, 1, /* DW_CFA_offset, %eip, -4 */ + /* The following ordering must match tcg_target_callee_save_regs. */ + 0x85, 2, /* DW_CFA_offset, %ebp, -8 */ + 0x83, 3, /* DW_CFA_offset, %ebx, -12 */ + 0x86, 4, /* DW_CFA_offset, %esi, -16 */ + 0x87, 5, /* DW_CFA_offset, %edi, -20 */ + } +}; +#endif + +#if defined(ELF_HOST_MACHINE) +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} +#endif diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c deleted file mode 100644 index 62d6549..0000000 --- a/tcg/ia64/tcg-target.c +++ /dev/null @@ -1,2453 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2009-2010 Aurelien Jarno - * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -/* - * Register definitions - */ - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", - "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", - "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", - "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", - "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", - "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", - "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", -}; -#endif - -#ifndef CONFIG_SOFTMMU -#define TCG_GUEST_BASE_REG TCG_REG_R55 -#endif - -/* Branch registers */ -enum { - TCG_REG_B0 = 0, - TCG_REG_B1, - TCG_REG_B2, - TCG_REG_B3, - TCG_REG_B4, - TCG_REG_B5, - TCG_REG_B6, - TCG_REG_B7, -}; - -/* Floating point registers */ -enum { - TCG_REG_F0 = 0, - TCG_REG_F1, - TCG_REG_F2, - TCG_REG_F3, - TCG_REG_F4, - TCG_REG_F5, - TCG_REG_F6, - TCG_REG_F7, - TCG_REG_F8, - TCG_REG_F9, - TCG_REG_F10, - TCG_REG_F11, - TCG_REG_F12, - TCG_REG_F13, - TCG_REG_F14, - TCG_REG_F15, -}; - -/* Predicate registers */ -enum { - TCG_REG_P0 = 0, - TCG_REG_P1, - TCG_REG_P2, - TCG_REG_P3, - TCG_REG_P4, - TCG_REG_P5, - TCG_REG_P6, - TCG_REG_P7, - TCG_REG_P8, - TCG_REG_P9, - TCG_REG_P10, - TCG_REG_P11, - TCG_REG_P12, - TCG_REG_P13, - TCG_REG_P14, - TCG_REG_P15, -}; - -/* Application registers */ -enum { - TCG_REG_PFS = 64, -}; - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R35, - TCG_REG_R36, - TCG_REG_R37, - TCG_REG_R38, - TCG_REG_R39, - TCG_REG_R40, - TCG_REG_R41, - TCG_REG_R42, - TCG_REG_R43, - TCG_REG_R44, - TCG_REG_R45, - TCG_REG_R46, - TCG_REG_R47, - TCG_REG_R48, - TCG_REG_R49, - TCG_REG_R50, - TCG_REG_R51, - TCG_REG_R52, - TCG_REG_R53, - TCG_REG_R54, - TCG_REG_R55, - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, - TCG_REG_R18, - TCG_REG_R19, - TCG_REG_R20, - TCG_REG_R21, - TCG_REG_R22, - TCG_REG_R23, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27, - TCG_REG_R28, - TCG_REG_R29, - TCG_REG_R30, - TCG_REG_R31, - TCG_REG_R56, - TCG_REG_R57, - TCG_REG_R58, - TCG_REG_R59, - TCG_REG_R60, - TCG_REG_R61, - TCG_REG_R62, - TCG_REG_R63, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11 -}; - -static const int tcg_target_call_iarg_regs[8] = { - TCG_REG_R56, - TCG_REG_R57, - TCG_REG_R58, - TCG_REG_R59, - TCG_REG_R60, - TCG_REG_R61, - TCG_REG_R62, - TCG_REG_R63, -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R8 -}; - -/* - * opcode formation - */ - -/* bundle templates: stops (double bar in the IA64 manual) are marked with - an uppercase letter. */ -enum { - mii = 0x00, - miI = 0x01, - mIi = 0x02, - mII = 0x03, - mlx = 0x04, - mLX = 0x05, - mmi = 0x08, - mmI = 0x09, - Mmi = 0x0a, - MmI = 0x0b, - mfi = 0x0c, - mfI = 0x0d, - mmf = 0x0e, - mmF = 0x0f, - mib = 0x10, - miB = 0x11, - mbb = 0x12, - mbB = 0x13, - bbb = 0x16, - bbB = 0x17, - mmb = 0x18, - mmB = 0x19, - mfb = 0x1c, - mfB = 0x1d, -}; - -enum { - OPC_ADD_A1 = 0x10000000000ull, - OPC_AND_A1 = 0x10060000000ull, - OPC_AND_A3 = 0x10160000000ull, - OPC_ANDCM_A1 = 0x10068000000ull, - OPC_ANDCM_A3 = 0x10168000000ull, - OPC_ADDS_A4 = 0x10800000000ull, - OPC_ADDL_A5 = 0x12000000000ull, - OPC_ALLOC_M34 = 0x02c00000000ull, - OPC_BR_DPTK_FEW_B1 = 0x08400000000ull, - OPC_BR_SPTK_MANY_B1 = 0x08000001000ull, - OPC_BR_CALL_SPNT_FEW_B3 = 0x0a200000000ull, - OPC_BR_SPTK_MANY_B4 = 0x00100001000ull, - OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull, - OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull, - OPC_BRL_SPTK_MANY_X3 = 0x18000001000ull, - OPC_BRL_CALL_SPNT_MANY_X4 = 0x1a200001000ull, - OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull, - OPC_CMP_LT_A6 = 0x18000000000ull, - OPC_CMP_LTU_A6 = 0x1a000000000ull, - OPC_CMP_EQ_A6 = 0x1c000000000ull, - OPC_CMP4_LT_A6 = 0x18400000000ull, - OPC_CMP4_LTU_A6 = 0x1a400000000ull, - OPC_CMP4_EQ_A6 = 0x1c400000000ull, - OPC_DEP_I14 = 0x0ae00000000ull, - OPC_DEP_I15 = 0x08000000000ull, - OPC_DEP_Z_I12 = 0x0a600000000ull, - OPC_EXTR_I11 = 0x0a400002000ull, - OPC_EXTR_U_I11 = 0x0a400000000ull, - OPC_FCVT_FX_TRUNC_S1_F10 = 0x004d0000000ull, - OPC_FCVT_FXU_TRUNC_S1_F10 = 0x004d8000000ull, - OPC_FCVT_XF_F11 = 0x000e0000000ull, - OPC_FMA_S1_F1 = 0x10400000000ull, - OPC_FNMA_S1_F1 = 0x18400000000ull, - OPC_FRCPA_S1_F6 = 0x00600000000ull, - OPC_GETF_SIG_M19 = 0x08708000000ull, - OPC_LD1_M1 = 0x08000000000ull, - OPC_LD1_M3 = 0x0a000000000ull, - OPC_LD2_M1 = 0x08040000000ull, - OPC_LD2_M3 = 0x0a040000000ull, - OPC_LD4_M1 = 0x08080000000ull, - OPC_LD4_M3 = 0x0a080000000ull, - OPC_LD8_M1 = 0x080c0000000ull, - OPC_LD8_M3 = 0x0a0c0000000ull, - OPC_MUX1_I3 = 0x0eca0000000ull, - OPC_NOP_B9 = 0x04008000000ull, - OPC_NOP_F16 = 0x00008000000ull, - OPC_NOP_I18 = 0x00008000000ull, - OPC_NOP_M48 = 0x00008000000ull, - OPC_MOV_I21 = 0x00e00100000ull, - OPC_MOV_RET_I21 = 0x00e00500000ull, - OPC_MOV_I22 = 0x00188000000ull, - OPC_MOV_I_I26 = 0x00150000000ull, - OPC_MOVL_X2 = 0x0c000000000ull, - OPC_OR_A1 = 0x10070000000ull, - OPC_OR_A3 = 0x10170000000ull, - OPC_SETF_EXP_M18 = 0x0c748000000ull, - OPC_SETF_SIG_M18 = 0x0c708000000ull, - OPC_SHL_I7 = 0x0f240000000ull, - OPC_SHR_I5 = 0x0f220000000ull, - OPC_SHR_U_I5 = 0x0f200000000ull, - OPC_SHRP_I10 = 0x0ac00000000ull, - OPC_SXT1_I29 = 0x000a0000000ull, - OPC_SXT2_I29 = 0x000a8000000ull, - OPC_SXT4_I29 = 0x000b0000000ull, - OPC_ST1_M4 = 0x08c00000000ull, - OPC_ST2_M4 = 0x08c40000000ull, - OPC_ST4_M4 = 0x08c80000000ull, - OPC_ST8_M4 = 0x08cc0000000ull, - OPC_SUB_A1 = 0x10028000000ull, - OPC_SUB_A3 = 0x10128000000ull, - OPC_UNPACK4_L_I2 = 0x0f860000000ull, - OPC_XMA_L_F2 = 0x1d000000000ull, - OPC_XOR_A1 = 0x10078000000ull, - OPC_XOR_A3 = 0x10178000000ull, - OPC_ZXT1_I29 = 0x00080000000ull, - OPC_ZXT2_I29 = 0x00088000000ull, - OPC_ZXT4_I29 = 0x00090000000ull, - - INSN_NOP_M = OPC_NOP_M48, /* nop.m 0 */ - INSN_NOP_I = OPC_NOP_I18, /* nop.i 0 */ -}; - -static inline uint64_t tcg_opc_a1(int qp, uint64_t opc, int r1, - int r2, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_a3(int qp, uint64_t opc, int r1, - uint64_t imm, int r3) -{ - return opc - | ((imm & 0x80) << 29) /* s */ - | ((imm & 0x7f) << 13) /* imm7b */ - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_a4(int qp, uint64_t opc, int r1, - uint64_t imm, int r3) -{ - return opc - | ((imm & 0x2000) << 23) /* s */ - | ((imm & 0x1f80) << 20) /* imm6d */ - | ((imm & 0x007f) << 13) /* imm7b */ - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_a5(int qp, uint64_t opc, int r1, - uint64_t imm, int r3) -{ - return opc - | ((imm & 0x200000) << 15) /* s */ - | ((imm & 0x1f0000) << 6) /* imm5c */ - | ((imm & 0x00ff80) << 20) /* imm9d */ - | ((imm & 0x00007f) << 13) /* imm7b */ - | ((r3 & 0x03) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_a6(int qp, uint64_t opc, int p1, - int p2, int r2, int r3) -{ - return opc - | ((p2 & 0x3f) << 27) - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((p1 & 0x3f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* s */ - | ((imm & 0x0fffff) << 13) /* imm20b */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* s */ - | ((imm & 0x0fffff) << 13) /* imm20b */ - | ((b1 & 0x7) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2) -{ - return opc - | ((b2 & 0x7) << 13) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_b5(int qp, uint64_t opc, int b1, int b2) -{ - return opc - | ((b2 & 0x7) << 13) - | ((b1 & 0x7) << 6) - | (qp & 0x3f); -} - - -static inline uint64_t tcg_opc_b9(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f1(int qp, uint64_t opc, int f1, - int f3, int f4, int f2) -{ - return opc - | ((f4 & 0x7f) << 27) - | ((f3 & 0x7f) << 20) - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f2(int qp, uint64_t opc, int f1, - int f3, int f4, int f2) -{ - return opc - | ((f4 & 0x7f) << 27) - | ((f3 & 0x7f) << 20) - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f6(int qp, uint64_t opc, int f1, - int p2, int f2, int f3) -{ - return opc - | ((p2 & 0x3f) << 27) - | ((f3 & 0x7f) << 20) - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f10(int qp, uint64_t opc, int f1, int f2) -{ - return opc - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f11(int qp, uint64_t opc, int f1, int f2) -{ - return opc - | ((f2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_f16(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i2(int qp, uint64_t opc, int r1, - int r2, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i3(int qp, uint64_t opc, int r1, - int r2, int mbtype) -{ - return opc - | ((mbtype & 0x0f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i5(int qp, uint64_t opc, int r1, - int r3, int r2) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i7(int qp, uint64_t opc, int r1, - int r2, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i10(int qp, uint64_t opc, int r1, - int r2, int r3, uint64_t count) -{ - return opc - | ((count & 0x3f) << 27) - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i11(int qp, uint64_t opc, int r1, - int r3, uint64_t pos, uint64_t len) -{ - return opc - | ((len & 0x3f) << 27) - | ((r3 & 0x7f) << 20) - | ((pos & 0x3f) << 14) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1, - int r2, uint64_t pos, uint64_t len) -{ - return opc - | ((len & 0x3f) << 27) - | ((pos & 0x3f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm, - int r3, uint64_t pos, uint64_t len) -{ - return opc - | ((imm & 0x01) << 36) - | ((len & 0x3f) << 27) - | ((r3 & 0x7f) << 20) - | ((pos & 0x3f) << 14) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1, int r2, - int r3, uint64_t pos, uint64_t len) -{ - return opc - | ((pos & 0x3f) << 31) - | ((len & 0x0f) << 27) - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i21(int qp, uint64_t opc, int b1, - int r2, uint64_t imm) -{ - return opc - | ((imm & 0x1ff) << 24) - | ((r2 & 0x7f) << 13) - | ((b1 & 0x7) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i22(int qp, uint64_t opc, int r1, int b2) -{ - return opc - | ((b2 & 0x7) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i26(int qp, uint64_t opc, int ar3, int r2) -{ - return opc - | ((ar3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_i29(int qp, uint64_t opc, int r1, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_l2(uint64_t imm) -{ - return (imm & 0x7fffffffffc00000ull) >> 22; -} - -static inline uint64_t tcg_opc_l3(uint64_t imm) -{ - return (imm & 0x07fffffffff00000ull) >> 18; -} - -#define tcg_opc_l4 tcg_opc_l3 - -static inline uint64_t tcg_opc_m1(int qp, uint64_t opc, int r1, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m3(int qp, uint64_t opc, int r1, - int r3, uint64_t imm) -{ - return opc - | ((imm & 0x100) << 28) /* s */ - | ((imm & 0x080) << 20) /* i */ - | ((imm & 0x07f) << 13) /* imm7b */ - | ((r3 & 0x7f) << 20) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m4(int qp, uint64_t opc, int r2, int r3) -{ - return opc - | ((r3 & 0x7f) << 20) - | ((r2 & 0x7f) << 13) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m18(int qp, uint64_t opc, int f1, int r2) -{ - return opc - | ((r2 & 0x7f) << 13) - | ((f1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m19(int qp, uint64_t opc, int r1, int f2) -{ - return opc - | ((f2 & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m34(int qp, uint64_t opc, int r1, - int sof, int sol, int sor) -{ - return opc - | ((sor & 0x0f) << 27) - | ((sol & 0x7f) << 20) - | ((sof & 0x7f) << 13) - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_m48(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x100000) << 16) /* i */ - | ((imm & 0x0fffff) << 6) /* imm20a */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_x2(int qp, uint64_t opc, - int r1, uint64_t imm) -{ - return opc - | ((imm & 0x8000000000000000ull) >> 27) /* i */ - | (imm & 0x0000000000200000ull) /* ic */ - | ((imm & 0x00000000001f0000ull) << 6) /* imm5c */ - | ((imm & 0x000000000000ff80ull) << 20) /* imm9d */ - | ((imm & 0x000000000000007full) << 13) /* imm7b */ - | ((r1 & 0x7f) << 6) - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_x3(int qp, uint64_t opc, uint64_t imm) -{ - return opc - | ((imm & 0x0800000000000000ull) >> 23) /* i */ - | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ - | (qp & 0x3f); -} - -static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm) -{ - return opc - | ((imm & 0x0800000000000000ull) >> 23) /* i */ - | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ - | ((b1 & 0x7) << 6) - | (qp & 0x3f); -} - - -/* - * Relocations - Note that we never encode branches elsewhere than slot 2. - */ - -static void reloc_pcrel21b_slot2(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - uint64_t imm = target - pc; - - pc->hi = (pc->hi & 0xf700000fffffffffull) - | ((imm & 0x100000) << 39) /* s */ - | ((imm & 0x0fffff) << 36); /* imm20b */ -} - -static uint64_t get_reloc_pcrel21b_slot2(tcg_insn_unit *pc) -{ - int64_t high = pc->hi; - - return ((high >> 39) & 0x100000) + /* s */ - ((high >> 36) & 0x0fffff); /* imm20b */ -} - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - assert(addend == 0); - assert(type == R_IA64_PCREL21B); - reloc_pcrel21b_slot2(code_ptr, (tcg_insn_unit *)value); -} - -/* - * Constraints - */ - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch(ct_str[0]) { - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffffffffffffull); - break; - case 'I': - ct->ct |= TCG_CT_CONST_S22; - break; - case 'S': - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffffffffffffull); -#if defined(CONFIG_SOFTMMU) - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R56); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R57); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R58); -#endif - break; - case 'Z': - /* We are cheating a bit here, using the fact that the register - r0 is also the register number 0. Hence there is no need - to check for const_args in each instruction. */ - ct->ct |= TCG_CT_CONST_ZERO; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct; - ct = arg_ct->ct; - if (ct & TCG_CT_CONST) - return 1; - else if ((ct & TCG_CT_CONST_ZERO) && val == 0) - return 1; - else if ((ct & TCG_CT_CONST_S22) && val == ((int32_t)val << 10) >> 10) - return 1; - else - return 0; -} - -/* - * Code generation - */ - -static tcg_insn_unit *tb_ret_addr; - -static inline void tcg_out_bundle(TCGContext *s, int template, - uint64_t slot0, uint64_t slot1, - uint64_t slot2) -{ - template &= 0x1f; /* 5 bits */ - slot0 &= 0x1ffffffffffull; /* 41 bits */ - slot1 &= 0x1ffffffffffull; /* 41 bits */ - slot2 &= 0x1ffffffffffull; /* 41 bits */ - - *s->code_ptr++ = (tcg_insn_unit){ - (slot1 << 46) | (slot0 << 5) | template, - (slot2 << 23) | (slot1 >> 18) - }; -} - -static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src) -{ - return tcg_opc_a4(qp, OPC_ADDS_A4, dst, 0, src); -} - -static inline void tcg_out_mov(TCGContext *s, TCGType type, - TCGReg ret, TCGReg arg) -{ - tcg_out_bundle(s, mmI, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_mov_a(TCG_REG_P0, ret, arg)); -} - -static inline uint64_t tcg_opc_movi_a(int qp, TCGReg dst, int64_t src) -{ - assert(src == sextract64(src, 0, 22)); - return tcg_opc_a5(qp, OPC_ADDL_A5, dst, src, TCG_REG_R0); -} - -static inline void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg reg, tcg_target_long arg) -{ - tcg_out_bundle(s, mLX, - INSN_NOP_M, - tcg_opc_l2 (arg), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, reg, arg)); -} - -static void tcg_out_br(TCGContext *s, TCGLabel *l) -{ - uint64_t imm; - - /* We pay attention here to not modify the branch target by reading - the existing value and using it again. This ensure that caches and - memory are kept coherent during retranslation. */ - if (l->has_value) { - imm = l->u.value_ptr - s->code_ptr; - } else { - imm = get_reloc_pcrel21b_slot2(s->code_ptr); - tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0); - } - - tcg_out_bundle(s, mmB, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_b1(TCG_REG_P0, OPC_BR_SPTK_MANY_B1, imm)); -} - -static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *desc) -{ - uintptr_t func = desc->lo, gp = desc->hi, disp; - - /* Look through the function descriptor. */ - tcg_out_bundle(s, mlx, - INSN_NOP_M, - tcg_opc_l2 (gp), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, gp)); - disp = (tcg_insn_unit *)func - s->code_ptr; - tcg_out_bundle(s, mLX, - INSN_NOP_M, - tcg_opc_l4 (disp), - tcg_opc_x4 (TCG_REG_P0, OPC_BRL_CALL_SPTK_MANY_X4, - TCG_REG_B0, disp)); -} - -static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) -{ - uint64_t imm, opc1; - - /* At least arg == 0 is a common operation. */ - if (arg == sextract64(arg, 0, 22)) { - opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R8, arg); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); - opc1 = INSN_NOP_M; - } - - imm = tb_ret_addr - s->code_ptr; - - tcg_out_bundle(s, mLX, - opc1, - tcg_opc_l3 (imm), - tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm)); -} - -static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) -{ - if (s->tb_jmp_offset) { - /* direct jump method */ - tcg_abort(); - } else { - /* indirect jump method */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, - (tcg_target_long)(s->tb_next + arg)); - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, - TCG_REG_R2, TCG_REG_R2), - INSN_NOP_M, - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, - TCG_REG_R2, 0)); - tcg_out_bundle(s, mmB, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, - TCG_REG_B6)); - } - s->tb_next_offset[arg] = tcg_current_code_size(s); -} - -static inline void tcg_out_jmp(TCGContext *s, TCGArg addr) -{ - tcg_out_bundle(s, mmI, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0)); - tcg_out_bundle(s, mmB, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_b4(TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); -} - -static inline void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, - TCGArg arg1, tcg_target_long arg2) -{ - if (arg2 == ((int16_t)arg2 >> 2) << 2) { - tcg_out_bundle(s, MmI, - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R2, arg2, arg1), - tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - INSN_NOP_I); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, - TCG_REG_R2, TCG_REG_R2, arg1), - tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - INSN_NOP_I); - } -} - -static inline void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, - TCGArg arg1, tcg_target_long arg2) -{ - if (arg2 == ((int16_t)arg2 >> 2) << 2) { - tcg_out_bundle(s, MmI, - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R2, arg2, arg1), - tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - INSN_NOP_I); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, - TCG_REG_R2, TCG_REG_R2, arg1), - tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), - INSN_NOP_I); - } -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - if (type == TCG_TYPE_I32) { - tcg_out_ld_rel(s, OPC_LD4_M1, arg, arg1, arg2); - } else { - tcg_out_ld_rel(s, OPC_LD8_M1, arg, arg1, arg2); - } -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - if (type == TCG_TYPE_I32) { - tcg_out_st_rel(s, OPC_ST4_M4, arg, arg1, arg2); - } else { - tcg_out_st_rel(s, OPC_ST8_M4, arg, arg1, arg2); - } -} - -static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3, - TCGReg ret, TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) -{ - uint64_t opc1 = 0, opc2 = 0, opc3 = 0; - - if (const_arg2 && arg2 != 0) { - opc2 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R3, arg2); - arg2 = TCG_REG_R3; - } - if (const_arg1 && arg1 != 0) { - if (opc_a3 && arg1 == (int8_t)arg1) { - opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2); - } else { - opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, arg1); - arg1 = TCG_REG_R2; - } - } - if (opc3 == 0) { - opc3 = tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2); - } - - tcg_out_bundle(s, (opc1 || opc2 ? mII : miI), - opc1 ? opc1 : INSN_NOP_M, - opc2 ? opc2 : INSN_NOP_I, - opc3); -} - -static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2 && arg2 == sextract64(arg2, 0, 14)) { - tcg_out_bundle(s, mmI, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1)); - } else { - tcg_out_alu(s, OPC_ADD_A1, 0, ret, arg1, 0, arg2, const_arg2); - } -} - -static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1, - int const_arg1, TCGArg arg2, int const_arg2) -{ - if (!const_arg1 && const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) { - tcg_out_bundle(s, mmI, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1)); - } else { - tcg_out_alu(s, OPC_SUB_A1, OPC_SUB_A3, ret, - arg1, const_arg1, arg2, const_arg2); - } -} - -static inline void tcg_out_eqv(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a1 (TCG_REG_P0, OPC_XOR_A1, ret, arg1, arg2), - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); -} - -static inline void tcg_out_nand(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, ret, arg1, arg2), - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); -} - -static inline void tcg_out_nor(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, arg2), - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); -} - -static inline void tcg_out_orc(TCGContext *s, TCGArg ret, - TCGArg arg1, int const_arg1, - TCGArg arg2, int const_arg2) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, TCG_REG_R2, -1, arg2), - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, TCG_REG_R2)); -} - -static inline void tcg_out_mul(TCGContext *s, TCGArg ret, - TCGArg arg1, TCGArg arg2) -{ - tcg_out_bundle(s, mmI, - tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1), - tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2), - INSN_NOP_I); - tcg_out_bundle(s, mmF, - INSN_NOP_M, - INSN_NOP_M, - tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6, - TCG_REG_F7, TCG_REG_F0)); - tcg_out_bundle(s, miI, - tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6), - INSN_NOP_I, - INSN_NOP_I); -} - -static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, - ret, arg1, arg2, 31 - arg2)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, - TCG_REG_R3, 0x1f, arg2), - tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static inline void tcg_out_sar_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, - ret, arg1, arg2, 63 - arg2)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, arg1, arg2)); - } -} - -static inline void tcg_out_shl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, - arg1, 63 - arg2, 31 - arg2)); - } else { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R2, - 0x1f, arg2), - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, - arg1, TCG_REG_R2)); - } -} - -static inline void tcg_out_shl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, - arg1, 63 - arg2, 63 - arg2)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, - arg1, arg2)); - } -} - -static inline void tcg_out_shr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - arg1, arg2, 31 - arg2)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, - 0x1f, arg2), - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static inline void tcg_out_shr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - arg1, arg2, 63 - arg2)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - arg1, arg2)); - } -} - -static inline void tcg_out_rotl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - TCG_REG_R2, 32 - arg2, 31)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, - 0x1f, arg2)); - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3, - 0x20, TCG_REG_R3), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, - arg1, 0x40 - arg2)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, - 0x40, arg2), - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R3, - arg1, arg2), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2, - arg1, TCG_REG_R2)); - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static inline void tcg_out_rotr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, - TCG_REG_R2, arg2, 31)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, - 0x1f, arg2), - tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, - TCG_REG_R2, arg1, arg1), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, - TCGArg arg2, int const_arg2) -{ - if (const_arg2) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, - arg1, arg2)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, - 0x40, arg2), - tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R3, - arg1, arg2), - tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2, - arg1, TCG_REG_R2)); - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, - TCG_REG_R2, TCG_REG_R3)); - } -} - -static const uint64_t opc_ext_i29[8] = { - OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0, - OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 -}; - -static inline uint64_t tcg_opc_ext_i(int qp, TCGMemOp opc, TCGReg d, TCGReg s) -{ - if ((opc & MO_SIZE) == MO_64) { - return tcg_opc_mov_a(qp, d, s); - } else { - return tcg_opc_i29(qp, opc_ext_i29[opc & MO_SSIZE], d, s); - } -} - -static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29, - TCGArg ret, TCGArg arg) -{ - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg)); -} - -static inline uint64_t tcg_opc_bswap64_i(int qp, TCGReg d, TCGReg s) -{ - return tcg_opc_i3(qp, OPC_MUX1_I3, d, s, 0xb); -} - -static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15), - tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); -} - -static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg) -{ - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31), - tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); -} - -static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg) -{ - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, ret, arg)); -} - -static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1, - TCGArg a2, int const_a2, int pos, int len) -{ - uint64_t i1 = 0, i2 = 0; - int cpos = 63 - pos, lm1 = len - 1; - - if (const_a2) { - /* Truncate the value of a constant a2 to the width of the field. */ - int mask = (1u << len) - 1; - a2 &= mask; - - if (a2 == 0 || a2 == mask) { - /* 1-bit signed constant inserted into register. */ - i2 = tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, ret, a2, a1, cpos, lm1); - } else { - /* Otherwise, load any constant into a temporary. Do this into - the first I slot to help out with cross-unit delays. */ - i1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, a2); - a2 = TCG_REG_R2; - } - } - if (i2 == 0) { - i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1); - } - tcg_out_bundle(s, (i1 ? mII : miI), - INSN_NOP_M, - i1 ? i1 : INSN_NOP_I, - i2); -} - -static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1, - TCGArg arg2, int cmp4) -{ - uint64_t opc_eq_a6, opc_lt_a6, opc_ltu_a6; - - if (cmp4) { - opc_eq_a6 = OPC_CMP4_EQ_A6; - opc_lt_a6 = OPC_CMP4_LT_A6; - opc_ltu_a6 = OPC_CMP4_LTU_A6; - } else { - opc_eq_a6 = OPC_CMP_EQ_A6; - opc_lt_a6 = OPC_CMP_LT_A6; - opc_ltu_a6 = OPC_CMP_LTU_A6; - } - - switch (cond) { - case TCG_COND_EQ: - return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); - case TCG_COND_NE: - return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); - case TCG_COND_LT: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); - case TCG_COND_LTU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); - case TCG_COND_GE: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); - case TCG_COND_GEU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); - case TCG_COND_LE: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); - case TCG_COND_LEU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); - case TCG_COND_GT: - return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); - case TCG_COND_GTU: - return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); - default: - tcg_abort(); - break; - } -} - -static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, - TCGReg arg2, TCGLabel *l, int cmp4) -{ - uint64_t imm; - - /* We pay attention here to not modify the branch target by reading - the existing value and using it again. This ensure that caches and - memory are kept coherent during retranslation. */ - if (l->has_value) { - imm = l->u.value_ptr - s->code_ptr; - } else { - imm = get_reloc_pcrel21b_slot2(s->code_ptr); - tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0); - } - - tcg_out_bundle(s, miB, - INSN_NOP_M, - tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), - tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, imm)); -} - -static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg arg1, TCGArg arg2, int cmp4) -{ - tcg_out_bundle(s, MmI, - tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), - tcg_opc_movi_a(TCG_REG_P6, ret, 1), - tcg_opc_movi_a(TCG_REG_P7, ret, 0)); -} - -static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, - TCGArg c1, TCGArg c2, - TCGArg v1, int const_v1, - TCGArg v2, int const_v2, int cmp4) -{ - uint64_t opc1, opc2; - - if (const_v1) { - opc1 = tcg_opc_movi_a(TCG_REG_P6, ret, v1); - } else if (ret == v1) { - opc1 = INSN_NOP_M; - } else { - opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1); - } - if (const_v2) { - opc2 = tcg_opc_movi_a(TCG_REG_P7, ret, v2); - } else if (ret == v2) { - opc2 = INSN_NOP_I; - } else { - opc2 = tcg_opc_mov_a(TCG_REG_P7, ret, v2); - } - - tcg_out_bundle(s, MmI, - tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4), - opc1, - opc2); -} - -#if defined(CONFIG_SOFTMMU) -/* We're expecting to use an signed 22-bit immediate add. */ -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) - > 0x1fffff) - -/* Load and compare a TLB entry, and return the result in (p6, p7). - R2 is loaded with the addend TLB entry. - R57 is loaded with the address, zero extented on 32-bit targets. - R1, R3 are clobbered, leaving R56 free for... - BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store. */ -static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, - TCGMemOp s_bits, int off_rw, int off_add, - uint64_t bswap1, uint64_t bswap2) -{ - /* - .mii - mov r2 = off_rw - extr.u r3 = addr_reg, ... # extract tlb page - zxt4 r57 = addr_reg # or mov for 64-bit guest - ;; - .mii - addl r2 = r2, areg0 - shl r3 = r3, cteb # via dep.z - dep r1 = 0, r57, ... # zero page ofs, keep align - ;; - .mmi - add r2 = r2, r3 - ;; - ld4 r3 = [r2], off_add-off_rw # or ld8 for 64-bit guest - nop - ;; - .mmi - nop - cmp.eq p6, p7 = r3, r58 - nop - ;; - */ - tcg_out_bundle(s, miI, - tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw), - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3, - addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1), - tcg_opc_ext_i(TCG_REG_P0, - TARGET_LONG_BITS == 32 ? MO_UL : MO_Q, - TCG_REG_R57, addr_reg)); - tcg_out_bundle(s, miI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_AREG0), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, - TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS, - 63 - CPU_TLB_ENTRY_BITS), - tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0, - TCG_REG_R57, 63 - s_bits, - TARGET_PAGE_BITS - s_bits - 1)); - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, - TCG_REG_R2, TCG_REG_R2, TCG_REG_R3), - tcg_opc_m3 (TCG_REG_P0, - (TARGET_LONG_BITS == 32 - ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3, - TCG_REG_R2, off_add - off_rw), - bswap1); - tcg_out_bundle(s, mmI, - tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2), - tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, - TCG_REG_P7, TCG_REG_R1, TCG_REG_R3), - bswap2); -} - -typedef struct TCGLabelQemuLdst { - bool is_ld; - TCGMemOp size; - tcg_insn_unit *label_ptr; /* label pointers to be updated */ - struct TCGLabelQemuLdst *next; -} TCGLabelQemuLdst; - -typedef struct TCGBackendData { - TCGLabelQemuLdst *labels; -} TCGBackendData; - -static inline void tcg_out_tb_init(TCGContext *s) -{ - s->be->labels = NULL; -} - -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, - tcg_insn_unit *label_ptr) -{ - TCGBackendData *be = s->be; - TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); - - l->is_ld = is_ld; - l->size = opc & MO_SIZE; - l->label_ptr = label_ptr; - l->next = be->labels; - be->labels = l; -} - -static bool tcg_out_tb_finalize(TCGContext *s) -{ - static const void * const helpers[8] = { - helper_ret_stb_mmu, - helper_le_stw_mmu, - helper_le_stl_mmu, - helper_le_stq_mmu, - helper_ret_ldub_mmu, - helper_le_lduw_mmu, - helper_le_ldul_mmu, - helper_le_ldq_mmu, - }; - tcg_insn_unit *thunks[8] = { }; - TCGLabelQemuLdst *l; - - for (l = s->be->labels; l != NULL; l = l->next) { - long x = l->is_ld * 4 + l->size; - tcg_insn_unit *dest = thunks[x]; - - /* The out-of-line thunks are all the same; load the return address - from B0, load the GP, and branch to the code. Note that we are - always post-call, so the register window has rolled, so we're - using incoming parameter register numbers, not outgoing. */ - if (dest == NULL) { - uintptr_t *desc = (uintptr_t *)helpers[x]; - uintptr_t func = desc[0], gp = desc[1], disp; - - thunks[x] = dest = s->code_ptr; - - tcg_out_bundle(s, mlx, - INSN_NOP_M, - tcg_opc_l2 (gp), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, - TCG_REG_R1, gp)); - tcg_out_bundle(s, mii, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, - l->is_ld ? TCG_REG_R35 : TCG_REG_R36, - TCG_REG_B0)); - disp = (tcg_insn_unit *)func - s->code_ptr; - tcg_out_bundle(s, mLX, - INSN_NOP_M, - tcg_opc_l3 (disp), - tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp)); - } - - reloc_pcrel21b_slot2(l->label_ptr, dest); - - /* Test for (pending) buffer overflow. The assumption is that any - one operation beginning below the high water mark cannot overrun - the buffer completely. Thus we can test for overflow after - generating code without having to check during generation. */ - if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { - return false; - } - } - return true; -} - -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) -{ - static const uint64_t opc_ld_m1[4] = { - OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 - }; - int addr_reg, data_reg, mem_index; - TCGMemOpIdx oi; - TCGMemOp opc, s_bits; - uint64_t fin1, fin2; - tcg_insn_unit *label_ptr; - - data_reg = args[0]; - addr_reg = args[1]; - oi = args[2]; - opc = get_memop(oi); - mem_index = get_mmuidx(oi); - s_bits = opc & MO_SIZE; - - /* Read the TLB entry */ - tcg_out_qemu_tlb(s, addr_reg, s_bits, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read), - offsetof(CPUArchState, tlb_table[mem_index][0].addend), - INSN_NOP_I, INSN_NOP_I); - - /* P6 is the fast path, and P7 the slow path */ - - fin2 = 0; - if (opc & MO_BSWAP) { - fin1 = tcg_opc_bswap64_i(TCG_REG_P0, data_reg, TCG_REG_R8); - if (s_bits < MO_64) { - int shift = 64 - (8 << s_bits); - fin2 = (opc & MO_SIGN ? OPC_EXTR_I11 : OPC_EXTR_U_I11); - fin2 = tcg_opc_i11(TCG_REG_P0, fin2, - data_reg, data_reg, shift, 63 - shift); - } - } else { - fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8); - } - - tcg_out_bundle(s, mmI, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_REG_R57), - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, oi)); - label_ptr = s->code_ptr; - tcg_out_bundle(s, miB, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, - get_reloc_pcrel21b_slot2(label_ptr))); - - add_qemu_ldst_label(s, 1, opc, label_ptr); - - /* Note that we always use LE helper functions, so the bswap insns - here for the fast path also apply to the slow path. */ - tcg_out_bundle(s, (fin2 ? mII : miI), - INSN_NOP_M, - fin1, - fin2 ? fin2 : INSN_NOP_I); -} - -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) -{ - static const uint64_t opc_st_m4[4] = { - OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 - }; - TCGReg addr_reg, data_reg; - int mem_index; - uint64_t pre1, pre2; - TCGMemOpIdx oi; - TCGMemOp opc, s_bits; - tcg_insn_unit *label_ptr; - - data_reg = args[0]; - addr_reg = args[1]; - oi = args[2]; - opc = get_memop(oi); - mem_index = get_mmuidx(oi); - s_bits = opc & MO_SIZE; - - /* Note that we always use LE helper functions, so the bswap insns - that are here for the fast path also apply to the slow path, - and move the data into the argument register. */ - pre2 = INSN_NOP_I; - if (opc & MO_BSWAP) { - pre1 = tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R58, data_reg); - if (s_bits < MO_64) { - int shift = 64 - (8 << s_bits); - pre2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, - TCG_REG_R58, TCG_REG_R58, shift, 63 - shift); - } - } else { - /* Just move the data into place for the slow path. */ - pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg); - } - - tcg_out_qemu_tlb(s, addr_reg, s_bits, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_write), - offsetof(CPUArchState, tlb_table[mem_index][0].addend), - pre1, pre2); - - /* P6 is the fast path, and P7 the slow path */ - tcg_out_bundle(s, mmI, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_REG_R57), - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, oi)); - label_ptr = s->code_ptr; - tcg_out_bundle(s, miB, - tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], - TCG_REG_R58, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, - get_reloc_pcrel21b_slot2(label_ptr))); - - add_qemu_ldst_label(s, 0, opc, label_ptr); -} - -#else /* !CONFIG_SOFTMMU */ -# include "tcg-be-null.h" - -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) -{ - static uint64_t const opc_ld_m1[4] = { - OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 - }; - int addr_reg, data_reg; - TCGMemOp opc, s_bits, bswap; - - data_reg = args[0]; - addr_reg = args[1]; - opc = args[2]; - s_bits = opc & MO_SIZE; - bswap = opc & MO_BSWAP; - -#if TARGET_LONG_BITS == 32 - if (guest_base != 0) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R3, addr_reg), - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, TCG_REG_R3)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R2, addr_reg), - INSN_NOP_I); - } - - if (!bswap) { - if (!(opc & MO_SIGN)) { - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I, - INSN_NOP_I); - } else { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); - } - } else if (s_bits == MO_64) { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); - } else { - if (s_bits == MO_16) { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 15, 15)); - } else { - tcg_out_bundle(s, mII, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 31, 31)); - } - if (!(opc & MO_SIGN)) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); - } else { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg), - tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); - } - } -#else - if (guest_base != 0) { - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, addr_reg), - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_I); - } else { - tcg_out_bundle(s, mmI, - INSN_NOP_M, - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, addr_reg), - INSN_NOP_I); - } - - if (bswap && s_bits == MO_16) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 15, 15), - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); - } else if (bswap && s_bits == MO_32) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - data_reg, data_reg, 31, 31), - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); - } else if (bswap && s_bits == MO_64) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); - } - if (opc & MO_SIGN) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); - } -#endif -} - -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) -{ - static uint64_t const opc_st_m4[4] = { - OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 - }; - int addr_reg, data_reg; -#if TARGET_LONG_BITS == 64 - uint64_t add_guest_base; -#endif - TCGMemOp opc, s_bits, bswap; - - data_reg = args[0]; - addr_reg = args[1]; - opc = args[2]; - s_bits = opc & MO_SIZE; - bswap = opc & MO_BSWAP; - -#if TARGET_LONG_BITS == 32 - if (guest_base != 0) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R3, addr_reg), - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, TCG_REG_R3)); - } else { - tcg_out_bundle(s, miI, - INSN_NOP_M, - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R2, addr_reg), - INSN_NOP_I); - } - - if (bswap) { - if (s_bits == MO_16) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 15, 15), - tcg_opc_bswap64_i(TCG_REG_P0, - TCG_REG_R3, TCG_REG_R3)); - data_reg = TCG_REG_R3; - } else if (s_bits == MO_32) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 31, 31), - tcg_opc_bswap64_i(TCG_REG_P0, - TCG_REG_R3, TCG_REG_R3)); - data_reg = TCG_REG_R3; - } else if (s_bits == MO_64) { - tcg_out_bundle(s, miI, - INSN_NOP_M, - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); - data_reg = TCG_REG_R3; - } - } - tcg_out_bundle(s, mmI, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], - data_reg, TCG_REG_R2), - INSN_NOP_M, - INSN_NOP_I); -#else - if (guest_base != 0) { - add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_GUEST_BASE_REG, addr_reg); - addr_reg = TCG_REG_R2; - } else { - add_guest_base = INSN_NOP_M; - } - - if (!bswap) { - tcg_out_bundle(s, (guest_base ? MmI : mmI), - add_guest_base, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], - data_reg, addr_reg), - INSN_NOP_I); - } else { - if (s_bits == MO_16) { - tcg_out_bundle(s, mII, - add_guest_base, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 15, 15), - tcg_opc_bswap64_i(TCG_REG_P0, - TCG_REG_R3, TCG_REG_R3)); - data_reg = TCG_REG_R3; - } else if (s_bits == MO_32) { - tcg_out_bundle(s, mII, - add_guest_base, - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, - TCG_REG_R3, data_reg, 31, 31), - tcg_opc_bswap64_i(TCG_REG_P0, - TCG_REG_R3, TCG_REG_R3)); - data_reg = TCG_REG_R3; - } else if (s_bits == MO_64) { - tcg_out_bundle(s, miI, - add_guest_base, - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); - data_reg = TCG_REG_R3; - } - tcg_out_bundle(s, miI, - tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], - data_reg, addr_reg), - INSN_NOP_I, - INSN_NOP_I); - } -#endif -} - -#endif - -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg *args, const int *const_args) -{ - switch(opc) { - case INDEX_op_exit_tb: - tcg_out_exit_tb(s, args[0]); - break; - case INDEX_op_br: - tcg_out_br(s, arg_label(args[0])); - break; - case INDEX_op_goto_tb: - tcg_out_goto_tb(s, args[0]); - break; - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); - break; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); - tcg_out_ext(s, OPC_SXT1_I29, args[0], args[0]); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); - break; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); - tcg_out_ext(s, OPC_SXT2_I29, args[0], args[0]); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); - tcg_out_ext(s, OPC_SXT4_I29, args[0], args[0]); - break; - case INDEX_op_ld_i64: - tcg_out_ld_rel(s, OPC_LD8_M1, args[0], args[1], args[2]); - break; - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_st_rel(s, OPC_ST1_M4, args[0], args[1], args[2]); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_st_rel(s, OPC_ST2_M4, args[0], args[1], args[2]); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_st_rel(s, OPC_ST4_M4, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - tcg_out_st_rel(s, OPC_ST8_M4, args[0], args[1], args[2]); - break; - - case INDEX_op_add_i32: - case INDEX_op_add_i64: - tcg_out_add(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - tcg_out_sub(s, args[0], args[1], const_args[1], args[2], const_args[2]); - break; - - case INDEX_op_and_i32: - case INDEX_op_and_i64: - /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ - tcg_out_alu(s, OPC_AND_A1, OPC_AND_A3, args[0], - args[2], const_args[2], args[1], const_args[1]); - break; - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - tcg_out_alu(s, OPC_ANDCM_A1, OPC_ANDCM_A3, args[0], - args[1], const_args[1], args[2], const_args[2]); - break; - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: - tcg_out_eqv(s, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: - tcg_out_nand(s, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - tcg_out_nor(s, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_or_i32: - case INDEX_op_or_i64: - /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ - tcg_out_alu(s, OPC_OR_A1, OPC_OR_A3, args[0], - args[2], const_args[2], args[1], const_args[1]); - break; - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - tcg_out_orc(s, args[0], args[1], const_args[1], - args[2], const_args[2]); - break; - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ - tcg_out_alu(s, OPC_XOR_A1, OPC_XOR_A3, args[0], - args[2], const_args[2], args[1], const_args[1]); - break; - - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - tcg_out_mul(s, args[0], args[1], args[2]); - break; - - case INDEX_op_sar_i32: - tcg_out_sar_i32(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_sar_i64: - tcg_out_sar_i64(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_shl_i32: - tcg_out_shl_i32(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_shl_i64: - tcg_out_shl_i64(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_shr_i32: - tcg_out_shr_i32(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_shr_i64: - tcg_out_shr_i64(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_rotl_i32: - tcg_out_rotl_i32(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_rotl_i64: - tcg_out_rotl_i64(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_rotr_i32: - tcg_out_rotr_i32(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_rotr_i64: - tcg_out_rotr_i64(s, args[0], args[1], args[2], const_args[2]); - break; - - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - tcg_out_ext(s, OPC_SXT1_I29, args[0], args[1]); - break; - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - tcg_out_ext(s, OPC_ZXT1_I29, args[0], args[1]); - break; - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - tcg_out_ext(s, OPC_SXT2_I29, args[0], args[1]); - break; - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]); - break; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]); - break; - case INDEX_op_extu_i32_i64: - case INDEX_op_ext32u_i64: - tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]); - break; - - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - tcg_out_bswap16(s, args[0], args[1]); - break; - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - tcg_out_bswap32(s, args[0], args[1]); - break; - case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, args[0], args[1]); - break; - - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - tcg_out_deposit(s, args[0], args[1], args[2], const_args[2], - args[3], args[4]); - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 1); - break; - case INDEX_op_brcond_i64: - tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 0); - break; - case INDEX_op_setcond_i32: - tcg_out_setcond(s, args[3], args[0], args[1], args[2], 1); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0); - break; - case INDEX_op_movcond_i32: - tcg_out_movcond(s, args[5], args[0], args[1], args[2], - args[3], const_args[3], args[4], const_args[4], 1); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond(s, args[5], args[0], args[1], args[2], - args[3], const_args[3], args[4], const_args[4], 0); - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef ia64_op_defs[] = { - { INDEX_op_br, { } }, - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "rZ", "r" } }, - { INDEX_op_st16_i32, { "rZ", "r" } }, - { INDEX_op_st_i32, { "rZ", "r" } }, - - { INDEX_op_add_i32, { "r", "rZ", "rI" } }, - { INDEX_op_sub_i32, { "r", "rI", "rI" } }, - - { INDEX_op_and_i32, { "r", "rI", "rI" } }, - { INDEX_op_andc_i32, { "r", "rI", "rI" } }, - { INDEX_op_eqv_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_nand_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_or_i32, { "r", "rI", "rI" } }, - { INDEX_op_orc_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_xor_i32, { "r", "rI", "rI" } }, - - { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, - - { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, - { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, - { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, - { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, - { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, - - { INDEX_op_ext8s_i32, { "r", "rZ"} }, - { INDEX_op_ext8u_i32, { "r", "rZ"} }, - { INDEX_op_ext16s_i32, { "r", "rZ"} }, - { INDEX_op_ext16u_i32, { "r", "rZ"} }, - - { INDEX_op_bswap16_i32, { "r", "rZ" } }, - { INDEX_op_bswap32_i32, { "r", "rZ" } }, - - { INDEX_op_brcond_i32, { "rZ", "rZ" } }, - { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } }, - - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - { INDEX_op_st8_i64, { "rZ", "r" } }, - { INDEX_op_st16_i64, { "rZ", "r" } }, - { INDEX_op_st32_i64, { "rZ", "r" } }, - { INDEX_op_st_i64, { "rZ", "r" } }, - - { INDEX_op_add_i64, { "r", "rZ", "rI" } }, - { INDEX_op_sub_i64, { "r", "rI", "rI" } }, - - { INDEX_op_and_i64, { "r", "rI", "rI" } }, - { INDEX_op_andc_i64, { "r", "rI", "rI" } }, - { INDEX_op_eqv_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_nand_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_nor_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_or_i64, { "r", "rI", "rI" } }, - { INDEX_op_orc_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_xor_i64, { "r", "rI", "rI" } }, - - { INDEX_op_mul_i64, { "r", "rZ", "rZ" } }, - - { INDEX_op_sar_i64, { "r", "rZ", "ri" } }, - { INDEX_op_shl_i64, { "r", "rZ", "ri" } }, - { INDEX_op_shr_i64, { "r", "rZ", "ri" } }, - { INDEX_op_rotl_i64, { "r", "rZ", "ri" } }, - { INDEX_op_rotr_i64, { "r", "rZ", "ri" } }, - - { INDEX_op_ext8s_i64, { "r", "rZ"} }, - { INDEX_op_ext8u_i64, { "r", "rZ"} }, - { INDEX_op_ext16s_i64, { "r", "rZ"} }, - { INDEX_op_ext16u_i64, { "r", "rZ"} }, - { INDEX_op_ext32s_i64, { "r", "rZ"} }, - { INDEX_op_ext32u_i64, { "r", "rZ"} }, - { INDEX_op_ext_i32_i64, { "r", "rZ" } }, - { INDEX_op_extu_i32_i64, { "r", "rZ" } }, - - { INDEX_op_bswap16_i64, { "r", "rZ" } }, - { INDEX_op_bswap32_i64, { "r", "rZ" } }, - { INDEX_op_bswap64_i64, { "r", "rZ" } }, - - { INDEX_op_brcond_i64, { "rZ", "rZ" } }, - { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } }, - { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } }, - - { INDEX_op_deposit_i32, { "r", "rZ", "ri" } }, - { INDEX_op_deposit_i64, { "r", "rZ", "ri" } }, - - { INDEX_op_qemu_ld_i32, { "r", "r" } }, - { INDEX_op_qemu_ld_i64, { "r", "r" } }, - { INDEX_op_qemu_st_i32, { "SZ", "r" } }, - { INDEX_op_qemu_st_i64, { "SZ", "r" } }, - - { -1 }, -}; - -/* Generate global QEMU prologue and epilogue code */ -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int frame_size; - - /* reserve some stack space */ - frame_size = TCG_STATIC_CALL_ARGS_SIZE + - CPU_TEMP_BUF_NLONGS * sizeof(long); - frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & - ~(TCG_TARGET_STACK_ALIGN - 1); - tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - - /* First emit adhoc function descriptor */ - *s->code_ptr = (tcg_insn_unit){ - (uint64_t)(s->code_ptr + 1), /* entry point */ - 0 /* skip gp */ - }; - s->code_ptr++; - - /* prologue */ - tcg_out_bundle(s, miI, - tcg_opc_m34(TCG_REG_P0, OPC_ALLOC_M34, - TCG_REG_R34, 32, 24, 0), - INSN_NOP_I, - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, - TCG_REG_B6, TCG_REG_R33, 0)); - - /* ??? If guest_base < 0x200000, we could load the register via - an ADDL in the M slot of the next bundle. */ - if (guest_base != 0) { - tcg_out_bundle(s, mlx, - INSN_NOP_M, - tcg_opc_l2(guest_base), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, - TCG_GUEST_BASE_REG, guest_base)); - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); - } - - tcg_out_bundle(s, miB, - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R12, -frame_size, TCG_REG_R12), - tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, - TCG_REG_R33, TCG_REG_B0), - tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); - - /* epilogue */ - tb_ret_addr = s->code_ptr; - tcg_out_bundle(s, miI, - INSN_NOP_M, - tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, - TCG_REG_B0, TCG_REG_R33, 0), - tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, - TCG_REG_R12, frame_size, TCG_REG_R12)); - tcg_out_bundle(s, miB, - INSN_NOP_M, - tcg_opc_i26(TCG_REG_P0, OPC_MOV_I_I26, - TCG_REG_PFS, TCG_REG_R34), - tcg_opc_b4 (TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4, - TCG_REG_B0)); -} - -static void tcg_target_init(TCGContext *s) -{ - tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], - 0xffffffffffffffffull); - tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I64], - 0xffffffffffffffffull); - - tcg_regset_clear(tcg_target_call_clobber_regs); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R15); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R16); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R17); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R18); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R19); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R27); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R28); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R29); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R30); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R31); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R56); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R57); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R58); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R59); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R60); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R61); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R62); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R63); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* zero register */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* global pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3); /* internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12); /* stack pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R33); /* return address */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R34); /* PFS */ - - /* The following 4 are not in use, are call-saved, but *not* saved - by the prologue. Therefore we cannot use them without modifying - the prologue. There doesn't seem to be any good reason to use - these as opposed to the windowed registers. */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R4); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R7); - - tcg_add_target_add_op_defs(ia64_op_defs); -} diff --git a/tcg/ia64/tcg-target.inc.c b/tcg/ia64/tcg-target.inc.c new file mode 100644 index 0000000..62d6549 --- /dev/null +++ b/tcg/ia64/tcg-target.inc.c @@ -0,0 +1,2453 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009-2010 Aurelien Jarno + * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * Register definitions + */ + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", + "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", + "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", + "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", +}; +#endif + +#ifndef CONFIG_SOFTMMU +#define TCG_GUEST_BASE_REG TCG_REG_R55 +#endif + +/* Branch registers */ +enum { + TCG_REG_B0 = 0, + TCG_REG_B1, + TCG_REG_B2, + TCG_REG_B3, + TCG_REG_B4, + TCG_REG_B5, + TCG_REG_B6, + TCG_REG_B7, +}; + +/* Floating point registers */ +enum { + TCG_REG_F0 = 0, + TCG_REG_F1, + TCG_REG_F2, + TCG_REG_F3, + TCG_REG_F4, + TCG_REG_F5, + TCG_REG_F6, + TCG_REG_F7, + TCG_REG_F8, + TCG_REG_F9, + TCG_REG_F10, + TCG_REG_F11, + TCG_REG_F12, + TCG_REG_F13, + TCG_REG_F14, + TCG_REG_F15, +}; + +/* Predicate registers */ +enum { + TCG_REG_P0 = 0, + TCG_REG_P1, + TCG_REG_P2, + TCG_REG_P3, + TCG_REG_P4, + TCG_REG_P5, + TCG_REG_P6, + TCG_REG_P7, + TCG_REG_P8, + TCG_REG_P9, + TCG_REG_P10, + TCG_REG_P11, + TCG_REG_P12, + TCG_REG_P13, + TCG_REG_P14, + TCG_REG_P15, +}; + +/* Application registers */ +enum { + TCG_REG_PFS = 64, +}; + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R35, + TCG_REG_R36, + TCG_REG_R37, + TCG_REG_R38, + TCG_REG_R39, + TCG_REG_R40, + TCG_REG_R41, + TCG_REG_R42, + TCG_REG_R43, + TCG_REG_R44, + TCG_REG_R45, + TCG_REG_R46, + TCG_REG_R47, + TCG_REG_R48, + TCG_REG_R49, + TCG_REG_R50, + TCG_REG_R51, + TCG_REG_R52, + TCG_REG_R53, + TCG_REG_R54, + TCG_REG_R55, + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_R16, + TCG_REG_R17, + TCG_REG_R18, + TCG_REG_R19, + TCG_REG_R20, + TCG_REG_R21, + TCG_REG_R22, + TCG_REG_R23, + TCG_REG_R24, + TCG_REG_R25, + TCG_REG_R26, + TCG_REG_R27, + TCG_REG_R28, + TCG_REG_R29, + TCG_REG_R30, + TCG_REG_R31, + TCG_REG_R56, + TCG_REG_R57, + TCG_REG_R58, + TCG_REG_R59, + TCG_REG_R60, + TCG_REG_R61, + TCG_REG_R62, + TCG_REG_R63, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11 +}; + +static const int tcg_target_call_iarg_regs[8] = { + TCG_REG_R56, + TCG_REG_R57, + TCG_REG_R58, + TCG_REG_R59, + TCG_REG_R60, + TCG_REG_R61, + TCG_REG_R62, + TCG_REG_R63, +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_R8 +}; + +/* + * opcode formation + */ + +/* bundle templates: stops (double bar in the IA64 manual) are marked with + an uppercase letter. */ +enum { + mii = 0x00, + miI = 0x01, + mIi = 0x02, + mII = 0x03, + mlx = 0x04, + mLX = 0x05, + mmi = 0x08, + mmI = 0x09, + Mmi = 0x0a, + MmI = 0x0b, + mfi = 0x0c, + mfI = 0x0d, + mmf = 0x0e, + mmF = 0x0f, + mib = 0x10, + miB = 0x11, + mbb = 0x12, + mbB = 0x13, + bbb = 0x16, + bbB = 0x17, + mmb = 0x18, + mmB = 0x19, + mfb = 0x1c, + mfB = 0x1d, +}; + +enum { + OPC_ADD_A1 = 0x10000000000ull, + OPC_AND_A1 = 0x10060000000ull, + OPC_AND_A3 = 0x10160000000ull, + OPC_ANDCM_A1 = 0x10068000000ull, + OPC_ANDCM_A3 = 0x10168000000ull, + OPC_ADDS_A4 = 0x10800000000ull, + OPC_ADDL_A5 = 0x12000000000ull, + OPC_ALLOC_M34 = 0x02c00000000ull, + OPC_BR_DPTK_FEW_B1 = 0x08400000000ull, + OPC_BR_SPTK_MANY_B1 = 0x08000001000ull, + OPC_BR_CALL_SPNT_FEW_B3 = 0x0a200000000ull, + OPC_BR_SPTK_MANY_B4 = 0x00100001000ull, + OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull, + OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull, + OPC_BRL_SPTK_MANY_X3 = 0x18000001000ull, + OPC_BRL_CALL_SPNT_MANY_X4 = 0x1a200001000ull, + OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull, + OPC_CMP_LT_A6 = 0x18000000000ull, + OPC_CMP_LTU_A6 = 0x1a000000000ull, + OPC_CMP_EQ_A6 = 0x1c000000000ull, + OPC_CMP4_LT_A6 = 0x18400000000ull, + OPC_CMP4_LTU_A6 = 0x1a400000000ull, + OPC_CMP4_EQ_A6 = 0x1c400000000ull, + OPC_DEP_I14 = 0x0ae00000000ull, + OPC_DEP_I15 = 0x08000000000ull, + OPC_DEP_Z_I12 = 0x0a600000000ull, + OPC_EXTR_I11 = 0x0a400002000ull, + OPC_EXTR_U_I11 = 0x0a400000000ull, + OPC_FCVT_FX_TRUNC_S1_F10 = 0x004d0000000ull, + OPC_FCVT_FXU_TRUNC_S1_F10 = 0x004d8000000ull, + OPC_FCVT_XF_F11 = 0x000e0000000ull, + OPC_FMA_S1_F1 = 0x10400000000ull, + OPC_FNMA_S1_F1 = 0x18400000000ull, + OPC_FRCPA_S1_F6 = 0x00600000000ull, + OPC_GETF_SIG_M19 = 0x08708000000ull, + OPC_LD1_M1 = 0x08000000000ull, + OPC_LD1_M3 = 0x0a000000000ull, + OPC_LD2_M1 = 0x08040000000ull, + OPC_LD2_M3 = 0x0a040000000ull, + OPC_LD4_M1 = 0x08080000000ull, + OPC_LD4_M3 = 0x0a080000000ull, + OPC_LD8_M1 = 0x080c0000000ull, + OPC_LD8_M3 = 0x0a0c0000000ull, + OPC_MUX1_I3 = 0x0eca0000000ull, + OPC_NOP_B9 = 0x04008000000ull, + OPC_NOP_F16 = 0x00008000000ull, + OPC_NOP_I18 = 0x00008000000ull, + OPC_NOP_M48 = 0x00008000000ull, + OPC_MOV_I21 = 0x00e00100000ull, + OPC_MOV_RET_I21 = 0x00e00500000ull, + OPC_MOV_I22 = 0x00188000000ull, + OPC_MOV_I_I26 = 0x00150000000ull, + OPC_MOVL_X2 = 0x0c000000000ull, + OPC_OR_A1 = 0x10070000000ull, + OPC_OR_A3 = 0x10170000000ull, + OPC_SETF_EXP_M18 = 0x0c748000000ull, + OPC_SETF_SIG_M18 = 0x0c708000000ull, + OPC_SHL_I7 = 0x0f240000000ull, + OPC_SHR_I5 = 0x0f220000000ull, + OPC_SHR_U_I5 = 0x0f200000000ull, + OPC_SHRP_I10 = 0x0ac00000000ull, + OPC_SXT1_I29 = 0x000a0000000ull, + OPC_SXT2_I29 = 0x000a8000000ull, + OPC_SXT4_I29 = 0x000b0000000ull, + OPC_ST1_M4 = 0x08c00000000ull, + OPC_ST2_M4 = 0x08c40000000ull, + OPC_ST4_M4 = 0x08c80000000ull, + OPC_ST8_M4 = 0x08cc0000000ull, + OPC_SUB_A1 = 0x10028000000ull, + OPC_SUB_A3 = 0x10128000000ull, + OPC_UNPACK4_L_I2 = 0x0f860000000ull, + OPC_XMA_L_F2 = 0x1d000000000ull, + OPC_XOR_A1 = 0x10078000000ull, + OPC_XOR_A3 = 0x10178000000ull, + OPC_ZXT1_I29 = 0x00080000000ull, + OPC_ZXT2_I29 = 0x00088000000ull, + OPC_ZXT4_I29 = 0x00090000000ull, + + INSN_NOP_M = OPC_NOP_M48, /* nop.m 0 */ + INSN_NOP_I = OPC_NOP_I18, /* nop.i 0 */ +}; + +static inline uint64_t tcg_opc_a1(int qp, uint64_t opc, int r1, + int r2, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a3(int qp, uint64_t opc, int r1, + uint64_t imm, int r3) +{ + return opc + | ((imm & 0x80) << 29) /* s */ + | ((imm & 0x7f) << 13) /* imm7b */ + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a4(int qp, uint64_t opc, int r1, + uint64_t imm, int r3) +{ + return opc + | ((imm & 0x2000) << 23) /* s */ + | ((imm & 0x1f80) << 20) /* imm6d */ + | ((imm & 0x007f) << 13) /* imm7b */ + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a5(int qp, uint64_t opc, int r1, + uint64_t imm, int r3) +{ + return opc + | ((imm & 0x200000) << 15) /* s */ + | ((imm & 0x1f0000) << 6) /* imm5c */ + | ((imm & 0x00ff80) << 20) /* imm9d */ + | ((imm & 0x00007f) << 13) /* imm7b */ + | ((r3 & 0x03) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a6(int qp, uint64_t opc, int p1, + int p2, int r2, int r3) +{ + return opc + | ((p2 & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((p1 & 0x3f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* s */ + | ((imm & 0x0fffff) << 13) /* imm20b */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* s */ + | ((imm & 0x0fffff) << 13) /* imm20b */ + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2) +{ + return opc + | ((b2 & 0x7) << 13) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b5(int qp, uint64_t opc, int b1, int b2) +{ + return opc + | ((b2 & 0x7) << 13) + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + + +static inline uint64_t tcg_opc_b9(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* i */ + | ((imm & 0x0fffff) << 6) /* imm20a */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f1(int qp, uint64_t opc, int f1, + int f3, int f4, int f2) +{ + return opc + | ((f4 & 0x7f) << 27) + | ((f3 & 0x7f) << 20) + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f2(int qp, uint64_t opc, int f1, + int f3, int f4, int f2) +{ + return opc + | ((f4 & 0x7f) << 27) + | ((f3 & 0x7f) << 20) + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f6(int qp, uint64_t opc, int f1, + int p2, int f2, int f3) +{ + return opc + | ((p2 & 0x3f) << 27) + | ((f3 & 0x7f) << 20) + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f10(int qp, uint64_t opc, int f1, int f2) +{ + return opc + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f11(int qp, uint64_t opc, int f1, int f2) +{ + return opc + | ((f2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f16(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* i */ + | ((imm & 0x0fffff) << 6) /* imm20a */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i2(int qp, uint64_t opc, int r1, + int r2, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i3(int qp, uint64_t opc, int r1, + int r2, int mbtype) +{ + return opc + | ((mbtype & 0x0f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i5(int qp, uint64_t opc, int r1, + int r3, int r2) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i7(int qp, uint64_t opc, int r1, + int r2, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i10(int qp, uint64_t opc, int r1, + int r2, int r3, uint64_t count) +{ + return opc + | ((count & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i11(int qp, uint64_t opc, int r1, + int r3, uint64_t pos, uint64_t len) +{ + return opc + | ((len & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((pos & 0x3f) << 14) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1, + int r2, uint64_t pos, uint64_t len) +{ + return opc + | ((len & 0x3f) << 27) + | ((pos & 0x3f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm, + int r3, uint64_t pos, uint64_t len) +{ + return opc + | ((imm & 0x01) << 36) + | ((len & 0x3f) << 27) + | ((r3 & 0x7f) << 20) + | ((pos & 0x3f) << 14) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1, int r2, + int r3, uint64_t pos, uint64_t len) +{ + return opc + | ((pos & 0x3f) << 31) + | ((len & 0x0f) << 27) + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* i */ + | ((imm & 0x0fffff) << 6) /* imm20a */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i21(int qp, uint64_t opc, int b1, + int r2, uint64_t imm) +{ + return opc + | ((imm & 0x1ff) << 24) + | ((r2 & 0x7f) << 13) + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i22(int qp, uint64_t opc, int r1, int b2) +{ + return opc + | ((b2 & 0x7) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i26(int qp, uint64_t opc, int ar3, int r2) +{ + return opc + | ((ar3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i29(int qp, uint64_t opc, int r1, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_l2(uint64_t imm) +{ + return (imm & 0x7fffffffffc00000ull) >> 22; +} + +static inline uint64_t tcg_opc_l3(uint64_t imm) +{ + return (imm & 0x07fffffffff00000ull) >> 18; +} + +#define tcg_opc_l4 tcg_opc_l3 + +static inline uint64_t tcg_opc_m1(int qp, uint64_t opc, int r1, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m3(int qp, uint64_t opc, int r1, + int r3, uint64_t imm) +{ + return opc + | ((imm & 0x100) << 28) /* s */ + | ((imm & 0x080) << 20) /* i */ + | ((imm & 0x07f) << 13) /* imm7b */ + | ((r3 & 0x7f) << 20) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m4(int qp, uint64_t opc, int r2, int r3) +{ + return opc + | ((r3 & 0x7f) << 20) + | ((r2 & 0x7f) << 13) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m18(int qp, uint64_t opc, int f1, int r2) +{ + return opc + | ((r2 & 0x7f) << 13) + | ((f1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m19(int qp, uint64_t opc, int r1, int f2) +{ + return opc + | ((f2 & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m34(int qp, uint64_t opc, int r1, + int sof, int sol, int sor) +{ + return opc + | ((sor & 0x0f) << 27) + | ((sol & 0x7f) << 20) + | ((sof & 0x7f) << 13) + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m48(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* i */ + | ((imm & 0x0fffff) << 6) /* imm20a */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_x2(int qp, uint64_t opc, + int r1, uint64_t imm) +{ + return opc + | ((imm & 0x8000000000000000ull) >> 27) /* i */ + | (imm & 0x0000000000200000ull) /* ic */ + | ((imm & 0x00000000001f0000ull) << 6) /* imm5c */ + | ((imm & 0x000000000000ff80ull) << 20) /* imm9d */ + | ((imm & 0x000000000000007full) << 13) /* imm7b */ + | ((r1 & 0x7f) << 6) + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_x3(int qp, uint64_t opc, uint64_t imm) +{ + return opc + | ((imm & 0x0800000000000000ull) >> 23) /* i */ + | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ + | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm) +{ + return opc + | ((imm & 0x0800000000000000ull) >> 23) /* i */ + | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + + +/* + * Relocations - Note that we never encode branches elsewhere than slot 2. + */ + +static void reloc_pcrel21b_slot2(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + uint64_t imm = target - pc; + + pc->hi = (pc->hi & 0xf700000fffffffffull) + | ((imm & 0x100000) << 39) /* s */ + | ((imm & 0x0fffff) << 36); /* imm20b */ +} + +static uint64_t get_reloc_pcrel21b_slot2(tcg_insn_unit *pc) +{ + int64_t high = pc->hi; + + return ((high >> 39) & 0x100000) + /* s */ + ((high >> 36) & 0x0fffff); /* imm20b */ +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + assert(addend == 0); + assert(type == R_IA64_PCREL21B); + reloc_pcrel21b_slot2(code_ptr, (tcg_insn_unit *)value); +} + +/* + * Constraints + */ + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch(ct_str[0]) { + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffffffffffffull); + break; + case 'I': + ct->ct |= TCG_CT_CONST_S22; + break; + case 'S': + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffffffffffffull); +#if defined(CONFIG_SOFTMMU) + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R56); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R57); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R58); +#endif + break; + case 'Z': + /* We are cheating a bit here, using the fact that the register + r0 is also the register number 0. Hence there is no need + to check for const_args in each instruction. */ + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct; + ct = arg_ct->ct; + if (ct & TCG_CT_CONST) + return 1; + else if ((ct & TCG_CT_CONST_ZERO) && val == 0) + return 1; + else if ((ct & TCG_CT_CONST_S22) && val == ((int32_t)val << 10) >> 10) + return 1; + else + return 0; +} + +/* + * Code generation + */ + +static tcg_insn_unit *tb_ret_addr; + +static inline void tcg_out_bundle(TCGContext *s, int template, + uint64_t slot0, uint64_t slot1, + uint64_t slot2) +{ + template &= 0x1f; /* 5 bits */ + slot0 &= 0x1ffffffffffull; /* 41 bits */ + slot1 &= 0x1ffffffffffull; /* 41 bits */ + slot2 &= 0x1ffffffffffull; /* 41 bits */ + + *s->code_ptr++ = (tcg_insn_unit){ + (slot1 << 46) | (slot0 << 5) | template, + (slot2 << 23) | (slot1 >> 18) + }; +} + +static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src) +{ + return tcg_opc_a4(qp, OPC_ADDS_A4, dst, 0, src); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) +{ + tcg_out_bundle(s, mmI, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_mov_a(TCG_REG_P0, ret, arg)); +} + +static inline uint64_t tcg_opc_movi_a(int qp, TCGReg dst, int64_t src) +{ + assert(src == sextract64(src, 0, 22)); + return tcg_opc_a5(qp, OPC_ADDL_A5, dst, src, TCG_REG_R0); +} + +static inline void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg reg, tcg_target_long arg) +{ + tcg_out_bundle(s, mLX, + INSN_NOP_M, + tcg_opc_l2 (arg), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, reg, arg)); +} + +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + uint64_t imm; + + /* We pay attention here to not modify the branch target by reading + the existing value and using it again. This ensure that caches and + memory are kept coherent during retranslation. */ + if (l->has_value) { + imm = l->u.value_ptr - s->code_ptr; + } else { + imm = get_reloc_pcrel21b_slot2(s->code_ptr); + tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0); + } + + tcg_out_bundle(s, mmB, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_b1(TCG_REG_P0, OPC_BR_SPTK_MANY_B1, imm)); +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *desc) +{ + uintptr_t func = desc->lo, gp = desc->hi, disp; + + /* Look through the function descriptor. */ + tcg_out_bundle(s, mlx, + INSN_NOP_M, + tcg_opc_l2 (gp), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, gp)); + disp = (tcg_insn_unit *)func - s->code_ptr; + tcg_out_bundle(s, mLX, + INSN_NOP_M, + tcg_opc_l4 (disp), + tcg_opc_x4 (TCG_REG_P0, OPC_BRL_CALL_SPTK_MANY_X4, + TCG_REG_B0, disp)); +} + +static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) +{ + uint64_t imm, opc1; + + /* At least arg == 0 is a common operation. */ + if (arg == sextract64(arg, 0, 22)) { + opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R8, arg); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); + opc1 = INSN_NOP_M; + } + + imm = tb_ret_addr - s->code_ptr; + + tcg_out_bundle(s, mLX, + opc1, + tcg_opc_l3 (imm), + tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm)); +} + +static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) +{ + if (s->tb_jmp_offset) { + /* direct jump method */ + tcg_abort(); + } else { + /* indirect jump method */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, + (tcg_target_long)(s->tb_next + arg)); + tcg_out_bundle(s, MmI, + tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, + TCG_REG_R2, TCG_REG_R2), + INSN_NOP_M, + tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, + TCG_REG_R2, 0)); + tcg_out_bundle(s, mmB, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, + TCG_REG_B6)); + } + s->tb_next_offset[arg] = tcg_current_code_size(s); +} + +static inline void tcg_out_jmp(TCGContext *s, TCGArg addr) +{ + tcg_out_bundle(s, mmI, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0)); + tcg_out_bundle(s, mmB, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_b4(TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); +} + +static inline void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, + TCGArg arg1, tcg_target_long arg2) +{ + if (arg2 == ((int16_t)arg2 >> 2) << 2) { + tcg_out_bundle(s, MmI, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, + TCG_REG_R2, arg2, arg1), + tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), + INSN_NOP_I); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, + TCG_REG_R2, TCG_REG_R2, arg1), + tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), + INSN_NOP_I); + } +} + +static inline void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, + TCGArg arg1, tcg_target_long arg2) +{ + if (arg2 == ((int16_t)arg2 >> 2) << 2) { + tcg_out_bundle(s, MmI, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, + TCG_REG_R2, arg2, arg1), + tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), + INSN_NOP_I); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, + TCG_REG_R2, TCG_REG_R2, arg1), + tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), + INSN_NOP_I); + } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_ld_rel(s, OPC_LD4_M1, arg, arg1, arg2); + } else { + tcg_out_ld_rel(s, OPC_LD8_M1, arg, arg1, arg2); + } +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_st_rel(s, OPC_ST4_M4, arg, arg1, arg2); + } else { + tcg_out_st_rel(s, OPC_ST8_M4, arg, arg1, arg2); + } +} + +static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3, + TCGReg ret, TCGArg arg1, int const_arg1, + TCGArg arg2, int const_arg2) +{ + uint64_t opc1 = 0, opc2 = 0, opc3 = 0; + + if (const_arg2 && arg2 != 0) { + opc2 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R3, arg2); + arg2 = TCG_REG_R3; + } + if (const_arg1 && arg1 != 0) { + if (opc_a3 && arg1 == (int8_t)arg1) { + opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2); + } else { + opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, arg1); + arg1 = TCG_REG_R2; + } + } + if (opc3 == 0) { + opc3 = tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2); + } + + tcg_out_bundle(s, (opc1 || opc2 ? mII : miI), + opc1 ? opc1 : INSN_NOP_M, + opc2 ? opc2 : INSN_NOP_I, + opc3); +} + +static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2 && arg2 == sextract64(arg2, 0, 14)) { + tcg_out_bundle(s, mmI, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1)); + } else { + tcg_out_alu(s, OPC_ADD_A1, 0, ret, arg1, 0, arg2, const_arg2); + } +} + +static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1, + int const_arg1, TCGArg arg2, int const_arg2) +{ + if (!const_arg1 && const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) { + tcg_out_bundle(s, mmI, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1)); + } else { + tcg_out_alu(s, OPC_SUB_A1, OPC_SUB_A3, ret, + arg1, const_arg1, arg2, const_arg2); + } +} + +static inline void tcg_out_eqv(TCGContext *s, TCGArg ret, + TCGArg arg1, int const_arg1, + TCGArg arg2, int const_arg2) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a1 (TCG_REG_P0, OPC_XOR_A1, ret, arg1, arg2), + tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); +} + +static inline void tcg_out_nand(TCGContext *s, TCGArg ret, + TCGArg arg1, int const_arg1, + TCGArg arg2, int const_arg2) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, ret, arg1, arg2), + tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); +} + +static inline void tcg_out_nor(TCGContext *s, TCGArg ret, + TCGArg arg1, int const_arg1, + TCGArg arg2, int const_arg2) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, arg2), + tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); +} + +static inline void tcg_out_orc(TCGContext *s, TCGArg ret, + TCGArg arg1, int const_arg1, + TCGArg arg2, int const_arg2) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, TCG_REG_R2, -1, arg2), + tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, TCG_REG_R2)); +} + +static inline void tcg_out_mul(TCGContext *s, TCGArg ret, + TCGArg arg1, TCGArg arg2) +{ + tcg_out_bundle(s, mmI, + tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1), + tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2), + INSN_NOP_I); + tcg_out_bundle(s, mmF, + INSN_NOP_M, + INSN_NOP_M, + tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6, + TCG_REG_F7, TCG_REG_F0)); + tcg_out_bundle(s, miI, + tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6), + INSN_NOP_I, + INSN_NOP_I); +} + +static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, + ret, arg1, arg2, 31 - arg2)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, + TCG_REG_R3, 0x1f, arg2), + tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static inline void tcg_out_sar_i64(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, + ret, arg1, arg2, 63 - arg2)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, arg1, arg2)); + } +} + +static inline void tcg_out_shl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, + arg1, 63 - arg2, 31 - arg2)); + } else { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R2, + 0x1f, arg2), + tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, + arg1, TCG_REG_R2)); + } +} + +static inline void tcg_out_shl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, + arg1, 63 - arg2, 63 - arg2)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, + arg1, arg2)); + } +} + +static inline void tcg_out_shr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, + arg1, arg2, 31 - arg2)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, + 0x1f, arg2), + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static inline void tcg_out_shr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, + arg1, arg2, 63 - arg2)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, + arg1, arg2)); + } +} + +static inline void tcg_out_rotl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, + TCG_REG_R2, arg1, arg1), + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, + TCG_REG_R2, 32 - arg2, 31)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, + TCG_REG_R2, arg1, arg1), + tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, + 0x1f, arg2)); + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3, + 0x20, TCG_REG_R3), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, + arg1, 0x40 - arg2)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, + 0x40, arg2), + tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R3, + arg1, arg2), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2, + arg1, TCG_REG_R2)); + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static inline void tcg_out_rotr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, + TCG_REG_R2, arg1, arg1), + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, + TCG_REG_R2, arg2, 31)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, + 0x1f, arg2), + tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, + TCG_REG_R2, arg1, arg1), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, + TCGArg arg2, int const_arg2) +{ + if (const_arg2) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, + arg1, arg2)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, + 0x40, arg2), + tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R3, + arg1, arg2), + tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2, + arg1, TCG_REG_R2)); + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, + TCG_REG_R2, TCG_REG_R3)); + } +} + +static const uint64_t opc_ext_i29[8] = { + OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0, + OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 +}; + +static inline uint64_t tcg_opc_ext_i(int qp, TCGMemOp opc, TCGReg d, TCGReg s) +{ + if ((opc & MO_SIZE) == MO_64) { + return tcg_opc_mov_a(qp, d, s); + } else { + return tcg_opc_i29(qp, opc_ext_i29[opc & MO_SSIZE], d, s); + } +} + +static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29, + TCGArg ret, TCGArg arg) +{ + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg)); +} + +static inline uint64_t tcg_opc_bswap64_i(int qp, TCGReg d, TCGReg s) +{ + return tcg_opc_i3(qp, OPC_MUX1_I3, d, s, 0xb); +} + +static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15), + tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); +} + +static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg) +{ + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31), + tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); +} + +static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg) +{ + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, ret, arg)); +} + +static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1, + TCGArg a2, int const_a2, int pos, int len) +{ + uint64_t i1 = 0, i2 = 0; + int cpos = 63 - pos, lm1 = len - 1; + + if (const_a2) { + /* Truncate the value of a constant a2 to the width of the field. */ + int mask = (1u << len) - 1; + a2 &= mask; + + if (a2 == 0 || a2 == mask) { + /* 1-bit signed constant inserted into register. */ + i2 = tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, ret, a2, a1, cpos, lm1); + } else { + /* Otherwise, load any constant into a temporary. Do this into + the first I slot to help out with cross-unit delays. */ + i1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, a2); + a2 = TCG_REG_R2; + } + } + if (i2 == 0) { + i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1); + } + tcg_out_bundle(s, (i1 ? mII : miI), + INSN_NOP_M, + i1 ? i1 : INSN_NOP_I, + i2); +} + +static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1, + TCGArg arg2, int cmp4) +{ + uint64_t opc_eq_a6, opc_lt_a6, opc_ltu_a6; + + if (cmp4) { + opc_eq_a6 = OPC_CMP4_EQ_A6; + opc_lt_a6 = OPC_CMP4_LT_A6; + opc_ltu_a6 = OPC_CMP4_LTU_A6; + } else { + opc_eq_a6 = OPC_CMP_EQ_A6; + opc_lt_a6 = OPC_CMP_LT_A6; + opc_ltu_a6 = OPC_CMP_LTU_A6; + } + + switch (cond) { + case TCG_COND_EQ: + return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + case TCG_COND_NE: + return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + case TCG_COND_LT: + return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + case TCG_COND_LTU: + return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); + case TCG_COND_GE: + return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + case TCG_COND_GEU: + return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); + case TCG_COND_LE: + return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); + case TCG_COND_LEU: + return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); + case TCG_COND_GT: + return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); + case TCG_COND_GTU: + return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); + default: + tcg_abort(); + break; + } +} + +static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, TCGLabel *l, int cmp4) +{ + uint64_t imm; + + /* We pay attention here to not modify the branch target by reading + the existing value and using it again. This ensure that caches and + memory are kept coherent during retranslation. */ + if (l->has_value) { + imm = l->u.value_ptr - s->code_ptr; + } else { + imm = get_reloc_pcrel21b_slot2(s->code_ptr); + tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0); + } + + tcg_out_bundle(s, miB, + INSN_NOP_M, + tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), + tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, imm)); +} + +static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, + TCGArg arg1, TCGArg arg2, int cmp4) +{ + tcg_out_bundle(s, MmI, + tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), + tcg_opc_movi_a(TCG_REG_P6, ret, 1), + tcg_opc_movi_a(TCG_REG_P7, ret, 0)); +} + +static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, + TCGArg c1, TCGArg c2, + TCGArg v1, int const_v1, + TCGArg v2, int const_v2, int cmp4) +{ + uint64_t opc1, opc2; + + if (const_v1) { + opc1 = tcg_opc_movi_a(TCG_REG_P6, ret, v1); + } else if (ret == v1) { + opc1 = INSN_NOP_M; + } else { + opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1); + } + if (const_v2) { + opc2 = tcg_opc_movi_a(TCG_REG_P7, ret, v2); + } else if (ret == v2) { + opc2 = INSN_NOP_I; + } else { + opc2 = tcg_opc_mov_a(TCG_REG_P7, ret, v2); + } + + tcg_out_bundle(s, MmI, + tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4), + opc1, + opc2); +} + +#if defined(CONFIG_SOFTMMU) +/* We're expecting to use an signed 22-bit immediate add. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0x1fffff) + +/* Load and compare a TLB entry, and return the result in (p6, p7). + R2 is loaded with the addend TLB entry. + R57 is loaded with the address, zero extented on 32-bit targets. + R1, R3 are clobbered, leaving R56 free for... + BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store. */ +static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, + TCGMemOp s_bits, int off_rw, int off_add, + uint64_t bswap1, uint64_t bswap2) +{ + /* + .mii + mov r2 = off_rw + extr.u r3 = addr_reg, ... # extract tlb page + zxt4 r57 = addr_reg # or mov for 64-bit guest + ;; + .mii + addl r2 = r2, areg0 + shl r3 = r3, cteb # via dep.z + dep r1 = 0, r57, ... # zero page ofs, keep align + ;; + .mmi + add r2 = r2, r3 + ;; + ld4 r3 = [r2], off_add-off_rw # or ld8 for 64-bit guest + nop + ;; + .mmi + nop + cmp.eq p6, p7 = r3, r58 + nop + ;; + */ + tcg_out_bundle(s, miI, + tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw), + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3, + addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1), + tcg_opc_ext_i(TCG_REG_P0, + TARGET_LONG_BITS == 32 ? MO_UL : MO_Q, + TCG_REG_R57, addr_reg)); + tcg_out_bundle(s, miI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_AREG0), + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, + TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS, + 63 - CPU_TLB_ENTRY_BITS), + tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0, + TCG_REG_R57, 63 - s_bits, + TARGET_PAGE_BITS - s_bits - 1)); + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, + TCG_REG_R2, TCG_REG_R2, TCG_REG_R3), + tcg_opc_m3 (TCG_REG_P0, + (TARGET_LONG_BITS == 32 + ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3, + TCG_REG_R2, off_add - off_rw), + bswap1); + tcg_out_bundle(s, mmI, + tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2), + tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, + TCG_REG_P7, TCG_REG_R1, TCG_REG_R3), + bswap2); +} + +typedef struct TCGLabelQemuLdst { + bool is_ld; + TCGMemOp size; + tcg_insn_unit *label_ptr; /* label pointers to be updated */ + struct TCGLabelQemuLdst *next; +} TCGLabelQemuLdst; + +typedef struct TCGBackendData { + TCGLabelQemuLdst *labels; +} TCGBackendData; + +static inline void tcg_out_tb_init(TCGContext *s) +{ + s->be->labels = NULL; +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, + tcg_insn_unit *label_ptr) +{ + TCGBackendData *be = s->be; + TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); + + l->is_ld = is_ld; + l->size = opc & MO_SIZE; + l->label_ptr = label_ptr; + l->next = be->labels; + be->labels = l; +} + +static bool tcg_out_tb_finalize(TCGContext *s) +{ + static const void * const helpers[8] = { + helper_ret_stb_mmu, + helper_le_stw_mmu, + helper_le_stl_mmu, + helper_le_stq_mmu, + helper_ret_ldub_mmu, + helper_le_lduw_mmu, + helper_le_ldul_mmu, + helper_le_ldq_mmu, + }; + tcg_insn_unit *thunks[8] = { }; + TCGLabelQemuLdst *l; + + for (l = s->be->labels; l != NULL; l = l->next) { + long x = l->is_ld * 4 + l->size; + tcg_insn_unit *dest = thunks[x]; + + /* The out-of-line thunks are all the same; load the return address + from B0, load the GP, and branch to the code. Note that we are + always post-call, so the register window has rolled, so we're + using incoming parameter register numbers, not outgoing. */ + if (dest == NULL) { + uintptr_t *desc = (uintptr_t *)helpers[x]; + uintptr_t func = desc[0], gp = desc[1], disp; + + thunks[x] = dest = s->code_ptr; + + tcg_out_bundle(s, mlx, + INSN_NOP_M, + tcg_opc_l2 (gp), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, + TCG_REG_R1, gp)); + tcg_out_bundle(s, mii, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, + l->is_ld ? TCG_REG_R35 : TCG_REG_R36, + TCG_REG_B0)); + disp = (tcg_insn_unit *)func - s->code_ptr; + tcg_out_bundle(s, mLX, + INSN_NOP_M, + tcg_opc_l3 (disp), + tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp)); + } + + reloc_pcrel21b_slot2(l->label_ptr, dest); + + /* Test for (pending) buffer overflow. The assumption is that any + one operation beginning below the high water mark cannot overrun + the buffer completely. Thus we can test for overflow after + generating code without having to check during generation. */ + if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { + return false; + } + } + return true; +} + +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) +{ + static const uint64_t opc_ld_m1[4] = { + OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 + }; + int addr_reg, data_reg, mem_index; + TCGMemOpIdx oi; + TCGMemOp opc, s_bits; + uint64_t fin1, fin2; + tcg_insn_unit *label_ptr; + + data_reg = args[0]; + addr_reg = args[1]; + oi = args[2]; + opc = get_memop(oi); + mem_index = get_mmuidx(oi); + s_bits = opc & MO_SIZE; + + /* Read the TLB entry */ + tcg_out_qemu_tlb(s, addr_reg, s_bits, + offsetof(CPUArchState, tlb_table[mem_index][0].addr_read), + offsetof(CPUArchState, tlb_table[mem_index][0].addend), + INSN_NOP_I, INSN_NOP_I); + + /* P6 is the fast path, and P7 the slow path */ + + fin2 = 0; + if (opc & MO_BSWAP) { + fin1 = tcg_opc_bswap64_i(TCG_REG_P0, data_reg, TCG_REG_R8); + if (s_bits < MO_64) { + int shift = 64 - (8 << s_bits); + fin2 = (opc & MO_SIGN ? OPC_EXTR_I11 : OPC_EXTR_U_I11); + fin2 = tcg_opc_i11(TCG_REG_P0, fin2, + data_reg, data_reg, shift, 63 - shift); + } + } else { + fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8); + } + + tcg_out_bundle(s, mmI, + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), + tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_REG_R57), + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, oi)); + label_ptr = s->code_ptr; + tcg_out_bundle(s, miB, + tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], + TCG_REG_R8, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, + get_reloc_pcrel21b_slot2(label_ptr))); + + add_qemu_ldst_label(s, 1, opc, label_ptr); + + /* Note that we always use LE helper functions, so the bswap insns + here for the fast path also apply to the slow path. */ + tcg_out_bundle(s, (fin2 ? mII : miI), + INSN_NOP_M, + fin1, + fin2 ? fin2 : INSN_NOP_I); +} + +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) +{ + static const uint64_t opc_st_m4[4] = { + OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 + }; + TCGReg addr_reg, data_reg; + int mem_index; + uint64_t pre1, pre2; + TCGMemOpIdx oi; + TCGMemOp opc, s_bits; + tcg_insn_unit *label_ptr; + + data_reg = args[0]; + addr_reg = args[1]; + oi = args[2]; + opc = get_memop(oi); + mem_index = get_mmuidx(oi); + s_bits = opc & MO_SIZE; + + /* Note that we always use LE helper functions, so the bswap insns + that are here for the fast path also apply to the slow path, + and move the data into the argument register. */ + pre2 = INSN_NOP_I; + if (opc & MO_BSWAP) { + pre1 = tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R58, data_reg); + if (s_bits < MO_64) { + int shift = 64 - (8 << s_bits); + pre2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, + TCG_REG_R58, TCG_REG_R58, shift, 63 - shift); + } + } else { + /* Just move the data into place for the slow path. */ + pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg); + } + + tcg_out_qemu_tlb(s, addr_reg, s_bits, + offsetof(CPUArchState, tlb_table[mem_index][0].addr_write), + offsetof(CPUArchState, tlb_table[mem_index][0].addend), + pre1, pre2); + + /* P6 is the fast path, and P7 the slow path */ + tcg_out_bundle(s, mmI, + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), + tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_REG_R57), + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, oi)); + label_ptr = s->code_ptr; + tcg_out_bundle(s, miB, + tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], + TCG_REG_R58, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, + get_reloc_pcrel21b_slot2(label_ptr))); + + add_qemu_ldst_label(s, 0, opc, label_ptr); +} + +#else /* !CONFIG_SOFTMMU */ +# include "tcg-be-null.h" + +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) +{ + static uint64_t const opc_ld_m1[4] = { + OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 + }; + int addr_reg, data_reg; + TCGMemOp opc, s_bits, bswap; + + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; + s_bits = opc & MO_SIZE; + bswap = opc & MO_BSWAP; + +#if TARGET_LONG_BITS == 32 + if (guest_base != 0) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R3, addr_reg), + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, TCG_REG_R3)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R2, addr_reg), + INSN_NOP_I); + } + + if (!bswap) { + if (!(opc & MO_SIGN)) { + tcg_out_bundle(s, miI, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I, + INSN_NOP_I); + } else { + tcg_out_bundle(s, mII, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); + } + } else if (s_bits == MO_64) { + tcg_out_bundle(s, mII, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } else { + if (s_bits == MO_16) { + tcg_out_bundle(s, mII, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + data_reg, data_reg, 15, 15)); + } else { + tcg_out_bundle(s, mII, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + data_reg, data_reg, 31, 31)); + } + if (!(opc & MO_SIGN)) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } else { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg), + tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); + } + } +#else + if (guest_base != 0) { + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, addr_reg), + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_I); + } else { + tcg_out_bundle(s, mmI, + INSN_NOP_M, + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, addr_reg), + INSN_NOP_I); + } + + if (bswap && s_bits == MO_16) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + data_reg, data_reg, 15, 15), + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } else if (bswap && s_bits == MO_32) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + data_reg, data_reg, 31, 31), + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } else if (bswap && s_bits == MO_64) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); + } + if (opc & MO_SIGN) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); + } +#endif +} + +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) +{ + static uint64_t const opc_st_m4[4] = { + OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 + }; + int addr_reg, data_reg; +#if TARGET_LONG_BITS == 64 + uint64_t add_guest_base; +#endif + TCGMemOp opc, s_bits, bswap; + + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; + s_bits = opc & MO_SIZE; + bswap = opc & MO_BSWAP; + +#if TARGET_LONG_BITS == 32 + if (guest_base != 0) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R3, addr_reg), + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, TCG_REG_R3)); + } else { + tcg_out_bundle(s, miI, + INSN_NOP_M, + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R2, addr_reg), + INSN_NOP_I); + } + + if (bswap) { + if (s_bits == MO_16) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + TCG_REG_R3, data_reg, 15, 15), + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); + data_reg = TCG_REG_R3; + } else if (s_bits == MO_32) { + tcg_out_bundle(s, mII, + INSN_NOP_M, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + TCG_REG_R3, data_reg, 31, 31), + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); + data_reg = TCG_REG_R3; + } else if (s_bits == MO_64) { + tcg_out_bundle(s, miI, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); + data_reg = TCG_REG_R3; + } + } + tcg_out_bundle(s, mmI, + tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], + data_reg, TCG_REG_R2), + INSN_NOP_M, + INSN_NOP_I); +#else + if (guest_base != 0) { + add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, addr_reg); + addr_reg = TCG_REG_R2; + } else { + add_guest_base = INSN_NOP_M; + } + + if (!bswap) { + tcg_out_bundle(s, (guest_base ? MmI : mmI), + add_guest_base, + tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], + data_reg, addr_reg), + INSN_NOP_I); + } else { + if (s_bits == MO_16) { + tcg_out_bundle(s, mII, + add_guest_base, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + TCG_REG_R3, data_reg, 15, 15), + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); + data_reg = TCG_REG_R3; + } else if (s_bits == MO_32) { + tcg_out_bundle(s, mII, + add_guest_base, + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, + TCG_REG_R3, data_reg, 31, 31), + tcg_opc_bswap64_i(TCG_REG_P0, + TCG_REG_R3, TCG_REG_R3)); + data_reg = TCG_REG_R3; + } else if (s_bits == MO_64) { + tcg_out_bundle(s, miI, + add_guest_base, + INSN_NOP_I, + tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); + data_reg = TCG_REG_R3; + } + tcg_out_bundle(s, miI, + tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], + data_reg, addr_reg), + INSN_NOP_I, + INSN_NOP_I); + } +#endif +} + +#endif + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + switch(opc) { + case INDEX_op_exit_tb: + tcg_out_exit_tb(s, args[0]); + break; + case INDEX_op_br: + tcg_out_br(s, arg_label(args[0])); + break; + case INDEX_op_goto_tb: + tcg_out_goto_tb(s, args[0]); + break; + + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); + break; + case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: + tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); + tcg_out_ext(s, OPC_SXT1_I29, args[0], args[0]); + break; + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); + break; + case INDEX_op_ld16s_i32: + case INDEX_op_ld16s_i64: + tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); + tcg_out_ext(s, OPC_SXT2_I29, args[0], args[0]); + break; + case INDEX_op_ld_i32: + case INDEX_op_ld32u_i64: + tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); + break; + case INDEX_op_ld32s_i64: + tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); + tcg_out_ext(s, OPC_SXT4_I29, args[0], args[0]); + break; + case INDEX_op_ld_i64: + tcg_out_ld_rel(s, OPC_LD8_M1, args[0], args[1], args[2]); + break; + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + tcg_out_st_rel(s, OPC_ST1_M4, args[0], args[1], args[2]); + break; + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + tcg_out_st_rel(s, OPC_ST2_M4, args[0], args[1], args[2]); + break; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_st_rel(s, OPC_ST4_M4, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + tcg_out_st_rel(s, OPC_ST8_M4, args[0], args[1], args[2]); + break; + + case INDEX_op_add_i32: + case INDEX_op_add_i64: + tcg_out_add(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_sub_i32: + case INDEX_op_sub_i64: + tcg_out_sub(s, args[0], args[1], const_args[1], args[2], const_args[2]); + break; + + case INDEX_op_and_i32: + case INDEX_op_and_i64: + /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ + tcg_out_alu(s, OPC_AND_A1, OPC_AND_A3, args[0], + args[2], const_args[2], args[1], const_args[1]); + break; + case INDEX_op_andc_i32: + case INDEX_op_andc_i64: + tcg_out_alu(s, OPC_ANDCM_A1, OPC_ANDCM_A3, args[0], + args[1], const_args[1], args[2], const_args[2]); + break; + case INDEX_op_eqv_i32: + case INDEX_op_eqv_i64: + tcg_out_eqv(s, args[0], args[1], const_args[1], + args[2], const_args[2]); + break; + case INDEX_op_nand_i32: + case INDEX_op_nand_i64: + tcg_out_nand(s, args[0], args[1], const_args[1], + args[2], const_args[2]); + break; + case INDEX_op_nor_i32: + case INDEX_op_nor_i64: + tcg_out_nor(s, args[0], args[1], const_args[1], + args[2], const_args[2]); + break; + case INDEX_op_or_i32: + case INDEX_op_or_i64: + /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ + tcg_out_alu(s, OPC_OR_A1, OPC_OR_A3, args[0], + args[2], const_args[2], args[1], const_args[1]); + break; + case INDEX_op_orc_i32: + case INDEX_op_orc_i64: + tcg_out_orc(s, args[0], args[1], const_args[1], + args[2], const_args[2]); + break; + case INDEX_op_xor_i32: + case INDEX_op_xor_i64: + /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */ + tcg_out_alu(s, OPC_XOR_A1, OPC_XOR_A3, args[0], + args[2], const_args[2], args[1], const_args[1]); + break; + + case INDEX_op_mul_i32: + case INDEX_op_mul_i64: + tcg_out_mul(s, args[0], args[1], args[2]); + break; + + case INDEX_op_sar_i32: + tcg_out_sar_i32(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_sar_i64: + tcg_out_sar_i64(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_shl_i32: + tcg_out_shl_i32(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_shl_i64: + tcg_out_shl_i64(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_shr_i32: + tcg_out_shr_i32(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_shr_i64: + tcg_out_shr_i64(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_rotl_i32: + tcg_out_rotl_i32(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_rotl_i64: + tcg_out_rotl_i64(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_rotr_i32: + tcg_out_rotr_i32(s, args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_rotr_i64: + tcg_out_rotr_i64(s, args[0], args[1], args[2], const_args[2]); + break; + + case INDEX_op_ext8s_i32: + case INDEX_op_ext8s_i64: + tcg_out_ext(s, OPC_SXT1_I29, args[0], args[1]); + break; + case INDEX_op_ext8u_i32: + case INDEX_op_ext8u_i64: + tcg_out_ext(s, OPC_ZXT1_I29, args[0], args[1]); + break; + case INDEX_op_ext16s_i32: + case INDEX_op_ext16s_i64: + tcg_out_ext(s, OPC_SXT2_I29, args[0], args[1]); + break; + case INDEX_op_ext16u_i32: + case INDEX_op_ext16u_i64: + tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]); + break; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]); + break; + case INDEX_op_extu_i32_i64: + case INDEX_op_ext32u_i64: + tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]); + break; + + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + tcg_out_bswap16(s, args[0], args[1]); + break; + case INDEX_op_bswap32_i32: + case INDEX_op_bswap32_i64: + tcg_out_bswap32(s, args[0], args[1]); + break; + case INDEX_op_bswap64_i64: + tcg_out_bswap64(s, args[0], args[1]); + break; + + case INDEX_op_deposit_i32: + case INDEX_op_deposit_i64: + tcg_out_deposit(s, args[0], args[1], args[2], const_args[2], + args[3], args[4]); + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 1); + break; + case INDEX_op_brcond_i64: + tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 0); + break; + case INDEX_op_setcond_i32: + tcg_out_setcond(s, args[3], args[0], args[1], args[2], 1); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0); + break; + case INDEX_op_movcond_i32: + tcg_out_movcond(s, args[5], args[0], args[1], args[2], + args[3], const_args[3], args[4], const_args[4], 1); + break; + case INDEX_op_movcond_i64: + tcg_out_movcond(s, args[5], args[0], args[1], args[2], + args[3], const_args[3], args[4], const_args[4], 0); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef ia64_op_defs[] = { + { INDEX_op_br, { } }, + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "rZ", "r" } }, + { INDEX_op_st16_i32, { "rZ", "r" } }, + { INDEX_op_st_i32, { "rZ", "r" } }, + + { INDEX_op_add_i32, { "r", "rZ", "rI" } }, + { INDEX_op_sub_i32, { "r", "rI", "rI" } }, + + { INDEX_op_and_i32, { "r", "rI", "rI" } }, + { INDEX_op_andc_i32, { "r", "rI", "rI" } }, + { INDEX_op_eqv_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_nand_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_or_i32, { "r", "rI", "rI" } }, + { INDEX_op_orc_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_xor_i32, { "r", "rI", "rI" } }, + + { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, + + { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, + { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, + { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, + + { INDEX_op_ext8s_i32, { "r", "rZ"} }, + { INDEX_op_ext8u_i32, { "r", "rZ"} }, + { INDEX_op_ext16s_i32, { "r", "rZ"} }, + { INDEX_op_ext16u_i32, { "r", "rZ"} }, + + { INDEX_op_bswap16_i32, { "r", "rZ" } }, + { INDEX_op_bswap32_i32, { "r", "rZ" } }, + + { INDEX_op_brcond_i32, { "rZ", "rZ" } }, + { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } }, + + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + { INDEX_op_st8_i64, { "rZ", "r" } }, + { INDEX_op_st16_i64, { "rZ", "r" } }, + { INDEX_op_st32_i64, { "rZ", "r" } }, + { INDEX_op_st_i64, { "rZ", "r" } }, + + { INDEX_op_add_i64, { "r", "rZ", "rI" } }, + { INDEX_op_sub_i64, { "r", "rI", "rI" } }, + + { INDEX_op_and_i64, { "r", "rI", "rI" } }, + { INDEX_op_andc_i64, { "r", "rI", "rI" } }, + { INDEX_op_eqv_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_nand_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_nor_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_or_i64, { "r", "rI", "rI" } }, + { INDEX_op_orc_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_xor_i64, { "r", "rI", "rI" } }, + + { INDEX_op_mul_i64, { "r", "rZ", "rZ" } }, + + { INDEX_op_sar_i64, { "r", "rZ", "ri" } }, + { INDEX_op_shl_i64, { "r", "rZ", "ri" } }, + { INDEX_op_shr_i64, { "r", "rZ", "ri" } }, + { INDEX_op_rotl_i64, { "r", "rZ", "ri" } }, + { INDEX_op_rotr_i64, { "r", "rZ", "ri" } }, + + { INDEX_op_ext8s_i64, { "r", "rZ"} }, + { INDEX_op_ext8u_i64, { "r", "rZ"} }, + { INDEX_op_ext16s_i64, { "r", "rZ"} }, + { INDEX_op_ext16u_i64, { "r", "rZ"} }, + { INDEX_op_ext32s_i64, { "r", "rZ"} }, + { INDEX_op_ext32u_i64, { "r", "rZ"} }, + { INDEX_op_ext_i32_i64, { "r", "rZ" } }, + { INDEX_op_extu_i32_i64, { "r", "rZ" } }, + + { INDEX_op_bswap16_i64, { "r", "rZ" } }, + { INDEX_op_bswap32_i64, { "r", "rZ" } }, + { INDEX_op_bswap64_i64, { "r", "rZ" } }, + + { INDEX_op_brcond_i64, { "rZ", "rZ" } }, + { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } }, + + { INDEX_op_deposit_i32, { "r", "rZ", "ri" } }, + { INDEX_op_deposit_i64, { "r", "rZ", "ri" } }, + + { INDEX_op_qemu_ld_i32, { "r", "r" } }, + { INDEX_op_qemu_ld_i64, { "r", "r" } }, + { INDEX_op_qemu_st_i32, { "SZ", "r" } }, + { INDEX_op_qemu_st_i64, { "SZ", "r" } }, + + { -1 }, +}; + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int frame_size; + + /* reserve some stack space */ + frame_size = TCG_STATIC_CALL_ARGS_SIZE + + CPU_TEMP_BUF_NLONGS * sizeof(long); + frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & + ~(TCG_TARGET_STACK_ALIGN - 1); + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + + /* First emit adhoc function descriptor */ + *s->code_ptr = (tcg_insn_unit){ + (uint64_t)(s->code_ptr + 1), /* entry point */ + 0 /* skip gp */ + }; + s->code_ptr++; + + /* prologue */ + tcg_out_bundle(s, miI, + tcg_opc_m34(TCG_REG_P0, OPC_ALLOC_M34, + TCG_REG_R34, 32, 24, 0), + INSN_NOP_I, + tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, + TCG_REG_B6, TCG_REG_R33, 0)); + + /* ??? If guest_base < 0x200000, we could load the register via + an ADDL in the M slot of the next bundle. */ + if (guest_base != 0) { + tcg_out_bundle(s, mlx, + INSN_NOP_M, + tcg_opc_l2(guest_base), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, + TCG_GUEST_BASE_REG, guest_base)); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } + + tcg_out_bundle(s, miB, + tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, + TCG_REG_R12, -frame_size, TCG_REG_R12), + tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, + TCG_REG_R33, TCG_REG_B0), + tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); + + /* epilogue */ + tb_ret_addr = s->code_ptr; + tcg_out_bundle(s, miI, + INSN_NOP_M, + tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, + TCG_REG_B0, TCG_REG_R33, 0), + tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, + TCG_REG_R12, frame_size, TCG_REG_R12)); + tcg_out_bundle(s, miB, + INSN_NOP_M, + tcg_opc_i26(TCG_REG_P0, OPC_MOV_I_I26, + TCG_REG_PFS, TCG_REG_R34), + tcg_opc_b4 (TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4, + TCG_REG_B0)); +} + +static void tcg_target_init(TCGContext *s) +{ + tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], + 0xffffffffffffffffull); + tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I64], + 0xffffffffffffffffull); + + tcg_regset_clear(tcg_target_call_clobber_regs); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R15); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R16); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R17); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R18); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R19); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R27); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R28); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R29); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R30); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R31); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R56); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R57); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R58); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R59); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R60); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R61); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R62); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R63); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* zero register */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* global pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12); /* stack pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R33); /* return address */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R34); /* PFS */ + + /* The following 4 are not in use, are call-saved, but *not* saved + by the prologue. Therefore we cannot use them without modifying + the prologue. There doesn't seem to be any good reason to use + these as opposed to the windowed registers. */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R4); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R7); + + tcg_add_target_add_op_defs(ia64_op_defs); +} diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c deleted file mode 100644 index 2dc4998..0000000 --- a/tcg/mips/tcg-target.c +++ /dev/null @@ -1,1892 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008-2009 Arnaud Patard - * Copyright (c) 2009 Aurelien Jarno - * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-ldst.h" - -#ifdef HOST_WORDS_BIGENDIAN -# define MIPS_BE 1 -#else -# define MIPS_BE 0 -#endif - -#define LO_OFF (MIPS_BE * 4) -#define HI_OFF (4 - LO_OFF) - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "zero", - "at", - "v0", - "v1", - "a0", - "a1", - "a2", - "a3", - "t0", - "t1", - "t2", - "t3", - "t4", - "t5", - "t6", - "t7", - "s0", - "s1", - "s2", - "s3", - "s4", - "s5", - "s6", - "s7", - "t8", - "t9", - "k0", - "k1", - "gp", - "sp", - "s8", - "ra", -}; -#endif - -#define TCG_TMP0 TCG_REG_AT -#define TCG_TMP1 TCG_REG_T9 - -/* check if we really need so many registers :P */ -static const TCGReg tcg_target_reg_alloc_order[] = { - /* Call saved registers. */ - TCG_REG_S0, - TCG_REG_S1, - TCG_REG_S2, - TCG_REG_S3, - TCG_REG_S4, - TCG_REG_S5, - TCG_REG_S6, - TCG_REG_S7, - TCG_REG_S8, - - /* Call clobbered registers. */ - TCG_REG_T0, - TCG_REG_T1, - TCG_REG_T2, - TCG_REG_T3, - TCG_REG_T4, - TCG_REG_T5, - TCG_REG_T6, - TCG_REG_T7, - TCG_REG_T8, - TCG_REG_T9, - TCG_REG_V1, - TCG_REG_V0, - - /* Argument registers, opposite order of allocation. */ - TCG_REG_A3, - TCG_REG_A2, - TCG_REG_A1, - TCG_REG_A0, -}; - -static const TCGReg tcg_target_call_iarg_regs[4] = { - TCG_REG_A0, - TCG_REG_A1, - TCG_REG_A2, - TCG_REG_A3 -}; - -static const TCGReg tcg_target_call_oarg_regs[2] = { - TCG_REG_V0, - TCG_REG_V1 -}; - -static tcg_insn_unit *tb_ret_addr; - -static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - /* Let the compiler perform the right-shift as part of the arithmetic. */ - ptrdiff_t disp = target - (pc + 1); - assert(disp == (int16_t)disp); - return disp & 0xffff; -} - -static inline void reloc_pc16(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - *pc = deposit32(*pc, 0, 16, reloc_pc16_val(pc, target)); -} - -static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0); - return ((uintptr_t)target >> 2) & 0x3ffffff; -} - -static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target)); -} - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - assert(type == R_MIPS_PC16); - assert(addend == 0); - reloc_pc16(code_ptr, (tcg_insn_unit *)value); -} - -#define TCG_CT_CONST_ZERO 0x100 -#define TCG_CT_CONST_U16 0x200 /* Unsigned 16-bit: 0 - 0xffff. */ -#define TCG_CT_CONST_S16 0x400 /* Signed 16-bit: -32768 - 32767 */ -#define TCG_CT_CONST_P2M1 0x800 /* Power of 2 minus 1. */ -#define TCG_CT_CONST_N16 0x1000 /* "Negatable" 16-bit: -32767 - 32767 */ - -static inline bool is_p2m1(tcg_target_long val) -{ - return val && ((val + 1) & val) == 0; -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch(ct_str[0]) { - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffff); - break; - case 'L': /* qemu_ld output arg constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_V0); - break; - case 'l': /* qemu_ld input arg constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); -#if defined(CONFIG_SOFTMMU) - if (TARGET_LONG_BITS == 64) { - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); - } -#endif - break; - case 'S': /* qemu_st constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set(ct->u.regs, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); -#if defined(CONFIG_SOFTMMU) - if (TARGET_LONG_BITS == 32) { - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); - } else { - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); - } -#endif - break; - case 'I': - ct->ct |= TCG_CT_CONST_U16; - break; - case 'J': - ct->ct |= TCG_CT_CONST_S16; - break; - case 'K': - ct->ct |= TCG_CT_CONST_P2M1; - break; - case 'N': - ct->ct |= TCG_CT_CONST_N16; - break; - case 'Z': - /* We are cheating a bit here, using the fact that the register - ZERO is also the register number 0. Hence there is no need - to check for const_args in each instruction. */ - ct->ct |= TCG_CT_CONST_ZERO; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct; - ct = arg_ct->ct; - if (ct & TCG_CT_CONST) { - return 1; - } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { - return 1; - } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) { - return 1; - } else if ((ct & TCG_CT_CONST_P2M1) - && use_mips32r2_instructions && is_p2m1(val)) { - return 1; - } - return 0; -} - -/* instruction opcodes */ -typedef enum { - OPC_J = 0x02 << 26, - OPC_JAL = 0x03 << 26, - OPC_BEQ = 0x04 << 26, - OPC_BNE = 0x05 << 26, - OPC_BLEZ = 0x06 << 26, - OPC_BGTZ = 0x07 << 26, - OPC_ADDIU = 0x09 << 26, - OPC_SLTI = 0x0A << 26, - OPC_SLTIU = 0x0B << 26, - OPC_ANDI = 0x0C << 26, - OPC_ORI = 0x0D << 26, - OPC_XORI = 0x0E << 26, - OPC_LUI = 0x0F << 26, - OPC_LB = 0x20 << 26, - OPC_LH = 0x21 << 26, - OPC_LW = 0x23 << 26, - OPC_LBU = 0x24 << 26, - OPC_LHU = 0x25 << 26, - OPC_LWU = 0x27 << 26, - OPC_SB = 0x28 << 26, - OPC_SH = 0x29 << 26, - OPC_SW = 0x2B << 26, - - OPC_SPECIAL = 0x00 << 26, - OPC_SLL = OPC_SPECIAL | 0x00, - OPC_SRL = OPC_SPECIAL | 0x02, - OPC_ROTR = OPC_SPECIAL | (0x01 << 21) | 0x02, - OPC_SRA = OPC_SPECIAL | 0x03, - OPC_SLLV = OPC_SPECIAL | 0x04, - OPC_SRLV = OPC_SPECIAL | 0x06, - OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06, - OPC_SRAV = OPC_SPECIAL | 0x07, - OPC_JR_R5 = OPC_SPECIAL | 0x08, - OPC_JALR = OPC_SPECIAL | 0x09, - OPC_MOVZ = OPC_SPECIAL | 0x0A, - OPC_MOVN = OPC_SPECIAL | 0x0B, - OPC_MFHI = OPC_SPECIAL | 0x10, - OPC_MFLO = OPC_SPECIAL | 0x12, - OPC_MULT = OPC_SPECIAL | 0x18, - OPC_MUL_R6 = OPC_SPECIAL | (0x02 << 6) | 0x18, - OPC_MUH = OPC_SPECIAL | (0x03 << 6) | 0x18, - OPC_MULTU = OPC_SPECIAL | 0x19, - OPC_MULU = OPC_SPECIAL | (0x02 << 6) | 0x19, - OPC_MUHU = OPC_SPECIAL | (0x03 << 6) | 0x19, - OPC_DIV = OPC_SPECIAL | 0x1A, - OPC_DIV_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1A, - OPC_MOD = OPC_SPECIAL | (0x03 << 6) | 0x1A, - OPC_DIVU = OPC_SPECIAL | 0x1B, - OPC_DIVU_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1B, - OPC_MODU = OPC_SPECIAL | (0x03 << 6) | 0x1B, - OPC_ADDU = OPC_SPECIAL | 0x21, - OPC_SUBU = OPC_SPECIAL | 0x23, - OPC_AND = OPC_SPECIAL | 0x24, - OPC_OR = OPC_SPECIAL | 0x25, - OPC_XOR = OPC_SPECIAL | 0x26, - OPC_NOR = OPC_SPECIAL | 0x27, - OPC_SLT = OPC_SPECIAL | 0x2A, - OPC_SLTU = OPC_SPECIAL | 0x2B, - OPC_SELEQZ = OPC_SPECIAL | 0x35, - OPC_SELNEZ = OPC_SPECIAL | 0x37, - - OPC_REGIMM = 0x01 << 26, - OPC_BLTZ = OPC_REGIMM | (0x00 << 16), - OPC_BGEZ = OPC_REGIMM | (0x01 << 16), - - OPC_SPECIAL2 = 0x1c << 26, - OPC_MUL_R5 = OPC_SPECIAL2 | 0x002, - - OPC_SPECIAL3 = 0x1f << 26, - OPC_EXT = OPC_SPECIAL3 | 0x000, - OPC_INS = OPC_SPECIAL3 | 0x004, - OPC_WSBH = OPC_SPECIAL3 | 0x0a0, - OPC_SEB = OPC_SPECIAL3 | 0x420, - OPC_SEH = OPC_SPECIAL3 | 0x620, - - /* MIPS r6 doesn't have JR, JALR should be used instead */ - OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5, - - /* - * MIPS r6 replaces MUL with an alternative encoding which is - * backwards-compatible at the assembly level. - */ - OPC_MUL = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5, -} MIPSInsn; - -/* - * Type reg - */ -static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc, - TCGReg rd, TCGReg rs, TCGReg rt) -{ - int32_t inst; - - inst = opc; - inst |= (rs & 0x1F) << 21; - inst |= (rt & 0x1F) << 16; - inst |= (rd & 0x1F) << 11; - tcg_out32(s, inst); -} - -/* - * Type immediate - */ -static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc, - TCGReg rt, TCGReg rs, TCGArg imm) -{ - int32_t inst; - - inst = opc; - inst |= (rs & 0x1F) << 21; - inst |= (rt & 0x1F) << 16; - inst |= (imm & 0xffff); - tcg_out32(s, inst); -} - -/* - * Type bitfield - */ -static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt, - TCGReg rs, int msb, int lsb) -{ - int32_t inst; - - inst = opc; - inst |= (rs & 0x1F) << 21; - inst |= (rt & 0x1F) << 16; - inst |= (msb & 0x1F) << 11; - inst |= (lsb & 0x1F) << 6; - tcg_out32(s, inst); -} - -/* - * Type branch - */ -static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, - TCGReg rt, TCGReg rs) -{ - /* We pay attention here to not modify the branch target by reading - the existing value and using it again. This ensure that caches and - memory are kept coherent during retranslation. */ - uint16_t offset = (uint16_t)*s->code_ptr; - - tcg_out_opc_imm(s, opc, rt, rs, offset); -} - -/* - * Type sa - */ -static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc, - TCGReg rd, TCGReg rt, TCGArg sa) -{ - int32_t inst; - - inst = opc; - inst |= (rt & 0x1F) << 16; - inst |= (rd & 0x1F) << 11; - inst |= (sa & 0x1F) << 6; - tcg_out32(s, inst); - -} - -/* - * Type jump. - * Returns true if the branch was in range and the insn was emitted. - */ -static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target) -{ - uintptr_t dest = (uintptr_t)target; - uintptr_t from = (uintptr_t)s->code_ptr + 4; - int32_t inst; - - /* The pc-region branch happens within the 256MB region of - the delay slot (thus the +4). */ - if ((from ^ dest) & -(1 << 28)) { - return false; - } - assert((dest & 3) == 0); - - inst = opc; - inst |= (dest >> 2) & 0x3ffffff; - tcg_out32(s, inst); - return true; -} - -static inline void tcg_out_nop(TCGContext *s) -{ - tcg_out32(s, 0); -} - -static inline void tcg_out_mov(TCGContext *s, TCGType type, - TCGReg ret, TCGReg arg) -{ - /* Simple reg-reg move, optimising out the 'do nothing' case */ - if (ret != arg) { - tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO); - } -} - -static inline void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg reg, tcg_target_long arg) -{ - if (arg == (int16_t)arg) { - tcg_out_opc_imm(s, OPC_ADDIU, reg, TCG_REG_ZERO, arg); - } else if (arg == (uint16_t)arg) { - tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg); - } else { - tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16); - if (arg & 0xffff) { - tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff); - } - } -} - -static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - } else { - /* ret and arg can't be register at */ - if (ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); - } - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - } -} - -static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); - } else { - /* ret and arg can't be register at */ - if (ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); - } - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - } -} - -static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); - } else { - /* ret and arg must be different and can't be register at */ - if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); - } - - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00); - tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - } -} - -static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg); - } else { - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 24); - } -} - -static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg); - } else { - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16); - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); - } -} - -static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data, - TCGReg addr, intptr_t ofs) -{ - int16_t lo = ofs; - if (ofs != lo) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo); - if (addr != TCG_REG_ZERO) { - tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr); - } - addr = TCG_TMP0; - } - tcg_out_opc_imm(s, opc, data, addr, lo); -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, OPC_LW, arg, arg1, arg2); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, OPC_SW, arg, arg1, arg2); -} - -static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) -{ - if (val == (int16_t)val) { - tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, val); - tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_TMP0); - } -} - -static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, - TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, - bool cbh, bool is_sub) -{ - TCGReg th = TCG_TMP1; - - /* If we have a negative constant such that negating it would - make the high part zero, we can (usually) eliminate one insn. */ - if (cbl && cbh && bh == -1 && bl != 0) { - bl = -bl; - bh = 0; - is_sub = !is_sub; - } - - /* By operating on the high part first, we get to use the final - carry operation to move back from the temporary. */ - if (!cbh) { - tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); - } else if (bh != 0 || ah == rl) { - tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); - } else { - th = ah; - } - - /* Note that tcg optimization should eliminate the bl == 0 case. */ - if (is_sub) { - if (cbl) { - tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); - tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); - } else { - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); - tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); - } - tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); - } else { - if (cbl) { - tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); - tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); - } else if (rl == al && rl == bl) { - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, 31); - tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); - } else { - tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); - } - tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); - } -} - -/* Bit 0 set if inversion required; bit 1 set if swapping required. */ -#define MIPS_CMP_INV 1 -#define MIPS_CMP_SWAP 2 - -static const uint8_t mips_cmp_map[16] = { - [TCG_COND_LT] = 0, - [TCG_COND_LTU] = 0, - [TCG_COND_GE] = MIPS_CMP_INV, - [TCG_COND_GEU] = MIPS_CMP_INV, - [TCG_COND_LE] = MIPS_CMP_INV | MIPS_CMP_SWAP, - [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP, - [TCG_COND_GT] = MIPS_CMP_SWAP, - [TCG_COND_GTU] = MIPS_CMP_SWAP, -}; - -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg arg1, TCGReg arg2) -{ - MIPSInsn s_opc = OPC_SLTU; - int cmp_map; - - switch (cond) { - case TCG_COND_EQ: - if (arg2 != 0) { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - arg1 = ret; - } - tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); - break; - - case TCG_COND_NE: - if (arg2 != 0) { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - arg1 = ret; - } - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); - break; - - case TCG_COND_LT: - case TCG_COND_GE: - case TCG_COND_LE: - case TCG_COND_GT: - s_opc = OPC_SLT; - /* FALLTHRU */ - - case TCG_COND_LTU: - case TCG_COND_GEU: - case TCG_COND_LEU: - case TCG_COND_GTU: - cmp_map = mips_cmp_map[cond]; - if (cmp_map & MIPS_CMP_SWAP) { - TCGReg t = arg1; - arg1 = arg2; - arg2 = t; - } - tcg_out_opc_reg(s, s_opc, ret, arg1, arg2); - if (cmp_map & MIPS_CMP_INV) { - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - } - break; - - default: - tcg_abort(); - break; - } -} - -static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, - TCGReg arg2, TCGLabel *l) -{ - static const MIPSInsn b_zero[16] = { - [TCG_COND_LT] = OPC_BLTZ, - [TCG_COND_GT] = OPC_BGTZ, - [TCG_COND_LE] = OPC_BLEZ, - [TCG_COND_GE] = OPC_BGEZ, - }; - - MIPSInsn s_opc = OPC_SLTU; - MIPSInsn b_opc; - int cmp_map; - - switch (cond) { - case TCG_COND_EQ: - b_opc = OPC_BEQ; - break; - case TCG_COND_NE: - b_opc = OPC_BNE; - break; - - case TCG_COND_LT: - case TCG_COND_GT: - case TCG_COND_LE: - case TCG_COND_GE: - if (arg2 == 0) { - b_opc = b_zero[cond]; - arg2 = arg1; - arg1 = 0; - break; - } - s_opc = OPC_SLT; - /* FALLTHRU */ - - case TCG_COND_LTU: - case TCG_COND_GTU: - case TCG_COND_LEU: - case TCG_COND_GEU: - cmp_map = mips_cmp_map[cond]; - if (cmp_map & MIPS_CMP_SWAP) { - TCGReg t = arg1; - arg1 = arg2; - arg2 = t; - } - tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2); - b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE); - arg1 = TCG_TMP0; - arg2 = TCG_REG_ZERO; - break; - - default: - tcg_abort(); - break; - } - - tcg_out_opc_br(s, b_opc, arg1, arg2); - if (l->has_value) { - reloc_pc16(s->code_ptr - 1, l->u.value_ptr); - } else { - tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0); - } - tcg_out_nop(s); -} - -static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1, - TCGReg al, TCGReg ah, - TCGReg bl, TCGReg bh) -{ - /* Merge highpart comparison into AH. */ - if (bh != 0) { - if (ah != 0) { - tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh); - ah = tmp0; - } else { - ah = bh; - } - } - /* Merge lowpart comparison into AL. */ - if (bl != 0) { - if (al != 0) { - tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl); - al = tmp1; - } else { - al = bl; - } - } - /* Merge high and low part comparisons into AL. */ - if (ah != 0) { - if (al != 0) { - tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al); - al = tmp0; - } else { - al = ah; - } - } - return al; -} - -static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) -{ - TCGReg tmp0 = TCG_TMP0; - TCGReg tmp1 = ret; - - assert(ret != TCG_TMP0); - if (ret == ah || ret == bh) { - assert(ret != TCG_TMP1); - tmp1 = TCG_TMP1; - } - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh); - tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO); - break; - - default: - tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh); - tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl); - tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0); - tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh); - tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0); - break; - } -} - -static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, - TCGReg bl, TCGReg bh, TCGLabel *l) -{ - TCGCond b_cond = TCG_COND_NE; - TCGReg tmp = TCG_TMP1; - - /* With branches, we emit between 4 and 9 insns with 2 or 3 branches. - With setcond, we emit between 3 and 10 insns and only 1 branch, - which ought to get better branch prediction. */ - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - b_cond = cond; - tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh); - break; - - default: - /* Minimize code size by preferring a compare not requiring INV. */ - if (mips_cmp_map[cond] & MIPS_CMP_INV) { - cond = tcg_invert_cond(cond); - b_cond = TCG_COND_EQ; - } - tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh); - break; - } - - tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, l); -} - -static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2) -{ - bool eqz = false; - - /* If one of the values is zero, put it last to match SEL*Z instructions */ - if (use_mips32r6_instructions && v1 == 0) { - v1 = v2; - v2 = 0; - cond = tcg_invert_cond(cond); - } - - switch (cond) { - case TCG_COND_EQ: - eqz = true; - /* FALLTHRU */ - case TCG_COND_NE: - if (c2 != 0) { - tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); - c1 = TCG_TMP0; - } - break; - - default: - /* Minimize code size by preferring a compare not requiring INV. */ - if (mips_cmp_map[cond] & MIPS_CMP_INV) { - cond = tcg_invert_cond(cond); - eqz = true; - } - tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); - c1 = TCG_TMP0; - break; - } - - if (use_mips32r6_instructions) { - MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ; - MIPSInsn m_opc_f = eqz ? OPC_SELNEZ : OPC_SELEQZ; - - if (v2 != 0) { - tcg_out_opc_reg(s, m_opc_f, TCG_TMP1, v2, c1); - } - tcg_out_opc_reg(s, m_opc_t, ret, v1, c1); - if (v2 != 0) { - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1); - } - } else { - MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN; - - tcg_out_opc_reg(s, m_opc, ret, v1, c1); - - /* This should be guaranteed via constraints */ - tcg_debug_assert(v2 == ret); - } -} - -static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) -{ - /* Note that the ABI requires the called function's address to be - loaded into T9, even if a direct branch is in range. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg); - - /* But do try a direct branch, allowing the cpu better insn prefetch. */ - if (tail) { - if (!tcg_out_opc_jmp(s, OPC_J, arg)) { - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_T9, 0); - } - } else { - if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) { - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); - } - } -} - -static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) -{ - tcg_out_call_int(s, arg, false); - tcg_out_nop(s); -} - -#if defined(CONFIG_SOFTMMU) -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_SB] = helper_ret_ldsb_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * I is where we want to put this argument, and is updated and returned - * for the next call. ARG is the argument itself. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ - -static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) -{ - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); - } else { - tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i); - } - return i + 1; -} - -static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) -{ - TCGReg tmp = TCG_TMP0; - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xff); - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) -{ - TCGReg tmp = TCG_TMP0; - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff); - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) -{ - TCGReg tmp = TCG_TMP0; - if (arg == 0) { - tmp = TCG_REG_ZERO; - } else { - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); - } - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) -{ - i = (i + 1) & ~1; - i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al)); - i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah)); - return i; -} - -/* Perform the tlb comparison operation. The complete host address is - placed in BASE. Clobbers AT, T0, A0. */ -static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, - TCGReg addrh, TCGMemOpIdx oi, - tcg_insn_unit *label_ptr[2], bool is_load) -{ - TCGMemOp s_bits = get_memop(oi) & MO_SIZE; - int mem_index = get_mmuidx(oi); - int cmp_off - = (is_load - ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) - : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); - int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, - (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); - - /* Compensate for very large offsets. */ - if (add_off >= 0x8000) { - /* Most target env are smaller than 32k; none are larger than 64k. - Simplify the logic here merely to offset by 0x7ff0, giving us a - range just shy of 64k. Check this assumption. */ - QEMU_BUILD_BUG_ON(offsetof(CPUArchState, - tlb_table[NB_MMU_MODES - 1][1]) - > 0x7ff0 + 0x7fff); - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0); - cmp_off -= 0x7ff0; - add_off -= 0x7ff0; - } - - /* Load the (low half) tlb comparator. */ - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, - cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0)); - - /* Mask the page bits, keeping the alignment bits to compare against. - In between on 32-bit targets, load the tlb addend for the fast path. */ - tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, - TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - if (TARGET_LONG_BITS == 32) { - tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); - } - tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); - - label_ptr[0] = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); - - /* Load and test the high half tlb comparator. */ - if (TARGET_LONG_BITS == 64) { - /* delay slot */ - tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF); - - /* Load the tlb addend for the fast path. We can't do it earlier with - 64-bit targets or we'll clobber a0 before reading the high half tlb - comparator. */ - tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); - - label_ptr[1] = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0); - } - - /* delay slot */ - tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl); -} - -static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addrhi, - void *raddr, tcg_insn_unit *label_ptr[2]) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = raddr; - label->label_ptr[0] = label_ptr[0]; - if (TARGET_LONG_BITS == 64) { - label->label_ptr[1] = label_ptr[1]; - } -} - -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) -{ - TCGMemOpIdx oi = l->oi; - TCGMemOp opc = get_memop(oi); - TCGReg v0; - int i; - - /* resolve label address */ - reloc_pc16(l->label_ptr[0], s->code_ptr); - if (TARGET_LONG_BITS == 64) { - reloc_pc16(l->label_ptr[1], s->code_ptr); - } - - i = 1; - if (TARGET_LONG_BITS == 64) { - i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); - } - i = tcg_out_call_iarg_imm(s, i, oi); - i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); - tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false); - /* delay slot */ - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); - - v0 = l->datalo_reg; - if ((opc & MO_SIZE) == MO_64) { - /* We eliminated V0 from the possible output registers, so it - cannot be clobbered here. So we must move V1 first. */ - if (MIPS_BE) { - tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1); - v0 = l->datahi_reg; - } else { - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1); - } - } - - reloc_pc16(s->code_ptr, l->raddr); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); - /* delay slot */ - tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0); -} - -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) -{ - TCGMemOpIdx oi = l->oi; - TCGMemOp opc = get_memop(oi); - TCGMemOp s_bits = opc & MO_SIZE; - int i; - - /* resolve label address */ - reloc_pc16(l->label_ptr[0], s->code_ptr); - if (TARGET_LONG_BITS == 64) { - reloc_pc16(l->label_ptr[1], s->code_ptr); - } - - i = 1; - if (TARGET_LONG_BITS == 64) { - i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); - } - switch (s_bits) { - case MO_8: - i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg); - break; - case MO_16: - i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg); - break; - case MO_32: - i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); - break; - case MO_64: - i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg); - break; - default: - tcg_abort(); - } - i = tcg_out_call_iarg_imm(s, i, oi); - - /* Tail call to the store helper. Thus force the return address - computation to take place in the return address register. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); - i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); - tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); - /* delay slot */ - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); -} -#endif - -static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg base, TCGMemOp opc) -{ - switch (opc & (MO_SSIZE | MO_BSWAP)) { - case MO_UB: - tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0); - break; - case MO_SB: - tcg_out_opc_imm(s, OPC_LB, datalo, base, 0); - break; - case MO_UW | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16(s, datalo, TCG_TMP1); - break; - case MO_UW: - tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0); - break; - case MO_SW | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16s(s, datalo, TCG_TMP1); - break; - case MO_SW: - tcg_out_opc_imm(s, OPC_LH, datalo, base, 0); - break; - case MO_UL | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0); - tcg_out_bswap32(s, datalo, TCG_TMP1); - break; - case MO_UL: - tcg_out_opc_imm(s, OPC_LW, datalo, base, 0); - break; - case MO_Q | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF); - tcg_out_bswap32(s, datalo, TCG_TMP1); - tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF); - tcg_out_bswap32(s, datahi, TCG_TMP1); - break; - case MO_Q: - tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF); - tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF); - break; - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) -{ - TCGReg addr_regl, addr_regh __attribute__((unused)); - TCGReg data_regl, data_regh; - TCGMemOpIdx oi; - TCGMemOp opc; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; -#endif - /* Note that we've eliminated V0 from the output registers, - so we won't overwrite the base register during loading. */ - TCGReg base = TCG_REG_V0; - - data_regl = *args++; - data_regh = (is_64 ? *args++ : 0); - addr_regl = *args++; - addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#if defined(CONFIG_SOFTMMU) - tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1); - tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); - add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh, - s->code_ptr, label_ptr); -#else - if (guest_base == 0 && data_regl != addr_regl) { - base = addr_regl; - } else if (guest_base == (int16_t)guest_base) { - tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); - tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); - } - tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); -#endif -} - -static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg base, TCGMemOp opc) -{ - switch (opc & (MO_SIZE | MO_BSWAP)) { - case MO_8: - tcg_out_opc_imm(s, OPC_SB, datalo, base, 0); - break; - - case MO_16 | MO_BSWAP: - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff); - tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1); - datalo = TCG_TMP1; - /* FALLTHRU */ - case MO_16: - tcg_out_opc_imm(s, OPC_SH, datalo, base, 0); - break; - - case MO_32 | MO_BSWAP: - tcg_out_bswap32(s, TCG_TMP1, datalo); - datalo = TCG_TMP1; - /* FALLTHRU */ - case MO_32: - tcg_out_opc_imm(s, OPC_SW, datalo, base, 0); - break; - - case MO_64 | MO_BSWAP: - tcg_out_bswap32(s, TCG_TMP1, datalo); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, HI_OFF); - tcg_out_bswap32(s, TCG_TMP1, datahi); - tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, LO_OFF); - break; - case MO_64: - tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF); - tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF); - break; - - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) -{ - TCGReg addr_regl, addr_regh __attribute__((unused)); - TCGReg data_regl, data_regh, base; - TCGMemOpIdx oi; - TCGMemOp opc; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label_ptr[2]; -#endif - - data_regl = *args++; - data_regh = (is_64 ? *args++ : 0); - addr_regl = *args++; - addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#if defined(CONFIG_SOFTMMU) - /* Note that we eliminated the helper's address argument, - so we can reuse that for the base. */ - base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2); - tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0); - tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); - add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh, - s->code_ptr, label_ptr); -#else - if (guest_base == 0) { - base = addr_regl; - } else { - base = TCG_REG_A0; - if (guest_base == (int16_t)guest_base) { - tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); - tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); - } - } - tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); -#endif -} - -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg *args, const int *const_args) -{ - MIPSInsn i1, i2; - TCGArg a0, a1, a2; - int c2; - - a0 = args[0]; - a1 = args[1]; - a2 = args[2]; - c2 = const_args[2]; - - switch (opc) { - case INDEX_op_exit_tb: - { - TCGReg b0 = TCG_REG_ZERO; - - if (a0 & ~0xffff) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); - b0 = TCG_REG_V0; - } - if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, - (uintptr_t)tb_ret_addr); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); - } - tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); - } - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* direct jump method */ - s->tb_jmp_offset[a0] = tcg_current_code_size(s); - /* Avoid clobbering the address during retranslation. */ - tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff)); - } else { - /* indirect jump method */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, - (uintptr_t)(s->tb_next + a0)); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); - } - tcg_out_nop(s); - s->tb_next_offset[a0] = tcg_current_code_size(s); - break; - case INDEX_op_br: - tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, - arg_label(a0)); - break; - - case INDEX_op_ld8u_i32: - i1 = OPC_LBU; - goto do_ldst; - case INDEX_op_ld8s_i32: - i1 = OPC_LB; - goto do_ldst; - case INDEX_op_ld16u_i32: - i1 = OPC_LHU; - goto do_ldst; - case INDEX_op_ld16s_i32: - i1 = OPC_LH; - goto do_ldst; - case INDEX_op_ld_i32: - i1 = OPC_LW; - goto do_ldst; - case INDEX_op_st8_i32: - i1 = OPC_SB; - goto do_ldst; - case INDEX_op_st16_i32: - i1 = OPC_SH; - goto do_ldst; - case INDEX_op_st_i32: - i1 = OPC_SW; - do_ldst: - tcg_out_ldst(s, i1, a0, a1, a2); - break; - - case INDEX_op_add_i32: - i1 = OPC_ADDU, i2 = OPC_ADDIU; - goto do_binary; - case INDEX_op_or_i32: - i1 = OPC_OR, i2 = OPC_ORI; - goto do_binary; - case INDEX_op_xor_i32: - i1 = OPC_XOR, i2 = OPC_XORI; - do_binary: - if (c2) { - tcg_out_opc_imm(s, i2, a0, a1, a2); - break; - } - do_binaryv: - tcg_out_opc_reg(s, i1, a0, a1, a2); - break; - - case INDEX_op_sub_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2); - break; - } - i1 = OPC_SUBU; - goto do_binary; - case INDEX_op_and_i32: - if (c2 && a2 != (uint16_t)a2) { - int msb = ctz32(~a2) - 1; - assert(use_mips32r2_instructions); - assert(is_p2m1(a2)); - tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); - break; - } - i1 = OPC_AND, i2 = OPC_ANDI; - goto do_binary; - case INDEX_op_nor_i32: - i1 = OPC_NOR; - goto do_binaryv; - - case INDEX_op_mul_i32: - if (use_mips32_instructions) { - tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); - break; - } - i1 = OPC_MULT, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_mulsh_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2); - break; - } - i1 = OPC_MULT, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_muluh_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2); - break; - } - i1 = OPC_MULTU, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_div_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); - break; - } - i1 = OPC_DIV, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_divu_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2); - break; - } - i1 = OPC_DIVU, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_rem_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); - break; - } - i1 = OPC_DIV, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_remu_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); - break; - } - i1 = OPC_DIVU, i2 = OPC_MFHI; - do_hilo1: - tcg_out_opc_reg(s, i1, 0, a1, a2); - tcg_out_opc_reg(s, i2, a0, 0, 0); - break; - - case INDEX_op_muls2_i32: - i1 = OPC_MULT; - goto do_hilo2; - case INDEX_op_mulu2_i32: - i1 = OPC_MULTU; - do_hilo2: - tcg_out_opc_reg(s, i1, 0, a2, args[3]); - tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); - break; - - case INDEX_op_not_i32: - i1 = OPC_NOR; - goto do_unary; - case INDEX_op_bswap16_i32: - i1 = OPC_WSBH; - goto do_unary; - case INDEX_op_ext8s_i32: - i1 = OPC_SEB; - goto do_unary; - case INDEX_op_ext16s_i32: - i1 = OPC_SEH; - do_unary: - tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); - break; - - case INDEX_op_sar_i32: - i1 = OPC_SRAV, i2 = OPC_SRA; - goto do_shift; - case INDEX_op_shl_i32: - i1 = OPC_SLLV, i2 = OPC_SLL; - goto do_shift; - case INDEX_op_shr_i32: - i1 = OPC_SRLV, i2 = OPC_SRL; - goto do_shift; - case INDEX_op_rotr_i32: - i1 = OPC_ROTRV, i2 = OPC_ROTR; - do_shift: - if (c2) { - tcg_out_opc_sa(s, i2, a0, a1, a2); - } else { - tcg_out_opc_reg(s, i1, a0, a2, a1); - } - break; - case INDEX_op_rotl_i32: - if (c2) { - tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2); - } else { - tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2); - tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); - } - break; - - case INDEX_op_bswap32_i32: - tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1); - tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16); - break; - - case INDEX_op_deposit_i32: - tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); - break; - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5])); - break; - - case INDEX_op_movcond_i32: - tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]); - break; - - case INDEX_op_setcond_i32: - tcg_out_setcond(s, args[3], a0, a1, a2); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, false); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, true); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, false); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, true); - break; - - case INDEX_op_add2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], false); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], true); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef mips_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "rZ", "r" } }, - { INDEX_op_st16_i32, { "rZ", "r" } }, - { INDEX_op_st_i32, { "rZ", "r" } }, - - { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, -#if !use_mips32r6_instructions - { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, - { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, -#endif - { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_remu_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_sub_i32, { "r", "rZ", "rN" } }, - - { INDEX_op_and_i32, { "r", "rZ", "rIK" } }, - { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_not_i32, { "r", "rZ" } }, - { INDEX_op_or_i32, { "r", "rZ", "rIZ" } }, - { INDEX_op_xor_i32, { "r", "rZ", "rIZ" } }, - - { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, - { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, - { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, - { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, - { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, - - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - - { INDEX_op_ext8s_i32, { "r", "rZ" } }, - { INDEX_op_ext16s_i32, { "r", "rZ" } }, - - { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, - - { INDEX_op_brcond_i32, { "rZ", "rZ" } }, -#if use_mips32r6_instructions - { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, -#else - { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, -#endif - { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, - - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, - { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, - -#if TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld_i32, { "L", "lZ" } }, - { INDEX_op_qemu_st_i32, { "SZ", "SZ" } }, - { INDEX_op_qemu_ld_i64, { "L", "L", "lZ" } }, - { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ" } }, -#else - { INDEX_op_qemu_ld_i32, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_st_i32, { "SZ", "SZ", "SZ" } }, - { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } }, - { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } }, -#endif - { -1 }, -}; - -static int tcg_target_callee_save_regs[] = { - TCG_REG_S0, /* used for the global env (TCG_AREG0) */ - TCG_REG_S1, - TCG_REG_S2, - TCG_REG_S3, - TCG_REG_S4, - TCG_REG_S5, - TCG_REG_S6, - TCG_REG_S7, - TCG_REG_S8, - TCG_REG_RA, /* should be last for ABI compliance */ -}; - -/* The Linux kernel doesn't provide any information about the available - instruction set. Probe it using a signal handler. */ - - -#ifndef use_movnz_instructions -bool use_movnz_instructions = false; -#endif - -#ifndef use_mips32_instructions -bool use_mips32_instructions = false; -#endif - -#ifndef use_mips32r2_instructions -bool use_mips32r2_instructions = false; -#endif - -static volatile sig_atomic_t got_sigill; - -static void sigill_handler(int signo, siginfo_t *si, void *data) -{ - /* Skip the faulty instruction */ - ucontext_t *uc = (ucontext_t *)data; - uc->uc_mcontext.pc += 4; - - got_sigill = 1; -} - -static void tcg_target_detect_isa(void) -{ - struct sigaction sa_old, sa_new; - - memset(&sa_new, 0, sizeof(sa_new)); - sa_new.sa_flags = SA_SIGINFO; - sa_new.sa_sigaction = sigill_handler; - sigaction(SIGILL, &sa_new, &sa_old); - - /* Probe for movn/movz, necessary to implement movcond. */ -#ifndef use_movnz_instructions - got_sigill = 0; - asm volatile(".set push\n" - ".set mips32\n" - "movn $zero, $zero, $zero\n" - "movz $zero, $zero, $zero\n" - ".set pop\n" - : : : ); - use_movnz_instructions = !got_sigill; -#endif - - /* Probe for MIPS32 instructions. As no subsetting is allowed - by the specification, it is only necessary to probe for one - of the instructions. */ -#ifndef use_mips32_instructions - got_sigill = 0; - asm volatile(".set push\n" - ".set mips32\n" - "mul $zero, $zero\n" - ".set pop\n" - : : : ); - use_mips32_instructions = !got_sigill; -#endif - - /* Probe for MIPS32r2 instructions if MIPS32 instructions are - available. As no subsetting is allowed by the specification, - it is only necessary to probe for one of the instructions. */ -#ifndef use_mips32r2_instructions - if (use_mips32_instructions) { - got_sigill = 0; - asm volatile(".set push\n" - ".set mips32r2\n" - "seb $zero, $zero\n" - ".set pop\n" - : : : ); - use_mips32r2_instructions = !got_sigill; - } -#endif - - sigaction(SIGILL, &sa_old, NULL); -} - -/* Generate global QEMU prologue and epilogue code */ -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int i, frame_size; - - /* reserve some stack space, also for TCG temps. */ - frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4 - + TCG_STATIC_CALL_ARGS_SIZE - + CPU_TEMP_BUF_NLONGS * sizeof(long); - frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & - ~(TCG_TARGET_STACK_ALIGN - 1); - tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4 - + TCG_STATIC_CALL_ARGS_SIZE, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - - /* TB prologue */ - tcg_out_addi(s, TCG_REG_SP, -frame_size); - for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) { - tcg_out_st(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i], - TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4); - } - - /* Call generated code */ - tcg_out_opc_reg(s, OPC_JR, 0, tcg_target_call_iarg_regs[1], 0); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tb_ret_addr = s->code_ptr; - - /* TB epilogue */ - for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) { - tcg_out_ld(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i], - TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4); - } - - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0); - tcg_out_addi(s, TCG_REG_SP, frame_size); -} - -static void tcg_target_init(TCGContext *s) -{ - tcg_target_detect_isa(); - tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], 0xffffffff); - tcg_regset_set(tcg_target_call_clobber_regs, - (1 << TCG_REG_V0) | - (1 << TCG_REG_V1) | - (1 << TCG_REG_A0) | - (1 << TCG_REG_A1) | - (1 << TCG_REG_A2) | - (1 << TCG_REG_A3) | - (1 << TCG_REG_T0) | - (1 << TCG_REG_T1) | - (1 << TCG_REG_T2) | - (1 << TCG_REG_T3) | - (1 << TCG_REG_T4) | - (1 << TCG_REG_T5) | - (1 << TCG_REG_T6) | - (1 << TCG_REG_T7) | - (1 << TCG_REG_T8) | - (1 << TCG_REG_T9)); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); /* zero register */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_K0); /* kernel use only */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1); /* kernel use only */ - tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); /* internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_TMP1); /* internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */ - - tcg_add_target_add_op_defs(mips_op_defs); -} - -void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) -{ - uint32_t *ptr = (uint32_t *)jmp_addr; - *ptr = deposit32(*ptr, 0, 26, addr >> 2); - flush_icache_range(jmp_addr, jmp_addr + 4); -} diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c new file mode 100644 index 0000000..2dc4998 --- /dev/null +++ b/tcg/mips/tcg-target.inc.c @@ -0,0 +1,1892 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008-2009 Arnaud Patard + * Copyright (c) 2009 Aurelien Jarno + * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-ldst.h" + +#ifdef HOST_WORDS_BIGENDIAN +# define MIPS_BE 1 +#else +# define MIPS_BE 0 +#endif + +#define LO_OFF (MIPS_BE * 4) +#define HI_OFF (4 - LO_OFF) + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "zero", + "at", + "v0", + "v1", + "a0", + "a1", + "a2", + "a3", + "t0", + "t1", + "t2", + "t3", + "t4", + "t5", + "t6", + "t7", + "s0", + "s1", + "s2", + "s3", + "s4", + "s5", + "s6", + "s7", + "t8", + "t9", + "k0", + "k1", + "gp", + "sp", + "s8", + "ra", +}; +#endif + +#define TCG_TMP0 TCG_REG_AT +#define TCG_TMP1 TCG_REG_T9 + +/* check if we really need so many registers :P */ +static const TCGReg tcg_target_reg_alloc_order[] = { + /* Call saved registers. */ + TCG_REG_S0, + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + + /* Call clobbered registers. */ + TCG_REG_T0, + TCG_REG_T1, + TCG_REG_T2, + TCG_REG_T3, + TCG_REG_T4, + TCG_REG_T5, + TCG_REG_T6, + TCG_REG_T7, + TCG_REG_T8, + TCG_REG_T9, + TCG_REG_V1, + TCG_REG_V0, + + /* Argument registers, opposite order of allocation. */ + TCG_REG_A3, + TCG_REG_A2, + TCG_REG_A1, + TCG_REG_A0, +}; + +static const TCGReg tcg_target_call_iarg_regs[4] = { + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3 +}; + +static const TCGReg tcg_target_call_oarg_regs[2] = { + TCG_REG_V0, + TCG_REG_V1 +}; + +static tcg_insn_unit *tb_ret_addr; + +static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + /* Let the compiler perform the right-shift as part of the arithmetic. */ + ptrdiff_t disp = target - (pc + 1); + assert(disp == (int16_t)disp); + return disp & 0xffff; +} + +static inline void reloc_pc16(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + *pc = deposit32(*pc, 0, 16, reloc_pc16_val(pc, target)); +} + +static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0); + return ((uintptr_t)target >> 2) & 0x3ffffff; +} + +static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target)); +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + assert(type == R_MIPS_PC16); + assert(addend == 0); + reloc_pc16(code_ptr, (tcg_insn_unit *)value); +} + +#define TCG_CT_CONST_ZERO 0x100 +#define TCG_CT_CONST_U16 0x200 /* Unsigned 16-bit: 0 - 0xffff. */ +#define TCG_CT_CONST_S16 0x400 /* Signed 16-bit: -32768 - 32767 */ +#define TCG_CT_CONST_P2M1 0x800 /* Power of 2 minus 1. */ +#define TCG_CT_CONST_N16 0x1000 /* "Negatable" 16-bit: -32767 - 32767 */ + +static inline bool is_p2m1(tcg_target_long val) +{ + return val && ((val + 1) & val) == 0; +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch(ct_str[0]) { + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffff); + break; + case 'L': /* qemu_ld output arg constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_V0); + break; + case 'l': /* qemu_ld input arg constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); +#if defined(CONFIG_SOFTMMU) + if (TARGET_LONG_BITS == 64) { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); + } +#endif + break; + case 'S': /* qemu_st constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set(ct->u.regs, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); +#if defined(CONFIG_SOFTMMU) + if (TARGET_LONG_BITS == 32) { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); + } else { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); + } +#endif + break; + case 'I': + ct->ct |= TCG_CT_CONST_U16; + break; + case 'J': + ct->ct |= TCG_CT_CONST_S16; + break; + case 'K': + ct->ct |= TCG_CT_CONST_P2M1; + break; + case 'N': + ct->ct |= TCG_CT_CONST_N16; + break; + case 'Z': + /* We are cheating a bit here, using the fact that the register + ZERO is also the register number 0. Hence there is no need + to check for const_args in each instruction. */ + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct; + ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) { + return 1; + } else if ((ct & TCG_CT_CONST_P2M1) + && use_mips32r2_instructions && is_p2m1(val)) { + return 1; + } + return 0; +} + +/* instruction opcodes */ +typedef enum { + OPC_J = 0x02 << 26, + OPC_JAL = 0x03 << 26, + OPC_BEQ = 0x04 << 26, + OPC_BNE = 0x05 << 26, + OPC_BLEZ = 0x06 << 26, + OPC_BGTZ = 0x07 << 26, + OPC_ADDIU = 0x09 << 26, + OPC_SLTI = 0x0A << 26, + OPC_SLTIU = 0x0B << 26, + OPC_ANDI = 0x0C << 26, + OPC_ORI = 0x0D << 26, + OPC_XORI = 0x0E << 26, + OPC_LUI = 0x0F << 26, + OPC_LB = 0x20 << 26, + OPC_LH = 0x21 << 26, + OPC_LW = 0x23 << 26, + OPC_LBU = 0x24 << 26, + OPC_LHU = 0x25 << 26, + OPC_LWU = 0x27 << 26, + OPC_SB = 0x28 << 26, + OPC_SH = 0x29 << 26, + OPC_SW = 0x2B << 26, + + OPC_SPECIAL = 0x00 << 26, + OPC_SLL = OPC_SPECIAL | 0x00, + OPC_SRL = OPC_SPECIAL | 0x02, + OPC_ROTR = OPC_SPECIAL | (0x01 << 21) | 0x02, + OPC_SRA = OPC_SPECIAL | 0x03, + OPC_SLLV = OPC_SPECIAL | 0x04, + OPC_SRLV = OPC_SPECIAL | 0x06, + OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06, + OPC_SRAV = OPC_SPECIAL | 0x07, + OPC_JR_R5 = OPC_SPECIAL | 0x08, + OPC_JALR = OPC_SPECIAL | 0x09, + OPC_MOVZ = OPC_SPECIAL | 0x0A, + OPC_MOVN = OPC_SPECIAL | 0x0B, + OPC_MFHI = OPC_SPECIAL | 0x10, + OPC_MFLO = OPC_SPECIAL | 0x12, + OPC_MULT = OPC_SPECIAL | 0x18, + OPC_MUL_R6 = OPC_SPECIAL | (0x02 << 6) | 0x18, + OPC_MUH = OPC_SPECIAL | (0x03 << 6) | 0x18, + OPC_MULTU = OPC_SPECIAL | 0x19, + OPC_MULU = OPC_SPECIAL | (0x02 << 6) | 0x19, + OPC_MUHU = OPC_SPECIAL | (0x03 << 6) | 0x19, + OPC_DIV = OPC_SPECIAL | 0x1A, + OPC_DIV_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1A, + OPC_MOD = OPC_SPECIAL | (0x03 << 6) | 0x1A, + OPC_DIVU = OPC_SPECIAL | 0x1B, + OPC_DIVU_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1B, + OPC_MODU = OPC_SPECIAL | (0x03 << 6) | 0x1B, + OPC_ADDU = OPC_SPECIAL | 0x21, + OPC_SUBU = OPC_SPECIAL | 0x23, + OPC_AND = OPC_SPECIAL | 0x24, + OPC_OR = OPC_SPECIAL | 0x25, + OPC_XOR = OPC_SPECIAL | 0x26, + OPC_NOR = OPC_SPECIAL | 0x27, + OPC_SLT = OPC_SPECIAL | 0x2A, + OPC_SLTU = OPC_SPECIAL | 0x2B, + OPC_SELEQZ = OPC_SPECIAL | 0x35, + OPC_SELNEZ = OPC_SPECIAL | 0x37, + + OPC_REGIMM = 0x01 << 26, + OPC_BLTZ = OPC_REGIMM | (0x00 << 16), + OPC_BGEZ = OPC_REGIMM | (0x01 << 16), + + OPC_SPECIAL2 = 0x1c << 26, + OPC_MUL_R5 = OPC_SPECIAL2 | 0x002, + + OPC_SPECIAL3 = 0x1f << 26, + OPC_EXT = OPC_SPECIAL3 | 0x000, + OPC_INS = OPC_SPECIAL3 | 0x004, + OPC_WSBH = OPC_SPECIAL3 | 0x0a0, + OPC_SEB = OPC_SPECIAL3 | 0x420, + OPC_SEH = OPC_SPECIAL3 | 0x620, + + /* MIPS r6 doesn't have JR, JALR should be used instead */ + OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5, + + /* + * MIPS r6 replaces MUL with an alternative encoding which is + * backwards-compatible at the assembly level. + */ + OPC_MUL = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5, +} MIPSInsn; + +/* + * Type reg + */ +static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc, + TCGReg rd, TCGReg rs, TCGReg rt) +{ + int32_t inst; + + inst = opc; + inst |= (rs & 0x1F) << 21; + inst |= (rt & 0x1F) << 16; + inst |= (rd & 0x1F) << 11; + tcg_out32(s, inst); +} + +/* + * Type immediate + */ +static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc, + TCGReg rt, TCGReg rs, TCGArg imm) +{ + int32_t inst; + + inst = opc; + inst |= (rs & 0x1F) << 21; + inst |= (rt & 0x1F) << 16; + inst |= (imm & 0xffff); + tcg_out32(s, inst); +} + +/* + * Type bitfield + */ +static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt, + TCGReg rs, int msb, int lsb) +{ + int32_t inst; + + inst = opc; + inst |= (rs & 0x1F) << 21; + inst |= (rt & 0x1F) << 16; + inst |= (msb & 0x1F) << 11; + inst |= (lsb & 0x1F) << 6; + tcg_out32(s, inst); +} + +/* + * Type branch + */ +static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, + TCGReg rt, TCGReg rs) +{ + /* We pay attention here to not modify the branch target by reading + the existing value and using it again. This ensure that caches and + memory are kept coherent during retranslation. */ + uint16_t offset = (uint16_t)*s->code_ptr; + + tcg_out_opc_imm(s, opc, rt, rs, offset); +} + +/* + * Type sa + */ +static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc, + TCGReg rd, TCGReg rt, TCGArg sa) +{ + int32_t inst; + + inst = opc; + inst |= (rt & 0x1F) << 16; + inst |= (rd & 0x1F) << 11; + inst |= (sa & 0x1F) << 6; + tcg_out32(s, inst); + +} + +/* + * Type jump. + * Returns true if the branch was in range and the insn was emitted. + */ +static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target) +{ + uintptr_t dest = (uintptr_t)target; + uintptr_t from = (uintptr_t)s->code_ptr + 4; + int32_t inst; + + /* The pc-region branch happens within the 256MB region of + the delay slot (thus the +4). */ + if ((from ^ dest) & -(1 << 28)) { + return false; + } + assert((dest & 3) == 0); + + inst = opc; + inst |= (dest >> 2) & 0x3ffffff; + tcg_out32(s, inst); + return true; +} + +static inline void tcg_out_nop(TCGContext *s) +{ + tcg_out32(s, 0); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) +{ + /* Simple reg-reg move, optimising out the 'do nothing' case */ + if (ret != arg) { + tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO); + } +} + +static inline void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg reg, tcg_target_long arg) +{ + if (arg == (int16_t)arg) { + tcg_out_opc_imm(s, OPC_ADDIU, reg, TCG_REG_ZERO, arg); + } else if (arg == (uint16_t)arg) { + tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg); + } else { + tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16); + if (arg & 0xffff) { + tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff); + } + } +} + +static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + } else { + /* ret and arg can't be register at */ + if (ret == TCG_TMP0 || arg == TCG_TMP0) { + tcg_abort(); + } + + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); + tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + } +} + +static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); + } else { + /* ret and arg can't be register at */ + if (ret == TCG_TMP0 || arg == TCG_TMP0) { + tcg_abort(); + } + + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + } +} + +static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); + } else { + /* ret and arg must be different and can't be register at */ + if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) { + tcg_abort(); + } + + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); + + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00); + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + } +} + +static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg); + } else { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 24); + } +} + +static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg); + } else { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16); + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); + } +} + +static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data, + TCGReg addr, intptr_t ofs) +{ + int16_t lo = ofs; + if (ofs != lo) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo); + if (addr != TCG_REG_ZERO) { + tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr); + } + addr = TCG_TMP0; + } + tcg_out_opc_imm(s, opc, data, addr, lo); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, OPC_LW, arg, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, OPC_SW, arg, arg1, arg2); +} + +static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) +{ + if (val == (int16_t)val) { + tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, val); + tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_TMP0); + } +} + +static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, + TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, + bool cbh, bool is_sub) +{ + TCGReg th = TCG_TMP1; + + /* If we have a negative constant such that negating it would + make the high part zero, we can (usually) eliminate one insn. */ + if (cbl && cbh && bh == -1 && bl != 0) { + bl = -bl; + bh = 0; + is_sub = !is_sub; + } + + /* By operating on the high part first, we get to use the final + carry operation to move back from the temporary. */ + if (!cbh) { + tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); + } else if (bh != 0 || ah == rl) { + tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); + } else { + th = ah; + } + + /* Note that tcg optimization should eliminate the bl == 0 case. */ + if (is_sub) { + if (cbl) { + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); + } else { + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); + tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); + } + tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); + } else { + if (cbl) { + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); + } else if (rl == al && rl == bl) { + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, 31); + tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); + } else { + tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); + } + tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); + } +} + +/* Bit 0 set if inversion required; bit 1 set if swapping required. */ +#define MIPS_CMP_INV 1 +#define MIPS_CMP_SWAP 2 + +static const uint8_t mips_cmp_map[16] = { + [TCG_COND_LT] = 0, + [TCG_COND_LTU] = 0, + [TCG_COND_GE] = MIPS_CMP_INV, + [TCG_COND_GEU] = MIPS_CMP_INV, + [TCG_COND_LE] = MIPS_CMP_INV | MIPS_CMP_SWAP, + [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP, + [TCG_COND_GT] = MIPS_CMP_SWAP, + [TCG_COND_GTU] = MIPS_CMP_SWAP, +}; + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, TCGReg arg2) +{ + MIPSInsn s_opc = OPC_SLTU; + int cmp_map; + + switch (cond) { + case TCG_COND_EQ: + if (arg2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + arg1 = ret; + } + tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); + break; + + case TCG_COND_NE: + if (arg2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + arg1 = ret; + } + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); + break; + + case TCG_COND_LT: + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GT: + s_opc = OPC_SLT; + /* FALLTHRU */ + + case TCG_COND_LTU: + case TCG_COND_GEU: + case TCG_COND_LEU: + case TCG_COND_GTU: + cmp_map = mips_cmp_map[cond]; + if (cmp_map & MIPS_CMP_SWAP) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + tcg_out_opc_reg(s, s_opc, ret, arg1, arg2); + if (cmp_map & MIPS_CMP_INV) { + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + } + break; + + default: + tcg_abort(); + break; + } +} + +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, TCGLabel *l) +{ + static const MIPSInsn b_zero[16] = { + [TCG_COND_LT] = OPC_BLTZ, + [TCG_COND_GT] = OPC_BGTZ, + [TCG_COND_LE] = OPC_BLEZ, + [TCG_COND_GE] = OPC_BGEZ, + }; + + MIPSInsn s_opc = OPC_SLTU; + MIPSInsn b_opc; + int cmp_map; + + switch (cond) { + case TCG_COND_EQ: + b_opc = OPC_BEQ; + break; + case TCG_COND_NE: + b_opc = OPC_BNE; + break; + + case TCG_COND_LT: + case TCG_COND_GT: + case TCG_COND_LE: + case TCG_COND_GE: + if (arg2 == 0) { + b_opc = b_zero[cond]; + arg2 = arg1; + arg1 = 0; + break; + } + s_opc = OPC_SLT; + /* FALLTHRU */ + + case TCG_COND_LTU: + case TCG_COND_GTU: + case TCG_COND_LEU: + case TCG_COND_GEU: + cmp_map = mips_cmp_map[cond]; + if (cmp_map & MIPS_CMP_SWAP) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2); + b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE); + arg1 = TCG_TMP0; + arg2 = TCG_REG_ZERO; + break; + + default: + tcg_abort(); + break; + } + + tcg_out_opc_br(s, b_opc, arg1, arg2); + if (l->has_value) { + reloc_pc16(s->code_ptr - 1, l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0); + } + tcg_out_nop(s); +} + +static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1, + TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh) +{ + /* Merge highpart comparison into AH. */ + if (bh != 0) { + if (ah != 0) { + tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh); + ah = tmp0; + } else { + ah = bh; + } + } + /* Merge lowpart comparison into AL. */ + if (bl != 0) { + if (al != 0) { + tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl); + al = tmp1; + } else { + al = bl; + } + } + /* Merge high and low part comparisons into AL. */ + if (ah != 0) { + if (al != 0) { + tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al); + al = tmp0; + } else { + al = ah; + } + } + return al; +} + +static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) +{ + TCGReg tmp0 = TCG_TMP0; + TCGReg tmp1 = ret; + + assert(ret != TCG_TMP0); + if (ret == ah || ret == bh) { + assert(ret != TCG_TMP1); + tmp1 = TCG_TMP1; + } + + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh); + tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO); + break; + + default: + tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh); + tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl); + tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0); + tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh); + tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0); + break; + } +} + +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh, TCGLabel *l) +{ + TCGCond b_cond = TCG_COND_NE; + TCGReg tmp = TCG_TMP1; + + /* With branches, we emit between 4 and 9 insns with 2 or 3 branches. + With setcond, we emit between 3 and 10 insns and only 1 branch, + which ought to get better branch prediction. */ + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + b_cond = cond; + tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh); + break; + + default: + /* Minimize code size by preferring a compare not requiring INV. */ + if (mips_cmp_map[cond] & MIPS_CMP_INV) { + cond = tcg_invert_cond(cond); + b_cond = TCG_COND_EQ; + } + tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh); + break; + } + + tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, l); +} + +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2) +{ + bool eqz = false; + + /* If one of the values is zero, put it last to match SEL*Z instructions */ + if (use_mips32r6_instructions && v1 == 0) { + v1 = v2; + v2 = 0; + cond = tcg_invert_cond(cond); + } + + switch (cond) { + case TCG_COND_EQ: + eqz = true; + /* FALLTHRU */ + case TCG_COND_NE: + if (c2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); + c1 = TCG_TMP0; + } + break; + + default: + /* Minimize code size by preferring a compare not requiring INV. */ + if (mips_cmp_map[cond] & MIPS_CMP_INV) { + cond = tcg_invert_cond(cond); + eqz = true; + } + tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); + c1 = TCG_TMP0; + break; + } + + if (use_mips32r6_instructions) { + MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ; + MIPSInsn m_opc_f = eqz ? OPC_SELNEZ : OPC_SELEQZ; + + if (v2 != 0) { + tcg_out_opc_reg(s, m_opc_f, TCG_TMP1, v2, c1); + } + tcg_out_opc_reg(s, m_opc_t, ret, v1, c1); + if (v2 != 0) { + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1); + } + } else { + MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN; + + tcg_out_opc_reg(s, m_opc, ret, v1, c1); + + /* This should be guaranteed via constraints */ + tcg_debug_assert(v2 == ret); + } +} + +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) +{ + /* Note that the ABI requires the called function's address to be + loaded into T9, even if a direct branch is in range. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg); + + /* But do try a direct branch, allowing the cpu better insn prefetch. */ + if (tail) { + if (!tcg_out_opc_jmp(s, OPC_J, arg)) { + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_T9, 0); + } + } else { + if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) { + tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); + } + } +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + tcg_out_call_int(s, arg, false); + tcg_out_nop(s); +} + +#if defined(CONFIG_SOFTMMU) +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +}; + +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +/* Helper routines for marshalling helper function arguments into + * the correct registers and stack. + * I is where we want to put this argument, and is updated and returned + * for the next call. ARG is the argument itself. + * + * We provide routines for arguments which are: immediate, 32 bit + * value in register, 16 and 8 bit values in register (which must be zero + * extended before use) and 64 bit value in a lo:hi register pair. + */ + +static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) +{ + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); + } else { + tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i); + } + return i + 1; +} + +static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_TMP0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_TMP0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) +{ + TCGReg tmp = TCG_TMP0; + if (arg == 0) { + tmp = TCG_REG_ZERO; + } else { + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); + } + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) +{ + i = (i + 1) & ~1; + i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al)); + i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah)); + return i; +} + +/* Perform the tlb comparison operation. The complete host address is + placed in BASE. Clobbers AT, T0, A0. */ +static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, + TCGReg addrh, TCGMemOpIdx oi, + tcg_insn_unit *label_ptr[2], bool is_load) +{ + TCGMemOp s_bits = get_memop(oi) & MO_SIZE; + int mem_index = get_mmuidx(oi); + int cmp_off + = (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + + tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); + + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0); + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } + + /* Load the (low half) tlb comparator. */ + tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, + cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0)); + + /* Mask the page bits, keeping the alignment bits to compare against. + In between on 32-bit targets, load the tlb addend for the fast path. */ + tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, + TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + if (TARGET_LONG_BITS == 32) { + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); + } + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); + + label_ptr[0] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); + + /* Load and test the high half tlb comparator. */ + if (TARGET_LONG_BITS == 64) { + /* delay slot */ + tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF); + + /* Load the tlb addend for the fast path. We can't do it earlier with + 64-bit targets or we'll clobber a0 before reading the high half tlb + comparator. */ + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); + + label_ptr[1] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0); + } + + /* delay slot */ + tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl); +} + +static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + void *raddr, tcg_insn_unit *label_ptr[2]) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->raddr = raddr; + label->label_ptr[0] = label_ptr[0]; + if (TARGET_LONG_BITS == 64) { + label->label_ptr[1] = label_ptr[1]; + } +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGReg v0; + int i; + + /* resolve label address */ + reloc_pc16(l->label_ptr[0], s->code_ptr); + if (TARGET_LONG_BITS == 64) { + reloc_pc16(l->label_ptr[1], s->code_ptr); + } + + i = 1; + if (TARGET_LONG_BITS == 64) { + i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); + } else { + i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); + } + i = tcg_out_call_iarg_imm(s, i, oi); + i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); + tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + + v0 = l->datalo_reg; + if ((opc & MO_SIZE) == MO_64) { + /* We eliminated V0 from the possible output registers, so it + cannot be clobbered here. So we must move V1 first. */ + if (MIPS_BE) { + tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1); + v0 = l->datahi_reg; + } else { + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1); + } + } + + reloc_pc16(s->code_ptr, l->raddr); + tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp s_bits = opc & MO_SIZE; + int i; + + /* resolve label address */ + reloc_pc16(l->label_ptr[0], s->code_ptr); + if (TARGET_LONG_BITS == 64) { + reloc_pc16(l->label_ptr[1], s->code_ptr); + } + + i = 1; + if (TARGET_LONG_BITS == 64) { + i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); + } else { + i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); + } + switch (s_bits) { + case MO_8: + i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg); + break; + case MO_16: + i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg); + break; + case MO_32: + i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); + break; + case MO_64: + i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg); + break; + default: + tcg_abort(); + } + i = tcg_out_call_iarg_imm(s, i, oi); + + /* Tail call to the store helper. Thus force the return address + computation to take place in the return address register. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); + i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); + tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); +} +#endif + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, TCGMemOp opc) +{ + switch (opc & (MO_SSIZE | MO_BSWAP)) { + case MO_UB: + tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0); + break; + case MO_SB: + tcg_out_opc_imm(s, OPC_LB, datalo, base, 0); + break; + case MO_UW | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); + tcg_out_bswap16(s, datalo, TCG_TMP1); + break; + case MO_UW: + tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0); + break; + case MO_SW | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); + tcg_out_bswap16s(s, datalo, TCG_TMP1); + break; + case MO_SW: + tcg_out_opc_imm(s, OPC_LH, datalo, base, 0); + break; + case MO_UL | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0); + tcg_out_bswap32(s, datalo, TCG_TMP1); + break; + case MO_UL: + tcg_out_opc_imm(s, OPC_LW, datalo, base, 0); + break; + case MO_Q | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF); + tcg_out_bswap32(s, datalo, TCG_TMP1); + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF); + tcg_out_bswap32(s, datahi, TCG_TMP1); + break; + case MO_Q: + tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF); + tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF); + break; + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh; + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + tcg_insn_unit *label_ptr[2]; +#endif + /* Note that we've eliminated V0 from the output registers, + so we won't overwrite the base register during loading. */ + TCGReg base = TCG_REG_V0; + + data_regl = *args++; + data_regh = (is_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1); + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh, + s->code_ptr, label_ptr); +#else + if (guest_base == 0 && data_regl != addr_regl) { + base = addr_regl; + } else if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); + tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); + } + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); +#endif +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, TCGMemOp opc) +{ + switch (opc & (MO_SIZE | MO_BSWAP)) { + case MO_8: + tcg_out_opc_imm(s, OPC_SB, datalo, base, 0); + break; + + case MO_16 | MO_BSWAP: + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff); + tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1); + datalo = TCG_TMP1; + /* FALLTHRU */ + case MO_16: + tcg_out_opc_imm(s, OPC_SH, datalo, base, 0); + break; + + case MO_32 | MO_BSWAP: + tcg_out_bswap32(s, TCG_TMP1, datalo); + datalo = TCG_TMP1; + /* FALLTHRU */ + case MO_32: + tcg_out_opc_imm(s, OPC_SW, datalo, base, 0); + break; + + case MO_64 | MO_BSWAP: + tcg_out_bswap32(s, TCG_TMP1, datalo); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, HI_OFF); + tcg_out_bswap32(s, TCG_TMP1, datahi); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, LO_OFF); + break; + case MO_64: + tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF); + tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF); + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh, base; + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + tcg_insn_unit *label_ptr[2]; +#endif + + data_regl = *args++; + data_regh = (is_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + /* Note that we eliminated the helper's address argument, + so we can reuse that for the base. */ + base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2); + tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0); + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh, + s->code_ptr, label_ptr); +#else + if (guest_base == 0) { + base = addr_regl; + } else { + base = TCG_REG_A0; + if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); + tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); + } + } + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); +#endif +} + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + MIPSInsn i1, i2; + TCGArg a0, a1, a2; + int c2; + + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + c2 = const_args[2]; + + switch (opc) { + case INDEX_op_exit_tb: + { + TCGReg b0 = TCG_REG_ZERO; + + if (a0 & ~0xffff) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); + b0 = TCG_REG_V0; + } + if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, + (uintptr_t)tb_ret_addr); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); + } + tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); + } + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* direct jump method */ + s->tb_jmp_offset[a0] = tcg_current_code_size(s); + /* Avoid clobbering the address during retranslation. */ + tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff)); + } else { + /* indirect jump method */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, + (uintptr_t)(s->tb_next + a0)); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); + } + tcg_out_nop(s); + s->tb_next_offset[a0] = tcg_current_code_size(s); + break; + case INDEX_op_br: + tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, + arg_label(a0)); + break; + + case INDEX_op_ld8u_i32: + i1 = OPC_LBU; + goto do_ldst; + case INDEX_op_ld8s_i32: + i1 = OPC_LB; + goto do_ldst; + case INDEX_op_ld16u_i32: + i1 = OPC_LHU; + goto do_ldst; + case INDEX_op_ld16s_i32: + i1 = OPC_LH; + goto do_ldst; + case INDEX_op_ld_i32: + i1 = OPC_LW; + goto do_ldst; + case INDEX_op_st8_i32: + i1 = OPC_SB; + goto do_ldst; + case INDEX_op_st16_i32: + i1 = OPC_SH; + goto do_ldst; + case INDEX_op_st_i32: + i1 = OPC_SW; + do_ldst: + tcg_out_ldst(s, i1, a0, a1, a2); + break; + + case INDEX_op_add_i32: + i1 = OPC_ADDU, i2 = OPC_ADDIU; + goto do_binary; + case INDEX_op_or_i32: + i1 = OPC_OR, i2 = OPC_ORI; + goto do_binary; + case INDEX_op_xor_i32: + i1 = OPC_XOR, i2 = OPC_XORI; + do_binary: + if (c2) { + tcg_out_opc_imm(s, i2, a0, a1, a2); + break; + } + do_binaryv: + tcg_out_opc_reg(s, i1, a0, a1, a2); + break; + + case INDEX_op_sub_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2); + break; + } + i1 = OPC_SUBU; + goto do_binary; + case INDEX_op_and_i32: + if (c2 && a2 != (uint16_t)a2) { + int msb = ctz32(~a2) - 1; + assert(use_mips32r2_instructions); + assert(is_p2m1(a2)); + tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); + break; + } + i1 = OPC_AND, i2 = OPC_ANDI; + goto do_binary; + case INDEX_op_nor_i32: + i1 = OPC_NOR; + goto do_binaryv; + + case INDEX_op_mul_i32: + if (use_mips32_instructions) { + tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); + break; + } + i1 = OPC_MULT, i2 = OPC_MFLO; + goto do_hilo1; + case INDEX_op_mulsh_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2); + break; + } + i1 = OPC_MULT, i2 = OPC_MFHI; + goto do_hilo1; + case INDEX_op_muluh_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2); + break; + } + i1 = OPC_MULTU, i2 = OPC_MFHI; + goto do_hilo1; + case INDEX_op_div_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); + break; + } + i1 = OPC_DIV, i2 = OPC_MFLO; + goto do_hilo1; + case INDEX_op_divu_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2); + break; + } + i1 = OPC_DIVU, i2 = OPC_MFLO; + goto do_hilo1; + case INDEX_op_rem_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); + break; + } + i1 = OPC_DIV, i2 = OPC_MFHI; + goto do_hilo1; + case INDEX_op_remu_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); + break; + } + i1 = OPC_DIVU, i2 = OPC_MFHI; + do_hilo1: + tcg_out_opc_reg(s, i1, 0, a1, a2); + tcg_out_opc_reg(s, i2, a0, 0, 0); + break; + + case INDEX_op_muls2_i32: + i1 = OPC_MULT; + goto do_hilo2; + case INDEX_op_mulu2_i32: + i1 = OPC_MULTU; + do_hilo2: + tcg_out_opc_reg(s, i1, 0, a2, args[3]); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); + break; + + case INDEX_op_not_i32: + i1 = OPC_NOR; + goto do_unary; + case INDEX_op_bswap16_i32: + i1 = OPC_WSBH; + goto do_unary; + case INDEX_op_ext8s_i32: + i1 = OPC_SEB; + goto do_unary; + case INDEX_op_ext16s_i32: + i1 = OPC_SEH; + do_unary: + tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); + break; + + case INDEX_op_sar_i32: + i1 = OPC_SRAV, i2 = OPC_SRA; + goto do_shift; + case INDEX_op_shl_i32: + i1 = OPC_SLLV, i2 = OPC_SLL; + goto do_shift; + case INDEX_op_shr_i32: + i1 = OPC_SRLV, i2 = OPC_SRL; + goto do_shift; + case INDEX_op_rotr_i32: + i1 = OPC_ROTRV, i2 = OPC_ROTR; + do_shift: + if (c2) { + tcg_out_opc_sa(s, i2, a0, a1, a2); + } else { + tcg_out_opc_reg(s, i1, a0, a2, a1); + } + break; + case INDEX_op_rotl_i32: + if (c2) { + tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2); + } else { + tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2); + tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); + } + break; + + case INDEX_op_bswap32_i32: + tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1); + tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16); + break; + + case INDEX_op_deposit_i32: + tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); + break; + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5])); + break; + + case INDEX_op_movcond_i32: + tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]); + break; + + case INDEX_op_setcond_i32: + tcg_out_setcond(s, args[3], a0, a1, a2); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, false); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, true); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, false); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, true); + break; + + case INDEX_op_add2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], false); + break; + case INDEX_op_sub2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], true); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef mips_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "rZ", "r" } }, + { INDEX_op_st16_i32, { "rZ", "r" } }, + { INDEX_op_st_i32, { "rZ", "r" } }, + + { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, +#if !use_mips32r6_instructions + { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, + { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, +#endif + { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_remu_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_sub_i32, { "r", "rZ", "rN" } }, + + { INDEX_op_and_i32, { "r", "rZ", "rIK" } }, + { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_not_i32, { "r", "rZ" } }, + { INDEX_op_or_i32, { "r", "rZ", "rIZ" } }, + { INDEX_op_xor_i32, { "r", "rZ", "rIZ" } }, + + { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, + { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, + { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, + { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + + { INDEX_op_ext8s_i32, { "r", "rZ" } }, + { INDEX_op_ext16s_i32, { "r", "rZ" } }, + + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + + { INDEX_op_brcond_i32, { "rZ", "rZ" } }, +#if use_mips32r6_instructions + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, +#else + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, +#endif + { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, + + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, + { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, + +#if TARGET_LONG_BITS == 32 + { INDEX_op_qemu_ld_i32, { "L", "lZ" } }, + { INDEX_op_qemu_st_i32, { "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "lZ" } }, + { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ" } }, +#else + { INDEX_op_qemu_ld_i32, { "L", "lZ", "lZ" } }, + { INDEX_op_qemu_st_i32, { "SZ", "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } }, + { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } }, +#endif + { -1 }, +}; + +static int tcg_target_callee_save_regs[] = { + TCG_REG_S0, /* used for the global env (TCG_AREG0) */ + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + TCG_REG_RA, /* should be last for ABI compliance */ +}; + +/* The Linux kernel doesn't provide any information about the available + instruction set. Probe it using a signal handler. */ + + +#ifndef use_movnz_instructions +bool use_movnz_instructions = false; +#endif + +#ifndef use_mips32_instructions +bool use_mips32_instructions = false; +#endif + +#ifndef use_mips32r2_instructions +bool use_mips32r2_instructions = false; +#endif + +static volatile sig_atomic_t got_sigill; + +static void sigill_handler(int signo, siginfo_t *si, void *data) +{ + /* Skip the faulty instruction */ + ucontext_t *uc = (ucontext_t *)data; + uc->uc_mcontext.pc += 4; + + got_sigill = 1; +} + +static void tcg_target_detect_isa(void) +{ + struct sigaction sa_old, sa_new; + + memset(&sa_new, 0, sizeof(sa_new)); + sa_new.sa_flags = SA_SIGINFO; + sa_new.sa_sigaction = sigill_handler; + sigaction(SIGILL, &sa_new, &sa_old); + + /* Probe for movn/movz, necessary to implement movcond. */ +#ifndef use_movnz_instructions + got_sigill = 0; + asm volatile(".set push\n" + ".set mips32\n" + "movn $zero, $zero, $zero\n" + "movz $zero, $zero, $zero\n" + ".set pop\n" + : : : ); + use_movnz_instructions = !got_sigill; +#endif + + /* Probe for MIPS32 instructions. As no subsetting is allowed + by the specification, it is only necessary to probe for one + of the instructions. */ +#ifndef use_mips32_instructions + got_sigill = 0; + asm volatile(".set push\n" + ".set mips32\n" + "mul $zero, $zero\n" + ".set pop\n" + : : : ); + use_mips32_instructions = !got_sigill; +#endif + + /* Probe for MIPS32r2 instructions if MIPS32 instructions are + available. As no subsetting is allowed by the specification, + it is only necessary to probe for one of the instructions. */ +#ifndef use_mips32r2_instructions + if (use_mips32_instructions) { + got_sigill = 0; + asm volatile(".set push\n" + ".set mips32r2\n" + "seb $zero, $zero\n" + ".set pop\n" + : : : ); + use_mips32r2_instructions = !got_sigill; + } +#endif + + sigaction(SIGILL, &sa_old, NULL); +} + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i, frame_size; + + /* reserve some stack space, also for TCG temps. */ + frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4 + + TCG_STATIC_CALL_ARGS_SIZE + + CPU_TEMP_BUF_NLONGS * sizeof(long); + frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & + ~(TCG_TARGET_STACK_ALIGN - 1); + tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4 + + TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + + /* TB prologue */ + tcg_out_addi(s, TCG_REG_SP, -frame_size); + for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) { + tcg_out_st(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i], + TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4); + } + + /* Call generated code */ + tcg_out_opc_reg(s, OPC_JR, 0, tcg_target_call_iarg_regs[1], 0); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tb_ret_addr = s->code_ptr; + + /* TB epilogue */ + for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) { + tcg_out_ld(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i], + TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4); + } + + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0); + tcg_out_addi(s, TCG_REG_SP, frame_size); +} + +static void tcg_target_init(TCGContext *s) +{ + tcg_target_detect_isa(); + tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], 0xffffffff); + tcg_regset_set(tcg_target_call_clobber_regs, + (1 << TCG_REG_V0) | + (1 << TCG_REG_V1) | + (1 << TCG_REG_A0) | + (1 << TCG_REG_A1) | + (1 << TCG_REG_A2) | + (1 << TCG_REG_A3) | + (1 << TCG_REG_T0) | + (1 << TCG_REG_T1) | + (1 << TCG_REG_T2) | + (1 << TCG_REG_T3) | + (1 << TCG_REG_T4) | + (1 << TCG_REG_T5) | + (1 << TCG_REG_T6) | + (1 << TCG_REG_T7) | + (1 << TCG_REG_T8) | + (1 << TCG_REG_T9)); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); /* zero register */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_K0); /* kernel use only */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1); /* kernel use only */ + tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_TMP1); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */ + + tcg_add_target_add_op_defs(mips_op_defs); +} + +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ + uint32_t *ptr = (uint32_t *)jmp_addr; + *ptr = deposit32(*ptr, 0, 26, addr >> 2); + flush_icache_range(jmp_addr, jmp_addr + 4); +} diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c deleted file mode 100644 index c593344..0000000 --- a/tcg/ppc/tcg-target.c +++ /dev/null @@ -1,2762 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-ldst.h" - -#if defined _CALL_DARWIN || defined __APPLE__ -#define TCG_TARGET_CALL_DARWIN -#endif -#ifdef _CALL_SYSV -# define TCG_TARGET_CALL_ALIGN_ARGS 1 -#endif - -/* For some memory operations, we need a scratch that isn't R0. For the AIX - calling convention, we can re-use the TOC register since we'll be reloading - it at every call. Otherwise R12 will do nicely as neither a call-saved - register nor a parameter register. */ -#ifdef _CALL_AIX -# define TCG_REG_TMP1 TCG_REG_R2 -#else -# define TCG_REG_TMP1 TCG_REG_R12 -#endif - -/* For the 64-bit target, we don't like the 5 insn sequence needed to build - full 64-bit addresses. Better to have a base register to which we can - apply a 32-bit displacement. - - There are generally three items of interest: - (1) helper functions in the main executable, - (2) TranslationBlock data structures, - (3) the return address in the epilogue. - - For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer - will be inside the main executable, and thus near enough to make a - pointer to the epilogue be within 2GB of all helper functions. - - For softmmu, we'll let the kernel choose the address of code_gen_buffer, - and odds are it'll be somewhere close to the main malloc arena, and so - a pointer to the epilogue will be within 2GB of the TranslationBlocks. - - For --enable-pie, everything will be kinda near everything else, - somewhere in high memory. - - Thus we choose to keep the return address in a call-saved register. */ -#define TCG_REG_RA TCG_REG_R31 -#define USE_REG_RA (TCG_TARGET_REG_BITS == 64) - -/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ -#define SZP ((int)sizeof(void *)) - -/* Shorthand for size of a register. */ -#define SZR (TCG_TARGET_REG_BITS / 8) - -#define TCG_CT_CONST_S16 0x100 -#define TCG_CT_CONST_U16 0x200 -#define TCG_CT_CONST_S32 0x400 -#define TCG_CT_CONST_U32 0x800 -#define TCG_CT_CONST_ZERO 0x1000 -#define TCG_CT_CONST_MONE 0x2000 - -static tcg_insn_unit *tb_ret_addr; - -#include "elf.h" -static bool have_isa_2_06; -#define HAVE_ISA_2_06 have_isa_2_06 -#define HAVE_ISEL have_isa_2_06 - -#ifndef CONFIG_SOFTMMU -#define TCG_GUEST_BASE_REG 30 -#endif - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "r0", - "r1", - "r2", - "r3", - "r4", - "r5", - "r6", - "r7", - "r8", - "r9", - "r10", - "r11", - "r12", - "r13", - "r14", - "r15", - "r16", - "r17", - "r18", - "r19", - "r20", - "r21", - "r22", - "r23", - "r24", - "r25", - "r26", - "r27", - "r28", - "r29", - "r30", - "r31" -}; -#endif - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R14, /* call saved registers */ - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, - TCG_REG_R18, - TCG_REG_R19, - TCG_REG_R20, - TCG_REG_R21, - TCG_REG_R22, - TCG_REG_R23, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27, - TCG_REG_R28, - TCG_REG_R29, - TCG_REG_R30, - TCG_REG_R31, - TCG_REG_R12, /* call clobbered, non-arguments */ - TCG_REG_R11, - TCG_REG_R2, - TCG_REG_R13, - TCG_REG_R10, /* call clobbered, arguments */ - TCG_REG_R9, - TCG_REG_R8, - TCG_REG_R7, - TCG_REG_R6, - TCG_REG_R5, - TCG_REG_R4, - TCG_REG_R3, -}; - -static const int tcg_target_call_iarg_regs[] = { - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10 -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R3, - TCG_REG_R4 -}; - -static const int tcg_target_callee_save_regs[] = { -#ifdef TCG_TARGET_CALL_DARWIN - TCG_REG_R11, -#endif - TCG_REG_R14, - TCG_REG_R15, - TCG_REG_R16, - TCG_REG_R17, - TCG_REG_R18, - TCG_REG_R19, - TCG_REG_R20, - TCG_REG_R21, - TCG_REG_R22, - TCG_REG_R23, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27, /* currently used for the global env */ - TCG_REG_R28, - TCG_REG_R29, - TCG_REG_R30, - TCG_REG_R31 -}; - -static inline bool in_range_b(tcg_target_long target) -{ - return target == sextract64(target, 0, 26); -} - -static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); - assert(in_range_b(disp)); - return disp & 0x3fffffc; -} - -static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target); -} - -static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); - assert(disp == (int16_t) disp); - return disp & 0xfffc; -} - -static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) -{ - *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target); -} - -static inline void tcg_out_b_noaddr(TCGContext *s, int insn) -{ - unsigned retrans = *s->code_ptr & 0x3fffffc; - tcg_out32(s, insn | retrans); -} - -static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) -{ - unsigned retrans = *s->code_ptr & 0xfffc; - tcg_out32(s, insn | retrans); -} - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - tcg_insn_unit *target = (tcg_insn_unit *)value; - - assert(addend == 0); - switch (type) { - case R_PPC_REL14: - reloc_pc14(code_ptr, target); - break; - case R_PPC_REL24: - reloc_pc24(code_ptr, target); - break; - default: - tcg_abort(); - } -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch (ct_str[0]) { - case 'A': case 'B': case 'C': case 'D': - ct->ct |= TCG_CT_REG; - tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A'); - break; - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - break; - case 'L': /* qemu_ld constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); -#ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); -#endif - break; - case 'S': /* qemu_st constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); -#ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); -#endif - break; - case 'I': - ct->ct |= TCG_CT_CONST_S16; - break; - case 'J': - ct->ct |= TCG_CT_CONST_U16; - break; - case 'M': - ct->ct |= TCG_CT_CONST_MONE; - break; - case 'T': - ct->ct |= TCG_CT_CONST_S32; - break; - case 'U': - ct->ct |= TCG_CT_CONST_U32; - break; - case 'Z': - ct->ct |= TCG_CT_CONST_ZERO; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - if (ct & TCG_CT_CONST) { - return 1; - } - - /* The only 32-bit constraint we use aside from - TCG_CT_CONST is TCG_CT_CONST_S16. */ - if (type == TCG_TYPE_I32) { - val = (int32_t)val; - } - - if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { - return 1; - } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { - return 1; - } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { - return 1; - } - return 0; -} - -#define OPCD(opc) ((opc)<<26) -#define XO19(opc) (OPCD(19)|((opc)<<1)) -#define MD30(opc) (OPCD(30)|((opc)<<2)) -#define MDS30(opc) (OPCD(30)|((opc)<<1)) -#define XO31(opc) (OPCD(31)|((opc)<<1)) -#define XO58(opc) (OPCD(58)|(opc)) -#define XO62(opc) (OPCD(62)|(opc)) - -#define B OPCD( 18) -#define BC OPCD( 16) -#define LBZ OPCD( 34) -#define LHZ OPCD( 40) -#define LHA OPCD( 42) -#define LWZ OPCD( 32) -#define STB OPCD( 38) -#define STH OPCD( 44) -#define STW OPCD( 36) - -#define STD XO62( 0) -#define STDU XO62( 1) -#define STDX XO31(149) - -#define LD XO58( 0) -#define LDX XO31( 21) -#define LDU XO58( 1) -#define LWA XO58( 2) -#define LWAX XO31(341) - -#define ADDIC OPCD( 12) -#define ADDI OPCD( 14) -#define ADDIS OPCD( 15) -#define ORI OPCD( 24) -#define ORIS OPCD( 25) -#define XORI OPCD( 26) -#define XORIS OPCD( 27) -#define ANDI OPCD( 28) -#define ANDIS OPCD( 29) -#define MULLI OPCD( 7) -#define CMPLI OPCD( 10) -#define CMPI OPCD( 11) -#define SUBFIC OPCD( 8) - -#define LWZU OPCD( 33) -#define STWU OPCD( 37) - -#define RLWIMI OPCD( 20) -#define RLWINM OPCD( 21) -#define RLWNM OPCD( 23) - -#define RLDICL MD30( 0) -#define RLDICR MD30( 1) -#define RLDIMI MD30( 3) -#define RLDCL MDS30( 8) - -#define BCLR XO19( 16) -#define BCCTR XO19(528) -#define CRAND XO19(257) -#define CRANDC XO19(129) -#define CRNAND XO19(225) -#define CROR XO19(449) -#define CRNOR XO19( 33) - -#define EXTSB XO31(954) -#define EXTSH XO31(922) -#define EXTSW XO31(986) -#define ADD XO31(266) -#define ADDE XO31(138) -#define ADDME XO31(234) -#define ADDZE XO31(202) -#define ADDC XO31( 10) -#define AND XO31( 28) -#define SUBF XO31( 40) -#define SUBFC XO31( 8) -#define SUBFE XO31(136) -#define SUBFME XO31(232) -#define SUBFZE XO31(200) -#define OR XO31(444) -#define XOR XO31(316) -#define MULLW XO31(235) -#define MULHW XO31( 75) -#define MULHWU XO31( 11) -#define DIVW XO31(491) -#define DIVWU XO31(459) -#define CMP XO31( 0) -#define CMPL XO31( 32) -#define LHBRX XO31(790) -#define LWBRX XO31(534) -#define LDBRX XO31(532) -#define STHBRX XO31(918) -#define STWBRX XO31(662) -#define STDBRX XO31(660) -#define MFSPR XO31(339) -#define MTSPR XO31(467) -#define SRAWI XO31(824) -#define NEG XO31(104) -#define MFCR XO31( 19) -#define MFOCRF (MFCR | (1u << 20)) -#define NOR XO31(124) -#define CNTLZW XO31( 26) -#define CNTLZD XO31( 58) -#define ANDC XO31( 60) -#define ORC XO31(412) -#define EQV XO31(284) -#define NAND XO31(476) -#define ISEL XO31( 15) - -#define MULLD XO31(233) -#define MULHD XO31( 73) -#define MULHDU XO31( 9) -#define DIVD XO31(489) -#define DIVDU XO31(457) - -#define LBZX XO31( 87) -#define LHZX XO31(279) -#define LHAX XO31(343) -#define LWZX XO31( 23) -#define STBX XO31(215) -#define STHX XO31(407) -#define STWX XO31(151) - -#define SPR(a, b) ((((a)<<5)|(b))<<11) -#define LR SPR(8, 0) -#define CTR SPR(9, 0) - -#define SLW XO31( 24) -#define SRW XO31(536) -#define SRAW XO31(792) - -#define SLD XO31( 27) -#define SRD XO31(539) -#define SRAD XO31(794) -#define SRADI XO31(413<<1) - -#define TW XO31( 4) -#define TRAP (TW | TO(31)) - -#define NOP ORI /* ori 0,0,0 */ - -#define RT(r) ((r)<<21) -#define RS(r) ((r)<<21) -#define RA(r) ((r)<<16) -#define RB(r) ((r)<<11) -#define TO(t) ((t)<<21) -#define SH(s) ((s)<<11) -#define MB(b) ((b)<<6) -#define ME(e) ((e)<<1) -#define BO(o) ((o)<<21) -#define MB64(b) ((b)<<5) -#define FXM(b) (1 << (19 - (b))) - -#define LK 1 - -#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) -#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) -#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) -#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) - -#define BF(n) ((n)<<23) -#define BI(n, c) (((c)+((n)*4))<<16) -#define BT(n, c) (((c)+((n)*4))<<21) -#define BA(n, c) (((c)+((n)*4))<<16) -#define BB(n, c) (((c)+((n)*4))<<11) -#define BC_(n, c) (((c)+((n)*4))<<6) - -#define BO_COND_TRUE BO(12) -#define BO_COND_FALSE BO( 4) -#define BO_ALWAYS BO(20) - -enum { - CR_LT, - CR_GT, - CR_EQ, - CR_SO -}; - -static const uint32_t tcg_to_bc[] = { - [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, - [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, - [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, - [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, -}; - -/* The low bit here is set if the RA and RB fields must be inverted. */ -static const uint32_t tcg_to_isel[] = { - [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), - [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, - [TCG_COND_LT] = ISEL | BC_(7, CR_LT), - [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, - [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, - [TCG_COND_GT] = ISEL | BC_(7, CR_GT), - [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), - [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, - [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, - [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), -}; - -static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, - TCGReg base, tcg_target_long offset); - -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) -{ - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - if (ret != arg) { - tcg_out32(s, OR | SAB(arg, ret, arg)); - } -} - -static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, - int sh, int mb) -{ - assert(TCG_TARGET_REG_BITS == 64); - sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); - mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); - tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); -} - -static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, - int sh, int mb, int me) -{ - tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); -} - -static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) -{ - tcg_out_rld(s, RLDICL, dst, src, 0, 32); -} - -static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) -{ - tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); -} - -static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) -{ - tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); -} - -static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) -{ - tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); -} - -static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) -{ - tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); -} - -static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg) -{ - if (arg == (int16_t) arg) { - tcg_out32(s, ADDI | TAI(ret, 0, arg)); - } else { - tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); - if (arg & 0xffff) { - tcg_out32(s, ORI | SAI(ret, ret, arg)); - } - } -} - -static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, - tcg_target_long arg) -{ - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - if (type == TCG_TYPE_I32 || arg == (int32_t)arg) { - tcg_out_movi32(s, ret, arg); - } else if (arg == (uint32_t)arg && !(arg & 0x8000)) { - tcg_out32(s, ADDI | TAI(ret, 0, arg)); - tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); - } else { - int32_t high; - - if (USE_REG_RA) { - intptr_t diff = arg - (intptr_t)tb_ret_addr; - if (diff == (int32_t)diff) { - tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff); - return; - } - } - - high = arg >> 31 >> 1; - tcg_out_movi32(s, ret, high); - if (high) { - tcg_out_shli64(s, ret, ret, 32); - } - if (arg & 0xffff0000) { - tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); - } - if (arg & 0xffff) { - tcg_out32(s, ORI | SAI(ret, ret, arg)); - } - } -} - -static bool mask_operand(uint32_t c, int *mb, int *me) -{ - uint32_t lsb, test; - - /* Accept a bit pattern like: - 0....01....1 - 1....10....0 - 0..01..10..0 - Keep track of the transitions. */ - if (c == 0 || c == -1) { - return false; - } - test = c; - lsb = test & -test; - test += lsb; - if (test & (test - 1)) { - return false; - } - - *me = clz32(lsb); - *mb = test ? clz32(test & -test) + 1 : 0; - return true; -} - -static bool mask64_operand(uint64_t c, int *mb, int *me) -{ - uint64_t lsb; - - if (c == 0) { - return false; - } - - lsb = c & -c; - /* Accept 1..10..0. */ - if (c == -lsb) { - *mb = 0; - *me = clz64(lsb); - return true; - } - /* Accept 0..01..1. */ - if (lsb == 1 && (c & (c + 1)) == 0) { - *mb = clz64(c + 1) + 1; - *me = 63; - return true; - } - return false; -} - -static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) -{ - int mb, me; - - if (mask_operand(c, &mb, &me)) { - tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); - } else if ((c & 0xffff) == c) { - tcg_out32(s, ANDI | SAI(src, dst, c)); - return; - } else if ((c & 0xffff0000) == c) { - tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); - return; - } else { - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); - tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); - } -} - -static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) -{ - int mb, me; - - assert(TCG_TARGET_REG_BITS == 64); - if (mask64_operand(c, &mb, &me)) { - if (mb == 0) { - tcg_out_rld(s, RLDICR, dst, src, 0, me); - } else { - tcg_out_rld(s, RLDICL, dst, src, 0, mb); - } - } else if ((c & 0xffff) == c) { - tcg_out32(s, ANDI | SAI(src, dst, c)); - return; - } else if ((c & 0xffff0000) == c) { - tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); - return; - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); - tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); - } -} - -static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, - int op_lo, int op_hi) -{ - if (c >> 16) { - tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); - src = dst; - } - if (c & 0xffff) { - tcg_out32(s, op_lo | SAI(src, dst, c)); - src = dst; - } -} - -static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) -{ - tcg_out_zori32(s, dst, src, c, ORI, ORIS); -} - -static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) -{ - tcg_out_zori32(s, dst, src, c, XORI, XORIS); -} - -static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target) -{ - ptrdiff_t disp = tcg_pcrel_diff(s, target); - if (in_range_b(disp)) { - tcg_out32(s, B | (disp & 0x3fffffc) | mask); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); - tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS | mask); - } -} - -static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, - TCGReg base, tcg_target_long offset) -{ - tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; - bool is_store = false; - TCGReg rs = TCG_REG_TMP1; - - switch (opi) { - case LD: case LWA: - align = 3; - /* FALLTHRU */ - default: - if (rt != TCG_REG_R0) { - rs = rt; - break; - } - break; - case STD: - align = 3; - /* FALLTHRU */ - case STB: case STH: case STW: - is_store = true; - break; - } - - /* For unaligned, or very large offsets, use the indexed form. */ - if (offset & align || offset != (int32_t)offset) { - if (rs == base) { - rs = TCG_REG_R0; - } - tcg_debug_assert(!is_store || rs != rt); - tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); - tcg_out32(s, opx | TAB(rt, base, rs)); - return; - } - - l0 = (int16_t)offset; - offset = (offset - l0) >> 16; - l1 = (int16_t)offset; - - if (l1 < 0 && orig >= 0) { - extra = 0x4000; - l1 = (int16_t)(offset - 0x4000); - } - if (l1) { - tcg_out32(s, ADDIS | TAI(rs, base, l1)); - base = rs; - } - if (extra) { - tcg_out32(s, ADDIS | TAI(rs, base, extra)); - base = rs; - } - if (opi != ADDI || base != rt || l0 != 0) { - tcg_out32(s, opi | TAI(rt, base, l0)); - } -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, - TCGReg arg1, intptr_t arg2) -{ - int opi, opx; - - assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - if (type == TCG_TYPE_I32) { - opi = LWZ, opx = LWZX; - } else { - opi = LD, opx = LDX; - } - tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - int opi, opx; - - assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - if (type == TCG_TYPE_I32) { - opi = STW, opx = STWX; - } else { - opi = STD, opx = STDX; - } - tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); -} - -static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, - int const_arg2, int cr, TCGType type) -{ - int imm; - uint32_t op; - - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - - /* Simplify the comparisons below wrt CMPI. */ - if (type == TCG_TYPE_I32) { - arg2 = (int32_t)arg2; - } - - switch (cond) { - case TCG_COND_EQ: - case TCG_COND_NE: - if (const_arg2) { - if ((int16_t) arg2 == arg2) { - op = CMPI; - imm = 1; - break; - } else if ((uint16_t) arg2 == arg2) { - op = CMPLI; - imm = 1; - break; - } - } - op = CMPL; - imm = 0; - break; - - case TCG_COND_LT: - case TCG_COND_GE: - case TCG_COND_LE: - case TCG_COND_GT: - if (const_arg2) { - if ((int16_t) arg2 == arg2) { - op = CMPI; - imm = 1; - break; - } - } - op = CMP; - imm = 0; - break; - - case TCG_COND_LTU: - case TCG_COND_GEU: - case TCG_COND_LEU: - case TCG_COND_GTU: - if (const_arg2) { - if ((uint16_t) arg2 == arg2) { - op = CMPLI; - imm = 1; - break; - } - } - op = CMPL; - imm = 0; - break; - - default: - tcg_abort(); - } - op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); - - if (imm) { - tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); - } else { - if (const_arg2) { - tcg_out_movi(s, type, TCG_REG_R0, arg2); - arg2 = TCG_REG_R0; - } - tcg_out32(s, op | RA(arg1) | RB(arg2)); - } -} - -static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, - TCGReg dst, TCGReg src) -{ - if (type == TCG_TYPE_I32) { - tcg_out32(s, CNTLZW | RS(src) | RA(dst)); - tcg_out_shri32(s, dst, dst, 5); - } else { - tcg_out32(s, CNTLZD | RS(src) | RA(dst)); - tcg_out_shri64(s, dst, dst, 6); - } -} - -static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) -{ - /* X != 0 implies X + -1 generates a carry. Extra addition - trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ - if (dst != src) { - tcg_out32(s, ADDIC | TAI(dst, src, -1)); - tcg_out32(s, SUBFE | TAB(dst, dst, src)); - } else { - tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); - tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); - } -} - -static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, - bool const_arg2) -{ - if (const_arg2) { - if ((uint32_t)arg2 == arg2) { - tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); - tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); - } - } else { - tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); - } - return TCG_REG_R0; -} - -static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, - TCGArg arg0, TCGArg arg1, TCGArg arg2, - int const_arg2) -{ - int crop, sh; - - assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); - - /* Ignore high bits of a potential constant arg2. */ - if (type == TCG_TYPE_I32) { - arg2 = (uint32_t)arg2; - } - - /* Handle common and trivial cases before handling anything else. */ - if (arg2 == 0) { - switch (cond) { - case TCG_COND_EQ: - tcg_out_setcond_eq0(s, type, arg0, arg1); - return; - case TCG_COND_NE: - if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { - tcg_out_ext32u(s, TCG_REG_R0, arg1); - arg1 = TCG_REG_R0; - } - tcg_out_setcond_ne0(s, arg0, arg1); - return; - case TCG_COND_GE: - tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); - arg1 = arg0; - /* FALLTHRU */ - case TCG_COND_LT: - /* Extract the sign bit. */ - if (type == TCG_TYPE_I32) { - tcg_out_shri32(s, arg0, arg1, 31); - } else { - tcg_out_shri64(s, arg0, arg1, 63); - } - return; - default: - break; - } - } - - /* If we have ISEL, we can implement everything with 3 or 4 insns. - All other cases below are also at least 3 insns, so speed up the - code generator by not considering them and always using ISEL. */ - if (HAVE_ISEL) { - int isel, tab; - - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - - isel = tcg_to_isel[cond]; - - tcg_out_movi(s, type, arg0, 1); - if (isel & 1) { - /* arg0 = (bc ? 0 : 1) */ - tab = TAB(arg0, 0, arg0); - isel &= ~1; - } else { - /* arg0 = (bc ? 1 : 0) */ - tcg_out_movi(s, type, TCG_REG_R0, 0); - tab = TAB(arg0, arg0, TCG_REG_R0); - } - tcg_out32(s, isel | tab); - return; - } - - switch (cond) { - case TCG_COND_EQ: - arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); - tcg_out_setcond_eq0(s, type, arg0, arg1); - return; - - case TCG_COND_NE: - arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); - /* Discard the high bits only once, rather than both inputs. */ - if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { - tcg_out_ext32u(s, TCG_REG_R0, arg1); - arg1 = TCG_REG_R0; - } - tcg_out_setcond_ne0(s, arg0, arg1); - return; - - case TCG_COND_GT: - case TCG_COND_GTU: - sh = 30; - crop = 0; - goto crtest; - - case TCG_COND_LT: - case TCG_COND_LTU: - sh = 29; - crop = 0; - goto crtest; - - case TCG_COND_GE: - case TCG_COND_GEU: - sh = 31; - crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); - goto crtest; - - case TCG_COND_LE: - case TCG_COND_LEU: - sh = 31; - crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); - crtest: - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - if (crop) { - tcg_out32(s, crop); - } - tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); - tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); - break; - - default: - tcg_abort(); - } -} - -static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) -{ - if (l->has_value) { - tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr)); - } else { - tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); - tcg_out_bc_noaddr(s, bc); - } -} - -static void tcg_out_brcond(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - TCGLabel *l, TCGType type) -{ - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); - tcg_out_bc(s, tcg_to_bc[cond], l); -} - -static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, - TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, - TCGArg v2, bool const_c2) -{ - /* If for some reason both inputs are zero, don't produce bad code. */ - if (v1 == 0 && v2 == 0) { - tcg_out_movi(s, type, dest, 0); - return; - } - - tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); - - if (HAVE_ISEL) { - int isel = tcg_to_isel[cond]; - - /* Swap the V operands if the operation indicates inversion. */ - if (isel & 1) { - int t = v1; - v1 = v2; - v2 = t; - isel &= ~1; - } - /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ - if (v2 == 0) { - tcg_out_movi(s, type, TCG_REG_R0, 0); - } - tcg_out32(s, isel | TAB(dest, v1, v2)); - } else { - if (dest == v2) { - cond = tcg_invert_cond(cond); - v2 = v1; - } else if (dest != v1) { - if (v1 == 0) { - tcg_out_movi(s, type, dest, 0); - } else { - tcg_out_mov(s, type, dest, v1); - } - } - /* Branch forward over one insn */ - tcg_out32(s, tcg_to_bc[cond] | 8); - if (v2 == 0) { - tcg_out_movi(s, type, dest, 0); - } else { - tcg_out_mov(s, type, dest, v2); - } - } -} - -static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, - const int *const_args) -{ - static const struct { uint8_t bit1, bit2; } bits[] = { - [TCG_COND_LT ] = { CR_LT, CR_LT }, - [TCG_COND_LE ] = { CR_LT, CR_GT }, - [TCG_COND_GT ] = { CR_GT, CR_GT }, - [TCG_COND_GE ] = { CR_GT, CR_LT }, - [TCG_COND_LTU] = { CR_LT, CR_LT }, - [TCG_COND_LEU] = { CR_LT, CR_GT }, - [TCG_COND_GTU] = { CR_GT, CR_GT }, - [TCG_COND_GEU] = { CR_GT, CR_LT }, - }; - - TCGCond cond = args[4], cond2; - TCGArg al, ah, bl, bh; - int blconst, bhconst; - int op, bit1, bit2; - - al = args[0]; - ah = args[1]; - bl = args[2]; - bh = args[3]; - blconst = const_args[2]; - bhconst = const_args[3]; - - switch (cond) { - case TCG_COND_EQ: - op = CRAND; - goto do_equality; - case TCG_COND_NE: - op = CRNAND; - do_equality: - tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); - tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); - tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); - break; - - case TCG_COND_LT: - case TCG_COND_LE: - case TCG_COND_GT: - case TCG_COND_GE: - case TCG_COND_LTU: - case TCG_COND_LEU: - case TCG_COND_GTU: - case TCG_COND_GEU: - bit1 = bits[cond].bit1; - bit2 = bits[cond].bit2; - op = (bit1 != bit2 ? CRANDC : CRAND); - cond2 = tcg_unsigned_cond(cond); - - tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); - tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); - tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); - tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); - break; - - default: - tcg_abort(); - } -} - -static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, - const int *const_args) -{ - tcg_out_cmp2(s, args + 1, const_args + 1); - tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); - tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); -} - -static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, - const int *const_args) -{ - tcg_out_cmp2(s, args, const_args); - tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); -} - -void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) -{ - tcg_insn_unit i1, i2; - uint64_t pair; - intptr_t diff = addr - jmp_addr; - - if (in_range_b(diff)) { - i1 = B | (diff & 0x3fffffc); - i2 = NOP; - } else if (USE_REG_RA) { - intptr_t lo, hi; - diff = addr - (uintptr_t)tb_ret_addr; - lo = (int16_t)diff; - hi = (int32_t)(diff - lo); - assert(diff == hi + lo); - i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16); - i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo); - } else { - assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr); - i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16); - i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr); - } -#ifdef HOST_WORDS_BIGENDIAN - pair = (uint64_t)i1 << 32 | i2; -#else - pair = (uint64_t)i2 << 32 | i1; -#endif - - /* ??? __atomic_store_8, presuming there's some way to do that - for 32-bit, otherwise this is good enough for 64-bit. */ - *(uint64_t *)jmp_addr = pair; - flush_icache_range(jmp_addr, jmp_addr + 8); -} - -static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) -{ -#ifdef _CALL_AIX - /* Look through the descriptor. If the branch is in range, and we - don't have to spend too much effort on building the toc. */ - void *tgt = ((void **)target)[0]; - uintptr_t toc = ((uintptr_t *)target)[1]; - intptr_t diff = tcg_pcrel_diff(s, tgt); - - if (in_range_b(diff) && toc == (uint32_t)toc) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); - tcg_out_b(s, LK, tgt); - } else { - /* Fold the low bits of the constant into the addresses below. */ - intptr_t arg = (intptr_t)target; - int ofs = (int16_t)arg; - - if (ofs + 8 < 0x8000) { - arg -= ofs; - } else { - ofs = 0; - } - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); - tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); - tcg_out32(s, BCCTR | BO_ALWAYS | LK); - } -#elif defined(_CALL_ELF) && _CALL_ELF == 2 - intptr_t diff; - - /* In the ELFv2 ABI, we have to set up r12 to contain the destination - address, which the callee uses to compute its TOC address. */ - /* FIXME: when the branch is in range, we could avoid r12 load if we - knew that the destination uses the same TOC, and what its local - entry point offset is. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); - - diff = tcg_pcrel_diff(s, target); - if (in_range_b(diff)) { - tcg_out_b(s, LK, target); - } else { - tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS | LK); - } -#else - tcg_out_b(s, LK, target); -#endif -} - -static const uint32_t qemu_ldx_opc[16] = { - [MO_UB] = LBZX, - [MO_UW] = LHZX, - [MO_UL] = LWZX, - [MO_Q] = LDX, - [MO_SW] = LHAX, - [MO_SL] = LWAX, - [MO_BSWAP | MO_UB] = LBZX, - [MO_BSWAP | MO_UW] = LHBRX, - [MO_BSWAP | MO_UL] = LWBRX, - [MO_BSWAP | MO_Q] = LDBRX, -}; - -static const uint32_t qemu_stx_opc[16] = { - [MO_UB] = STBX, - [MO_UW] = STHX, - [MO_UL] = STWX, - [MO_Q] = STDX, - [MO_BSWAP | MO_UB] = STBX, - [MO_BSWAP | MO_UW] = STHBRX, - [MO_BSWAP | MO_UL] = STWBRX, - [MO_BSWAP | MO_Q] = STDBRX, -}; - -static const uint32_t qemu_exts_opc[4] = { - EXTSB, EXTSH, EXTSW, 0 -}; - -#if defined (CONFIG_SOFTMMU) -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - * int mmu_idx, uintptr_t ra) - */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - * uintxx_t val, int mmu_idx, uintptr_t ra) - */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; - -/* Perform the TLB load and compare. Places the result of the comparison - in CR7, loads the addend of the TLB into R3, and returns the register - containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ - -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc, - TCGReg addrlo, TCGReg addrhi, - int mem_index, bool is_read) -{ - int cmp_off - = (is_read - ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) - : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); - int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - TCGReg base = TCG_AREG0; - TCGMemOp s_bits = opc & MO_SIZE; - - /* Extract the page index, shifted into place for tlb index. */ - if (TCG_TARGET_REG_BITS == 64) { - if (TARGET_LONG_BITS == 32) { - /* Zero-extend the address into a place helpful for further use. */ - tcg_out_ext32u(s, TCG_REG_R4, addrlo); - addrlo = TCG_REG_R4; - } else { - tcg_out_rld(s, RLDICL, TCG_REG_R3, addrlo, - 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS); - } - } - - /* Compensate for very large offsets. */ - if (add_off >= 0x8000) { - /* Most target env are smaller than 32k; none are larger than 64k. - Simplify the logic here merely to offset by 0x7ff0, giving us a - range just shy of 64k. Check this assumption. */ - QEMU_BUILD_BUG_ON(offsetof(CPUArchState, - tlb_table[NB_MMU_MODES - 1][1]) - > 0x7ff0 + 0x7fff); - tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0)); - base = TCG_REG_TMP1; - cmp_off -= 0x7ff0; - add_off -= 0x7ff0; - } - - /* Extraction and shifting, part 2. */ - if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { - tcg_out_rlw(s, RLWINM, TCG_REG_R3, addrlo, - 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), - 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), - 31 - CPU_TLB_ENTRY_BITS); - } else { - tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS); - } - - tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base)); - - /* Load the tlb comparator. */ - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off); - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4); - } else { - tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off); - } - - /* Load the TLB addend for use on the fast path. Do this asap - to minimize any load use delay. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off); - - /* Clear the non-page, non-alignment bits from the address */ - if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { - /* We don't support unaligned accesses on 32-bits, preserve - * the bottom bits and thus trigger a comparison failure on - * unaligned accesses - */ - tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, - (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); - } else if (s_bits) { - /* > byte access, we need to handle alignment */ - if ((opc & MO_AMASK) == MO_ALIGN) { - /* Alignment required by the front-end, same as 32-bits */ - tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, - 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); - tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); - } else { - /* We support unaligned accesses, we need to make sure we fail - * if we cross a page boundary. The trick is to add the - * access_size-1 to the address before masking the low bits. - * That will make the address overflow to the next page if we - * cross a page boundary which will then force a mismatch of - * the TLB compare since the next page cannot possibly be in - * the same TLB index. - */ - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1)); - tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0, - 0, 63 - TARGET_PAGE_BITS); - } - } else { - /* Byte access, just chop off the bits below the page index */ - tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS); - } - - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, - 0, 7, TCG_TYPE_I32); - tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32); - tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); - } else { - tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, - 0, 7, TCG_TYPE_TL); - } - - return addrlo; -} - -/* Record the context of a call to the out of line helper code for the slow - path for a load or store, so that we can later generate the correct - helper code. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, - TCGReg datalo_reg, TCGReg datahi_reg, - TCGReg addrlo_reg, TCGReg addrhi_reg, - tcg_insn_unit *raddr, tcg_insn_unit *lptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->datalo_reg = datalo_reg; - label->datahi_reg = datahi_reg; - label->addrlo_reg = addrlo_reg; - label->addrhi_reg = addrhi_reg; - label->raddr = raddr; - label->label_ptr[0] = lptr; -} - -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - TCGReg hi, lo, arg = TCG_REG_R3; - - reloc_pc14(lb->label_ptr[0], s->code_ptr); - - tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); - - lo = lb->addrlo_reg; - hi = lb->addrhi_reg; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - arg |= 1; -#endif - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - } else { - /* If the address needed to be zero-extended, we'll have already - placed it in R4. The only remaining case is 64-bit guest. */ - tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); - } - - tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); - tcg_out32(s, MFSPR | RT(arg) | LR); - - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); - - lo = lb->datalo_reg; - hi = lb->datahi_reg; - if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { - tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4); - tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3); - } else if (opc & MO_SIGN) { - uint32_t insn = qemu_exts_opc[opc & MO_SIZE]; - tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3)); - } else { - tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3); - } - - tcg_out_b(s, 0, lb->raddr); -} - -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - TCGMemOp s_bits = opc & MO_SIZE; - TCGReg hi, lo, arg = TCG_REG_R3; - - reloc_pc14(lb->label_ptr[0], s->code_ptr); - - tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); - - lo = lb->addrlo_reg; - hi = lb->addrhi_reg; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - arg |= 1; -#endif - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - } else { - /* If the address needed to be zero-extended, we'll have already - placed it in R4. The only remaining case is 64-bit guest. */ - tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); - } - - lo = lb->datalo_reg; - hi = lb->datahi_reg; - if (TCG_TARGET_REG_BITS == 32) { - switch (s_bits) { - case MO_64: -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - arg |= 1; -#endif - tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); - /* FALLTHRU */ - case MO_32: - tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); - break; - default: - tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31); - break; - } - } else { - if (s_bits == MO_64) { - tcg_out_mov(s, TCG_TYPE_I64, arg++, lo); - } else { - tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits)); - } - } - - tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); - tcg_out32(s, MFSPR | RT(arg) | LR); - - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); - - tcg_out_b(s, 0, lb->raddr); -} -#endif /* SOFTMMU */ - -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) -{ - TCGReg datalo, datahi, addrlo, rbase; - TCGReg addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc, s_bits; -#ifdef CONFIG_SOFTMMU - int mem_index; - tcg_insn_unit *label_ptr; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - s_bits = opc & MO_SIZE; - -#ifdef CONFIG_SOFTMMU - mem_index = get_mmuidx(oi); - addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true); - - /* Load a pointer into the current opcode w/conditional branch-link. */ - label_ptr = s->code_ptr; - tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); - - rbase = TCG_REG_R3; -#else /* !CONFIG_SOFTMMU */ - rbase = guest_base ? TCG_GUEST_BASE_REG : 0; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); - addrlo = TCG_REG_TMP1; - } -#endif - - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - if (opc & MO_BSWAP) { - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); - tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); - tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0)); - } else if (rbase != 0) { - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); - tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo)); - tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0)); - } else if (addrlo == datahi) { - tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); - tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); - } else { - tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); - tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); - } - } else { - uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; - if (!HAVE_ISA_2_06 && insn == LDBRX) { - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); - tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); - tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); - tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); - } else if (insn) { - tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); - } else { - insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; - tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); - insn = qemu_exts_opc[s_bits]; - tcg_out32(s, insn | RA(datalo) | RS(datalo)); - } - } - -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#endif -} - -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) -{ - TCGReg datalo, datahi, addrlo, rbase; - TCGReg addrhi __attribute__((unused)); - TCGMemOpIdx oi; - TCGMemOp opc, s_bits; -#ifdef CONFIG_SOFTMMU - int mem_index; - tcg_insn_unit *label_ptr; -#endif - - datalo = *args++; - datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - s_bits = opc & MO_SIZE; - -#ifdef CONFIG_SOFTMMU - mem_index = get_mmuidx(oi); - addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false); - - /* Load a pointer into the current opcode w/conditional branch-link. */ - label_ptr = s->code_ptr; - tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); - - rbase = TCG_REG_R3; -#else /* !CONFIG_SOFTMMU */ - rbase = guest_base ? TCG_GUEST_BASE_REG : 0; - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); - addrlo = TCG_REG_TMP1; - } -#endif - - if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { - if (opc & MO_BSWAP) { - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); - tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); - tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0)); - } else if (rbase != 0) { - tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); - tcg_out32(s, STWX | SAB(datahi, rbase, addrlo)); - tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0)); - } else { - tcg_out32(s, STW | TAI(datahi, addrlo, 0)); - tcg_out32(s, STW | TAI(datalo, addrlo, 4)); - } - } else { - uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; - if (!HAVE_ISA_2_06 && insn == STDBRX) { - tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); - tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); - tcg_out_shri64(s, TCG_REG_R0, datalo, 32); - tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1)); - } else { - tcg_out32(s, insn | SAB(datalo, rbase, addrlo)); - } - } - -#ifdef CONFIG_SOFTMMU - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#endif -} - -/* Parameters for function call generation, used in tcg.c. */ -#define TCG_TARGET_STACK_ALIGN 16 -#define TCG_TARGET_EXTEND_ARGS 1 - -#ifdef _CALL_AIX -# define LINK_AREA_SIZE (6 * SZR) -# define LR_OFFSET (1 * SZR) -# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) -#elif defined(TCG_TARGET_CALL_DARWIN) -# define LINK_AREA_SIZE (6 * SZR) -# define LR_OFFSET (2 * SZR) -#elif TCG_TARGET_REG_BITS == 64 -# if defined(_CALL_ELF) && _CALL_ELF == 2 -# define LINK_AREA_SIZE (4 * SZR) -# define LR_OFFSET (1 * SZR) -# endif -#else /* TCG_TARGET_REG_BITS == 32 */ -# if defined(_CALL_SYSV) -# define LINK_AREA_SIZE (2 * SZR) -# define LR_OFFSET (1 * SZR) -# endif -#endif -#ifndef LR_OFFSET -# error "Unhandled abi" -#endif -#ifndef TCG_TARGET_CALL_STACK_OFFSET -# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE -#endif - -#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) -#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) - -#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ - + TCG_STATIC_CALL_ARGS_SIZE \ - + CPU_TEMP_BUF_SIZE \ - + REG_SAVE_SIZE \ - + TCG_TARGET_STACK_ALIGN - 1) \ - & -TCG_TARGET_STACK_ALIGN) - -#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) - -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int i; - -#ifdef _CALL_AIX - void **desc = (void **)s->code_ptr; - desc[0] = desc + 2; /* entry point */ - desc[1] = 0; /* environment pointer */ - s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ -#endif - - tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, - CPU_TEMP_BUF_SIZE); - - /* Prologue */ - tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); - tcg_out32(s, (SZR == 8 ? STDU : STWU) - | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); - - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { - tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], - TCG_REG_R1, REG_SAVE_BOT + i * SZR); - } - tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); - -#ifndef CONFIG_SOFTMMU - if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); - } -#endif - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); - - if (USE_REG_RA) { -#ifdef _CALL_AIX - /* Make the caller load the value as the TOC into R2. */ - tb_ret_addr = s->code_ptr + 2; - desc[1] = tb_ret_addr; - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2); - tcg_out32(s, BCCTR | BO_ALWAYS); -#elif defined(_CALL_ELF) && _CALL_ELF == 2 - /* Compute from the incoming R12 value. */ - tb_ret_addr = s->code_ptr + 2; - tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12, - tcg_ptr_byte_diff(tb_ret_addr, s->code_buf))); - tcg_out32(s, BCCTR | BO_ALWAYS); -#else - /* Reserve max 5 insns for the constant load. */ - tb_ret_addr = s->code_ptr + 6; - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr); - tcg_out32(s, BCCTR | BO_ALWAYS); - while (s->code_ptr < tb_ret_addr) { - tcg_out32(s, NOP); - } -#endif - } else { - tcg_out32(s, BCCTR | BO_ALWAYS); - tb_ret_addr = s->code_ptr; - } - - /* Epilogue */ - assert(tb_ret_addr == s->code_ptr); - - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { - tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], - TCG_REG_R1, REG_SAVE_BOT + i * SZR); - } - tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); - tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); - tcg_out32(s, BCLR | BO_ALWAYS); -} - -static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, - const int *const_args) -{ - TCGArg a0, a1, a2; - int c; - - switch (opc) { - case INDEX_op_exit_tb: - if (USE_REG_RA) { - ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr); - - /* Use a direct branch if we can, otherwise use the value in RA. - Note that the direct branch is always backward, thus we need - to account for the possibility of 5 insns from the movi. */ - if (!in_range_b(disp - 20)) { - tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); - tcg_out32(s, BCCTR | BO_ALWAYS); - break; - } - } - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); - tcg_out_b(s, 0, tb_ret_addr); - break; - case INDEX_op_goto_tb: - tcg_debug_assert(s->tb_jmp_offset); - /* Direct jump. Ensure the next insns are 8-byte aligned. */ - if ((uintptr_t)s->code_ptr & 7) { - tcg_out32(s, NOP); - } - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - /* To be replaced by either a branch+nop or a load into TMP1. */ - s->code_ptr += 2; - tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS); - s->tb_next_offset[args[0]] = tcg_current_code_size(s); - break; - case INDEX_op_br: - { - TCGLabel *l = arg_label(args[0]); - - if (l->has_value) { - tcg_out_b(s, 0, l->u.value_ptr); - } else { - tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); - tcg_out_b_noaddr(s, B); - } - } - break; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i64: - tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); - break; - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); - break; - - case INDEX_op_add_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_32: - tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); - } else { - tcg_out32(s, ADD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_sub_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); - } else { - tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); - } - } else if (const_args[2]) { - a2 = -a2; - goto do_addi_32; - } else { - tcg_out32(s, SUBF | TAB(a0, a2, a1)); - } - break; - - case INDEX_op_and_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi32(s, a0, a1, a2); - } else { - tcg_out32(s, AND | SAB(a1, a0, a2)); - } - break; - case INDEX_op_and_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi64(s, a0, a1, a2); - } else { - tcg_out32(s, AND | SAB(a1, a0, a2)); - } - break; - case INDEX_op_or_i64: - case INDEX_op_or_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_ori32(s, a0, a1, a2); - } else { - tcg_out32(s, OR | SAB(a1, a0, a2)); - } - break; - case INDEX_op_xor_i64: - case INDEX_op_xor_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_xori32(s, a0, a1, a2); - } else { - tcg_out32(s, XOR | SAB(a1, a0, a2)); - } - break; - case INDEX_op_andc_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi32(s, a0, a1, ~a2); - } else { - tcg_out32(s, ANDC | SAB(a1, a0, a2)); - } - break; - case INDEX_op_andc_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi64(s, a0, a1, ~a2); - } else { - tcg_out32(s, ANDC | SAB(a1, a0, a2)); - } - break; - case INDEX_op_orc_i32: - if (const_args[2]) { - tcg_out_ori32(s, args[0], args[1], ~args[2]); - break; - } - /* FALLTHRU */ - case INDEX_op_orc_i64: - tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_eqv_i32: - if (const_args[2]) { - tcg_out_xori32(s, args[0], args[1], ~args[2]); - break; - } - /* FALLTHRU */ - case INDEX_op_eqv_i64: - tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: - tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); - break; - - case INDEX_op_mul_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out32(s, MULLI | TAI(a0, a1, a2)); - } else { - tcg_out32(s, MULLW | TAB(a0, a1, a2)); - } - break; - - case INDEX_op_div_i32: - tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); - break; - - case INDEX_op_divu_i32: - tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); - break; - - case INDEX_op_shl_i32: - if (const_args[2]) { - tcg_out_shli32(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_shr_i32: - if (const_args[2]) { - tcg_out_shri32(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_sar_i32: - if (const_args[2]) { - tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); - } else { - tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); - } else { - tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) - | MB(0) | ME(31)); - } - break; - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); - } else { - tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); - tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) - | MB(0) | ME(31)); - } - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), TCG_TYPE_I32); - break; - case INDEX_op_brcond_i64: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), TCG_TYPE_I64); - break; - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args); - break; - - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); - break; - - case INDEX_op_not_i32: - case INDEX_op_not_i64: - tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); - break; - - case INDEX_op_add_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_64: - tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); - } else { - tcg_out32(s, ADD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_sub_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); - } else { - tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); - } - } else if (const_args[2]) { - a2 = -a2; - goto do_addi_64; - } else { - tcg_out32(s, SUBF | TAB(a0, a2, a1)); - } - break; - - case INDEX_op_shl_i64: - if (const_args[2]) { - tcg_out_shli64(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_shr_i64: - if (const_args[2]) { - tcg_out_shri64(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_sar_i64: - if (const_args[2]) { - int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); - tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh); - } else { - tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_rotl_i64: - if (const_args[2]) { - tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); - } else { - tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); - } - break; - case INDEX_op_rotr_i64: - if (const_args[2]) { - tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); - } else { - tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); - tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); - } - break; - - case INDEX_op_mul_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out32(s, MULLI | TAI(a0, a1, a2)); - } else { - tcg_out32(s, MULLD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_div_i64: - tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_divu_i64: - tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, false); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, true); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, false); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, true); - break; - - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - c = EXTSB; - goto gen_ext; - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - c = EXTSH; - goto gen_ext; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - c = EXTSW; - goto gen_ext; - gen_ext: - tcg_out32(s, c | RS(args[1]) | RA(args[0])); - break; - case INDEX_op_extu_i32_i64: - tcg_out_ext32u(s, args[0], args[1]); - break; - - case INDEX_op_setcond_i32: - tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], - const_args[2]); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], - const_args[2]); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args, const_args); - break; - - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - a0 = args[0], a1 = args[1]; - /* a1 = abcd */ - if (a0 != a1) { - /* a0 = (a1 r<< 24) & 0xff # 000c */ - tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); - /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */ - tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23); - } else { - /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */ - tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23); - /* a0 = (a1 r<< 24) & 0xff # 000c */ - tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); - /* a0 = a0 | r0 # 00dc */ - tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0)); - } - break; - - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - /* Stolen from gcc's builtin_bswap32 */ - a1 = args[1]; - a0 = args[0] == a1 ? TCG_REG_R0 : args[0]; - - /* a1 = args[1] # abcd */ - /* a0 = rotate_left (a1, 8) # bcda */ - tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); - /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); - /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); - - if (a0 == TCG_REG_R0) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } - break; - - case INDEX_op_bswap64_i64: - a0 = args[0], a1 = args[1], a2 = TCG_REG_R0; - if (a0 == a1) { - a0 = TCG_REG_R0; - a2 = a1; - } - - /* a1 = # abcd efgh */ - /* a0 = rl32(a1, 8) # 0000 fghe */ - tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); - /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); - /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); - - /* a0 = rl64(a0, 32) # hgfe 0000 */ - /* a2 = rl64(a1, 32) # efgh abcd */ - tcg_out_rld(s, RLDICL, a0, a0, 32, 0); - tcg_out_rld(s, RLDICL, a2, a1, 32, 0); - - /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */ - tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31); - /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */ - tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7); - /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */ - tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23); - - if (a0 == 0) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } - break; - - case INDEX_op_deposit_i32: - if (const_args[2]) { - uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; - tcg_out_andi32(s, args[0], args[0], ~mask); - } else { - tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], - 32 - args[3] - args[4], 31 - args[3]); - } - break; - case INDEX_op_deposit_i64: - if (const_args[2]) { - uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; - tcg_out_andi64(s, args[0], args[0], ~mask); - } else { - tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], - 64 - args[3] - args[4]); - } - break; - - case INDEX_op_movcond_i32: - tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], - args[3], args[4], const_args[2]); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], - args[3], args[4], const_args[2]); - break; - -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_add2_i64: -#else - case INDEX_op_add2_i32: -#endif - /* Note that the CA bit is defined based on the word size of the - environment. So in 64-bit mode it's always carry-out of bit 63. - The fallback code using deposit works just as well for 32-bit. */ - a0 = args[0], a1 = args[1]; - if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { - a0 = TCG_REG_R0; - } - if (const_args[4]) { - tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); - } else { - tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); - } - if (const_args[5]) { - tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); - } else { - tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); - } - if (a0 != args[0]) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } - break; - -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_sub2_i64: -#else - case INDEX_op_sub2_i32: -#endif - a0 = args[0], a1 = args[1]; - if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { - a0 = TCG_REG_R0; - } - if (const_args[2]) { - tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); - } else { - tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); - } - if (const_args[3]) { - tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); - } else { - tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); - } - if (a0 != args[0]) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } - break; - - case INDEX_op_muluh_i32: - tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_mulsh_i32: - tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_muluh_i64: - tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_mulsh_i64: - tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef ppc_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - - { INDEX_op_st8_i32, { "r", "r" } }, - { INDEX_op_st16_i32, { "r", "r" } }, - { INDEX_op_st_i32, { "r", "r" } }, - - { INDEX_op_add_i32, { "r", "r", "ri" } }, - { INDEX_op_mul_i32, { "r", "r", "rI" } }, - { INDEX_op_div_i32, { "r", "r", "r" } }, - { INDEX_op_divu_i32, { "r", "r", "r" } }, - { INDEX_op_sub_i32, { "r", "rI", "ri" } }, - { INDEX_op_and_i32, { "r", "r", "ri" } }, - { INDEX_op_or_i32, { "r", "r", "ri" } }, - { INDEX_op_xor_i32, { "r", "r", "ri" } }, - { INDEX_op_andc_i32, { "r", "r", "ri" } }, - { INDEX_op_orc_i32, { "r", "r", "ri" } }, - { INDEX_op_eqv_i32, { "r", "r", "ri" } }, - { INDEX_op_nand_i32, { "r", "r", "r" } }, - { INDEX_op_nor_i32, { "r", "r", "r" } }, - - { INDEX_op_shl_i32, { "r", "r", "ri" } }, - { INDEX_op_shr_i32, { "r", "r", "ri" } }, - { INDEX_op_sar_i32, { "r", "r", "ri" } }, - { INDEX_op_rotl_i32, { "r", "r", "ri" } }, - { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - - { INDEX_op_neg_i32, { "r", "r" } }, - { INDEX_op_not_i32, { "r", "r" } }, - { INDEX_op_ext8s_i32, { "r", "r" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - - { INDEX_op_brcond_i32, { "r", "ri" } }, - { INDEX_op_setcond_i32, { "r", "r", "ri" } }, - { INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } }, - - { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, - - { INDEX_op_muluh_i32, { "r", "r", "r" } }, - { INDEX_op_mulsh_i32, { "r", "r", "r" } }, - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - - { INDEX_op_st8_i64, { "r", "r" } }, - { INDEX_op_st16_i64, { "r", "r" } }, - { INDEX_op_st32_i64, { "r", "r" } }, - { INDEX_op_st_i64, { "r", "r" } }, - - { INDEX_op_add_i64, { "r", "r", "rT" } }, - { INDEX_op_sub_i64, { "r", "rI", "rT" } }, - { INDEX_op_and_i64, { "r", "r", "ri" } }, - { INDEX_op_or_i64, { "r", "r", "rU" } }, - { INDEX_op_xor_i64, { "r", "r", "rU" } }, - { INDEX_op_andc_i64, { "r", "r", "ri" } }, - { INDEX_op_orc_i64, { "r", "r", "r" } }, - { INDEX_op_eqv_i64, { "r", "r", "r" } }, - { INDEX_op_nand_i64, { "r", "r", "r" } }, - { INDEX_op_nor_i64, { "r", "r", "r" } }, - - { INDEX_op_shl_i64, { "r", "r", "ri" } }, - { INDEX_op_shr_i64, { "r", "r", "ri" } }, - { INDEX_op_sar_i64, { "r", "r", "ri" } }, - { INDEX_op_rotl_i64, { "r", "r", "ri" } }, - { INDEX_op_rotr_i64, { "r", "r", "ri" } }, - - { INDEX_op_mul_i64, { "r", "r", "rI" } }, - { INDEX_op_div_i64, { "r", "r", "r" } }, - { INDEX_op_divu_i64, { "r", "r", "r" } }, - - { INDEX_op_neg_i64, { "r", "r" } }, - { INDEX_op_not_i64, { "r", "r" } }, - { INDEX_op_ext8s_i64, { "r", "r" } }, - { INDEX_op_ext16s_i64, { "r", "r" } }, - { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext_i32_i64, { "r", "r" } }, - { INDEX_op_extu_i32_i64, { "r", "r" } }, - { INDEX_op_bswap16_i64, { "r", "r" } }, - { INDEX_op_bswap32_i64, { "r", "r" } }, - { INDEX_op_bswap64_i64, { "r", "r" } }, - - { INDEX_op_brcond_i64, { "r", "ri" } }, - { INDEX_op_setcond_i64, { "r", "r", "ri" } }, - { INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } }, - - { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, - - { INDEX_op_mulsh_i64, { "r", "r", "r" } }, - { INDEX_op_muluh_i64, { "r", "r", "r" } }, -#endif - -#if TCG_TARGET_REG_BITS == 32 - { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, - { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, -#endif - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, - { INDEX_op_sub2_i64, { "r", "r", "rI", "rZM", "r", "r" } }, -#else - { INDEX_op_add2_i32, { "r", "r", "r", "r", "rI", "rZM" } }, - { INDEX_op_sub2_i32, { "r", "r", "rI", "rZM", "r", "r" } }, -#endif - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "S", "S" } }, - { INDEX_op_qemu_ld_i64, { "r", "L" } }, - { INDEX_op_qemu_st_i64, { "S", "S" } }, -#elif TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "S", "S" } }, - { INDEX_op_qemu_ld_i64, { "L", "L", "L" } }, - { INDEX_op_qemu_st_i64, { "S", "S", "S" } }, -#else - { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, - { INDEX_op_qemu_st_i32, { "S", "S", "S" } }, - { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } }, - { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } }, -#endif - - { -1 }, -}; - -static void tcg_target_init(TCGContext *s) -{ - unsigned long hwcap = qemu_getauxval(AT_HWCAP); - if (hwcap & PPC_FEATURE_ARCH_2_06) { - have_isa_2_06 = true; - } - - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - (1 << TCG_REG_R0) | - (1 << TCG_REG_R2) | - (1 << TCG_REG_R3) | - (1 << TCG_REG_R4) | - (1 << TCG_REG_R5) | - (1 << TCG_REG_R6) | - (1 << TCG_REG_R7) | - (1 << TCG_REG_R8) | - (1 << TCG_REG_R9) | - (1 << TCG_REG_R10) | - (1 << TCG_REG_R11) | - (1 << TCG_REG_R12)); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ -#if defined(_CALL_SYSV) - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ -#endif -#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ -#endif - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ - if (USE_REG_RA) { - tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return addr */ - } - - tcg_add_target_add_op_defs(ppc_op_defs); -} - -#ifdef __ELF__ -typedef struct { - DebugFrameCIE cie; - DebugFrameFDEHeader fde; - uint8_t fde_def_cfa[4]; - uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; -} DebugFrame; - -/* We're expecting a 2 byte uleb128 encoded value. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); - -#if TCG_TARGET_REG_BITS == 64 -# define ELF_HOST_MACHINE EM_PPC64 -#else -# define ELF_HOST_MACHINE EM_PPC -#endif - -static DebugFrame debug_frame = { - .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .cie.id = -1, - .cie.version = 1, - .cie.code_align = 1, - .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ - .cie.return_column = 65, - - /* Total FDE size does not include the "len" member. */ - .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), - - .fde_def_cfa = { - 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ - 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, - } -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - uint8_t *p = &debug_frame.fde_reg_ofs[3]; - int i; - - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { - p[0] = 0x80 + tcg_target_callee_save_regs[i]; - p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; - } - - debug_frame.fde.func_start = (uintptr_t)buf; - debug_frame.fde.func_len = buf_size; - - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} -#endif /* __ELF__ */ - -static size_t dcache_bsize = 16; -static size_t icache_bsize = 16; - -void flush_icache_range(uintptr_t start, uintptr_t stop) -{ - uintptr_t p, start1, stop1; - size_t dsize = dcache_bsize; - size_t isize = icache_bsize; - - start1 = start & ~(dsize - 1); - stop1 = (stop + dsize - 1) & ~(dsize - 1); - for (p = start1; p < stop1; p += dsize) { - asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); - } - asm volatile ("sync" : : : "memory"); - - start &= start & ~(isize - 1); - stop1 = (stop + isize - 1) & ~(isize - 1); - for (p = start1; p < stop1; p += isize) { - asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); - } - asm volatile ("sync" : : : "memory"); - asm volatile ("isync" : : : "memory"); -} - -#if defined _AIX -#include - -static void __attribute__((constructor)) tcg_cache_init(void) -{ - icache_bsize = _system_configuration.icache_line; - dcache_bsize = _system_configuration.dcache_line; -} - -#elif defined __linux__ -static void __attribute__((constructor)) tcg_cache_init(void) -{ - unsigned long dsize = qemu_getauxval(AT_DCACHEBSIZE); - unsigned long isize = qemu_getauxval(AT_ICACHEBSIZE); - - if (dsize == 0 || isize == 0) { - if (dsize == 0) { - fprintf(stderr, "getauxval AT_DCACHEBSIZE failed\n"); - } - if (isize == 0) { - fprintf(stderr, "getauxval AT_ICACHEBSIZE failed\n"); - } - exit(1); - } - dcache_bsize = dsize; - icache_bsize = isize; -} - -#elif defined __APPLE__ -#include - -static void __attribute__((constructor)) tcg_cache_init(void) -{ - size_t len; - unsigned cacheline; - int name[2] = { CTL_HW, HW_CACHELINE }; - - len = sizeof(cacheline); - if (sysctl(name, 2, &cacheline, &len, NULL, 0)) { - perror("sysctl CTL_HW HW_CACHELINE failed"); - exit(1); - } - dcache_bsize = cacheline; - icache_bsize = cacheline; -} - -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -#include - -static void __attribute__((constructor)) tcg_cache_init(void) -{ - size_t len = 4; - unsigned cacheline; - - if (sysctlbyname ("machdep.cacheline_size", &cacheline, &len, NULL, 0)) { - fprintf(stderr, "sysctlbyname machdep.cacheline_size failed: %s\n", - strerror(errno)); - exit(1); - } - dcache_bsize = cacheline; - icache_bsize = cacheline; -} -#endif diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c new file mode 100644 index 0000000..c593344 --- /dev/null +++ b/tcg/ppc/tcg-target.inc.c @@ -0,0 +1,2762 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-ldst.h" + +#if defined _CALL_DARWIN || defined __APPLE__ +#define TCG_TARGET_CALL_DARWIN +#endif +#ifdef _CALL_SYSV +# define TCG_TARGET_CALL_ALIGN_ARGS 1 +#endif + +/* For some memory operations, we need a scratch that isn't R0. For the AIX + calling convention, we can re-use the TOC register since we'll be reloading + it at every call. Otherwise R12 will do nicely as neither a call-saved + register nor a parameter register. */ +#ifdef _CALL_AIX +# define TCG_REG_TMP1 TCG_REG_R2 +#else +# define TCG_REG_TMP1 TCG_REG_R12 +#endif + +/* For the 64-bit target, we don't like the 5 insn sequence needed to build + full 64-bit addresses. Better to have a base register to which we can + apply a 32-bit displacement. + + There are generally three items of interest: + (1) helper functions in the main executable, + (2) TranslationBlock data structures, + (3) the return address in the epilogue. + + For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer + will be inside the main executable, and thus near enough to make a + pointer to the epilogue be within 2GB of all helper functions. + + For softmmu, we'll let the kernel choose the address of code_gen_buffer, + and odds are it'll be somewhere close to the main malloc arena, and so + a pointer to the epilogue will be within 2GB of the TranslationBlocks. + + For --enable-pie, everything will be kinda near everything else, + somewhere in high memory. + + Thus we choose to keep the return address in a call-saved register. */ +#define TCG_REG_RA TCG_REG_R31 +#define USE_REG_RA (TCG_TARGET_REG_BITS == 64) + +/* Shorthand for size of a pointer. Avoid promotion to unsigned. */ +#define SZP ((int)sizeof(void *)) + +/* Shorthand for size of a register. */ +#define SZR (TCG_TARGET_REG_BITS / 8) + +#define TCG_CT_CONST_S16 0x100 +#define TCG_CT_CONST_U16 0x200 +#define TCG_CT_CONST_S32 0x400 +#define TCG_CT_CONST_U32 0x800 +#define TCG_CT_CONST_ZERO 0x1000 +#define TCG_CT_CONST_MONE 0x2000 + +static tcg_insn_unit *tb_ret_addr; + +#include "elf.h" +static bool have_isa_2_06; +#define HAVE_ISA_2_06 have_isa_2_06 +#define HAVE_ISEL have_isa_2_06 + +#ifndef CONFIG_SOFTMMU +#define TCG_GUEST_BASE_REG 30 +#endif + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "r0", + "r1", + "r2", + "r3", + "r4", + "r5", + "r6", + "r7", + "r8", + "r9", + "r10", + "r11", + "r12", + "r13", + "r14", + "r15", + "r16", + "r17", + "r18", + "r19", + "r20", + "r21", + "r22", + "r23", + "r24", + "r25", + "r26", + "r27", + "r28", + "r29", + "r30", + "r31" +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R14, /* call saved registers */ + TCG_REG_R15, + TCG_REG_R16, + TCG_REG_R17, + TCG_REG_R18, + TCG_REG_R19, + TCG_REG_R20, + TCG_REG_R21, + TCG_REG_R22, + TCG_REG_R23, + TCG_REG_R24, + TCG_REG_R25, + TCG_REG_R26, + TCG_REG_R27, + TCG_REG_R28, + TCG_REG_R29, + TCG_REG_R30, + TCG_REG_R31, + TCG_REG_R12, /* call clobbered, non-arguments */ + TCG_REG_R11, + TCG_REG_R2, + TCG_REG_R13, + TCG_REG_R10, /* call clobbered, arguments */ + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_R7, + TCG_REG_R6, + TCG_REG_R5, + TCG_REG_R4, + TCG_REG_R3, +}; + +static const int tcg_target_call_iarg_regs[] = { + TCG_REG_R3, + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10 +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_R3, + TCG_REG_R4 +}; + +static const int tcg_target_callee_save_regs[] = { +#ifdef TCG_TARGET_CALL_DARWIN + TCG_REG_R11, +#endif + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_R16, + TCG_REG_R17, + TCG_REG_R18, + TCG_REG_R19, + TCG_REG_R20, + TCG_REG_R21, + TCG_REG_R22, + TCG_REG_R23, + TCG_REG_R24, + TCG_REG_R25, + TCG_REG_R26, + TCG_REG_R27, /* currently used for the global env */ + TCG_REG_R28, + TCG_REG_R29, + TCG_REG_R30, + TCG_REG_R31 +}; + +static inline bool in_range_b(tcg_target_long target) +{ + return target == sextract64(target, 0, 26); +} + +static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); + assert(in_range_b(disp)); + return disp & 0x3fffffc; +} + +static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target); +} + +static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); + assert(disp == (int16_t) disp); + return disp & 0xfffc; +} + +static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) +{ + *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target); +} + +static inline void tcg_out_b_noaddr(TCGContext *s, int insn) +{ + unsigned retrans = *s->code_ptr & 0x3fffffc; + tcg_out32(s, insn | retrans); +} + +static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) +{ + unsigned retrans = *s->code_ptr & 0xfffc; + tcg_out32(s, insn | retrans); +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + tcg_insn_unit *target = (tcg_insn_unit *)value; + + assert(addend == 0); + switch (type) { + case R_PPC_REL14: + reloc_pc14(code_ptr, target); + break; + case R_PPC_REL24: + reloc_pc24(code_ptr, target); + break; + default: + tcg_abort(); + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch (ct_str[0]) { + case 'A': case 'B': case 'C': case 'D': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A'); + break; + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + break; + case 'L': /* qemu_ld constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); +#ifdef CONFIG_SOFTMMU + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); +#endif + break; + case 'S': /* qemu_st constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); +#ifdef CONFIG_SOFTMMU + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); +#endif + break; + case 'I': + ct->ct |= TCG_CT_CONST_S16; + break; + case 'J': + ct->ct |= TCG_CT_CONST_U16; + break; + case 'M': + ct->ct |= TCG_CT_CONST_MONE; + break; + case 'T': + ct->ct |= TCG_CT_CONST_S32; + break; + case 'U': + ct->ct |= TCG_CT_CONST_U32; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } + + /* The only 32-bit constraint we use aside from + TCG_CT_CONST is TCG_CT_CONST_S16. */ + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + + if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + return 1; + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { + return 1; + } + return 0; +} + +#define OPCD(opc) ((opc)<<26) +#define XO19(opc) (OPCD(19)|((opc)<<1)) +#define MD30(opc) (OPCD(30)|((opc)<<2)) +#define MDS30(opc) (OPCD(30)|((opc)<<1)) +#define XO31(opc) (OPCD(31)|((opc)<<1)) +#define XO58(opc) (OPCD(58)|(opc)) +#define XO62(opc) (OPCD(62)|(opc)) + +#define B OPCD( 18) +#define BC OPCD( 16) +#define LBZ OPCD( 34) +#define LHZ OPCD( 40) +#define LHA OPCD( 42) +#define LWZ OPCD( 32) +#define STB OPCD( 38) +#define STH OPCD( 44) +#define STW OPCD( 36) + +#define STD XO62( 0) +#define STDU XO62( 1) +#define STDX XO31(149) + +#define LD XO58( 0) +#define LDX XO31( 21) +#define LDU XO58( 1) +#define LWA XO58( 2) +#define LWAX XO31(341) + +#define ADDIC OPCD( 12) +#define ADDI OPCD( 14) +#define ADDIS OPCD( 15) +#define ORI OPCD( 24) +#define ORIS OPCD( 25) +#define XORI OPCD( 26) +#define XORIS OPCD( 27) +#define ANDI OPCD( 28) +#define ANDIS OPCD( 29) +#define MULLI OPCD( 7) +#define CMPLI OPCD( 10) +#define CMPI OPCD( 11) +#define SUBFIC OPCD( 8) + +#define LWZU OPCD( 33) +#define STWU OPCD( 37) + +#define RLWIMI OPCD( 20) +#define RLWINM OPCD( 21) +#define RLWNM OPCD( 23) + +#define RLDICL MD30( 0) +#define RLDICR MD30( 1) +#define RLDIMI MD30( 3) +#define RLDCL MDS30( 8) + +#define BCLR XO19( 16) +#define BCCTR XO19(528) +#define CRAND XO19(257) +#define CRANDC XO19(129) +#define CRNAND XO19(225) +#define CROR XO19(449) +#define CRNOR XO19( 33) + +#define EXTSB XO31(954) +#define EXTSH XO31(922) +#define EXTSW XO31(986) +#define ADD XO31(266) +#define ADDE XO31(138) +#define ADDME XO31(234) +#define ADDZE XO31(202) +#define ADDC XO31( 10) +#define AND XO31( 28) +#define SUBF XO31( 40) +#define SUBFC XO31( 8) +#define SUBFE XO31(136) +#define SUBFME XO31(232) +#define SUBFZE XO31(200) +#define OR XO31(444) +#define XOR XO31(316) +#define MULLW XO31(235) +#define MULHW XO31( 75) +#define MULHWU XO31( 11) +#define DIVW XO31(491) +#define DIVWU XO31(459) +#define CMP XO31( 0) +#define CMPL XO31( 32) +#define LHBRX XO31(790) +#define LWBRX XO31(534) +#define LDBRX XO31(532) +#define STHBRX XO31(918) +#define STWBRX XO31(662) +#define STDBRX XO31(660) +#define MFSPR XO31(339) +#define MTSPR XO31(467) +#define SRAWI XO31(824) +#define NEG XO31(104) +#define MFCR XO31( 19) +#define MFOCRF (MFCR | (1u << 20)) +#define NOR XO31(124) +#define CNTLZW XO31( 26) +#define CNTLZD XO31( 58) +#define ANDC XO31( 60) +#define ORC XO31(412) +#define EQV XO31(284) +#define NAND XO31(476) +#define ISEL XO31( 15) + +#define MULLD XO31(233) +#define MULHD XO31( 73) +#define MULHDU XO31( 9) +#define DIVD XO31(489) +#define DIVDU XO31(457) + +#define LBZX XO31( 87) +#define LHZX XO31(279) +#define LHAX XO31(343) +#define LWZX XO31( 23) +#define STBX XO31(215) +#define STHX XO31(407) +#define STWX XO31(151) + +#define SPR(a, b) ((((a)<<5)|(b))<<11) +#define LR SPR(8, 0) +#define CTR SPR(9, 0) + +#define SLW XO31( 24) +#define SRW XO31(536) +#define SRAW XO31(792) + +#define SLD XO31( 27) +#define SRD XO31(539) +#define SRAD XO31(794) +#define SRADI XO31(413<<1) + +#define TW XO31( 4) +#define TRAP (TW | TO(31)) + +#define NOP ORI /* ori 0,0,0 */ + +#define RT(r) ((r)<<21) +#define RS(r) ((r)<<21) +#define RA(r) ((r)<<16) +#define RB(r) ((r)<<11) +#define TO(t) ((t)<<21) +#define SH(s) ((s)<<11) +#define MB(b) ((b)<<6) +#define ME(e) ((e)<<1) +#define BO(o) ((o)<<21) +#define MB64(b) ((b)<<5) +#define FXM(b) (1 << (19 - (b))) + +#define LK 1 + +#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) +#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) +#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) +#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) + +#define BF(n) ((n)<<23) +#define BI(n, c) (((c)+((n)*4))<<16) +#define BT(n, c) (((c)+((n)*4))<<21) +#define BA(n, c) (((c)+((n)*4))<<16) +#define BB(n, c) (((c)+((n)*4))<<11) +#define BC_(n, c) (((c)+((n)*4))<<6) + +#define BO_COND_TRUE BO(12) +#define BO_COND_FALSE BO( 4) +#define BO_ALWAYS BO(20) + +enum { + CR_LT, + CR_GT, + CR_EQ, + CR_SO +}; + +static const uint32_t tcg_to_bc[] = { + [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, + [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, + [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, + [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, + [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, + [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, + [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, + [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, + [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, + [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, +}; + +/* The low bit here is set if the RA and RB fields must be inverted. */ +static const uint32_t tcg_to_isel[] = { + [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), + [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, + [TCG_COND_LT] = ISEL | BC_(7, CR_LT), + [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, + [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, + [TCG_COND_GT] = ISEL | BC_(7, CR_GT), + [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), + [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, + [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, + [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), +}; + +static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, + TCGReg base, tcg_target_long offset); + +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + if (ret != arg) { + tcg_out32(s, OR | SAB(arg, ret, arg)); + } +} + +static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb) +{ + assert(TCG_TARGET_REG_BITS == 64); + sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); + mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); + tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); +} + +static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, + int sh, int mb, int me) +{ + tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); +} + +static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) +{ + tcg_out_rld(s, RLDICL, dst, src, 0, 32); +} + +static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); +} + +static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); +} + +static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); +} + +static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); +} + +static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg) +{ + if (arg == (int16_t) arg) { + tcg_out32(s, ADDI | TAI(ret, 0, arg)); + } else { + tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); + if (arg & 0xffff) { + tcg_out32(s, ORI | SAI(ret, ret, arg)); + } + } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, + tcg_target_long arg) +{ + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + if (type == TCG_TYPE_I32 || arg == (int32_t)arg) { + tcg_out_movi32(s, ret, arg); + } else if (arg == (uint32_t)arg && !(arg & 0x8000)) { + tcg_out32(s, ADDI | TAI(ret, 0, arg)); + tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); + } else { + int32_t high; + + if (USE_REG_RA) { + intptr_t diff = arg - (intptr_t)tb_ret_addr; + if (diff == (int32_t)diff) { + tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff); + return; + } + } + + high = arg >> 31 >> 1; + tcg_out_movi32(s, ret, high); + if (high) { + tcg_out_shli64(s, ret, ret, 32); + } + if (arg & 0xffff0000) { + tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); + } + if (arg & 0xffff) { + tcg_out32(s, ORI | SAI(ret, ret, arg)); + } + } +} + +static bool mask_operand(uint32_t c, int *mb, int *me) +{ + uint32_t lsb, test; + + /* Accept a bit pattern like: + 0....01....1 + 1....10....0 + 0..01..10..0 + Keep track of the transitions. */ + if (c == 0 || c == -1) { + return false; + } + test = c; + lsb = test & -test; + test += lsb; + if (test & (test - 1)) { + return false; + } + + *me = clz32(lsb); + *mb = test ? clz32(test & -test) + 1 : 0; + return true; +} + +static bool mask64_operand(uint64_t c, int *mb, int *me) +{ + uint64_t lsb; + + if (c == 0) { + return false; + } + + lsb = c & -c; + /* Accept 1..10..0. */ + if (c == -lsb) { + *mb = 0; + *me = clz64(lsb); + return true; + } + /* Accept 0..01..1. */ + if (lsb == 1 && (c & (c + 1)) == 0) { + *mb = clz64(c + 1) + 1; + *me = 63; + return true; + } + return false; +} + +static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ + int mb, me; + + if (mask_operand(c, &mb, &me)) { + tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); + } else if ((c & 0xffff) == c) { + tcg_out32(s, ANDI | SAI(src, dst, c)); + return; + } else if ((c & 0xffff0000) == c) { + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); + return; + } else { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); + tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); + } +} + +static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) +{ + int mb, me; + + assert(TCG_TARGET_REG_BITS == 64); + if (mask64_operand(c, &mb, &me)) { + if (mb == 0) { + tcg_out_rld(s, RLDICR, dst, src, 0, me); + } else { + tcg_out_rld(s, RLDICL, dst, src, 0, mb); + } + } else if ((c & 0xffff) == c) { + tcg_out32(s, ANDI | SAI(src, dst, c)); + return; + } else if ((c & 0xffff0000) == c) { + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); + return; + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); + tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); + } +} + +static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, + int op_lo, int op_hi) +{ + if (c >> 16) { + tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); + src = dst; + } + if (c & 0xffff) { + tcg_out32(s, op_lo | SAI(src, dst, c)); + src = dst; + } +} + +static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ + tcg_out_zori32(s, dst, src, c, ORI, ORIS); +} + +static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ + tcg_out_zori32(s, dst, src, c, XORI, XORIS); +} + +static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target) +{ + ptrdiff_t disp = tcg_pcrel_diff(s, target); + if (in_range_b(disp)) { + tcg_out32(s, B | (disp & 0x3fffffc) | mask); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); + tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS | mask); + } +} + +static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, + TCGReg base, tcg_target_long offset) +{ + tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; + bool is_store = false; + TCGReg rs = TCG_REG_TMP1; + + switch (opi) { + case LD: case LWA: + align = 3; + /* FALLTHRU */ + default: + if (rt != TCG_REG_R0) { + rs = rt; + break; + } + break; + case STD: + align = 3; + /* FALLTHRU */ + case STB: case STH: case STW: + is_store = true; + break; + } + + /* For unaligned, or very large offsets, use the indexed form. */ + if (offset & align || offset != (int32_t)offset) { + if (rs == base) { + rs = TCG_REG_R0; + } + tcg_debug_assert(!is_store || rs != rt); + tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); + tcg_out32(s, opx | TAB(rt, base, rs)); + return; + } + + l0 = (int16_t)offset; + offset = (offset - l0) >> 16; + l1 = (int16_t)offset; + + if (l1 < 0 && orig >= 0) { + extra = 0x4000; + l1 = (int16_t)(offset - 0x4000); + } + if (l1) { + tcg_out32(s, ADDIS | TAI(rs, base, l1)); + base = rs; + } + if (extra) { + tcg_out32(s, ADDIS | TAI(rs, base, extra)); + base = rs; + } + if (opi != ADDI || base != rt || l0 != 0) { + tcg_out32(s, opi | TAI(rt, base, l0)); + } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, intptr_t arg2) +{ + int opi, opx; + + assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + if (type == TCG_TYPE_I32) { + opi = LWZ, opx = LWZX; + } else { + opi = LD, opx = LDX; + } + tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + int opi, opx; + + assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + if (type == TCG_TYPE_I32) { + opi = STW, opx = STWX; + } else { + opi = STD, opx = STDX; + } + tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); +} + +static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, + int const_arg2, int cr, TCGType type) +{ + int imm; + uint32_t op; + + tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + + /* Simplify the comparisons below wrt CMPI. */ + if (type == TCG_TYPE_I32) { + arg2 = (int32_t)arg2; + } + + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + if (const_arg2) { + if ((int16_t) arg2 == arg2) { + op = CMPI; + imm = 1; + break; + } else if ((uint16_t) arg2 == arg2) { + op = CMPLI; + imm = 1; + break; + } + } + op = CMPL; + imm = 0; + break; + + case TCG_COND_LT: + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GT: + if (const_arg2) { + if ((int16_t) arg2 == arg2) { + op = CMPI; + imm = 1; + break; + } + } + op = CMP; + imm = 0; + break; + + case TCG_COND_LTU: + case TCG_COND_GEU: + case TCG_COND_LEU: + case TCG_COND_GTU: + if (const_arg2) { + if ((uint16_t) arg2 == arg2) { + op = CMPLI; + imm = 1; + break; + } + } + op = CMPL; + imm = 0; + break; + + default: + tcg_abort(); + } + op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); + + if (imm) { + tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); + } else { + if (const_arg2) { + tcg_out_movi(s, type, TCG_REG_R0, arg2); + arg2 = TCG_REG_R0; + } + tcg_out32(s, op | RA(arg1) | RB(arg2)); + } +} + +static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, + TCGReg dst, TCGReg src) +{ + if (type == TCG_TYPE_I32) { + tcg_out32(s, CNTLZW | RS(src) | RA(dst)); + tcg_out_shri32(s, dst, dst, 5); + } else { + tcg_out32(s, CNTLZD | RS(src) | RA(dst)); + tcg_out_shri64(s, dst, dst, 6); + } +} + +static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) +{ + /* X != 0 implies X + -1 generates a carry. Extra addition + trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ + if (dst != src) { + tcg_out32(s, ADDIC | TAI(dst, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, dst, src)); + } else { + tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); + } +} + +static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, + bool const_arg2) +{ + if (const_arg2) { + if ((uint32_t)arg2 == arg2) { + tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); + } + } else { + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); + } + return TCG_REG_R0; +} + +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGArg arg0, TCGArg arg1, TCGArg arg2, + int const_arg2) +{ + int crop, sh; + + assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + + /* Ignore high bits of a potential constant arg2. */ + if (type == TCG_TYPE_I32) { + arg2 = (uint32_t)arg2; + } + + /* Handle common and trivial cases before handling anything else. */ + if (arg2 == 0) { + switch (cond) { + case TCG_COND_EQ: + tcg_out_setcond_eq0(s, type, arg0, arg1); + return; + case TCG_COND_NE: + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + tcg_out_ext32u(s, TCG_REG_R0, arg1); + arg1 = TCG_REG_R0; + } + tcg_out_setcond_ne0(s, arg0, arg1); + return; + case TCG_COND_GE: + tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); + arg1 = arg0; + /* FALLTHRU */ + case TCG_COND_LT: + /* Extract the sign bit. */ + if (type == TCG_TYPE_I32) { + tcg_out_shri32(s, arg0, arg1, 31); + } else { + tcg_out_shri64(s, arg0, arg1, 63); + } + return; + default: + break; + } + } + + /* If we have ISEL, we can implement everything with 3 or 4 insns. + All other cases below are also at least 3 insns, so speed up the + code generator by not considering them and always using ISEL. */ + if (HAVE_ISEL) { + int isel, tab; + + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + + isel = tcg_to_isel[cond]; + + tcg_out_movi(s, type, arg0, 1); + if (isel & 1) { + /* arg0 = (bc ? 0 : 1) */ + tab = TAB(arg0, 0, arg0); + isel &= ~1; + } else { + /* arg0 = (bc ? 1 : 0) */ + tcg_out_movi(s, type, TCG_REG_R0, 0); + tab = TAB(arg0, arg0, TCG_REG_R0); + } + tcg_out32(s, isel | tab); + return; + } + + switch (cond) { + case TCG_COND_EQ: + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); + tcg_out_setcond_eq0(s, type, arg0, arg1); + return; + + case TCG_COND_NE: + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); + /* Discard the high bits only once, rather than both inputs. */ + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + tcg_out_ext32u(s, TCG_REG_R0, arg1); + arg1 = TCG_REG_R0; + } + tcg_out_setcond_ne0(s, arg0, arg1); + return; + + case TCG_COND_GT: + case TCG_COND_GTU: + sh = 30; + crop = 0; + goto crtest; + + case TCG_COND_LT: + case TCG_COND_LTU: + sh = 29; + crop = 0; + goto crtest; + + case TCG_COND_GE: + case TCG_COND_GEU: + sh = 31; + crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); + goto crtest; + + case TCG_COND_LE: + case TCG_COND_LEU: + sh = 31; + crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); + crtest: + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + if (crop) { + tcg_out32(s, crop); + } + tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); + tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) +{ + if (l->has_value) { + tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr)); + } else { + tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); + tcg_out_bc_noaddr(s, bc); + } +} + +static void tcg_out_brcond(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + TCGLabel *l, TCGType type) +{ + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + tcg_out_bc(s, tcg_to_bc[cond], l); +} + +static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, + TCGArg v2, bool const_c2) +{ + /* If for some reason both inputs are zero, don't produce bad code. */ + if (v1 == 0 && v2 == 0) { + tcg_out_movi(s, type, dest, 0); + return; + } + + tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); + + if (HAVE_ISEL) { + int isel = tcg_to_isel[cond]; + + /* Swap the V operands if the operation indicates inversion. */ + if (isel & 1) { + int t = v1; + v1 = v2; + v2 = t; + isel &= ~1; + } + /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ + if (v2 == 0) { + tcg_out_movi(s, type, TCG_REG_R0, 0); + } + tcg_out32(s, isel | TAB(dest, v1, v2)); + } else { + if (dest == v2) { + cond = tcg_invert_cond(cond); + v2 = v1; + } else if (dest != v1) { + if (v1 == 0) { + tcg_out_movi(s, type, dest, 0); + } else { + tcg_out_mov(s, type, dest, v1); + } + } + /* Branch forward over one insn */ + tcg_out32(s, tcg_to_bc[cond] | 8); + if (v2 == 0) { + tcg_out_movi(s, type, dest, 0); + } else { + tcg_out_mov(s, type, dest, v2); + } + } +} + +static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + static const struct { uint8_t bit1, bit2; } bits[] = { + [TCG_COND_LT ] = { CR_LT, CR_LT }, + [TCG_COND_LE ] = { CR_LT, CR_GT }, + [TCG_COND_GT ] = { CR_GT, CR_GT }, + [TCG_COND_GE ] = { CR_GT, CR_LT }, + [TCG_COND_LTU] = { CR_LT, CR_LT }, + [TCG_COND_LEU] = { CR_LT, CR_GT }, + [TCG_COND_GTU] = { CR_GT, CR_GT }, + [TCG_COND_GEU] = { CR_GT, CR_LT }, + }; + + TCGCond cond = args[4], cond2; + TCGArg al, ah, bl, bh; + int blconst, bhconst; + int op, bit1, bit2; + + al = args[0]; + ah = args[1]; + bl = args[2]; + bh = args[3]; + blconst = const_args[2]; + bhconst = const_args[3]; + + switch (cond) { + case TCG_COND_EQ: + op = CRAND; + goto do_equality; + case TCG_COND_NE: + op = CRNAND; + do_equality: + tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); + tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); + tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + break; + + case TCG_COND_LT: + case TCG_COND_LE: + case TCG_COND_GT: + case TCG_COND_GE: + case TCG_COND_LTU: + case TCG_COND_LEU: + case TCG_COND_GTU: + case TCG_COND_GEU: + bit1 = bits[cond].bit1; + bit2 = bits[cond].bit2; + op = (bit1 != bit2 ? CRANDC : CRAND); + cond2 = tcg_unsigned_cond(cond); + + tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); + tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); + tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); + tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + tcg_out_cmp2(s, args + 1, const_args + 1); + tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); + tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); +} + +static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, + const int *const_args) +{ + tcg_out_cmp2(s, args, const_args); + tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); +} + +void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) +{ + tcg_insn_unit i1, i2; + uint64_t pair; + intptr_t diff = addr - jmp_addr; + + if (in_range_b(diff)) { + i1 = B | (diff & 0x3fffffc); + i2 = NOP; + } else if (USE_REG_RA) { + intptr_t lo, hi; + diff = addr - (uintptr_t)tb_ret_addr; + lo = (int16_t)diff; + hi = (int32_t)(diff - lo); + assert(diff == hi + lo); + i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16); + i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo); + } else { + assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr); + i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16); + i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr); + } +#ifdef HOST_WORDS_BIGENDIAN + pair = (uint64_t)i1 << 32 | i2; +#else + pair = (uint64_t)i2 << 32 | i1; +#endif + + /* ??? __atomic_store_8, presuming there's some way to do that + for 32-bit, otherwise this is good enough for 64-bit. */ + *(uint64_t *)jmp_addr = pair; + flush_icache_range(jmp_addr, jmp_addr + 8); +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) +{ +#ifdef _CALL_AIX + /* Look through the descriptor. If the branch is in range, and we + don't have to spend too much effort on building the toc. */ + void *tgt = ((void **)target)[0]; + uintptr_t toc = ((uintptr_t *)target)[1]; + intptr_t diff = tcg_pcrel_diff(s, tgt); + + if (in_range_b(diff) && toc == (uint32_t)toc) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); + tcg_out_b(s, LK, tgt); + } else { + /* Fold the low bits of the constant into the addresses below. */ + intptr_t arg = (intptr_t)target; + int ofs = (int16_t)arg; + + if (ofs + 8 < 0x8000) { + arg -= ofs; + } else { + ofs = 0; + } + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); + tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); + tcg_out32(s, BCCTR | BO_ALWAYS | LK); + } +#elif defined(_CALL_ELF) && _CALL_ELF == 2 + intptr_t diff; + + /* In the ELFv2 ABI, we have to set up r12 to contain the destination + address, which the callee uses to compute its TOC address. */ + /* FIXME: when the branch is in range, we could avoid r12 load if we + knew that the destination uses the same TOC, and what its local + entry point offset is. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); + + diff = tcg_pcrel_diff(s, target); + if (in_range_b(diff)) { + tcg_out_b(s, LK, target); + } else { + tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS | LK); + } +#else + tcg_out_b(s, LK, target); +#endif +} + +static const uint32_t qemu_ldx_opc[16] = { + [MO_UB] = LBZX, + [MO_UW] = LHZX, + [MO_UL] = LWZX, + [MO_Q] = LDX, + [MO_SW] = LHAX, + [MO_SL] = LWAX, + [MO_BSWAP | MO_UB] = LBZX, + [MO_BSWAP | MO_UW] = LHBRX, + [MO_BSWAP | MO_UL] = LWBRX, + [MO_BSWAP | MO_Q] = LDBRX, +}; + +static const uint32_t qemu_stx_opc[16] = { + [MO_UB] = STBX, + [MO_UW] = STHX, + [MO_UL] = STWX, + [MO_Q] = STDX, + [MO_BSWAP | MO_UB] = STBX, + [MO_BSWAP | MO_UW] = STHBRX, + [MO_BSWAP | MO_UL] = STWBRX, + [MO_BSWAP | MO_Q] = STDBRX, +}; + +static const uint32_t qemu_exts_opc[4] = { + EXTSB, EXTSH, EXTSW, 0 +}; + +#if defined (CONFIG_SOFTMMU) +/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +}; + +/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +/* Perform the TLB load and compare. Places the result of the comparison + in CR7, loads the addend of the TLB into R3, and returns the register + containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ + +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc, + TCGReg addrlo, TCGReg addrhi, + int mem_index, bool is_read) +{ + int cmp_off + = (is_read + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + TCGReg base = TCG_AREG0; + TCGMemOp s_bits = opc & MO_SIZE; + + /* Extract the page index, shifted into place for tlb index. */ + if (TCG_TARGET_REG_BITS == 64) { + if (TARGET_LONG_BITS == 32) { + /* Zero-extend the address into a place helpful for further use. */ + tcg_out_ext32u(s, TCG_REG_R4, addrlo); + addrlo = TCG_REG_R4; + } else { + tcg_out_rld(s, RLDICL, TCG_REG_R3, addrlo, + 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS); + } + } + + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0)); + base = TCG_REG_TMP1; + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } + + /* Extraction and shifting, part 2. */ + if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { + tcg_out_rlw(s, RLWINM, TCG_REG_R3, addrlo, + 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), + 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), + 31 - CPU_TLB_ENTRY_BITS); + } else { + tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS); + } + + tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base)); + + /* Load the tlb comparator. */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off); + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4); + } else { + tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off); + } + + /* Load the TLB addend for use on the fast path. Do this asap + to minimize any load use delay. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off); + + /* Clear the non-page, non-alignment bits from the address */ + if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { + /* We don't support unaligned accesses on 32-bits, preserve + * the bottom bits and thus trigger a comparison failure on + * unaligned accesses + */ + tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, + (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); + } else if (s_bits) { + /* > byte access, we need to handle alignment */ + if ((opc & MO_AMASK) == MO_ALIGN) { + /* Alignment required by the front-end, same as 32-bits */ + tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + } else { + /* We support unaligned accesses, we need to make sure we fail + * if we cross a page boundary. The trick is to add the + * access_size-1 to the address before masking the low bits. + * That will make the address overflow to the next page if we + * cross a page boundary which will then force a mismatch of + * the TLB compare since the next page cannot possibly be in + * the same TLB index. + */ + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1)); + tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0, + 0, 63 - TARGET_PAGE_BITS); + } + } else { + /* Byte access, just chop off the bits below the page index */ + tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS); + } + + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, + 0, 7, TCG_TYPE_I32); + tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32); + tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + } else { + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, + 0, 7, TCG_TYPE_TL); + } + + return addrlo; +} + +/* Record the context of a call to the out of line helper code for the slow + path for a load or store, so that we can later generate the correct + helper code. */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, + TCGReg datalo_reg, TCGReg datahi_reg, + TCGReg addrlo_reg, TCGReg addrhi_reg, + tcg_insn_unit *raddr, tcg_insn_unit *lptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->datalo_reg = datalo_reg; + label->datahi_reg = datahi_reg; + label->addrlo_reg = addrlo_reg; + label->addrhi_reg = addrhi_reg; + label->raddr = raddr; + label->label_ptr[0] = lptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + TCGReg hi, lo, arg = TCG_REG_R3; + + reloc_pc14(lb->label_ptr[0], s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); + + lo = lb->addrlo_reg; + hi = lb->addrhi_reg; + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + arg |= 1; +#endif + tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); + tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); + } else { + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); + } + + tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); + tcg_out32(s, MFSPR | RT(arg) | LR); + + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + + lo = lb->datalo_reg; + hi = lb->datahi_reg; + if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { + tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4); + tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3); + } else if (opc & MO_SIGN) { + uint32_t insn = qemu_exts_opc[opc & MO_SIZE]; + tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3)); + } else { + tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3); + } + + tcg_out_b(s, 0, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp s_bits = opc & MO_SIZE; + TCGReg hi, lo, arg = TCG_REG_R3; + + reloc_pc14(lb->label_ptr[0], s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); + + lo = lb->addrlo_reg; + hi = lb->addrhi_reg; + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + arg |= 1; +#endif + tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); + tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); + } else { + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); + } + + lo = lb->datalo_reg; + hi = lb->datahi_reg; + if (TCG_TARGET_REG_BITS == 32) { + switch (s_bits) { + case MO_64: +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + arg |= 1; +#endif + tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); + /* FALLTHRU */ + case MO_32: + tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); + break; + default: + tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31); + break; + } + } else { + if (s_bits == MO_64) { + tcg_out_mov(s, TCG_TYPE_I64, arg++, lo); + } else { + tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits)); + } + } + + tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); + tcg_out32(s, MFSPR | RT(arg) | LR); + + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + + tcg_out_b(s, 0, lb->raddr); +} +#endif /* SOFTMMU */ + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg datalo, datahi, addrlo, rbase; + TCGReg addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc, s_bits; +#ifdef CONFIG_SOFTMMU + int mem_index; + tcg_insn_unit *label_ptr; +#endif + + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + s_bits = opc & MO_SIZE; + +#ifdef CONFIG_SOFTMMU + mem_index = get_mmuidx(oi); + addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true); + + /* Load a pointer into the current opcode w/conditional branch-link. */ + label_ptr = s->code_ptr; + tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + + rbase = TCG_REG_R3; +#else /* !CONFIG_SOFTMMU */ + rbase = guest_base ? TCG_GUEST_BASE_REG : 0; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); + addrlo = TCG_REG_TMP1; + } +#endif + + if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { + if (opc & MO_BSWAP) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo)); + tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0)); + } else if (addrlo == datahi) { + tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); + tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); + } else { + tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); + tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); + } + } else { + uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; + if (!HAVE_ISA_2_06 && insn == LDBRX) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); + tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); + } else if (insn) { + tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); + } else { + insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; + tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); + insn = qemu_exts_opc[s_bits]; + tcg_out32(s, insn | RA(datalo) | RS(datalo)); + } + } + +#ifdef CONFIG_SOFTMMU + add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#endif +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg datalo, datahi, addrlo, rbase; + TCGReg addrhi __attribute__((unused)); + TCGMemOpIdx oi; + TCGMemOp opc, s_bits; +#ifdef CONFIG_SOFTMMU + int mem_index; + tcg_insn_unit *label_ptr; +#endif + + datalo = *args++; + datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); + addrlo = *args++; + addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + s_bits = opc & MO_SIZE; + +#ifdef CONFIG_SOFTMMU + mem_index = get_mmuidx(oi); + addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false); + + /* Load a pointer into the current opcode w/conditional branch-link. */ + label_ptr = s->code_ptr; + tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + + rbase = TCG_REG_R3; +#else /* !CONFIG_SOFTMMU */ + rbase = guest_base ? TCG_GUEST_BASE_REG : 0; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); + addrlo = TCG_REG_TMP1; + } +#endif + + if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { + if (opc & MO_BSWAP) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); + tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); + tcg_out32(s, STWX | SAB(datahi, rbase, addrlo)); + tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0)); + } else { + tcg_out32(s, STW | TAI(datahi, addrlo, 0)); + tcg_out32(s, STW | TAI(datalo, addrlo, 4)); + } + } else { + uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; + if (!HAVE_ISA_2_06 && insn == STDBRX) { + tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); + tcg_out_shri64(s, TCG_REG_R0, datalo, 32); + tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1)); + } else { + tcg_out32(s, insn | SAB(datalo, rbase, addrlo)); + } + } + +#ifdef CONFIG_SOFTMMU + add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, + s->code_ptr, label_ptr); +#endif +} + +/* Parameters for function call generation, used in tcg.c. */ +#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_EXTEND_ARGS 1 + +#ifdef _CALL_AIX +# define LINK_AREA_SIZE (6 * SZR) +# define LR_OFFSET (1 * SZR) +# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR) +#elif defined(TCG_TARGET_CALL_DARWIN) +# define LINK_AREA_SIZE (6 * SZR) +# define LR_OFFSET (2 * SZR) +#elif TCG_TARGET_REG_BITS == 64 +# if defined(_CALL_ELF) && _CALL_ELF == 2 +# define LINK_AREA_SIZE (4 * SZR) +# define LR_OFFSET (1 * SZR) +# endif +#else /* TCG_TARGET_REG_BITS == 32 */ +# if defined(_CALL_SYSV) +# define LINK_AREA_SIZE (2 * SZR) +# define LR_OFFSET (1 * SZR) +# endif +#endif +#ifndef LR_OFFSET +# error "Unhandled abi" +#endif +#ifndef TCG_TARGET_CALL_STACK_OFFSET +# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE +#endif + +#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) +#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) + +#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_SIZE \ + + REG_SAVE_SIZE \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & -TCG_TARGET_STACK_ALIGN) + +#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) + +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i; + +#ifdef _CALL_AIX + void **desc = (void **)s->code_ptr; + desc[0] = desc + 2; /* entry point */ + desc[1] = 0; /* environment pointer */ + s->code_ptr = (void *)(desc + 2); /* skip over descriptor */ +#endif + + tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, + CPU_TEMP_BUF_SIZE); + + /* Prologue */ + tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); + tcg_out32(s, (SZR == 8 ? STDU : STWU) + | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); + + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { + tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_R1, REG_SAVE_BOT + i * SZR); + } + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); + +#ifndef CONFIG_SOFTMMU + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } +#endif + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); + + if (USE_REG_RA) { +#ifdef _CALL_AIX + /* Make the caller load the value as the TOC into R2. */ + tb_ret_addr = s->code_ptr + 2; + desc[1] = tb_ret_addr; + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2); + tcg_out32(s, BCCTR | BO_ALWAYS); +#elif defined(_CALL_ELF) && _CALL_ELF == 2 + /* Compute from the incoming R12 value. */ + tb_ret_addr = s->code_ptr + 2; + tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12, + tcg_ptr_byte_diff(tb_ret_addr, s->code_buf))); + tcg_out32(s, BCCTR | BO_ALWAYS); +#else + /* Reserve max 5 insns for the constant load. */ + tb_ret_addr = s->code_ptr + 6; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr); + tcg_out32(s, BCCTR | BO_ALWAYS); + while (s->code_ptr < tb_ret_addr) { + tcg_out32(s, NOP); + } +#endif + } else { + tcg_out32(s, BCCTR | BO_ALWAYS); + tb_ret_addr = s->code_ptr; + } + + /* Epilogue */ + assert(tb_ret_addr == s->code_ptr); + + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { + tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_R1, REG_SAVE_BOT + i * SZR); + } + tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); + tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); + tcg_out32(s, BCLR | BO_ALWAYS); +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, + const int *const_args) +{ + TCGArg a0, a1, a2; + int c; + + switch (opc) { + case INDEX_op_exit_tb: + if (USE_REG_RA) { + ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr); + + /* Use a direct branch if we can, otherwise use the value in RA. + Note that the direct branch is always backward, thus we need + to account for the possibility of 5 insns from the movi. */ + if (!in_range_b(disp - 20)) { + tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); + tcg_out32(s, BCCTR | BO_ALWAYS); + break; + } + } + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); + tcg_out_b(s, 0, tb_ret_addr); + break; + case INDEX_op_goto_tb: + tcg_debug_assert(s->tb_jmp_offset); + /* Direct jump. Ensure the next insns are 8-byte aligned. */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out32(s, NOP); + } + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + /* To be replaced by either a branch+nop or a load into TMP1. */ + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + s->tb_next_offset[args[0]] = tcg_current_code_size(s); + break; + case INDEX_op_br: + { + TCGLabel *l = arg_label(args[0]); + + if (l->has_value) { + tcg_out_b(s, 0, l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); + tcg_out_b_noaddr(s, B); + } + } + break; + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); + break; + case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); + tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); + break; + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); + break; + case INDEX_op_ld16s_i32: + case INDEX_op_ld16s_i64: + tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i32: + case INDEX_op_ld32u_i64: + tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); + break; + case INDEX_op_ld32s_i64: + tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i64: + tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); + break; + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); + break; + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); + break; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); + break; + + case INDEX_op_add_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + do_addi_32: + tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); + } else { + tcg_out32(s, ADD | TAB(a0, a1, a2)); + } + break; + case INDEX_op_sub_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); + } else { + tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); + } + } else if (const_args[2]) { + a2 = -a2; + goto do_addi_32; + } else { + tcg_out32(s, SUBF | TAB(a0, a2, a1)); + } + break; + + case INDEX_op_and_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi32(s, a0, a1, a2); + } else { + tcg_out32(s, AND | SAB(a1, a0, a2)); + } + break; + case INDEX_op_and_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi64(s, a0, a1, a2); + } else { + tcg_out32(s, AND | SAB(a1, a0, a2)); + } + break; + case INDEX_op_or_i64: + case INDEX_op_or_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_ori32(s, a0, a1, a2); + } else { + tcg_out32(s, OR | SAB(a1, a0, a2)); + } + break; + case INDEX_op_xor_i64: + case INDEX_op_xor_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_xori32(s, a0, a1, a2); + } else { + tcg_out32(s, XOR | SAB(a1, a0, a2)); + } + break; + case INDEX_op_andc_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi32(s, a0, a1, ~a2); + } else { + tcg_out32(s, ANDC | SAB(a1, a0, a2)); + } + break; + case INDEX_op_andc_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_andi64(s, a0, a1, ~a2); + } else { + tcg_out32(s, ANDC | SAB(a1, a0, a2)); + } + break; + case INDEX_op_orc_i32: + if (const_args[2]) { + tcg_out_ori32(s, args[0], args[1], ~args[2]); + break; + } + /* FALLTHRU */ + case INDEX_op_orc_i64: + tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); + break; + case INDEX_op_eqv_i32: + if (const_args[2]) { + tcg_out_xori32(s, args[0], args[1], ~args[2]); + break; + } + /* FALLTHRU */ + case INDEX_op_eqv_i64: + tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); + break; + case INDEX_op_nand_i32: + case INDEX_op_nand_i64: + tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); + break; + case INDEX_op_nor_i32: + case INDEX_op_nor_i64: + tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); + break; + + case INDEX_op_mul_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out32(s, MULLI | TAI(a0, a1, a2)); + } else { + tcg_out32(s, MULLW | TAB(a0, a1, a2)); + } + break; + + case INDEX_op_div_i32: + tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); + break; + + case INDEX_op_divu_i32: + tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); + break; + + case INDEX_op_shl_i32: + if (const_args[2]) { + tcg_out_shli32(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_shr_i32: + if (const_args[2]) { + tcg_out_shri32(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_sar_i32: + if (const_args[2]) { + tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); + } else { + tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_rotl_i32: + if (const_args[2]) { + tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); + } else { + tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) + | MB(0) | ME(31)); + } + break; + case INDEX_op_rotr_i32: + if (const_args[2]) { + tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); + } else { + tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); + tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) + | MB(0) | ME(31)); + } + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], + arg_label(args[3]), TCG_TYPE_I32); + break; + case INDEX_op_brcond_i64: + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], + arg_label(args[3]), TCG_TYPE_I64); + break; + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args, const_args); + break; + + case INDEX_op_neg_i32: + case INDEX_op_neg_i64: + tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); + break; + + case INDEX_op_not_i32: + case INDEX_op_not_i64: + tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); + break; + + case INDEX_op_add_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + do_addi_64: + tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); + } else { + tcg_out32(s, ADD | TAB(a0, a1, a2)); + } + break; + case INDEX_op_sub_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); + } else { + tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); + } + } else if (const_args[2]) { + a2 = -a2; + goto do_addi_64; + } else { + tcg_out32(s, SUBF | TAB(a0, a2, a1)); + } + break; + + case INDEX_op_shl_i64: + if (const_args[2]) { + tcg_out_shli64(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_shr_i64: + if (const_args[2]) { + tcg_out_shri64(s, args[0], args[1], args[2]); + } else { + tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_sar_i64: + if (const_args[2]) { + int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); + tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh); + } else { + tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); + } + break; + case INDEX_op_rotl_i64: + if (const_args[2]) { + tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); + } else { + tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); + } + break; + case INDEX_op_rotr_i64: + if (const_args[2]) { + tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); + } else { + tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); + tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); + } + break; + + case INDEX_op_mul_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out32(s, MULLI | TAI(a0, a1, a2)); + } else { + tcg_out32(s, MULLD | TAB(a0, a1, a2)); + } + break; + case INDEX_op_div_i64: + tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_divu_i64: + tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, false); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, true); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, false); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, true); + break; + + case INDEX_op_ext8s_i32: + case INDEX_op_ext8s_i64: + c = EXTSB; + goto gen_ext; + case INDEX_op_ext16s_i32: + case INDEX_op_ext16s_i64: + c = EXTSH; + goto gen_ext; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + c = EXTSW; + goto gen_ext; + gen_ext: + tcg_out32(s, c | RS(args[1]) | RA(args[0])); + break; + case INDEX_op_extu_i32_i64: + tcg_out_ext32u(s, args[0], args[1]); + break; + + case INDEX_op_setcond_i32: + tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], + const_args[2]); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], + const_args[2]); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args, const_args); + break; + + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + a0 = args[0], a1 = args[1]; + /* a1 = abcd */ + if (a0 != a1) { + /* a0 = (a1 r<< 24) & 0xff # 000c */ + tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); + /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */ + tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23); + } else { + /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */ + tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23); + /* a0 = (a1 r<< 24) & 0xff # 000c */ + tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); + /* a0 = a0 | r0 # 00dc */ + tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0)); + } + break; + + case INDEX_op_bswap32_i32: + case INDEX_op_bswap32_i64: + /* Stolen from gcc's builtin_bswap32 */ + a1 = args[1]; + a0 = args[0] == a1 ? TCG_REG_R0 : args[0]; + + /* a1 = args[1] # abcd */ + /* a0 = rotate_left (a1, 8) # bcda */ + tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); + /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); + /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); + + if (a0 == TCG_REG_R0) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + } + break; + + case INDEX_op_bswap64_i64: + a0 = args[0], a1 = args[1], a2 = TCG_REG_R0; + if (a0 == a1) { + a0 = TCG_REG_R0; + a2 = a1; + } + + /* a1 = # abcd efgh */ + /* a0 = rl32(a1, 8) # 0000 fghe */ + tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); + /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); + /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */ + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); + + /* a0 = rl64(a0, 32) # hgfe 0000 */ + /* a2 = rl64(a1, 32) # efgh abcd */ + tcg_out_rld(s, RLDICL, a0, a0, 32, 0); + tcg_out_rld(s, RLDICL, a2, a1, 32, 0); + + /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */ + tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31); + /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */ + tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7); + /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */ + tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23); + + if (a0 == 0) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + } + break; + + case INDEX_op_deposit_i32: + if (const_args[2]) { + uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; + tcg_out_andi32(s, args[0], args[0], ~mask); + } else { + tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], + 32 - args[3] - args[4], 31 - args[3]); + } + break; + case INDEX_op_deposit_i64: + if (const_args[2]) { + uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; + tcg_out_andi64(s, args[0], args[0], ~mask); + } else { + tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], + 64 - args[3] - args[4]); + } + break; + + case INDEX_op_movcond_i32: + tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], + args[3], args[4], const_args[2]); + break; + case INDEX_op_movcond_i64: + tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], + args[3], args[4], const_args[2]); + break; + +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_add2_i64: +#else + case INDEX_op_add2_i32: +#endif + /* Note that the CA bit is defined based on the word size of the + environment. So in 64-bit mode it's always carry-out of bit 63. + The fallback code using deposit works just as well for 32-bit. */ + a0 = args[0], a1 = args[1]; + if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { + a0 = TCG_REG_R0; + } + if (const_args[4]) { + tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); + } else { + tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); + } + if (const_args[5]) { + tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); + } else { + tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); + } + if (a0 != args[0]) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + } + break; + +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_sub2_i64: +#else + case INDEX_op_sub2_i32: +#endif + a0 = args[0], a1 = args[1]; + if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { + a0 = TCG_REG_R0; + } + if (const_args[2]) { + tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); + } else { + tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); + } + if (const_args[3]) { + tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); + } else { + tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); + } + if (a0 != args[0]) { + tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + } + break; + + case INDEX_op_muluh_i32: + tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_mulsh_i32: + tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_muluh_i64: + tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_mulsh_i64: + tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef ppc_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + + { INDEX_op_st8_i32, { "r", "r" } }, + { INDEX_op_st16_i32, { "r", "r" } }, + { INDEX_op_st_i32, { "r", "r" } }, + + { INDEX_op_add_i32, { "r", "r", "ri" } }, + { INDEX_op_mul_i32, { "r", "r", "rI" } }, + { INDEX_op_div_i32, { "r", "r", "r" } }, + { INDEX_op_divu_i32, { "r", "r", "r" } }, + { INDEX_op_sub_i32, { "r", "rI", "ri" } }, + { INDEX_op_and_i32, { "r", "r", "ri" } }, + { INDEX_op_or_i32, { "r", "r", "ri" } }, + { INDEX_op_xor_i32, { "r", "r", "ri" } }, + { INDEX_op_andc_i32, { "r", "r", "ri" } }, + { INDEX_op_orc_i32, { "r", "r", "ri" } }, + { INDEX_op_eqv_i32, { "r", "r", "ri" } }, + { INDEX_op_nand_i32, { "r", "r", "r" } }, + { INDEX_op_nor_i32, { "r", "r", "r" } }, + + { INDEX_op_shl_i32, { "r", "r", "ri" } }, + { INDEX_op_shr_i32, { "r", "r", "ri" } }, + { INDEX_op_sar_i32, { "r", "r", "ri" } }, + { INDEX_op_rotl_i32, { "r", "r", "ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "ri" } }, + + { INDEX_op_neg_i32, { "r", "r" } }, + { INDEX_op_not_i32, { "r", "r" } }, + { INDEX_op_ext8s_i32, { "r", "r" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + + { INDEX_op_brcond_i32, { "r", "ri" } }, + { INDEX_op_setcond_i32, { "r", "r", "ri" } }, + { INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } }, + + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + + { INDEX_op_muluh_i32, { "r", "r", "r" } }, + { INDEX_op_mulsh_i32, { "r", "r", "r" } }, + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + + { INDEX_op_st8_i64, { "r", "r" } }, + { INDEX_op_st16_i64, { "r", "r" } }, + { INDEX_op_st32_i64, { "r", "r" } }, + { INDEX_op_st_i64, { "r", "r" } }, + + { INDEX_op_add_i64, { "r", "r", "rT" } }, + { INDEX_op_sub_i64, { "r", "rI", "rT" } }, + { INDEX_op_and_i64, { "r", "r", "ri" } }, + { INDEX_op_or_i64, { "r", "r", "rU" } }, + { INDEX_op_xor_i64, { "r", "r", "rU" } }, + { INDEX_op_andc_i64, { "r", "r", "ri" } }, + { INDEX_op_orc_i64, { "r", "r", "r" } }, + { INDEX_op_eqv_i64, { "r", "r", "r" } }, + { INDEX_op_nand_i64, { "r", "r", "r" } }, + { INDEX_op_nor_i64, { "r", "r", "r" } }, + + { INDEX_op_shl_i64, { "r", "r", "ri" } }, + { INDEX_op_shr_i64, { "r", "r", "ri" } }, + { INDEX_op_sar_i64, { "r", "r", "ri" } }, + { INDEX_op_rotl_i64, { "r", "r", "ri" } }, + { INDEX_op_rotr_i64, { "r", "r", "ri" } }, + + { INDEX_op_mul_i64, { "r", "r", "rI" } }, + { INDEX_op_div_i64, { "r", "r", "r" } }, + { INDEX_op_divu_i64, { "r", "r", "r" } }, + + { INDEX_op_neg_i64, { "r", "r" } }, + { INDEX_op_not_i64, { "r", "r" } }, + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + { INDEX_op_bswap16_i64, { "r", "r" } }, + { INDEX_op_bswap32_i64, { "r", "r" } }, + { INDEX_op_bswap64_i64, { "r", "r" } }, + + { INDEX_op_brcond_i64, { "r", "ri" } }, + { INDEX_op_setcond_i64, { "r", "r", "ri" } }, + { INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } }, + + { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, + + { INDEX_op_mulsh_i64, { "r", "r", "r" } }, + { INDEX_op_muluh_i64, { "r", "r", "r" } }, +#endif + +#if TCG_TARGET_REG_BITS == 32 + { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, + { INDEX_op_sub2_i64, { "r", "r", "rI", "rZM", "r", "r" } }, +#else + { INDEX_op_add2_i32, { "r", "r", "r", "r", "rI", "rZM" } }, + { INDEX_op_sub2_i32, { "r", "r", "rI", "rZM", "r", "r" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "S", "S" } }, + { INDEX_op_qemu_ld_i64, { "r", "L" } }, + { INDEX_op_qemu_st_i64, { "S", "S" } }, +#elif TARGET_LONG_BITS == 32 + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "S", "S" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "L" } }, + { INDEX_op_qemu_st_i64, { "S", "S", "S" } }, +#else + { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, + { INDEX_op_qemu_st_i32, { "S", "S", "S" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } }, + { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } }, +#endif + + { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + if (hwcap & PPC_FEATURE_ARCH_2_06) { + have_isa_2_06 = true; + } + + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + (1 << TCG_REG_R0) | + (1 << TCG_REG_R2) | + (1 << TCG_REG_R3) | + (1 << TCG_REG_R4) | + (1 << TCG_REG_R5) | + (1 << TCG_REG_R6) | + (1 << TCG_REG_R7) | + (1 << TCG_REG_R8) | + (1 << TCG_REG_R9) | + (1 << TCG_REG_R10) | + (1 << TCG_REG_R11) | + (1 << TCG_REG_R12)); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ +#if defined(_CALL_SYSV) + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ +#endif +#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ +#endif + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ + if (USE_REG_RA) { + tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return addr */ + } + + tcg_add_target_add_op_defs(ppc_op_defs); +} + +#ifdef __ELF__ +typedef struct { + DebugFrameCIE cie; + DebugFrameFDEHeader fde; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#if TCG_TARGET_REG_BITS == 64 +# define ELF_HOST_MACHINE EM_PPC64 +#else +# define ELF_HOST_MACHINE EM_PPC +#endif + +static DebugFrame debug_frame = { + .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .cie.id = -1, + .cie.version = 1, + .cie.code_align = 1, + .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */ + .cie.return_column = 65, + + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ + 0x11, 65, (LR_OFFSET / -SZR) & 0x7f, + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + uint8_t *p = &debug_frame.fde_reg_ofs[3]; + int i; + + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { + p[0] = 0x80 + tcg_target_callee_save_regs[i]; + p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; + } + + debug_frame.fde.func_start = (uintptr_t)buf; + debug_frame.fde.func_len = buf_size; + + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} +#endif /* __ELF__ */ + +static size_t dcache_bsize = 16; +static size_t icache_bsize = 16; + +void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + uintptr_t p, start1, stop1; + size_t dsize = dcache_bsize; + size_t isize = icache_bsize; + + start1 = start & ~(dsize - 1); + stop1 = (stop + dsize - 1) & ~(dsize - 1); + for (p = start1; p < stop1; p += dsize) { + asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + + start &= start & ~(isize - 1); + stop1 = (stop + isize - 1) & ~(isize - 1); + for (p = start1; p < stop1; p += isize) { + asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + asm volatile ("isync" : : : "memory"); +} + +#if defined _AIX +#include + +static void __attribute__((constructor)) tcg_cache_init(void) +{ + icache_bsize = _system_configuration.icache_line; + dcache_bsize = _system_configuration.dcache_line; +} + +#elif defined __linux__ +static void __attribute__((constructor)) tcg_cache_init(void) +{ + unsigned long dsize = qemu_getauxval(AT_DCACHEBSIZE); + unsigned long isize = qemu_getauxval(AT_ICACHEBSIZE); + + if (dsize == 0 || isize == 0) { + if (dsize == 0) { + fprintf(stderr, "getauxval AT_DCACHEBSIZE failed\n"); + } + if (isize == 0) { + fprintf(stderr, "getauxval AT_ICACHEBSIZE failed\n"); + } + exit(1); + } + dcache_bsize = dsize; + icache_bsize = isize; +} + +#elif defined __APPLE__ +#include + +static void __attribute__((constructor)) tcg_cache_init(void) +{ + size_t len; + unsigned cacheline; + int name[2] = { CTL_HW, HW_CACHELINE }; + + len = sizeof(cacheline); + if (sysctl(name, 2, &cacheline, &len, NULL, 0)) { + perror("sysctl CTL_HW HW_CACHELINE failed"); + exit(1); + } + dcache_bsize = cacheline; + icache_bsize = cacheline; +} + +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#include + +static void __attribute__((constructor)) tcg_cache_init(void) +{ + size_t len = 4; + unsigned cacheline; + + if (sysctlbyname ("machdep.cacheline_size", &cacheline, &len, NULL, 0)) { + fprintf(stderr, "sysctlbyname machdep.cacheline_size failed: %s\n", + strerror(errno)); + exit(1); + } + dcache_bsize = cacheline; + icache_bsize = cacheline; +} +#endif diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c deleted file mode 100644 index 58520fa..0000000 --- a/tcg/s390/tcg-target.c +++ /dev/null @@ -1,2410 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2009 Ulrich Hecht - * Copyright (c) 2009 Alexander Graf - * Copyright (c) 2010 Richard Henderson - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-ldst.h" - -/* We only support generating code for 64-bit mode. */ -#if TCG_TARGET_REG_BITS != 64 -#error "unsupported code generation mode" -#endif - -#include "elf.h" - -/* ??? The translation blocks produced by TCG are generally small enough to - be entirely reachable with a 16-bit displacement. Leaving the option for - a 32-bit displacement here Just In Case. */ -#define USE_LONG_BRANCHES 0 - -#define TCG_CT_CONST_MULI 0x100 -#define TCG_CT_CONST_ORI 0x200 -#define TCG_CT_CONST_XORI 0x400 -#define TCG_CT_CONST_CMPI 0x800 -#define TCG_CT_CONST_ADLI 0x1000 - -/* Several places within the instruction set 0 means "no register" - rather than TCG_REG_R0. */ -#define TCG_REG_NONE 0 - -/* A scratch register that may be be used throughout the backend. */ -#define TCG_TMP0 TCG_REG_R14 - -#ifndef CONFIG_SOFTMMU -#define TCG_GUEST_BASE_REG TCG_REG_R13 -#endif - -/* All of the following instructions are prefixed with their instruction - format, and are defined as 8- or 16-bit quantities, even when the two - halves of the 16-bit quantity may appear 32 bits apart in the insn. - This makes it easy to copy the values from the tables in Appendix B. */ -typedef enum S390Opcode { - RIL_AFI = 0xc209, - RIL_AGFI = 0xc208, - RIL_ALFI = 0xc20b, - RIL_ALGFI = 0xc20a, - RIL_BRASL = 0xc005, - RIL_BRCL = 0xc004, - RIL_CFI = 0xc20d, - RIL_CGFI = 0xc20c, - RIL_CLFI = 0xc20f, - RIL_CLGFI = 0xc20e, - RIL_IIHF = 0xc008, - RIL_IILF = 0xc009, - RIL_LARL = 0xc000, - RIL_LGFI = 0xc001, - RIL_LGRL = 0xc408, - RIL_LLIHF = 0xc00e, - RIL_LLILF = 0xc00f, - RIL_LRL = 0xc40d, - RIL_MSFI = 0xc201, - RIL_MSGFI = 0xc200, - RIL_NIHF = 0xc00a, - RIL_NILF = 0xc00b, - RIL_OIHF = 0xc00c, - RIL_OILF = 0xc00d, - RIL_SLFI = 0xc205, - RIL_SLGFI = 0xc204, - RIL_XIHF = 0xc006, - RIL_XILF = 0xc007, - - RI_AGHI = 0xa70b, - RI_AHI = 0xa70a, - RI_BRC = 0xa704, - RI_IIHH = 0xa500, - RI_IIHL = 0xa501, - RI_IILH = 0xa502, - RI_IILL = 0xa503, - RI_LGHI = 0xa709, - RI_LLIHH = 0xa50c, - RI_LLIHL = 0xa50d, - RI_LLILH = 0xa50e, - RI_LLILL = 0xa50f, - RI_MGHI = 0xa70d, - RI_MHI = 0xa70c, - RI_NIHH = 0xa504, - RI_NIHL = 0xa505, - RI_NILH = 0xa506, - RI_NILL = 0xa507, - RI_OIHH = 0xa508, - RI_OIHL = 0xa509, - RI_OILH = 0xa50a, - RI_OILL = 0xa50b, - - RIE_CGIJ = 0xec7c, - RIE_CGRJ = 0xec64, - RIE_CIJ = 0xec7e, - RIE_CLGRJ = 0xec65, - RIE_CLIJ = 0xec7f, - RIE_CLGIJ = 0xec7d, - RIE_CLRJ = 0xec77, - RIE_CRJ = 0xec76, - RIE_RISBG = 0xec55, - - RRE_AGR = 0xb908, - RRE_ALGR = 0xb90a, - RRE_ALCR = 0xb998, - RRE_ALCGR = 0xb988, - RRE_CGR = 0xb920, - RRE_CLGR = 0xb921, - RRE_DLGR = 0xb987, - RRE_DLR = 0xb997, - RRE_DSGFR = 0xb91d, - RRE_DSGR = 0xb90d, - RRE_LGBR = 0xb906, - RRE_LCGR = 0xb903, - RRE_LGFR = 0xb914, - RRE_LGHR = 0xb907, - RRE_LGR = 0xb904, - RRE_LLGCR = 0xb984, - RRE_LLGFR = 0xb916, - RRE_LLGHR = 0xb985, - RRE_LRVR = 0xb91f, - RRE_LRVGR = 0xb90f, - RRE_LTGR = 0xb902, - RRE_MLGR = 0xb986, - RRE_MSGR = 0xb90c, - RRE_MSR = 0xb252, - RRE_NGR = 0xb980, - RRE_OGR = 0xb981, - RRE_SGR = 0xb909, - RRE_SLGR = 0xb90b, - RRE_SLBR = 0xb999, - RRE_SLBGR = 0xb989, - RRE_XGR = 0xb982, - - RRF_LOCR = 0xb9f2, - RRF_LOCGR = 0xb9e2, - - RR_AR = 0x1a, - RR_ALR = 0x1e, - RR_BASR = 0x0d, - RR_BCR = 0x07, - RR_CLR = 0x15, - RR_CR = 0x19, - RR_DR = 0x1d, - RR_LCR = 0x13, - RR_LR = 0x18, - RR_LTR = 0x12, - RR_NR = 0x14, - RR_OR = 0x16, - RR_SR = 0x1b, - RR_SLR = 0x1f, - RR_XR = 0x17, - - RSY_RLL = 0xeb1d, - RSY_RLLG = 0xeb1c, - RSY_SLLG = 0xeb0d, - RSY_SRAG = 0xeb0a, - RSY_SRLG = 0xeb0c, - - RS_SLL = 0x89, - RS_SRA = 0x8a, - RS_SRL = 0x88, - - RXY_AG = 0xe308, - RXY_AY = 0xe35a, - RXY_CG = 0xe320, - RXY_CY = 0xe359, - RXY_LAY = 0xe371, - RXY_LB = 0xe376, - RXY_LG = 0xe304, - RXY_LGB = 0xe377, - RXY_LGF = 0xe314, - RXY_LGH = 0xe315, - RXY_LHY = 0xe378, - RXY_LLGC = 0xe390, - RXY_LLGF = 0xe316, - RXY_LLGH = 0xe391, - RXY_LMG = 0xeb04, - RXY_LRV = 0xe31e, - RXY_LRVG = 0xe30f, - RXY_LRVH = 0xe31f, - RXY_LY = 0xe358, - RXY_STCY = 0xe372, - RXY_STG = 0xe324, - RXY_STHY = 0xe370, - RXY_STMG = 0xeb24, - RXY_STRV = 0xe33e, - RXY_STRVG = 0xe32f, - RXY_STRVH = 0xe33f, - RXY_STY = 0xe350, - - RX_A = 0x5a, - RX_C = 0x59, - RX_L = 0x58, - RX_LA = 0x41, - RX_LH = 0x48, - RX_ST = 0x50, - RX_STC = 0x42, - RX_STH = 0x40, -} S390Opcode; - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", - "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15" -}; -#endif - -/* Since R6 is a potential argument register, choose it last of the - call-saved registers. Likewise prefer the call-clobbered registers - in reverse order to maximize the chance of avoiding the arguments. */ -static const int tcg_target_reg_alloc_order[] = { - /* Call saved registers. */ - TCG_REG_R13, - TCG_REG_R12, - TCG_REG_R11, - TCG_REG_R10, - TCG_REG_R9, - TCG_REG_R8, - TCG_REG_R7, - TCG_REG_R6, - /* Call clobbered registers. */ - TCG_REG_R14, - TCG_REG_R0, - TCG_REG_R1, - /* Argument registers, in reverse order of allocation. */ - TCG_REG_R5, - TCG_REG_R4, - TCG_REG_R3, - TCG_REG_R2, -}; - -static const int tcg_target_call_iarg_regs[] = { - TCG_REG_R2, - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R2, -}; - -#define S390_CC_EQ 8 -#define S390_CC_LT 4 -#define S390_CC_GT 2 -#define S390_CC_OV 1 -#define S390_CC_NE (S390_CC_LT | S390_CC_GT) -#define S390_CC_LE (S390_CC_LT | S390_CC_EQ) -#define S390_CC_GE (S390_CC_GT | S390_CC_EQ) -#define S390_CC_NEVER 0 -#define S390_CC_ALWAYS 15 - -/* Condition codes that result from a COMPARE and COMPARE LOGICAL. */ -static const uint8_t tcg_cond_to_s390_cond[] = { - [TCG_COND_EQ] = S390_CC_EQ, - [TCG_COND_NE] = S390_CC_NE, - [TCG_COND_LT] = S390_CC_LT, - [TCG_COND_LE] = S390_CC_LE, - [TCG_COND_GT] = S390_CC_GT, - [TCG_COND_GE] = S390_CC_GE, - [TCG_COND_LTU] = S390_CC_LT, - [TCG_COND_LEU] = S390_CC_LE, - [TCG_COND_GTU] = S390_CC_GT, - [TCG_COND_GEU] = S390_CC_GE, -}; - -/* Condition codes that result from a LOAD AND TEST. Here, we have no - unsigned instruction variation, however since the test is vs zero we - can re-map the outcomes appropriately. */ -static const uint8_t tcg_cond_to_ltr_cond[] = { - [TCG_COND_EQ] = S390_CC_EQ, - [TCG_COND_NE] = S390_CC_NE, - [TCG_COND_LT] = S390_CC_LT, - [TCG_COND_LE] = S390_CC_LE, - [TCG_COND_GT] = S390_CC_GT, - [TCG_COND_GE] = S390_CC_GE, - [TCG_COND_LTU] = S390_CC_NEVER, - [TCG_COND_LEU] = S390_CC_EQ, - [TCG_COND_GTU] = S390_CC_NE, - [TCG_COND_GEU] = S390_CC_ALWAYS, -}; - -#ifdef CONFIG_SOFTMMU -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_SB] = helper_ret_ldsb_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LESL] = helper_le_ldsl_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BESL] = helper_be_ldsl_mmu, - [MO_BEQ] = helper_be_ldq_mmu, -}; - -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, -}; -#endif - -static tcg_insn_unit *tb_ret_addr; - -/* A list of relevant facilities used by this translator. Some of these - are required for proper operation, and these are checked at startup. */ - -#define FACILITY_ZARCH_ACTIVE (1ULL << (63 - 2)) -#define FACILITY_LONG_DISP (1ULL << (63 - 18)) -#define FACILITY_EXT_IMM (1ULL << (63 - 21)) -#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34)) -#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45)) - -static uint64_t facilities; - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - intptr_t pcrel2 = (tcg_insn_unit *)value - (code_ptr - 1); - assert(addend == -2); - - switch (type) { - case R_390_PC16DBL: - assert(pcrel2 == (int16_t)pcrel2); - tcg_patch16(code_ptr, pcrel2); - break; - case R_390_PC32DBL: - assert(pcrel2 == (int32_t)pcrel2); - tcg_patch32(code_ptr, pcrel2); - break; - default: - tcg_abort(); - break; - } -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str = *pct_str; - - switch (ct_str[0]) { - case 'r': /* all registers */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffff); - break; - case 'R': /* not R0 */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - break; - case 'L': /* qemu_ld/st constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffff); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4); - break; - case 'a': /* force R2 for division */ - ct->ct |= TCG_CT_REG; - tcg_regset_clear(ct->u.regs); - tcg_regset_set_reg(ct->u.regs, TCG_REG_R2); - break; - case 'b': /* force R3 for division */ - ct->ct |= TCG_CT_REG; - tcg_regset_clear(ct->u.regs); - tcg_regset_set_reg(ct->u.regs, TCG_REG_R3); - break; - case 'A': - ct->ct |= TCG_CT_CONST_ADLI; - break; - case 'K': - ct->ct |= TCG_CT_CONST_MULI; - break; - case 'O': - ct->ct |= TCG_CT_CONST_ORI; - break; - case 'X': - ct->ct |= TCG_CT_CONST_XORI; - break; - case 'C': - ct->ct |= TCG_CT_CONST_CMPI; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - - return 0; -} - -/* Immediates to be used with logical OR. This is an optimization only, - since a full 64-bit immediate OR can always be performed with 4 sequential - OI[LH][LH] instructions. What we're looking for is immediates that we - can load efficiently, and the immediate load plus the reg-reg OR is - smaller than the sequential OI's. */ - -static int tcg_match_ori(TCGType type, tcg_target_long val) -{ - if (facilities & FACILITY_EXT_IMM) { - if (type == TCG_TYPE_I32) { - /* All 32-bit ORs can be performed with 1 48-bit insn. */ - return 1; - } - } - - /* Look for negative values. These are best to load with LGHI. */ - if (val < 0) { - if (val == (int16_t)val) { - return 0; - } - if (facilities & FACILITY_EXT_IMM) { - if (val == (int32_t)val) { - return 0; - } - } - } - - return 1; -} - -/* Immediates to be used with logical XOR. This is almost, but not quite, - only an optimization. XOR with immediate is only supported with the - extended-immediate facility. That said, there are a few patterns for - which it is better to load the value into a register first. */ - -static int tcg_match_xori(TCGType type, tcg_target_long val) -{ - if ((facilities & FACILITY_EXT_IMM) == 0) { - return 0; - } - - if (type == TCG_TYPE_I32) { - /* All 32-bit XORs can be performed with 1 48-bit insn. */ - return 1; - } - - /* Look for negative values. These are best to load with LGHI. */ - if (val < 0 && val == (int32_t)val) { - return 0; - } - - return 1; -} - -/* Imediates to be used with comparisons. */ - -static int tcg_match_cmpi(TCGType type, tcg_target_long val) -{ - if (facilities & FACILITY_EXT_IMM) { - /* The COMPARE IMMEDIATE instruction is available. */ - if (type == TCG_TYPE_I32) { - /* We have a 32-bit immediate and can compare against anything. */ - return 1; - } else { - /* ??? We have no insight here into whether the comparison is - signed or unsigned. The COMPARE IMMEDIATE insn uses a 32-bit - signed immediate, and the COMPARE LOGICAL IMMEDIATE insn uses - a 32-bit unsigned immediate. If we were to use the (semi) - obvious "val == (int32_t)val" we would be enabling unsigned - comparisons vs very large numbers. The only solution is to - take the intersection of the ranges. */ - /* ??? Another possible solution is to simply lie and allow all - constants here and force the out-of-range values into a temp - register in tgen_cmp when we have knowledge of the actual - comparison code in use. */ - return val >= 0 && val <= 0x7fffffff; - } - } else { - /* Only the LOAD AND TEST instruction is available. */ - return val == 0; - } -} - -/* Immediates to be used with add2/sub2. */ - -static int tcg_match_add2i(TCGType type, tcg_target_long val) -{ - if (facilities & FACILITY_EXT_IMM) { - if (type == TCG_TYPE_I32) { - return 1; - } else if (val >= -0xffffffffll && val <= 0xffffffffll) { - return 1; - } - } - return 0; -} - -/* Test if a constant matches the constraint. */ -static int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - - if (ct & TCG_CT_CONST) { - return 1; - } - - if (type == TCG_TYPE_I32) { - val = (int32_t)val; - } - - /* The following are mutually exclusive. */ - if (ct & TCG_CT_CONST_MULI) { - /* Immediates that may be used with multiply. If we have the - general-instruction-extensions, then we have MULTIPLY SINGLE - IMMEDIATE with a signed 32-bit, otherwise we have only - MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */ - if (facilities & FACILITY_GEN_INST_EXT) { - return val == (int32_t)val; - } else { - return val == (int16_t)val; - } - } else if (ct & TCG_CT_CONST_ADLI) { - return tcg_match_add2i(type, val); - } else if (ct & TCG_CT_CONST_ORI) { - return tcg_match_ori(type, val); - } else if (ct & TCG_CT_CONST_XORI) { - return tcg_match_xori(type, val); - } else if (ct & TCG_CT_CONST_CMPI) { - return tcg_match_cmpi(type, val); - } - - return 0; -} - -/* Emit instructions according to the given instruction format. */ - -static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2) -{ - tcg_out16(s, (op << 8) | (r1 << 4) | r2); -} - -static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op, - TCGReg r1, TCGReg r2) -{ - tcg_out32(s, (op << 16) | (r1 << 4) | r2); -} - -static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op, - TCGReg r1, TCGReg r2, int m3) -{ - tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2); -} - -static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2) -{ - tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff)); -} - -static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2) -{ - tcg_out16(s, op | (r1 << 4)); - tcg_out32(s, i2); -} - -static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1, - TCGReg b2, TCGReg r3, int disp) -{ - tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12) - | (disp & 0xfff)); -} - -static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1, - TCGReg b2, TCGReg r3, int disp) -{ - tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3); - tcg_out32(s, (op & 0xff) | (b2 << 28) - | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4)); -} - -#define tcg_out_insn_RX tcg_out_insn_RS -#define tcg_out_insn_RXY tcg_out_insn_RSY - -/* Emit an opcode with "type-checking" of the format. */ -#define tcg_out_insn(S, FMT, OP, ...) \ - glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__) - - -/* emit 64-bit shifts */ -static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest, - TCGReg src, TCGReg sh_reg, int sh_imm) -{ - tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm); -} - -/* emit 32-bit shifts */ -static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest, - TCGReg sh_reg, int sh_imm) -{ - tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm); -} - -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) -{ - if (src != dst) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, LR, dst, src); - } else { - tcg_out_insn(s, RRE, LGR, dst, src); - } - } -} - -/* load a register with an immediate value */ -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long sval) -{ - static const S390Opcode lli_insns[4] = { - RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH - }; - - tcg_target_ulong uval = sval; - int i; - - if (type == TCG_TYPE_I32) { - uval = (uint32_t)sval; - sval = (int32_t)sval; - } - - /* Try all 32-bit insns that can load it in one go. */ - if (sval >= -0x8000 && sval < 0x8000) { - tcg_out_insn(s, RI, LGHI, ret, sval); - return; - } - - for (i = 0; i < 4; i++) { - tcg_target_long mask = 0xffffull << i*16; - if ((uval & mask) == uval) { - tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16); - return; - } - } - - /* Try all 48-bit insns that can load it in one go. */ - if (facilities & FACILITY_EXT_IMM) { - if (sval == (int32_t)sval) { - tcg_out_insn(s, RIL, LGFI, ret, sval); - return; - } - if (uval <= 0xffffffff) { - tcg_out_insn(s, RIL, LLILF, ret, uval); - return; - } - if ((uval & 0xffffffff) == 0) { - tcg_out_insn(s, RIL, LLIHF, ret, uval >> 31 >> 1); - return; - } - } - - /* Try for PC-relative address load. */ - if ((sval & 1) == 0) { - ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1; - if (off == (int32_t)off) { - tcg_out_insn(s, RIL, LARL, ret, off); - return; - } - } - - /* If extended immediates are not present, then we may have to issue - several instructions to load the low 32 bits. */ - if (!(facilities & FACILITY_EXT_IMM)) { - /* A 32-bit unsigned value can be loaded in 2 insns. And given - that the lli_insns loop above did not succeed, we know that - both insns are required. */ - if (uval <= 0xffffffff) { - tcg_out_insn(s, RI, LLILL, ret, uval); - tcg_out_insn(s, RI, IILH, ret, uval >> 16); - return; - } - - /* If all high bits are set, the value can be loaded in 2 or 3 insns. - We first want to make sure that all the high bits get set. With - luck the low 16-bits can be considered negative to perform that for - free, otherwise we load an explicit -1. */ - if (sval >> 31 >> 1 == -1) { - if (uval & 0x8000) { - tcg_out_insn(s, RI, LGHI, ret, uval); - } else { - tcg_out_insn(s, RI, LGHI, ret, -1); - tcg_out_insn(s, RI, IILL, ret, uval); - } - tcg_out_insn(s, RI, IILH, ret, uval >> 16); - return; - } - } - - /* If we get here, both the high and low parts have non-zero bits. */ - - /* Recurse to load the lower 32-bits. */ - tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff); - - /* Insert data into the high 32-bits. */ - uval = uval >> 31 >> 1; - if (facilities & FACILITY_EXT_IMM) { - if (uval < 0x10000) { - tcg_out_insn(s, RI, IIHL, ret, uval); - } else if ((uval & 0xffff) == 0) { - tcg_out_insn(s, RI, IIHH, ret, uval >> 16); - } else { - tcg_out_insn(s, RIL, IIHF, ret, uval); - } - } else { - if (uval & 0xffff) { - tcg_out_insn(s, RI, IIHL, ret, uval); - } - if (uval & 0xffff0000) { - tcg_out_insn(s, RI, IIHH, ret, uval >> 16); - } - } -} - - -/* Emit a load/store type instruction. Inputs are: - DATA: The register to be loaded or stored. - BASE+OFS: The effective address. - OPC_RX: If the operation has an RX format opcode (e.g. STC), otherwise 0. - OPC_RXY: The RXY format opcode for the operation (e.g. STCY). */ - -static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy, - TCGReg data, TCGReg base, TCGReg index, - tcg_target_long ofs) -{ - if (ofs < -0x80000 || ofs >= 0x80000) { - /* Combine the low 20 bits of the offset with the actual load insn; - the high 44 bits must come from an immediate load. */ - tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000; - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low); - ofs = low; - - /* If we were already given an index register, add it in. */ - if (index != TCG_REG_NONE) { - tcg_out_insn(s, RRE, AGR, TCG_TMP0, index); - } - index = TCG_TMP0; - } - - if (opc_rx && ofs >= 0 && ofs < 0x1000) { - tcg_out_insn_RX(s, opc_rx, data, base, index, ofs); - } else { - tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs); - } -} - - -/* load data without address translation or endianness conversion */ -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data, - TCGReg base, intptr_t ofs) -{ - if (type == TCG_TYPE_I32) { - tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs); - } else { - tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs); - } -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data, - TCGReg base, intptr_t ofs) -{ - if (type == TCG_TYPE_I32) { - tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs); - } else { - tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs); - } -} - -/* load data from an absolute host address */ -static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs) -{ - intptr_t addr = (intptr_t)abs; - - if ((facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) { - ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1; - if (disp == (int32_t)disp) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RIL, LRL, dest, disp); - } else { - tcg_out_insn(s, RIL, LGRL, dest, disp); - } - return; - } - } - - tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff); - tcg_out_ld(s, type, dest, dest, addr & 0xffff); -} - -static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src, - int msb, int lsb, int ofs, int z) -{ - /* Format RIE-f */ - tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src); - tcg_out16(s, (msb << 8) | (z << 7) | lsb); - tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff)); -} - -static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) -{ - if (facilities & FACILITY_EXT_IMM) { - tcg_out_insn(s, RRE, LGBR, dest, src); - return; - } - - if (type == TCG_TYPE_I32) { - if (dest == src) { - tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24); - } - tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56); - tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56); - } -} - -static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) -{ - if (facilities & FACILITY_EXT_IMM) { - tcg_out_insn(s, RRE, LLGCR, dest, src); - return; - } - - if (dest == src) { - tcg_out_movi(s, type, TCG_TMP0, 0xff); - src = TCG_TMP0; - } else { - tcg_out_movi(s, type, dest, 0xff); - } - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, NR, dest, src); - } else { - tcg_out_insn(s, RRE, NGR, dest, src); - } -} - -static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) -{ - if (facilities & FACILITY_EXT_IMM) { - tcg_out_insn(s, RRE, LGHR, dest, src); - return; - } - - if (type == TCG_TYPE_I32) { - if (dest == src) { - tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16); - } - tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48); - tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48); - } -} - -static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) -{ - if (facilities & FACILITY_EXT_IMM) { - tcg_out_insn(s, RRE, LLGHR, dest, src); - return; - } - - if (dest == src) { - tcg_out_movi(s, type, TCG_TMP0, 0xffff); - src = TCG_TMP0; - } else { - tcg_out_movi(s, type, dest, 0xffff); - } - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, NR, dest, src); - } else { - tcg_out_insn(s, RRE, NGR, dest, src); - } -} - -static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src) -{ - tcg_out_insn(s, RRE, LGFR, dest, src); -} - -static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src) -{ - tcg_out_insn(s, RRE, LLGFR, dest, src); -} - -/* Accept bit patterns like these: - 0....01....1 - 1....10....0 - 1..10..01..1 - 0..01..10..0 - Copied from gcc sources. */ -static inline bool risbg_mask(uint64_t c) -{ - uint64_t lsb; - /* We don't change the number of transitions by inverting, - so make sure we start with the LSB zero. */ - if (c & 1) { - c = ~c; - } - /* Reject all zeros or all ones. */ - if (c == 0) { - return false; - } - /* Find the first transition. */ - lsb = c & -c; - /* Invert to look for a second transition. */ - c = ~c; - /* Erase the first transition. */ - c &= -lsb; - /* Find the second transition, if any. */ - lsb = c & -c; - /* Match if all the bits are 1's, or if c is zero. */ - return c == -lsb; -} - -static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val) -{ - int msb, lsb; - if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) { - /* Achieve wraparound by swapping msb and lsb. */ - msb = 64 - ctz64(~val); - lsb = clz64(~val) - 1; - } else { - msb = clz64(val); - lsb = 63 - ctz64(val); - } - tcg_out_risbg(s, out, in, msb, lsb, 0, 1); -} - -static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) -{ - static const S390Opcode ni_insns[4] = { - RI_NILL, RI_NILH, RI_NIHL, RI_NIHH - }; - static const S390Opcode nif_insns[2] = { - RIL_NILF, RIL_NIHF - }; - uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull); - int i; - - /* Look for the zero-extensions. */ - if ((val & valid) == 0xffffffff) { - tgen_ext32u(s, dest, dest); - return; - } - if (facilities & FACILITY_EXT_IMM) { - if ((val & valid) == 0xff) { - tgen_ext8u(s, TCG_TYPE_I64, dest, dest); - return; - } - if ((val & valid) == 0xffff) { - tgen_ext16u(s, TCG_TYPE_I64, dest, dest); - return; - } - } - - /* Try all 32-bit insns that can perform it in one go. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = ~(0xffffull << i*16); - if (((val | ~valid) & mask) == mask) { - tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); - return; - } - } - - /* Try all 48-bit insns that can perform it in one go. */ - if (facilities & FACILITY_EXT_IMM) { - for (i = 0; i < 2; i++) { - tcg_target_ulong mask = ~(0xffffffffull << i*32); - if (((val | ~valid) & mask) == mask) { - tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); - return; - } - } - } - if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) { - tgen_andi_risbg(s, dest, dest, val); - return; - } - - /* Fall back to loading the constant. */ - tcg_out_movi(s, type, TCG_TMP0, val); - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, NR, dest, TCG_TMP0); - } else { - tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); - } -} - -static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val) -{ - static const S390Opcode oi_insns[4] = { - RI_OILL, RI_OILH, RI_OIHL, RI_OIHH - }; - static const S390Opcode nif_insns[2] = { - RIL_OILF, RIL_OIHF - }; - - int i; - - /* Look for no-op. */ - if (val == 0) { - return; - } - - if (facilities & FACILITY_EXT_IMM) { - /* Try all 32-bit insns that can perform it in one go. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = (0xffffull << i*16); - if ((val & mask) != 0 && (val & ~mask) == 0) { - tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); - return; - } - } - - /* Try all 48-bit insns that can perform it in one go. */ - for (i = 0; i < 2; i++) { - tcg_target_ulong mask = (0xffffffffull << i*32); - if ((val & mask) != 0 && (val & ~mask) == 0) { - tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); - return; - } - } - - /* Perform the OR via sequential modifications to the high and - low parts. Do this via recursion to handle 16-bit vs 32-bit - masks in each half. */ - tgen64_ori(s, dest, val & 0x00000000ffffffffull); - tgen64_ori(s, dest, val & 0xffffffff00000000ull); - } else { - /* With no extended-immediate facility, we don't need to be so - clever. Just iterate over the insns and mask in the constant. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = (0xffffull << i*16); - if ((val & mask) != 0) { - tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); - } - } - } -} - -static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val) -{ - /* Perform the xor by parts. */ - if (val & 0xffffffff) { - tcg_out_insn(s, RIL, XILF, dest, val); - } - if (val > 0xffffffff) { - tcg_out_insn(s, RIL, XIHF, dest, val >> 31 >> 1); - } -} - -static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, - TCGArg c2, int c2const) -{ - bool is_unsigned = is_unsigned_cond(c); - if (c2const) { - if (c2 == 0) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, LTR, r1, r1); - } else { - tcg_out_insn(s, RRE, LTGR, r1, r1); - } - return tcg_cond_to_ltr_cond[c]; - } else { - if (is_unsigned) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RIL, CLFI, r1, c2); - } else { - tcg_out_insn(s, RIL, CLGFI, r1, c2); - } - } else { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RIL, CFI, r1, c2); - } else { - tcg_out_insn(s, RIL, CGFI, r1, c2); - } - } - } - } else { - if (is_unsigned) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, CLR, r1, c2); - } else { - tcg_out_insn(s, RRE, CLGR, r1, c2); - } - } else { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, CR, r1, c2); - } else { - tcg_out_insn(s, RRE, CGR, r1, c2); - } - } - } - return tcg_cond_to_s390_cond[c]; -} - -static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, - TCGReg dest, TCGReg c1, TCGArg c2, int c2const) -{ - int cc; - - switch (cond) { - case TCG_COND_GTU: - case TCG_COND_GT: - do_greater: - /* The result of a compare has CC=2 for GT and CC=3 unused. - ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */ - tgen_cmp(s, type, cond, c1, c2, c2const); - tcg_out_movi(s, type, dest, 0); - tcg_out_insn(s, RRE, ALCGR, dest, dest); - return; - - case TCG_COND_GEU: - do_geu: - /* We need "real" carry semantics, so use SUBTRACT LOGICAL - instead of COMPARE LOGICAL. This needs an extra move. */ - tcg_out_mov(s, type, TCG_TMP0, c1); - if (c2const) { - tcg_out_movi(s, TCG_TYPE_I64, dest, 0); - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RIL, SLFI, TCG_TMP0, c2); - } else { - tcg_out_insn(s, RIL, SLGFI, TCG_TMP0, c2); - } - } else { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, SLR, TCG_TMP0, c2); - } else { - tcg_out_insn(s, RRE, SLGR, TCG_TMP0, c2); - } - tcg_out_movi(s, TCG_TYPE_I64, dest, 0); - } - tcg_out_insn(s, RRE, ALCGR, dest, dest); - return; - - case TCG_COND_LEU: - case TCG_COND_LTU: - case TCG_COND_LT: - /* Swap operands so that we can use GEU/GTU/GT. */ - if (c2const) { - tcg_out_movi(s, type, TCG_TMP0, c2); - c2 = c1; - c2const = 0; - c1 = TCG_TMP0; - } else { - TCGReg t = c1; - c1 = c2; - c2 = t; - } - if (cond == TCG_COND_LEU) { - goto do_geu; - } - cond = tcg_swap_cond(cond); - goto do_greater; - - case TCG_COND_NE: - /* X != 0 is X > 0. */ - if (c2const && c2 == 0) { - cond = TCG_COND_GTU; - goto do_greater; - } - break; - - case TCG_COND_EQ: - /* X == 0 is X <= 0 is 0 >= X. */ - if (c2const && c2 == 0) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0); - c2 = c1; - c2const = 0; - c1 = TCG_TMP0; - goto do_geu; - } - break; - - default: - break; - } - - cc = tgen_cmp(s, type, cond, c1, c2, c2const); - if (facilities & FACILITY_LOAD_ON_COND) { - /* Emit: d = 0, t = 1, d = (cc ? t : d). */ - tcg_out_movi(s, TCG_TYPE_I64, dest, 0); - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1); - tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc); - } else { - /* Emit: d = 1; if (cc) goto over; d = 0; over: */ - tcg_out_movi(s, type, dest, 1); - tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); - tcg_out_movi(s, type, dest, 0); - } -} - -static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, - TCGReg c1, TCGArg c2, int c2const, TCGReg r3) -{ - int cc; - if (facilities & FACILITY_LOAD_ON_COND) { - cc = tgen_cmp(s, type, c, c1, c2, c2const); - tcg_out_insn(s, RRF, LOCGR, dest, r3, cc); - } else { - c = tcg_invert_cond(c); - cc = tgen_cmp(s, type, c, c1, c2, c2const); - - /* Emit: if (cc) goto over; dest = r3; over: */ - tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); - tcg_out_insn(s, RRE, LGR, dest, r3); - } -} - -bool tcg_target_deposit_valid(int ofs, int len) -{ - return (facilities & FACILITY_GEN_INST_EXT) != 0; -} - -static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, - int ofs, int len) -{ - int lsb = (63 - ofs); - int msb = lsb - (len - 1); - tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0); -} - -static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest) -{ - ptrdiff_t off = dest - s->code_ptr; - if (off == (int16_t)off) { - tcg_out_insn(s, RI, BRC, cc, off); - } else if (off == (int32_t)off) { - tcg_out_insn(s, RIL, BRCL, cc, off); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); - tcg_out_insn(s, RR, BCR, cc, TCG_TMP0); - } -} - -static void tgen_branch(TCGContext *s, int cc, TCGLabel *l) -{ - if (l->has_value) { - tgen_gotoi(s, cc, l->u.value_ptr); - } else if (USE_LONG_BRANCHES) { - tcg_out16(s, RIL_BRCL | (cc << 4)); - tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, -2); - s->code_ptr += 2; - } else { - tcg_out16(s, RI_BRC | (cc << 4)); - tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, -2); - s->code_ptr += 1; - } -} - -static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, - TCGReg r1, TCGReg r2, TCGLabel *l) -{ - intptr_t off; - - if (l->has_value) { - off = l->u.value_ptr - s->code_ptr; - } else { - /* We need to keep the offset unchanged for retranslation. */ - off = s->code_ptr[1]; - tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2); - } - - tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2); - tcg_out16(s, off); - tcg_out16(s, cc << 12 | (opc & 0xff)); -} - -static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, - TCGReg r1, int i2, TCGLabel *l) -{ - tcg_target_long off; - - if (l->has_value) { - off = l->u.value_ptr - s->code_ptr; - } else { - /* We need to keep the offset unchanged for retranslation. */ - off = s->code_ptr[1]; - tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2); - } - - tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc); - tcg_out16(s, off); - tcg_out16(s, (i2 << 8) | (opc & 0xff)); -} - -static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, - TCGReg r1, TCGArg c2, int c2const, TCGLabel *l) -{ - int cc; - - if (facilities & FACILITY_GEN_INST_EXT) { - bool is_unsigned = is_unsigned_cond(c); - bool in_range; - S390Opcode opc; - - cc = tcg_cond_to_s390_cond[c]; - - if (!c2const) { - opc = (type == TCG_TYPE_I32 - ? (is_unsigned ? RIE_CLRJ : RIE_CRJ) - : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ)); - tgen_compare_branch(s, opc, cc, r1, c2, l); - return; - } - - /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field. - If the immediate we've been given does not fit that range, we'll - fall back to separate compare and branch instructions using the - larger comparison range afforded by COMPARE IMMEDIATE. */ - if (type == TCG_TYPE_I32) { - if (is_unsigned) { - opc = RIE_CLIJ; - in_range = (uint32_t)c2 == (uint8_t)c2; - } else { - opc = RIE_CIJ; - in_range = (int32_t)c2 == (int8_t)c2; - } - } else { - if (is_unsigned) { - opc = RIE_CLGIJ; - in_range = (uint64_t)c2 == (uint8_t)c2; - } else { - opc = RIE_CGIJ; - in_range = (int64_t)c2 == (int8_t)c2; - } - } - if (in_range) { - tgen_compare_imm_branch(s, opc, cc, r1, c2, l); - return; - } - } - - cc = tgen_cmp(s, type, c, r1, c2, c2const); - tgen_branch(s, cc, l); -} - -static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) -{ - ptrdiff_t off = dest - s->code_ptr; - if (off == (int32_t)off) { - tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); - tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0); - } -} - -static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data, - TCGReg base, TCGReg index, int disp) -{ - switch (opc & (MO_SSIZE | MO_BSWAP)) { - case MO_UB: - tcg_out_insn(s, RXY, LLGC, data, base, index, disp); - break; - case MO_SB: - tcg_out_insn(s, RXY, LGB, data, base, index, disp); - break; - - case MO_UW | MO_BSWAP: - /* swapped unsigned halfword load with upper bits zeroed */ - tcg_out_insn(s, RXY, LRVH, data, base, index, disp); - tgen_ext16u(s, TCG_TYPE_I64, data, data); - break; - case MO_UW: - tcg_out_insn(s, RXY, LLGH, data, base, index, disp); - break; - - case MO_SW | MO_BSWAP: - /* swapped sign-extended halfword load */ - tcg_out_insn(s, RXY, LRVH, data, base, index, disp); - tgen_ext16s(s, TCG_TYPE_I64, data, data); - break; - case MO_SW: - tcg_out_insn(s, RXY, LGH, data, base, index, disp); - break; - - case MO_UL | MO_BSWAP: - /* swapped unsigned int load with upper bits zeroed */ - tcg_out_insn(s, RXY, LRV, data, base, index, disp); - tgen_ext32u(s, data, data); - break; - case MO_UL: - tcg_out_insn(s, RXY, LLGF, data, base, index, disp); - break; - - case MO_SL | MO_BSWAP: - /* swapped sign-extended int load */ - tcg_out_insn(s, RXY, LRV, data, base, index, disp); - tgen_ext32s(s, data, data); - break; - case MO_SL: - tcg_out_insn(s, RXY, LGF, data, base, index, disp); - break; - - case MO_Q | MO_BSWAP: - tcg_out_insn(s, RXY, LRVG, data, base, index, disp); - break; - case MO_Q: - tcg_out_insn(s, RXY, LG, data, base, index, disp); - break; - - default: - tcg_abort(); - } -} - -static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data, - TCGReg base, TCGReg index, int disp) -{ - switch (opc & (MO_SIZE | MO_BSWAP)) { - case MO_UB: - if (disp >= 0 && disp < 0x1000) { - tcg_out_insn(s, RX, STC, data, base, index, disp); - } else { - tcg_out_insn(s, RXY, STCY, data, base, index, disp); - } - break; - - case MO_UW | MO_BSWAP: - tcg_out_insn(s, RXY, STRVH, data, base, index, disp); - break; - case MO_UW: - if (disp >= 0 && disp < 0x1000) { - tcg_out_insn(s, RX, STH, data, base, index, disp); - } else { - tcg_out_insn(s, RXY, STHY, data, base, index, disp); - } - break; - - case MO_UL | MO_BSWAP: - tcg_out_insn(s, RXY, STRV, data, base, index, disp); - break; - case MO_UL: - if (disp >= 0 && disp < 0x1000) { - tcg_out_insn(s, RX, ST, data, base, index, disp); - } else { - tcg_out_insn(s, RXY, STY, data, base, index, disp); - } - break; - - case MO_Q | MO_BSWAP: - tcg_out_insn(s, RXY, STRVG, data, base, index, disp); - break; - case MO_Q: - tcg_out_insn(s, RXY, STG, data, base, index, disp); - break; - - default: - tcg_abort(); - } -} - -#if defined(CONFIG_SOFTMMU) -/* We're expecting to use a 20-bit signed offset on the tlb memory ops. - Using the offset of the second entry in the last tlb table ensures - that we can index all of the elements of the first entry. */ -QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) - > 0x7ffff); - -/* Load and compare a TLB entry, leaving the flags set. Loads the TLB - addend into R2. Returns a register with the santitized guest address. */ -static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, - int mem_index, bool is_ld) -{ - int s_mask = (1 << (opc & MO_SIZE)) - 1; - int ofs, a_off; - uint64_t tlb_mask; - - /* For aligned accesses, we check the first byte and include the alignment - bits within the address. For unaligned access, we check that we don't - cross pages using the address of the last byte of the access. */ - if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { - a_off = 0; - tlb_mask = TARGET_PAGE_MASK | s_mask; - } else { - a_off = s_mask; - tlb_mask = TARGET_PAGE_MASK; - } - - if (facilities & FACILITY_GEN_INST_EXT) { - tcg_out_risbg(s, TCG_REG_R2, addr_reg, - 64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS, - 63 - CPU_TLB_ENTRY_BITS, - 64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1); - if (a_off) { - tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); - tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); - } else { - tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); - } - } else { - tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); - tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2, - (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); - } - - if (is_ld) { - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); - } else { - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - } - if (TARGET_LONG_BITS == 32) { - tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); - } else { - tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); - } - - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs); - - if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, TCG_REG_R3, addr_reg); - return TCG_REG_R3; - } - return addr_reg; -} - -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, - TCGReg data, TCGReg addr, - tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->datalo_reg = data; - label->addrlo_reg = addr; - label->raddr = raddr; - label->label_ptr[0] = label_ptr; -} - -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGReg addr_reg = lb->addrlo_reg; - TCGReg data_reg = lb->datalo_reg; - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - - patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); - } - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); - - tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); -} - -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) -{ - TCGReg addr_reg = lb->addrlo_reg; - TCGReg data_reg = lb->datalo_reg; - TCGMemOpIdx oi = lb->oi; - TCGMemOp opc = get_memop(oi); - - patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); - } - switch (opc & MO_SIZE) { - case MO_UB: - tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); - break; - case MO_UW: - tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); - break; - case MO_UL: - tgen_ext32u(s, TCG_REG_R4, data_reg); - break; - case MO_Q: - tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); - break; - default: - tcg_abort(); - } - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); - - tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); -} -#else -static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, - TCGReg *index_reg, tcg_target_long *disp) -{ - if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, TCG_TMP0, *addr_reg); - *addr_reg = TCG_TMP0; - } - if (guest_base < 0x80000) { - *index_reg = TCG_REG_NONE; - *disp = guest_base; - } else { - *index_reg = TCG_GUEST_BASE_REG; - *disp = 0; - } -} -#endif /* CONFIG_SOFTMMU */ - -static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, - TCGMemOpIdx oi) -{ - TCGMemOp opc = get_memop(oi); -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; - TCGReg base_reg; - - base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); - - /* We need to keep the offset unchanged for retranslation. */ - tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); - label_ptr = s->code_ptr; - s->code_ptr += 1; - - tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); - - add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr); -#else - TCGReg index_reg; - tcg_target_long disp; - - tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); - tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp); -#endif -} - -static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, - TCGMemOpIdx oi) -{ - TCGMemOp opc = get_memop(oi); -#ifdef CONFIG_SOFTMMU - unsigned mem_index = get_mmuidx(oi); - tcg_insn_unit *label_ptr; - TCGReg base_reg; - - base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); - - /* We need to keep the offset unchanged for retranslation. */ - tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); - label_ptr = s->code_ptr; - s->code_ptr += 1; - - tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); - - add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr); -#else - TCGReg index_reg; - tcg_target_long disp; - - tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); - tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp); -#endif -} - -# define OP_32_64(x) \ - case glue(glue(INDEX_op_,x),_i32): \ - case glue(glue(INDEX_op_,x),_i64) - -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg *args, const int *const_args) -{ - S390Opcode op; - TCGArg a0, a1, a2; - - switch (opc) { - case INDEX_op_exit_tb: - /* return value */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]); - tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); - break; - - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - s->code_ptr += 2; - } else { - /* load address stored at s->tb_next + args[0] */ - tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_TMP0, s->tb_next + args[0]); - /* and go there */ - tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_TMP0); - } - s->tb_next_offset[args[0]] = tcg_current_code_size(s); - break; - - OP_32_64(ld8u): - /* ??? LLC (RXY format) is only present with the extended-immediate - facility, whereas LLGC is always present. */ - tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - OP_32_64(ld8s): - /* ??? LB is no smaller than LGB, so no point to using it. */ - tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - OP_32_64(ld16u): - /* ??? LLH (RXY format) is only present with the extended-immediate - facility, whereas LLGH is always present. */ - tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - case INDEX_op_ld16s_i32: - tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - case INDEX_op_ld_i32: - tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - - OP_32_64(st8): - tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1], - TCG_REG_NONE, args[2]); - break; - - OP_32_64(st16): - tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1], - TCG_REG_NONE, args[2]); - break; - - case INDEX_op_st_i32: - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - - case INDEX_op_add_i32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; - if (const_args[2]) { - do_addi_32: - if (a0 == a1) { - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, AHI, a0, a2); - break; - } - if (facilities & FACILITY_EXT_IMM) { - tcg_out_insn(s, RIL, AFI, a0, a2); - break; - } - } - tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RR, AR, a0, a2); - } else { - tcg_out_insn(s, RX, LA, a0, a1, a2, 0); - } - break; - case INDEX_op_sub_i32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; - if (const_args[2]) { - a2 = -a2; - goto do_addi_32; - } - tcg_out_insn(s, RR, SR, args[0], args[2]); - break; - - case INDEX_op_and_i32: - if (const_args[2]) { - tgen_andi(s, TCG_TYPE_I32, args[0], args[2]); - } else { - tcg_out_insn(s, RR, NR, args[0], args[2]); - } - break; - case INDEX_op_or_i32: - if (const_args[2]) { - tgen64_ori(s, args[0], args[2] & 0xffffffff); - } else { - tcg_out_insn(s, RR, OR, args[0], args[2]); - } - break; - case INDEX_op_xor_i32: - if (const_args[2]) { - tgen64_xori(s, args[0], args[2] & 0xffffffff); - } else { - tcg_out_insn(s, RR, XR, args[0], args[2]); - } - break; - - case INDEX_op_neg_i32: - tcg_out_insn(s, RR, LCR, args[0], args[1]); - break; - - case INDEX_op_mul_i32: - if (const_args[2]) { - if ((int32_t)args[2] == (int16_t)args[2]) { - tcg_out_insn(s, RI, MHI, args[0], args[2]); - } else { - tcg_out_insn(s, RIL, MSFI, args[0], args[2]); - } - } else { - tcg_out_insn(s, RRE, MSR, args[0], args[2]); - } - break; - - case INDEX_op_div2_i32: - tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]); - break; - case INDEX_op_divu2_i32: - tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]); - break; - - case INDEX_op_shl_i32: - op = RS_SLL; - do_shift32: - if (const_args[2]) { - tcg_out_sh32(s, op, args[0], TCG_REG_NONE, args[2]); - } else { - tcg_out_sh32(s, op, args[0], args[2], 0); - } - break; - case INDEX_op_shr_i32: - op = RS_SRL; - goto do_shift32; - case INDEX_op_sar_i32: - op = RS_SRA; - goto do_shift32; - - case INDEX_op_rotl_i32: - /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */ - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], - TCG_REG_NONE, (32 - args[2]) & 31); - } else { - tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); - tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0); - } - break; - - case INDEX_op_ext8s_i32: - tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_ext16s_i32: - tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_ext8u_i32: - tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_ext16u_i32: - tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]); - break; - - OP_32_64(bswap16): - /* The TCG bswap definition requires bits 0-47 already be zero. - Thus we don't need the G-type insns to implement bswap16_i64. */ - tcg_out_insn(s, RRE, LRVR, args[0], args[1]); - tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16); - break; - OP_32_64(bswap32): - tcg_out_insn(s, RRE, LRVR, args[0], args[1]); - break; - - case INDEX_op_add2_i32: - if (const_args[4]) { - tcg_out_insn(s, RIL, ALFI, args[0], args[4]); - } else { - tcg_out_insn(s, RR, ALR, args[0], args[4]); - } - tcg_out_insn(s, RRE, ALCR, args[1], args[5]); - break; - case INDEX_op_sub2_i32: - if (const_args[4]) { - tcg_out_insn(s, RIL, SLFI, args[0], args[4]); - } else { - tcg_out_insn(s, RR, SLR, args[0], args[4]); - } - tcg_out_insn(s, RRE, SLBR, args[1], args[5]); - break; - - case INDEX_op_br: - tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0])); - break; - - case INDEX_op_brcond_i32: - tgen_brcond(s, TCG_TYPE_I32, args[2], args[0], - args[1], const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i32: - tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_movcond_i32: - tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], - args[2], const_args[2], args[3]); - break; - - case INDEX_op_qemu_ld_i32: - /* ??? Technically we can use a non-extending instruction. */ - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args[0], args[1], args[2]); - break; - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args[0], args[1], args[2]); - break; - - case INDEX_op_ld16s_i64: - tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld32u_i64: - tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld_i64: - tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; - - case INDEX_op_st32_i64: - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; - - case INDEX_op_add_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_64: - if (a0 == a1) { - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, AGHI, a0, a2); - break; - } - if (facilities & FACILITY_EXT_IMM) { - if (a2 == (int32_t)a2) { - tcg_out_insn(s, RIL, AGFI, a0, a2); - break; - } else if (a2 == (uint32_t)a2) { - tcg_out_insn(s, RIL, ALGFI, a0, a2); - break; - } else if (-a2 == (uint32_t)-a2) { - tcg_out_insn(s, RIL, SLGFI, a0, -a2); - break; - } - } - } - tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RRE, AGR, a0, a2); - } else { - tcg_out_insn(s, RX, LA, a0, a1, a2, 0); - } - break; - case INDEX_op_sub_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - a2 = -a2; - goto do_addi_64; - } else { - tcg_out_insn(s, RRE, SGR, args[0], args[2]); - } - break; - - case INDEX_op_and_i64: - if (const_args[2]) { - tgen_andi(s, TCG_TYPE_I64, args[0], args[2]); - } else { - tcg_out_insn(s, RRE, NGR, args[0], args[2]); - } - break; - case INDEX_op_or_i64: - if (const_args[2]) { - tgen64_ori(s, args[0], args[2]); - } else { - tcg_out_insn(s, RRE, OGR, args[0], args[2]); - } - break; - case INDEX_op_xor_i64: - if (const_args[2]) { - tgen64_xori(s, args[0], args[2]); - } else { - tcg_out_insn(s, RRE, XGR, args[0], args[2]); - } - break; - - case INDEX_op_neg_i64: - tcg_out_insn(s, RRE, LCGR, args[0], args[1]); - break; - case INDEX_op_bswap64_i64: - tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); - break; - - case INDEX_op_mul_i64: - if (const_args[2]) { - if (args[2] == (int16_t)args[2]) { - tcg_out_insn(s, RI, MGHI, args[0], args[2]); - } else { - tcg_out_insn(s, RIL, MSGFI, args[0], args[2]); - } - } else { - tcg_out_insn(s, RRE, MSGR, args[0], args[2]); - } - break; - - case INDEX_op_div2_i64: - /* ??? We get an unnecessary sign-extension of the dividend - into R3 with this definition, but as we do in fact always - produce both quotient and remainder using INDEX_op_div_i64 - instead requires jumping through even more hoops. */ - tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]); - break; - case INDEX_op_divu2_i64: - tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]); - break; - case INDEX_op_mulu2_i64: - tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]); - break; - - case INDEX_op_shl_i64: - op = RSY_SLLG; - do_shift64: - if (const_args[2]) { - tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, op, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_shr_i64: - op = RSY_SRLG; - goto do_shift64; - case INDEX_op_sar_i64: - op = RSY_SRAG; - goto do_shift64; - - case INDEX_op_rotl_i64: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], - TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_rotr_i64: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], - TCG_REG_NONE, (64 - args[2]) & 63); - } else { - /* We can use the smaller 32-bit negate because only the - low 6 bits are examined for the rotate. */ - tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0); - } - break; - - case INDEX_op_ext8s_i64: - tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_ext16s_i64: - tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - tgen_ext32s(s, args[0], args[1]); - break; - case INDEX_op_ext8u_i64: - tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_ext16u_i64: - tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_extu_i32_i64: - case INDEX_op_ext32u_i64: - tgen_ext32u(s, args[0], args[1]); - break; - - case INDEX_op_add2_i64: - if (const_args[4]) { - if ((int64_t)args[4] >= 0) { - tcg_out_insn(s, RIL, ALGFI, args[0], args[4]); - } else { - tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]); - } - } else { - tcg_out_insn(s, RRE, ALGR, args[0], args[4]); - } - tcg_out_insn(s, RRE, ALCGR, args[1], args[5]); - break; - case INDEX_op_sub2_i64: - if (const_args[4]) { - if ((int64_t)args[4] >= 0) { - tcg_out_insn(s, RIL, SLGFI, args[0], args[4]); - } else { - tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]); - } - } else { - tcg_out_insn(s, RRE, SLGR, args[0], args[4]); - } - tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); - break; - - case INDEX_op_brcond_i64: - tgen_brcond(s, TCG_TYPE_I64, args[2], args[0], - args[1], const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i64: - tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_movcond_i64: - tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], - args[2], const_args[2], args[3]); - break; - - OP_32_64(deposit): - tgen_deposit(s, args[0], args[2], args[3], args[4]); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef s390_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "r", "r" } }, - { INDEX_op_st16_i32, { "r", "r" } }, - { INDEX_op_st_i32, { "r", "r" } }, - - { INDEX_op_add_i32, { "r", "r", "ri" } }, - { INDEX_op_sub_i32, { "r", "0", "ri" } }, - { INDEX_op_mul_i32, { "r", "0", "rK" } }, - - { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } }, - { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, - - { INDEX_op_and_i32, { "r", "0", "ri" } }, - { INDEX_op_or_i32, { "r", "0", "rO" } }, - { INDEX_op_xor_i32, { "r", "0", "rX" } }, - - { INDEX_op_neg_i32, { "r", "r" } }, - - { INDEX_op_shl_i32, { "r", "0", "Ri" } }, - { INDEX_op_shr_i32, { "r", "0", "Ri" } }, - { INDEX_op_sar_i32, { "r", "0", "Ri" } }, - - { INDEX_op_rotl_i32, { "r", "r", "Ri" } }, - { INDEX_op_rotr_i32, { "r", "r", "Ri" } }, - - { INDEX_op_ext8s_i32, { "r", "r" } }, - { INDEX_op_ext8u_i32, { "r", "r" } }, - { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_ext16u_i32, { "r", "r" } }, - - { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, - - { INDEX_op_add2_i32, { "r", "r", "0", "1", "rA", "r" } }, - { INDEX_op_sub2_i32, { "r", "r", "0", "1", "rA", "r" } }, - - { INDEX_op_brcond_i32, { "r", "rC" } }, - { INDEX_op_setcond_i32, { "r", "r", "rC" } }, - { INDEX_op_movcond_i32, { "r", "r", "rC", "r", "0" } }, - { INDEX_op_deposit_i32, { "r", "0", "r" } }, - - { INDEX_op_qemu_ld_i32, { "r", "L" } }, - { INDEX_op_qemu_ld_i64, { "r", "L" } }, - { INDEX_op_qemu_st_i32, { "L", "L" } }, - { INDEX_op_qemu_st_i64, { "L", "L" } }, - - { INDEX_op_ld8u_i64, { "r", "r" } }, - { INDEX_op_ld8s_i64, { "r", "r" } }, - { INDEX_op_ld16u_i64, { "r", "r" } }, - { INDEX_op_ld16s_i64, { "r", "r" } }, - { INDEX_op_ld32u_i64, { "r", "r" } }, - { INDEX_op_ld32s_i64, { "r", "r" } }, - { INDEX_op_ld_i64, { "r", "r" } }, - - { INDEX_op_st8_i64, { "r", "r" } }, - { INDEX_op_st16_i64, { "r", "r" } }, - { INDEX_op_st32_i64, { "r", "r" } }, - { INDEX_op_st_i64, { "r", "r" } }, - - { INDEX_op_add_i64, { "r", "r", "ri" } }, - { INDEX_op_sub_i64, { "r", "0", "ri" } }, - { INDEX_op_mul_i64, { "r", "0", "rK" } }, - - { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } }, - { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } }, - { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } }, - - { INDEX_op_and_i64, { "r", "0", "ri" } }, - { INDEX_op_or_i64, { "r", "0", "rO" } }, - { INDEX_op_xor_i64, { "r", "0", "rX" } }, - - { INDEX_op_neg_i64, { "r", "r" } }, - - { INDEX_op_shl_i64, { "r", "r", "Ri" } }, - { INDEX_op_shr_i64, { "r", "r", "Ri" } }, - { INDEX_op_sar_i64, { "r", "r", "Ri" } }, - - { INDEX_op_rotl_i64, { "r", "r", "Ri" } }, - { INDEX_op_rotr_i64, { "r", "r", "Ri" } }, - - { INDEX_op_ext8s_i64, { "r", "r" } }, - { INDEX_op_ext8u_i64, { "r", "r" } }, - { INDEX_op_ext16s_i64, { "r", "r" } }, - { INDEX_op_ext16u_i64, { "r", "r" } }, - { INDEX_op_ext32s_i64, { "r", "r" } }, - { INDEX_op_ext32u_i64, { "r", "r" } }, - - { INDEX_op_ext_i32_i64, { "r", "r" } }, - { INDEX_op_extu_i32_i64, { "r", "r" } }, - - { INDEX_op_bswap16_i64, { "r", "r" } }, - { INDEX_op_bswap32_i64, { "r", "r" } }, - { INDEX_op_bswap64_i64, { "r", "r" } }, - - { INDEX_op_add2_i64, { "r", "r", "0", "1", "rA", "r" } }, - { INDEX_op_sub2_i64, { "r", "r", "0", "1", "rA", "r" } }, - - { INDEX_op_brcond_i64, { "r", "rC" } }, - { INDEX_op_setcond_i64, { "r", "r", "rC" } }, - { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } }, - { INDEX_op_deposit_i64, { "r", "0", "r" } }, - - { -1 }, -}; - -static void query_facilities(void) -{ - unsigned long hwcap = qemu_getauxval(AT_HWCAP); - - /* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this - is present on all 64-bit systems, but let's check for it anyway. */ - if (hwcap & HWCAP_S390_STFLE) { - register int r0 __asm__("0"); - register void *r1 __asm__("1"); - - /* stfle 0(%r1) */ - r1 = &facilities; - asm volatile(".word 0xb2b0,0x1000" - : "=r"(r0) : "0"(0), "r"(r1) : "memory", "cc"); - } -} - -static void tcg_target_init(TCGContext *s) -{ - query_facilities(); - - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); - - tcg_regset_clear(tcg_target_call_clobber_regs); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); - /* The r6 register is technically call-saved, but it's also a parameter - register, so it can get killed by setup for the qemu_st helper. */ - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); - /* The return register can be considered call-clobbered. */ - tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); - /* XXX many insns can't be used with R0, so we better avoid it for now */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - - tcg_add_target_add_op_defs(s390_op_defs); -} - -#define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \ - + TCG_STATIC_CALL_ARGS_SIZE \ - + CPU_TEMP_BUF_NLONGS * sizeof(long))) - -static void tcg_target_qemu_prologue(TCGContext *s) -{ - /* stmg %r6,%r15,48(%r15) (save registers) */ - tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48); - - /* aghi %r15,-frame_size */ - tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE); - - tcg_set_frame(s, TCG_REG_CALL_STACK, - TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET, - CPU_TEMP_BUF_NLONGS * sizeof(long)); - -#ifndef CONFIG_SOFTMMU - if (guest_base >= 0x80000) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); - } -#endif - - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - /* br %r3 (go to TB) */ - tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]); - - tb_ret_addr = s->code_ptr; - - /* lmg %r6,%r15,fs+48(%r15) (restore registers) */ - tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, - FRAME_SIZE + 48); - - /* br %r14 (return) */ - tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14); -} - -typedef struct { - DebugFrameHeader h; - uint8_t fde_def_cfa[4]; - uint8_t fde_reg_ofs[18]; -} DebugFrame; - -/* We're expecting a 2 byte uleb128 encoded value. */ -QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); - -#define ELF_HOST_MACHINE EM_S390 - -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = 8, /* sleb128 8 */ - .h.cie.return_column = TCG_REG_R14, - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { - 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */ - (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ - (FRAME_SIZE >> 7) - }, - .fde_reg_ofs = { - 0x86, 6, /* DW_CFA_offset, %r6, 48 */ - 0x87, 7, /* DW_CFA_offset, %r7, 56 */ - 0x88, 8, /* DW_CFA_offset, %r8, 64 */ - 0x89, 9, /* DW_CFA_offset, %r92, 72 */ - 0x8a, 10, /* DW_CFA_offset, %r10, 80 */ - 0x8b, 11, /* DW_CFA_offset, %r11, 88 */ - 0x8c, 12, /* DW_CFA_offset, %r12, 96 */ - 0x8d, 13, /* DW_CFA_offset, %r13, 104 */ - 0x8e, 14, /* DW_CFA_offset, %r14, 112 */ - } -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c new file mode 100644 index 0000000..58520fa --- /dev/null +++ b/tcg/s390/tcg-target.inc.c @@ -0,0 +1,2410 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009 Ulrich Hecht + * Copyright (c) 2009 Alexander Graf + * Copyright (c) 2010 Richard Henderson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-ldst.h" + +/* We only support generating code for 64-bit mode. */ +#if TCG_TARGET_REG_BITS != 64 +#error "unsupported code generation mode" +#endif + +#include "elf.h" + +/* ??? The translation blocks produced by TCG are generally small enough to + be entirely reachable with a 16-bit displacement. Leaving the option for + a 32-bit displacement here Just In Case. */ +#define USE_LONG_BRANCHES 0 + +#define TCG_CT_CONST_MULI 0x100 +#define TCG_CT_CONST_ORI 0x200 +#define TCG_CT_CONST_XORI 0x400 +#define TCG_CT_CONST_CMPI 0x800 +#define TCG_CT_CONST_ADLI 0x1000 + +/* Several places within the instruction set 0 means "no register" + rather than TCG_REG_R0. */ +#define TCG_REG_NONE 0 + +/* A scratch register that may be be used throughout the backend. */ +#define TCG_TMP0 TCG_REG_R14 + +#ifndef CONFIG_SOFTMMU +#define TCG_GUEST_BASE_REG TCG_REG_R13 +#endif + +/* All of the following instructions are prefixed with their instruction + format, and are defined as 8- or 16-bit quantities, even when the two + halves of the 16-bit quantity may appear 32 bits apart in the insn. + This makes it easy to copy the values from the tables in Appendix B. */ +typedef enum S390Opcode { + RIL_AFI = 0xc209, + RIL_AGFI = 0xc208, + RIL_ALFI = 0xc20b, + RIL_ALGFI = 0xc20a, + RIL_BRASL = 0xc005, + RIL_BRCL = 0xc004, + RIL_CFI = 0xc20d, + RIL_CGFI = 0xc20c, + RIL_CLFI = 0xc20f, + RIL_CLGFI = 0xc20e, + RIL_IIHF = 0xc008, + RIL_IILF = 0xc009, + RIL_LARL = 0xc000, + RIL_LGFI = 0xc001, + RIL_LGRL = 0xc408, + RIL_LLIHF = 0xc00e, + RIL_LLILF = 0xc00f, + RIL_LRL = 0xc40d, + RIL_MSFI = 0xc201, + RIL_MSGFI = 0xc200, + RIL_NIHF = 0xc00a, + RIL_NILF = 0xc00b, + RIL_OIHF = 0xc00c, + RIL_OILF = 0xc00d, + RIL_SLFI = 0xc205, + RIL_SLGFI = 0xc204, + RIL_XIHF = 0xc006, + RIL_XILF = 0xc007, + + RI_AGHI = 0xa70b, + RI_AHI = 0xa70a, + RI_BRC = 0xa704, + RI_IIHH = 0xa500, + RI_IIHL = 0xa501, + RI_IILH = 0xa502, + RI_IILL = 0xa503, + RI_LGHI = 0xa709, + RI_LLIHH = 0xa50c, + RI_LLIHL = 0xa50d, + RI_LLILH = 0xa50e, + RI_LLILL = 0xa50f, + RI_MGHI = 0xa70d, + RI_MHI = 0xa70c, + RI_NIHH = 0xa504, + RI_NIHL = 0xa505, + RI_NILH = 0xa506, + RI_NILL = 0xa507, + RI_OIHH = 0xa508, + RI_OIHL = 0xa509, + RI_OILH = 0xa50a, + RI_OILL = 0xa50b, + + RIE_CGIJ = 0xec7c, + RIE_CGRJ = 0xec64, + RIE_CIJ = 0xec7e, + RIE_CLGRJ = 0xec65, + RIE_CLIJ = 0xec7f, + RIE_CLGIJ = 0xec7d, + RIE_CLRJ = 0xec77, + RIE_CRJ = 0xec76, + RIE_RISBG = 0xec55, + + RRE_AGR = 0xb908, + RRE_ALGR = 0xb90a, + RRE_ALCR = 0xb998, + RRE_ALCGR = 0xb988, + RRE_CGR = 0xb920, + RRE_CLGR = 0xb921, + RRE_DLGR = 0xb987, + RRE_DLR = 0xb997, + RRE_DSGFR = 0xb91d, + RRE_DSGR = 0xb90d, + RRE_LGBR = 0xb906, + RRE_LCGR = 0xb903, + RRE_LGFR = 0xb914, + RRE_LGHR = 0xb907, + RRE_LGR = 0xb904, + RRE_LLGCR = 0xb984, + RRE_LLGFR = 0xb916, + RRE_LLGHR = 0xb985, + RRE_LRVR = 0xb91f, + RRE_LRVGR = 0xb90f, + RRE_LTGR = 0xb902, + RRE_MLGR = 0xb986, + RRE_MSGR = 0xb90c, + RRE_MSR = 0xb252, + RRE_NGR = 0xb980, + RRE_OGR = 0xb981, + RRE_SGR = 0xb909, + RRE_SLGR = 0xb90b, + RRE_SLBR = 0xb999, + RRE_SLBGR = 0xb989, + RRE_XGR = 0xb982, + + RRF_LOCR = 0xb9f2, + RRF_LOCGR = 0xb9e2, + + RR_AR = 0x1a, + RR_ALR = 0x1e, + RR_BASR = 0x0d, + RR_BCR = 0x07, + RR_CLR = 0x15, + RR_CR = 0x19, + RR_DR = 0x1d, + RR_LCR = 0x13, + RR_LR = 0x18, + RR_LTR = 0x12, + RR_NR = 0x14, + RR_OR = 0x16, + RR_SR = 0x1b, + RR_SLR = 0x1f, + RR_XR = 0x17, + + RSY_RLL = 0xeb1d, + RSY_RLLG = 0xeb1c, + RSY_SLLG = 0xeb0d, + RSY_SRAG = 0xeb0a, + RSY_SRLG = 0xeb0c, + + RS_SLL = 0x89, + RS_SRA = 0x8a, + RS_SRL = 0x88, + + RXY_AG = 0xe308, + RXY_AY = 0xe35a, + RXY_CG = 0xe320, + RXY_CY = 0xe359, + RXY_LAY = 0xe371, + RXY_LB = 0xe376, + RXY_LG = 0xe304, + RXY_LGB = 0xe377, + RXY_LGF = 0xe314, + RXY_LGH = 0xe315, + RXY_LHY = 0xe378, + RXY_LLGC = 0xe390, + RXY_LLGF = 0xe316, + RXY_LLGH = 0xe391, + RXY_LMG = 0xeb04, + RXY_LRV = 0xe31e, + RXY_LRVG = 0xe30f, + RXY_LRVH = 0xe31f, + RXY_LY = 0xe358, + RXY_STCY = 0xe372, + RXY_STG = 0xe324, + RXY_STHY = 0xe370, + RXY_STMG = 0xeb24, + RXY_STRV = 0xe33e, + RXY_STRVG = 0xe32f, + RXY_STRVH = 0xe33f, + RXY_STY = 0xe350, + + RX_A = 0x5a, + RX_C = 0x59, + RX_L = 0x58, + RX_LA = 0x41, + RX_LH = 0x48, + RX_ST = 0x50, + RX_STC = 0x42, + RX_STH = 0x40, +} S390Opcode; + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", + "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15" +}; +#endif + +/* Since R6 is a potential argument register, choose it last of the + call-saved registers. Likewise prefer the call-clobbered registers + in reverse order to maximize the chance of avoiding the arguments. */ +static const int tcg_target_reg_alloc_order[] = { + /* Call saved registers. */ + TCG_REG_R13, + TCG_REG_R12, + TCG_REG_R11, + TCG_REG_R10, + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_R7, + TCG_REG_R6, + /* Call clobbered registers. */ + TCG_REG_R14, + TCG_REG_R0, + TCG_REG_R1, + /* Argument registers, in reverse order of allocation. */ + TCG_REG_R5, + TCG_REG_R4, + TCG_REG_R3, + TCG_REG_R2, +}; + +static const int tcg_target_call_iarg_regs[] = { + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_R2, +}; + +#define S390_CC_EQ 8 +#define S390_CC_LT 4 +#define S390_CC_GT 2 +#define S390_CC_OV 1 +#define S390_CC_NE (S390_CC_LT | S390_CC_GT) +#define S390_CC_LE (S390_CC_LT | S390_CC_EQ) +#define S390_CC_GE (S390_CC_GT | S390_CC_EQ) +#define S390_CC_NEVER 0 +#define S390_CC_ALWAYS 15 + +/* Condition codes that result from a COMPARE and COMPARE LOGICAL. */ +static const uint8_t tcg_cond_to_s390_cond[] = { + [TCG_COND_EQ] = S390_CC_EQ, + [TCG_COND_NE] = S390_CC_NE, + [TCG_COND_LT] = S390_CC_LT, + [TCG_COND_LE] = S390_CC_LE, + [TCG_COND_GT] = S390_CC_GT, + [TCG_COND_GE] = S390_CC_GE, + [TCG_COND_LTU] = S390_CC_LT, + [TCG_COND_LEU] = S390_CC_LE, + [TCG_COND_GTU] = S390_CC_GT, + [TCG_COND_GEU] = S390_CC_GE, +}; + +/* Condition codes that result from a LOAD AND TEST. Here, we have no + unsigned instruction variation, however since the test is vs zero we + can re-map the outcomes appropriately. */ +static const uint8_t tcg_cond_to_ltr_cond[] = { + [TCG_COND_EQ] = S390_CC_EQ, + [TCG_COND_NE] = S390_CC_NE, + [TCG_COND_LT] = S390_CC_LT, + [TCG_COND_LE] = S390_CC_LE, + [TCG_COND_GT] = S390_CC_GT, + [TCG_COND_GE] = S390_CC_GE, + [TCG_COND_LTU] = S390_CC_NEVER, + [TCG_COND_LEU] = S390_CC_EQ, + [TCG_COND_GTU] = S390_CC_NE, + [TCG_COND_GEU] = S390_CC_ALWAYS, +}; + +#ifdef CONFIG_SOFTMMU +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LESL] = helper_le_ldsl_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BESL] = helper_be_ldsl_mmu, + [MO_BEQ] = helper_be_ldq_mmu, +}; + +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; +#endif + +static tcg_insn_unit *tb_ret_addr; + +/* A list of relevant facilities used by this translator. Some of these + are required for proper operation, and these are checked at startup. */ + +#define FACILITY_ZARCH_ACTIVE (1ULL << (63 - 2)) +#define FACILITY_LONG_DISP (1ULL << (63 - 18)) +#define FACILITY_EXT_IMM (1ULL << (63 - 21)) +#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34)) +#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45)) + +static uint64_t facilities; + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + intptr_t pcrel2 = (tcg_insn_unit *)value - (code_ptr - 1); + assert(addend == -2); + + switch (type) { + case R_390_PC16DBL: + assert(pcrel2 == (int16_t)pcrel2); + tcg_patch16(code_ptr, pcrel2); + break; + case R_390_PC32DBL: + assert(pcrel2 == (int32_t)pcrel2); + tcg_patch32(code_ptr, pcrel2); + break; + default: + tcg_abort(); + break; + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str = *pct_str; + + switch (ct_str[0]) { + case 'r': /* all registers */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffff); + break; + case 'R': /* not R0 */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); + break; + case 'L': /* qemu_ld/st constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffff); + tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2); + tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3); + tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4); + break; + case 'a': /* force R2 for division */ + ct->ct |= TCG_CT_REG; + tcg_regset_clear(ct->u.regs); + tcg_regset_set_reg(ct->u.regs, TCG_REG_R2); + break; + case 'b': /* force R3 for division */ + ct->ct |= TCG_CT_REG; + tcg_regset_clear(ct->u.regs); + tcg_regset_set_reg(ct->u.regs, TCG_REG_R3); + break; + case 'A': + ct->ct |= TCG_CT_CONST_ADLI; + break; + case 'K': + ct->ct |= TCG_CT_CONST_MULI; + break; + case 'O': + ct->ct |= TCG_CT_CONST_ORI; + break; + case 'X': + ct->ct |= TCG_CT_CONST_XORI; + break; + case 'C': + ct->ct |= TCG_CT_CONST_CMPI; + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + + return 0; +} + +/* Immediates to be used with logical OR. This is an optimization only, + since a full 64-bit immediate OR can always be performed with 4 sequential + OI[LH][LH] instructions. What we're looking for is immediates that we + can load efficiently, and the immediate load plus the reg-reg OR is + smaller than the sequential OI's. */ + +static int tcg_match_ori(TCGType type, tcg_target_long val) +{ + if (facilities & FACILITY_EXT_IMM) { + if (type == TCG_TYPE_I32) { + /* All 32-bit ORs can be performed with 1 48-bit insn. */ + return 1; + } + } + + /* Look for negative values. These are best to load with LGHI. */ + if (val < 0) { + if (val == (int16_t)val) { + return 0; + } + if (facilities & FACILITY_EXT_IMM) { + if (val == (int32_t)val) { + return 0; + } + } + } + + return 1; +} + +/* Immediates to be used with logical XOR. This is almost, but not quite, + only an optimization. XOR with immediate is only supported with the + extended-immediate facility. That said, there are a few patterns for + which it is better to load the value into a register first. */ + +static int tcg_match_xori(TCGType type, tcg_target_long val) +{ + if ((facilities & FACILITY_EXT_IMM) == 0) { + return 0; + } + + if (type == TCG_TYPE_I32) { + /* All 32-bit XORs can be performed with 1 48-bit insn. */ + return 1; + } + + /* Look for negative values. These are best to load with LGHI. */ + if (val < 0 && val == (int32_t)val) { + return 0; + } + + return 1; +} + +/* Imediates to be used with comparisons. */ + +static int tcg_match_cmpi(TCGType type, tcg_target_long val) +{ + if (facilities & FACILITY_EXT_IMM) { + /* The COMPARE IMMEDIATE instruction is available. */ + if (type == TCG_TYPE_I32) { + /* We have a 32-bit immediate and can compare against anything. */ + return 1; + } else { + /* ??? We have no insight here into whether the comparison is + signed or unsigned. The COMPARE IMMEDIATE insn uses a 32-bit + signed immediate, and the COMPARE LOGICAL IMMEDIATE insn uses + a 32-bit unsigned immediate. If we were to use the (semi) + obvious "val == (int32_t)val" we would be enabling unsigned + comparisons vs very large numbers. The only solution is to + take the intersection of the ranges. */ + /* ??? Another possible solution is to simply lie and allow all + constants here and force the out-of-range values into a temp + register in tgen_cmp when we have knowledge of the actual + comparison code in use. */ + return val >= 0 && val <= 0x7fffffff; + } + } else { + /* Only the LOAD AND TEST instruction is available. */ + return val == 0; + } +} + +/* Immediates to be used with add2/sub2. */ + +static int tcg_match_add2i(TCGType type, tcg_target_long val) +{ + if (facilities & FACILITY_EXT_IMM) { + if (type == TCG_TYPE_I32) { + return 1; + } else if (val >= -0xffffffffll && val <= 0xffffffffll) { + return 1; + } + } + return 0; +} + +/* Test if a constant matches the constraint. */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + + if (ct & TCG_CT_CONST) { + return 1; + } + + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + + /* The following are mutually exclusive. */ + if (ct & TCG_CT_CONST_MULI) { + /* Immediates that may be used with multiply. If we have the + general-instruction-extensions, then we have MULTIPLY SINGLE + IMMEDIATE with a signed 32-bit, otherwise we have only + MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */ + if (facilities & FACILITY_GEN_INST_EXT) { + return val == (int32_t)val; + } else { + return val == (int16_t)val; + } + } else if (ct & TCG_CT_CONST_ADLI) { + return tcg_match_add2i(type, val); + } else if (ct & TCG_CT_CONST_ORI) { + return tcg_match_ori(type, val); + } else if (ct & TCG_CT_CONST_XORI) { + return tcg_match_xori(type, val); + } else if (ct & TCG_CT_CONST_CMPI) { + return tcg_match_cmpi(type, val); + } + + return 0; +} + +/* Emit instructions according to the given instruction format. */ + +static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2) +{ + tcg_out16(s, (op << 8) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op, + TCGReg r1, TCGReg r2) +{ + tcg_out32(s, (op << 16) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op, + TCGReg r1, TCGReg r2, int m3) +{ + tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2) +{ + tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff)); +} + +static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2) +{ + tcg_out16(s, op | (r1 << 4)); + tcg_out32(s, i2); +} + +static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1, + TCGReg b2, TCGReg r3, int disp) +{ + tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12) + | (disp & 0xfff)); +} + +static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1, + TCGReg b2, TCGReg r3, int disp) +{ + tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3); + tcg_out32(s, (op & 0xff) | (b2 << 28) + | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4)); +} + +#define tcg_out_insn_RX tcg_out_insn_RS +#define tcg_out_insn_RXY tcg_out_insn_RSY + +/* Emit an opcode with "type-checking" of the format. */ +#define tcg_out_insn(S, FMT, OP, ...) \ + glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__) + + +/* emit 64-bit shifts */ +static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest, + TCGReg src, TCGReg sh_reg, int sh_imm) +{ + tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm); +} + +/* emit 32-bit shifts */ +static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest, + TCGReg sh_reg, int sh_imm) +{ + tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm); +} + +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) +{ + if (src != dst) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, LR, dst, src); + } else { + tcg_out_insn(s, RRE, LGR, dst, src); + } + } +} + +/* load a register with an immediate value */ +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long sval) +{ + static const S390Opcode lli_insns[4] = { + RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH + }; + + tcg_target_ulong uval = sval; + int i; + + if (type == TCG_TYPE_I32) { + uval = (uint32_t)sval; + sval = (int32_t)sval; + } + + /* Try all 32-bit insns that can load it in one go. */ + if (sval >= -0x8000 && sval < 0x8000) { + tcg_out_insn(s, RI, LGHI, ret, sval); + return; + } + + for (i = 0; i < 4; i++) { + tcg_target_long mask = 0xffffull << i*16; + if ((uval & mask) == uval) { + tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16); + return; + } + } + + /* Try all 48-bit insns that can load it in one go. */ + if (facilities & FACILITY_EXT_IMM) { + if (sval == (int32_t)sval) { + tcg_out_insn(s, RIL, LGFI, ret, sval); + return; + } + if (uval <= 0xffffffff) { + tcg_out_insn(s, RIL, LLILF, ret, uval); + return; + } + if ((uval & 0xffffffff) == 0) { + tcg_out_insn(s, RIL, LLIHF, ret, uval >> 31 >> 1); + return; + } + } + + /* Try for PC-relative address load. */ + if ((sval & 1) == 0) { + ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1; + if (off == (int32_t)off) { + tcg_out_insn(s, RIL, LARL, ret, off); + return; + } + } + + /* If extended immediates are not present, then we may have to issue + several instructions to load the low 32 bits. */ + if (!(facilities & FACILITY_EXT_IMM)) { + /* A 32-bit unsigned value can be loaded in 2 insns. And given + that the lli_insns loop above did not succeed, we know that + both insns are required. */ + if (uval <= 0xffffffff) { + tcg_out_insn(s, RI, LLILL, ret, uval); + tcg_out_insn(s, RI, IILH, ret, uval >> 16); + return; + } + + /* If all high bits are set, the value can be loaded in 2 or 3 insns. + We first want to make sure that all the high bits get set. With + luck the low 16-bits can be considered negative to perform that for + free, otherwise we load an explicit -1. */ + if (sval >> 31 >> 1 == -1) { + if (uval & 0x8000) { + tcg_out_insn(s, RI, LGHI, ret, uval); + } else { + tcg_out_insn(s, RI, LGHI, ret, -1); + tcg_out_insn(s, RI, IILL, ret, uval); + } + tcg_out_insn(s, RI, IILH, ret, uval >> 16); + return; + } + } + + /* If we get here, both the high and low parts have non-zero bits. */ + + /* Recurse to load the lower 32-bits. */ + tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff); + + /* Insert data into the high 32-bits. */ + uval = uval >> 31 >> 1; + if (facilities & FACILITY_EXT_IMM) { + if (uval < 0x10000) { + tcg_out_insn(s, RI, IIHL, ret, uval); + } else if ((uval & 0xffff) == 0) { + tcg_out_insn(s, RI, IIHH, ret, uval >> 16); + } else { + tcg_out_insn(s, RIL, IIHF, ret, uval); + } + } else { + if (uval & 0xffff) { + tcg_out_insn(s, RI, IIHL, ret, uval); + } + if (uval & 0xffff0000) { + tcg_out_insn(s, RI, IIHH, ret, uval >> 16); + } + } +} + + +/* Emit a load/store type instruction. Inputs are: + DATA: The register to be loaded or stored. + BASE+OFS: The effective address. + OPC_RX: If the operation has an RX format opcode (e.g. STC), otherwise 0. + OPC_RXY: The RXY format opcode for the operation (e.g. STCY). */ + +static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy, + TCGReg data, TCGReg base, TCGReg index, + tcg_target_long ofs) +{ + if (ofs < -0x80000 || ofs >= 0x80000) { + /* Combine the low 20 bits of the offset with the actual load insn; + the high 44 bits must come from an immediate load. */ + tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low); + ofs = low; + + /* If we were already given an index register, add it in. */ + if (index != TCG_REG_NONE) { + tcg_out_insn(s, RRE, AGR, TCG_TMP0, index); + } + index = TCG_TMP0; + } + + if (opc_rx && ofs >= 0 && ofs < 0x1000) { + tcg_out_insn_RX(s, opc_rx, data, base, index, ofs); + } else { + tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs); + } +} + + +/* load data without address translation or endianness conversion */ +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, intptr_t ofs) +{ + if (type == TCG_TYPE_I32) { + tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs); + } else { + tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs); + } +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, intptr_t ofs) +{ + if (type == TCG_TYPE_I32) { + tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs); + } else { + tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs); + } +} + +/* load data from an absolute host address */ +static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs) +{ + intptr_t addr = (intptr_t)abs; + + if ((facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) { + ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1; + if (disp == (int32_t)disp) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, LRL, dest, disp); + } else { + tcg_out_insn(s, RIL, LGRL, dest, disp); + } + return; + } + } + + tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff); + tcg_out_ld(s, type, dest, dest, addr & 0xffff); +} + +static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src, + int msb, int lsb, int ofs, int z) +{ + /* Format RIE-f */ + tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src); + tcg_out16(s, (msb << 8) | (z << 7) | lsb); + tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff)); +} + +static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + if (facilities & FACILITY_EXT_IMM) { + tcg_out_insn(s, RRE, LGBR, dest, src); + return; + } + + if (type == TCG_TYPE_I32) { + if (dest == src) { + tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24); + } else { + tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24); + } + tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24); + } else { + tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56); + tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56); + } +} + +static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + if (facilities & FACILITY_EXT_IMM) { + tcg_out_insn(s, RRE, LLGCR, dest, src); + return; + } + + if (dest == src) { + tcg_out_movi(s, type, TCG_TMP0, 0xff); + src = TCG_TMP0; + } else { + tcg_out_movi(s, type, dest, 0xff); + } + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, NR, dest, src); + } else { + tcg_out_insn(s, RRE, NGR, dest, src); + } +} + +static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + if (facilities & FACILITY_EXT_IMM) { + tcg_out_insn(s, RRE, LGHR, dest, src); + return; + } + + if (type == TCG_TYPE_I32) { + if (dest == src) { + tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16); + } else { + tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16); + } + tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16); + } else { + tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48); + tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48); + } +} + +static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + if (facilities & FACILITY_EXT_IMM) { + tcg_out_insn(s, RRE, LLGHR, dest, src); + return; + } + + if (dest == src) { + tcg_out_movi(s, type, TCG_TMP0, 0xffff); + src = TCG_TMP0; + } else { + tcg_out_movi(s, type, dest, 0xffff); + } + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, NR, dest, src); + } else { + tcg_out_insn(s, RRE, NGR, dest, src); + } +} + +static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src) +{ + tcg_out_insn(s, RRE, LGFR, dest, src); +} + +static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src) +{ + tcg_out_insn(s, RRE, LLGFR, dest, src); +} + +/* Accept bit patterns like these: + 0....01....1 + 1....10....0 + 1..10..01..1 + 0..01..10..0 + Copied from gcc sources. */ +static inline bool risbg_mask(uint64_t c) +{ + uint64_t lsb; + /* We don't change the number of transitions by inverting, + so make sure we start with the LSB zero. */ + if (c & 1) { + c = ~c; + } + /* Reject all zeros or all ones. */ + if (c == 0) { + return false; + } + /* Find the first transition. */ + lsb = c & -c; + /* Invert to look for a second transition. */ + c = ~c; + /* Erase the first transition. */ + c &= -lsb; + /* Find the second transition, if any. */ + lsb = c & -c; + /* Match if all the bits are 1's, or if c is zero. */ + return c == -lsb; +} + +static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val) +{ + int msb, lsb; + if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) { + /* Achieve wraparound by swapping msb and lsb. */ + msb = 64 - ctz64(~val); + lsb = clz64(~val) - 1; + } else { + msb = clz64(val); + lsb = 63 - ctz64(val); + } + tcg_out_risbg(s, out, in, msb, lsb, 0, 1); +} + +static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) +{ + static const S390Opcode ni_insns[4] = { + RI_NILL, RI_NILH, RI_NIHL, RI_NIHH + }; + static const S390Opcode nif_insns[2] = { + RIL_NILF, RIL_NIHF + }; + uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull); + int i; + + /* Look for the zero-extensions. */ + if ((val & valid) == 0xffffffff) { + tgen_ext32u(s, dest, dest); + return; + } + if (facilities & FACILITY_EXT_IMM) { + if ((val & valid) == 0xff) { + tgen_ext8u(s, TCG_TYPE_I64, dest, dest); + return; + } + if ((val & valid) == 0xffff) { + tgen_ext16u(s, TCG_TYPE_I64, dest, dest); + return; + } + } + + /* Try all 32-bit insns that can perform it in one go. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = ~(0xffffull << i*16); + if (((val | ~valid) & mask) == mask) { + tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); + return; + } + } + + /* Try all 48-bit insns that can perform it in one go. */ + if (facilities & FACILITY_EXT_IMM) { + for (i = 0; i < 2; i++) { + tcg_target_ulong mask = ~(0xffffffffull << i*32); + if (((val | ~valid) & mask) == mask) { + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); + return; + } + } + } + if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) { + tgen_andi_risbg(s, dest, dest, val); + return; + } + + /* Fall back to loading the constant. */ + tcg_out_movi(s, type, TCG_TMP0, val); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, NR, dest, TCG_TMP0); + } else { + tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); + } +} + +static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val) +{ + static const S390Opcode oi_insns[4] = { + RI_OILL, RI_OILH, RI_OIHL, RI_OIHH + }; + static const S390Opcode nif_insns[2] = { + RIL_OILF, RIL_OIHF + }; + + int i; + + /* Look for no-op. */ + if (val == 0) { + return; + } + + if (facilities & FACILITY_EXT_IMM) { + /* Try all 32-bit insns that can perform it in one go. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = (0xffffull << i*16); + if ((val & mask) != 0 && (val & ~mask) == 0) { + tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); + return; + } + } + + /* Try all 48-bit insns that can perform it in one go. */ + for (i = 0; i < 2; i++) { + tcg_target_ulong mask = (0xffffffffull << i*32); + if ((val & mask) != 0 && (val & ~mask) == 0) { + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); + return; + } + } + + /* Perform the OR via sequential modifications to the high and + low parts. Do this via recursion to handle 16-bit vs 32-bit + masks in each half. */ + tgen64_ori(s, dest, val & 0x00000000ffffffffull); + tgen64_ori(s, dest, val & 0xffffffff00000000ull); + } else { + /* With no extended-immediate facility, we don't need to be so + clever. Just iterate over the insns and mask in the constant. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = (0xffffull << i*16); + if ((val & mask) != 0) { + tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); + } + } + } +} + +static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val) +{ + /* Perform the xor by parts. */ + if (val & 0xffffffff) { + tcg_out_insn(s, RIL, XILF, dest, val); + } + if (val > 0xffffffff) { + tcg_out_insn(s, RIL, XIHF, dest, val >> 31 >> 1); + } +} + +static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, + TCGArg c2, int c2const) +{ + bool is_unsigned = is_unsigned_cond(c); + if (c2const) { + if (c2 == 0) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, LTR, r1, r1); + } else { + tcg_out_insn(s, RRE, LTGR, r1, r1); + } + return tcg_cond_to_ltr_cond[c]; + } else { + if (is_unsigned) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, CLFI, r1, c2); + } else { + tcg_out_insn(s, RIL, CLGFI, r1, c2); + } + } else { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, CFI, r1, c2); + } else { + tcg_out_insn(s, RIL, CGFI, r1, c2); + } + } + } + } else { + if (is_unsigned) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, CLR, r1, c2); + } else { + tcg_out_insn(s, RRE, CLGR, r1, c2); + } + } else { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, CR, r1, c2); + } else { + tcg_out_insn(s, RRE, CGR, r1, c2); + } + } + } + return tcg_cond_to_s390_cond[c]; +} + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg c1, TCGArg c2, int c2const) +{ + int cc; + + switch (cond) { + case TCG_COND_GTU: + case TCG_COND_GT: + do_greater: + /* The result of a compare has CC=2 for GT and CC=3 unused. + ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */ + tgen_cmp(s, type, cond, c1, c2, c2const); + tcg_out_movi(s, type, dest, 0); + tcg_out_insn(s, RRE, ALCGR, dest, dest); + return; + + case TCG_COND_GEU: + do_geu: + /* We need "real" carry semantics, so use SUBTRACT LOGICAL + instead of COMPARE LOGICAL. This needs an extra move. */ + tcg_out_mov(s, type, TCG_TMP0, c1); + if (c2const) { + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, SLFI, TCG_TMP0, c2); + } else { + tcg_out_insn(s, RIL, SLGFI, TCG_TMP0, c2); + } + } else { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, SLR, TCG_TMP0, c2); + } else { + tcg_out_insn(s, RRE, SLGR, TCG_TMP0, c2); + } + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + } + tcg_out_insn(s, RRE, ALCGR, dest, dest); + return; + + case TCG_COND_LEU: + case TCG_COND_LTU: + case TCG_COND_LT: + /* Swap operands so that we can use GEU/GTU/GT. */ + if (c2const) { + tcg_out_movi(s, type, TCG_TMP0, c2); + c2 = c1; + c2const = 0; + c1 = TCG_TMP0; + } else { + TCGReg t = c1; + c1 = c2; + c2 = t; + } + if (cond == TCG_COND_LEU) { + goto do_geu; + } + cond = tcg_swap_cond(cond); + goto do_greater; + + case TCG_COND_NE: + /* X != 0 is X > 0. */ + if (c2const && c2 == 0) { + cond = TCG_COND_GTU; + goto do_greater; + } + break; + + case TCG_COND_EQ: + /* X == 0 is X <= 0 is 0 >= X. */ + if (c2const && c2 == 0) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0); + c2 = c1; + c2const = 0; + c1 = TCG_TMP0; + goto do_geu; + } + break; + + default: + break; + } + + cc = tgen_cmp(s, type, cond, c1, c2, c2const); + if (facilities & FACILITY_LOAD_ON_COND) { + /* Emit: d = 0, t = 1, d = (cc ? t : d). */ + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1); + tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc); + } else { + /* Emit: d = 1; if (cc) goto over; d = 0; over: */ + tcg_out_movi(s, type, dest, 1); + tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); + tcg_out_movi(s, type, dest, 0); + } +} + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, + TCGReg c1, TCGArg c2, int c2const, TCGReg r3) +{ + int cc; + if (facilities & FACILITY_LOAD_ON_COND) { + cc = tgen_cmp(s, type, c, c1, c2, c2const); + tcg_out_insn(s, RRF, LOCGR, dest, r3, cc); + } else { + c = tcg_invert_cond(c); + cc = tgen_cmp(s, type, c, c1, c2, c2const); + + /* Emit: if (cc) goto over; dest = r3; over: */ + tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); + tcg_out_insn(s, RRE, LGR, dest, r3); + } +} + +bool tcg_target_deposit_valid(int ofs, int len) +{ + return (facilities & FACILITY_GEN_INST_EXT) != 0; +} + +static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, + int ofs, int len) +{ + int lsb = (63 - ofs); + int msb = lsb - (len - 1); + tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0); +} + +static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest) +{ + ptrdiff_t off = dest - s->code_ptr; + if (off == (int16_t)off) { + tcg_out_insn(s, RI, BRC, cc, off); + } else if (off == (int32_t)off) { + tcg_out_insn(s, RIL, BRCL, cc, off); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); + tcg_out_insn(s, RR, BCR, cc, TCG_TMP0); + } +} + +static void tgen_branch(TCGContext *s, int cc, TCGLabel *l) +{ + if (l->has_value) { + tgen_gotoi(s, cc, l->u.value_ptr); + } else if (USE_LONG_BRANCHES) { + tcg_out16(s, RIL_BRCL | (cc << 4)); + tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, -2); + s->code_ptr += 2; + } else { + tcg_out16(s, RI_BRC | (cc << 4)); + tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, -2); + s->code_ptr += 1; + } +} + +static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, + TCGReg r1, TCGReg r2, TCGLabel *l) +{ + intptr_t off; + + if (l->has_value) { + off = l->u.value_ptr - s->code_ptr; + } else { + /* We need to keep the offset unchanged for retranslation. */ + off = s->code_ptr[1]; + tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2); + } + + tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2); + tcg_out16(s, off); + tcg_out16(s, cc << 12 | (opc & 0xff)); +} + +static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, + TCGReg r1, int i2, TCGLabel *l) +{ + tcg_target_long off; + + if (l->has_value) { + off = l->u.value_ptr - s->code_ptr; + } else { + /* We need to keep the offset unchanged for retranslation. */ + off = s->code_ptr[1]; + tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2); + } + + tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc); + tcg_out16(s, off); + tcg_out16(s, (i2 << 8) | (opc & 0xff)); +} + +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, + TCGReg r1, TCGArg c2, int c2const, TCGLabel *l) +{ + int cc; + + if (facilities & FACILITY_GEN_INST_EXT) { + bool is_unsigned = is_unsigned_cond(c); + bool in_range; + S390Opcode opc; + + cc = tcg_cond_to_s390_cond[c]; + + if (!c2const) { + opc = (type == TCG_TYPE_I32 + ? (is_unsigned ? RIE_CLRJ : RIE_CRJ) + : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ)); + tgen_compare_branch(s, opc, cc, r1, c2, l); + return; + } + + /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field. + If the immediate we've been given does not fit that range, we'll + fall back to separate compare and branch instructions using the + larger comparison range afforded by COMPARE IMMEDIATE. */ + if (type == TCG_TYPE_I32) { + if (is_unsigned) { + opc = RIE_CLIJ; + in_range = (uint32_t)c2 == (uint8_t)c2; + } else { + opc = RIE_CIJ; + in_range = (int32_t)c2 == (int8_t)c2; + } + } else { + if (is_unsigned) { + opc = RIE_CLGIJ; + in_range = (uint64_t)c2 == (uint8_t)c2; + } else { + opc = RIE_CGIJ; + in_range = (int64_t)c2 == (int8_t)c2; + } + } + if (in_range) { + tgen_compare_imm_branch(s, opc, cc, r1, c2, l); + return; + } + } + + cc = tgen_cmp(s, type, c, r1, c2, c2const); + tgen_branch(s, cc, l); +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +{ + ptrdiff_t off = dest - s->code_ptr; + if (off == (int32_t)off) { + tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); + tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0); + } +} + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data, + TCGReg base, TCGReg index, int disp) +{ + switch (opc & (MO_SSIZE | MO_BSWAP)) { + case MO_UB: + tcg_out_insn(s, RXY, LLGC, data, base, index, disp); + break; + case MO_SB: + tcg_out_insn(s, RXY, LGB, data, base, index, disp); + break; + + case MO_UW | MO_BSWAP: + /* swapped unsigned halfword load with upper bits zeroed */ + tcg_out_insn(s, RXY, LRVH, data, base, index, disp); + tgen_ext16u(s, TCG_TYPE_I64, data, data); + break; + case MO_UW: + tcg_out_insn(s, RXY, LLGH, data, base, index, disp); + break; + + case MO_SW | MO_BSWAP: + /* swapped sign-extended halfword load */ + tcg_out_insn(s, RXY, LRVH, data, base, index, disp); + tgen_ext16s(s, TCG_TYPE_I64, data, data); + break; + case MO_SW: + tcg_out_insn(s, RXY, LGH, data, base, index, disp); + break; + + case MO_UL | MO_BSWAP: + /* swapped unsigned int load with upper bits zeroed */ + tcg_out_insn(s, RXY, LRV, data, base, index, disp); + tgen_ext32u(s, data, data); + break; + case MO_UL: + tcg_out_insn(s, RXY, LLGF, data, base, index, disp); + break; + + case MO_SL | MO_BSWAP: + /* swapped sign-extended int load */ + tcg_out_insn(s, RXY, LRV, data, base, index, disp); + tgen_ext32s(s, data, data); + break; + case MO_SL: + tcg_out_insn(s, RXY, LGF, data, base, index, disp); + break; + + case MO_Q | MO_BSWAP: + tcg_out_insn(s, RXY, LRVG, data, base, index, disp); + break; + case MO_Q: + tcg_out_insn(s, RXY, LG, data, base, index, disp); + break; + + default: + tcg_abort(); + } +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data, + TCGReg base, TCGReg index, int disp) +{ + switch (opc & (MO_SIZE | MO_BSWAP)) { + case MO_UB: + if (disp >= 0 && disp < 0x1000) { + tcg_out_insn(s, RX, STC, data, base, index, disp); + } else { + tcg_out_insn(s, RXY, STCY, data, base, index, disp); + } + break; + + case MO_UW | MO_BSWAP: + tcg_out_insn(s, RXY, STRVH, data, base, index, disp); + break; + case MO_UW: + if (disp >= 0 && disp < 0x1000) { + tcg_out_insn(s, RX, STH, data, base, index, disp); + } else { + tcg_out_insn(s, RXY, STHY, data, base, index, disp); + } + break; + + case MO_UL | MO_BSWAP: + tcg_out_insn(s, RXY, STRV, data, base, index, disp); + break; + case MO_UL: + if (disp >= 0 && disp < 0x1000) { + tcg_out_insn(s, RX, ST, data, base, index, disp); + } else { + tcg_out_insn(s, RXY, STY, data, base, index, disp); + } + break; + + case MO_Q | MO_BSWAP: + tcg_out_insn(s, RXY, STRVG, data, base, index, disp); + break; + case MO_Q: + tcg_out_insn(s, RXY, STG, data, base, index, disp); + break; + + default: + tcg_abort(); + } +} + +#if defined(CONFIG_SOFTMMU) +/* We're expecting to use a 20-bit signed offset on the tlb memory ops. + Using the offset of the second entry in the last tlb table ensures + that we can index all of the elements of the first entry. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ffff); + +/* Load and compare a TLB entry, leaving the flags set. Loads the TLB + addend into R2. Returns a register with the santitized guest address. */ +static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, + int mem_index, bool is_ld) +{ + int s_mask = (1 << (opc & MO_SIZE)) - 1; + int ofs, a_off; + uint64_t tlb_mask; + + /* For aligned accesses, we check the first byte and include the alignment + bits within the address. For unaligned access, we check that we don't + cross pages using the address of the last byte of the access. */ + if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { + a_off = 0; + tlb_mask = TARGET_PAGE_MASK | s_mask; + } else { + a_off = s_mask; + tlb_mask = TARGET_PAGE_MASK; + } + + if (facilities & FACILITY_GEN_INST_EXT) { + tcg_out_risbg(s, TCG_REG_R2, addr_reg, + 64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS, + 63 - CPU_TLB_ENTRY_BITS, + 64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1); + if (a_off) { + tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); + } else { + tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); + } + } else { + tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); + tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); + } + + if (is_ld) { + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); + } else { + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); + } + if (TARGET_LONG_BITS == 32) { + tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); + } else { + tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); + } + + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs); + + if (TARGET_LONG_BITS == 32) { + tgen_ext32u(s, TCG_REG_R3, addr_reg); + return TCG_REG_R3; + } + return addr_reg; +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, + TCGReg data, TCGReg addr, + tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->datalo_reg = data; + label->addrlo_reg = addr; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg addr_reg = lb->addrlo_reg; + TCGReg data_reg = lb->datalo_reg; + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); + } + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); + + tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg addr_reg = lb->addrlo_reg; + TCGReg data_reg = lb->datalo_reg; + TCGMemOpIdx oi = lb->oi; + TCGMemOp opc = get_memop(oi); + + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); + } + switch (opc & MO_SIZE) { + case MO_UB: + tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + case MO_UW: + tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + case MO_UL: + tgen_ext32u(s, TCG_REG_R4, data_reg); + break; + case MO_Q: + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + default: + tcg_abort(); + } + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + + tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); +} +#else +static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, + TCGReg *index_reg, tcg_target_long *disp) +{ + if (TARGET_LONG_BITS == 32) { + tgen_ext32u(s, TCG_TMP0, *addr_reg); + *addr_reg = TCG_TMP0; + } + if (guest_base < 0x80000) { + *index_reg = TCG_REG_NONE; + *disp = guest_base; + } else { + *index_reg = TCG_GUEST_BASE_REG; + *disp = 0; + } +} +#endif /* CONFIG_SOFTMMU */ + +static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOpIdx oi) +{ + TCGMemOp opc = get_memop(oi); +#ifdef CONFIG_SOFTMMU + unsigned mem_index = get_mmuidx(oi); + tcg_insn_unit *label_ptr; + TCGReg base_reg; + + base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); + + /* We need to keep the offset unchanged for retranslation. */ + tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); + label_ptr = s->code_ptr; + s->code_ptr += 1; + + tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); + + add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr); +#else + TCGReg index_reg; + tcg_target_long disp; + + tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); + tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp); +#endif +} + +static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, + TCGMemOpIdx oi) +{ + TCGMemOp opc = get_memop(oi); +#ifdef CONFIG_SOFTMMU + unsigned mem_index = get_mmuidx(oi); + tcg_insn_unit *label_ptr; + TCGReg base_reg; + + base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); + + /* We need to keep the offset unchanged for retranslation. */ + tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); + label_ptr = s->code_ptr; + s->code_ptr += 1; + + tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); + + add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr); +#else + TCGReg index_reg; + tcg_target_long disp; + + tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); + tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp); +#endif +} + +# define OP_32_64(x) \ + case glue(glue(INDEX_op_,x),_i32): \ + case glue(glue(INDEX_op_,x),_i64) + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + S390Opcode op; + TCGArg a0, a1, a2; + + switch (opc) { + case INDEX_op_exit_tb: + /* return value */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]); + tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); + break; + + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + s->code_ptr += 2; + } else { + /* load address stored at s->tb_next + args[0] */ + tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_TMP0, s->tb_next + args[0]); + /* and go there */ + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_TMP0); + } + s->tb_next_offset[args[0]] = tcg_current_code_size(s); + break; + + OP_32_64(ld8u): + /* ??? LLC (RXY format) is only present with the extended-immediate + facility, whereas LLGC is always present. */ + tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + OP_32_64(ld8s): + /* ??? LB is no smaller than LGB, so no point to using it. */ + tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + OP_32_64(ld16u): + /* ??? LLH (RXY format) is only present with the extended-immediate + facility, whereas LLGH is always present. */ + tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + case INDEX_op_ld16s_i32: + tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + case INDEX_op_ld_i32: + tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + + OP_32_64(st8): + tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1], + TCG_REG_NONE, args[2]); + break; + + OP_32_64(st16): + tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1], + TCG_REG_NONE, args[2]); + break; + + case INDEX_op_st_i32: + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + + case INDEX_op_add_i32: + a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; + if (const_args[2]) { + do_addi_32: + if (a0 == a1) { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, AHI, a0, a2); + break; + } + if (facilities & FACILITY_EXT_IMM) { + tcg_out_insn(s, RIL, AFI, a0, a2); + break; + } + } + tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, AR, a0, a2); + } else { + tcg_out_insn(s, RX, LA, a0, a1, a2, 0); + } + break; + case INDEX_op_sub_i32: + a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; + if (const_args[2]) { + a2 = -a2; + goto do_addi_32; + } + tcg_out_insn(s, RR, SR, args[0], args[2]); + break; + + case INDEX_op_and_i32: + if (const_args[2]) { + tgen_andi(s, TCG_TYPE_I32, args[0], args[2]); + } else { + tcg_out_insn(s, RR, NR, args[0], args[2]); + } + break; + case INDEX_op_or_i32: + if (const_args[2]) { + tgen64_ori(s, args[0], args[2] & 0xffffffff); + } else { + tcg_out_insn(s, RR, OR, args[0], args[2]); + } + break; + case INDEX_op_xor_i32: + if (const_args[2]) { + tgen64_xori(s, args[0], args[2] & 0xffffffff); + } else { + tcg_out_insn(s, RR, XR, args[0], args[2]); + } + break; + + case INDEX_op_neg_i32: + tcg_out_insn(s, RR, LCR, args[0], args[1]); + break; + + case INDEX_op_mul_i32: + if (const_args[2]) { + if ((int32_t)args[2] == (int16_t)args[2]) { + tcg_out_insn(s, RI, MHI, args[0], args[2]); + } else { + tcg_out_insn(s, RIL, MSFI, args[0], args[2]); + } + } else { + tcg_out_insn(s, RRE, MSR, args[0], args[2]); + } + break; + + case INDEX_op_div2_i32: + tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]); + break; + case INDEX_op_divu2_i32: + tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]); + break; + + case INDEX_op_shl_i32: + op = RS_SLL; + do_shift32: + if (const_args[2]) { + tcg_out_sh32(s, op, args[0], TCG_REG_NONE, args[2]); + } else { + tcg_out_sh32(s, op, args[0], args[2], 0); + } + break; + case INDEX_op_shr_i32: + op = RS_SRL; + goto do_shift32; + case INDEX_op_sar_i32: + op = RS_SRA; + goto do_shift32; + + case INDEX_op_rotl_i32: + /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */ + if (const_args[2]) { + tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]); + } else { + tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0); + } + break; + case INDEX_op_rotr_i32: + if (const_args[2]) { + tcg_out_sh64(s, RSY_RLL, args[0], args[1], + TCG_REG_NONE, (32 - args[2]) & 31); + } else { + tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); + tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0); + } + break; + + case INDEX_op_ext8s_i32: + tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]); + break; + case INDEX_op_ext16s_i32: + tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]); + break; + case INDEX_op_ext8u_i32: + tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]); + break; + case INDEX_op_ext16u_i32: + tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]); + break; + + OP_32_64(bswap16): + /* The TCG bswap definition requires bits 0-47 already be zero. + Thus we don't need the G-type insns to implement bswap16_i64. */ + tcg_out_insn(s, RRE, LRVR, args[0], args[1]); + tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16); + break; + OP_32_64(bswap32): + tcg_out_insn(s, RRE, LRVR, args[0], args[1]); + break; + + case INDEX_op_add2_i32: + if (const_args[4]) { + tcg_out_insn(s, RIL, ALFI, args[0], args[4]); + } else { + tcg_out_insn(s, RR, ALR, args[0], args[4]); + } + tcg_out_insn(s, RRE, ALCR, args[1], args[5]); + break; + case INDEX_op_sub2_i32: + if (const_args[4]) { + tcg_out_insn(s, RIL, SLFI, args[0], args[4]); + } else { + tcg_out_insn(s, RR, SLR, args[0], args[4]); + } + tcg_out_insn(s, RRE, SLBR, args[1], args[5]); + break; + + case INDEX_op_br: + tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0])); + break; + + case INDEX_op_brcond_i32: + tgen_brcond(s, TCG_TYPE_I32, args[2], args[0], + args[1], const_args[1], arg_label(args[3])); + break; + case INDEX_op_setcond_i32: + tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], + args[2], const_args[2]); + break; + case INDEX_op_movcond_i32: + tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; + + case INDEX_op_qemu_ld_i32: + /* ??? Technically we can use a non-extending instruction. */ + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args[0], args[1], args[2]); + break; + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args[0], args[1], args[2]); + break; + + case INDEX_op_ld16s_i64: + tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); + break; + case INDEX_op_ld32u_i64: + tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]); + break; + case INDEX_op_ld32s_i64: + tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]); + break; + case INDEX_op_ld_i64: + tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + + case INDEX_op_st32_i64: + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + + case INDEX_op_add_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + do_addi_64: + if (a0 == a1) { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, AGHI, a0, a2); + break; + } + if (facilities & FACILITY_EXT_IMM) { + if (a2 == (int32_t)a2) { + tcg_out_insn(s, RIL, AGFI, a0, a2); + break; + } else if (a2 == (uint32_t)a2) { + tcg_out_insn(s, RIL, ALGFI, a0, a2); + break; + } else if (-a2 == (uint32_t)-a2) { + tcg_out_insn(s, RIL, SLGFI, a0, -a2); + break; + } + } + } + tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RRE, AGR, a0, a2); + } else { + tcg_out_insn(s, RX, LA, a0, a1, a2, 0); + } + break; + case INDEX_op_sub_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + a2 = -a2; + goto do_addi_64; + } else { + tcg_out_insn(s, RRE, SGR, args[0], args[2]); + } + break; + + case INDEX_op_and_i64: + if (const_args[2]) { + tgen_andi(s, TCG_TYPE_I64, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, NGR, args[0], args[2]); + } + break; + case INDEX_op_or_i64: + if (const_args[2]) { + tgen64_ori(s, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, OGR, args[0], args[2]); + } + break; + case INDEX_op_xor_i64: + if (const_args[2]) { + tgen64_xori(s, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, XGR, args[0], args[2]); + } + break; + + case INDEX_op_neg_i64: + tcg_out_insn(s, RRE, LCGR, args[0], args[1]); + break; + case INDEX_op_bswap64_i64: + tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); + break; + + case INDEX_op_mul_i64: + if (const_args[2]) { + if (args[2] == (int16_t)args[2]) { + tcg_out_insn(s, RI, MGHI, args[0], args[2]); + } else { + tcg_out_insn(s, RIL, MSGFI, args[0], args[2]); + } + } else { + tcg_out_insn(s, RRE, MSGR, args[0], args[2]); + } + break; + + case INDEX_op_div2_i64: + /* ??? We get an unnecessary sign-extension of the dividend + into R3 with this definition, but as we do in fact always + produce both quotient and remainder using INDEX_op_div_i64 + instead requires jumping through even more hoops. */ + tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]); + break; + case INDEX_op_divu2_i64: + tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]); + break; + case INDEX_op_mulu2_i64: + tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]); + break; + + case INDEX_op_shl_i64: + op = RSY_SLLG; + do_shift64: + if (const_args[2]) { + tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]); + } else { + tcg_out_sh64(s, op, args[0], args[1], args[2], 0); + } + break; + case INDEX_op_shr_i64: + op = RSY_SRLG; + goto do_shift64; + case INDEX_op_sar_i64: + op = RSY_SRAG; + goto do_shift64; + + case INDEX_op_rotl_i64: + if (const_args[2]) { + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], + TCG_REG_NONE, args[2]); + } else { + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0); + } + break; + case INDEX_op_rotr_i64: + if (const_args[2]) { + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], + TCG_REG_NONE, (64 - args[2]) & 63); + } else { + /* We can use the smaller 32-bit negate because only the + low 6 bits are examined for the rotate. */ + tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0); + } + break; + + case INDEX_op_ext8s_i64: + tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ext16s_i64: + tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + tgen_ext32s(s, args[0], args[1]); + break; + case INDEX_op_ext8u_i64: + tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ext16u_i64: + tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_extu_i32_i64: + case INDEX_op_ext32u_i64: + tgen_ext32u(s, args[0], args[1]); + break; + + case INDEX_op_add2_i64: + if (const_args[4]) { + if ((int64_t)args[4] >= 0) { + tcg_out_insn(s, RIL, ALGFI, args[0], args[4]); + } else { + tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]); + } + } else { + tcg_out_insn(s, RRE, ALGR, args[0], args[4]); + } + tcg_out_insn(s, RRE, ALCGR, args[1], args[5]); + break; + case INDEX_op_sub2_i64: + if (const_args[4]) { + if ((int64_t)args[4] >= 0) { + tcg_out_insn(s, RIL, SLGFI, args[0], args[4]); + } else { + tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]); + } + } else { + tcg_out_insn(s, RRE, SLGR, args[0], args[4]); + } + tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); + break; + + case INDEX_op_brcond_i64: + tgen_brcond(s, TCG_TYPE_I64, args[2], args[0], + args[1], const_args[1], arg_label(args[3])); + break; + case INDEX_op_setcond_i64: + tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], + args[2], const_args[2]); + break; + case INDEX_op_movcond_i64: + tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], + args[2], const_args[2], args[3]); + break; + + OP_32_64(deposit): + tgen_deposit(s, args[0], args[2], args[3], args[4]); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef s390_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "r", "r" } }, + { INDEX_op_st16_i32, { "r", "r" } }, + { INDEX_op_st_i32, { "r", "r" } }, + + { INDEX_op_add_i32, { "r", "r", "ri" } }, + { INDEX_op_sub_i32, { "r", "0", "ri" } }, + { INDEX_op_mul_i32, { "r", "0", "rK" } }, + + { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } }, + { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, + + { INDEX_op_and_i32, { "r", "0", "ri" } }, + { INDEX_op_or_i32, { "r", "0", "rO" } }, + { INDEX_op_xor_i32, { "r", "0", "rX" } }, + + { INDEX_op_neg_i32, { "r", "r" } }, + + { INDEX_op_shl_i32, { "r", "0", "Ri" } }, + { INDEX_op_shr_i32, { "r", "0", "Ri" } }, + { INDEX_op_sar_i32, { "r", "0", "Ri" } }, + + { INDEX_op_rotl_i32, { "r", "r", "Ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "Ri" } }, + + { INDEX_op_ext8s_i32, { "r", "r" } }, + { INDEX_op_ext8u_i32, { "r", "r" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + + { INDEX_op_add2_i32, { "r", "r", "0", "1", "rA", "r" } }, + { INDEX_op_sub2_i32, { "r", "r", "0", "1", "rA", "r" } }, + + { INDEX_op_brcond_i32, { "r", "rC" } }, + { INDEX_op_setcond_i32, { "r", "r", "rC" } }, + { INDEX_op_movcond_i32, { "r", "r", "rC", "r", "0" } }, + { INDEX_op_deposit_i32, { "r", "0", "r" } }, + + { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_ld_i64, { "r", "L" } }, + { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_st_i64, { "L", "L" } }, + + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + + { INDEX_op_st8_i64, { "r", "r" } }, + { INDEX_op_st16_i64, { "r", "r" } }, + { INDEX_op_st32_i64, { "r", "r" } }, + { INDEX_op_st_i64, { "r", "r" } }, + + { INDEX_op_add_i64, { "r", "r", "ri" } }, + { INDEX_op_sub_i64, { "r", "0", "ri" } }, + { INDEX_op_mul_i64, { "r", "0", "rK" } }, + + { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } }, + { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } }, + { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } }, + + { INDEX_op_and_i64, { "r", "0", "ri" } }, + { INDEX_op_or_i64, { "r", "0", "rO" } }, + { INDEX_op_xor_i64, { "r", "0", "rX" } }, + + { INDEX_op_neg_i64, { "r", "r" } }, + + { INDEX_op_shl_i64, { "r", "r", "Ri" } }, + { INDEX_op_shr_i64, { "r", "r", "Ri" } }, + { INDEX_op_sar_i64, { "r", "r", "Ri" } }, + + { INDEX_op_rotl_i64, { "r", "r", "Ri" } }, + { INDEX_op_rotr_i64, { "r", "r", "Ri" } }, + + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext8u_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext16u_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext32u_i64, { "r", "r" } }, + + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + + { INDEX_op_bswap16_i64, { "r", "r" } }, + { INDEX_op_bswap32_i64, { "r", "r" } }, + { INDEX_op_bswap64_i64, { "r", "r" } }, + + { INDEX_op_add2_i64, { "r", "r", "0", "1", "rA", "r" } }, + { INDEX_op_sub2_i64, { "r", "r", "0", "1", "rA", "r" } }, + + { INDEX_op_brcond_i64, { "r", "rC" } }, + { INDEX_op_setcond_i64, { "r", "r", "rC" } }, + { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } }, + { INDEX_op_deposit_i64, { "r", "0", "r" } }, + + { -1 }, +}; + +static void query_facilities(void) +{ + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + + /* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this + is present on all 64-bit systems, but let's check for it anyway. */ + if (hwcap & HWCAP_S390_STFLE) { + register int r0 __asm__("0"); + register void *r1 __asm__("1"); + + /* stfle 0(%r1) */ + r1 = &facilities; + asm volatile(".word 0xb2b0,0x1000" + : "=r"(r0) : "0"(0), "r"(r1) : "memory", "cc"); + } +} + +static void tcg_target_init(TCGContext *s) +{ + query_facilities(); + + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); + + tcg_regset_clear(tcg_target_call_clobber_regs); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); + /* The r6 register is technically call-saved, but it's also a parameter + register, so it can get killed by setup for the qemu_st helper. */ + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); + /* The return register can be considered call-clobbered. */ + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); + /* XXX many insns can't be used with R0, so we better avoid it for now */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + + tcg_add_target_add_op_defs(s390_op_defs); +} + +#define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long))) + +static void tcg_target_qemu_prologue(TCGContext *s) +{ + /* stmg %r6,%r15,48(%r15) (save registers) */ + tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48); + + /* aghi %r15,-frame_size */ + tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE); + + tcg_set_frame(s, TCG_REG_CALL_STACK, + TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET, + CPU_TEMP_BUF_NLONGS * sizeof(long)); + +#ifndef CONFIG_SOFTMMU + if (guest_base >= 0x80000) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } +#endif + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + /* br %r3 (go to TB) */ + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]); + + tb_ret_addr = s->code_ptr; + + /* lmg %r6,%r15,fs+48(%r15) (restore registers) */ + tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, + FRAME_SIZE + 48); + + /* br %r14 (return) */ + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[18]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#define ELF_HOST_MACHINE EM_S390 + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = 8, /* sleb128 8 */ + .h.cie.return_column = TCG_REG_R14, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x86, 6, /* DW_CFA_offset, %r6, 48 */ + 0x87, 7, /* DW_CFA_offset, %r7, 56 */ + 0x88, 8, /* DW_CFA_offset, %r8, 64 */ + 0x89, 9, /* DW_CFA_offset, %r92, 72 */ + 0x8a, 10, /* DW_CFA_offset, %r10, 80 */ + 0x8b, 11, /* DW_CFA_offset, %r11, 88 */ + 0x8c, 12, /* DW_CFA_offset, %r12, 96 */ + 0x8d, 13, /* DW_CFA_offset, %r13, 104 */ + 0x8e, 14, /* DW_CFA_offset, %r14, 112 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c deleted file mode 100644 index d3100ab..0000000 --- a/tcg/sparc/tcg-target.c +++ /dev/null @@ -1,1653 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-null.h" - -#ifndef NDEBUG -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "%g0", - "%g1", - "%g2", - "%g3", - "%g4", - "%g5", - "%g6", - "%g7", - "%o0", - "%o1", - "%o2", - "%o3", - "%o4", - "%o5", - "%o6", - "%o7", - "%l0", - "%l1", - "%l2", - "%l3", - "%l4", - "%l5", - "%l6", - "%l7", - "%i0", - "%i1", - "%i2", - "%i3", - "%i4", - "%i5", - "%i6", - "%i7", -}; -#endif - -#ifdef __arch64__ -# define SPARC64 1 -#else -# define SPARC64 0 -#endif - -/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o - registers. These are saved manually by the kernel in full 64-bit - slots. The %i and %l registers are saved by the register window - mechanism, which only allocates space for 32 bits. Given that this - window spill/fill can happen on any signal, we must consider the - high bits of the %i and %l registers garbage at all times. */ -#if SPARC64 -# define ALL_64 0xffffffffu -#else -# define ALL_64 0xffffu -#endif - -/* Define some temporary registers. T2 is used for constant generation. */ -#define TCG_REG_T1 TCG_REG_G1 -#define TCG_REG_T2 TCG_REG_O7 - -#ifndef CONFIG_SOFTMMU -# define TCG_GUEST_BASE_REG TCG_REG_I5 -#endif - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_L0, - TCG_REG_L1, - TCG_REG_L2, - TCG_REG_L3, - TCG_REG_L4, - TCG_REG_L5, - TCG_REG_L6, - TCG_REG_L7, - - TCG_REG_I0, - TCG_REG_I1, - TCG_REG_I2, - TCG_REG_I3, - TCG_REG_I4, - TCG_REG_I5, - - TCG_REG_G2, - TCG_REG_G3, - TCG_REG_G4, - TCG_REG_G5, - - TCG_REG_O0, - TCG_REG_O1, - TCG_REG_O2, - TCG_REG_O3, - TCG_REG_O4, - TCG_REG_O5, -}; - -static const int tcg_target_call_iarg_regs[6] = { - TCG_REG_O0, - TCG_REG_O1, - TCG_REG_O2, - TCG_REG_O3, - TCG_REG_O4, - TCG_REG_O5, -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_O0, - TCG_REG_O1, - TCG_REG_O2, - TCG_REG_O3, -}; - -#define INSN_OP(x) ((x) << 30) -#define INSN_OP2(x) ((x) << 22) -#define INSN_OP3(x) ((x) << 19) -#define INSN_OPF(x) ((x) << 5) -#define INSN_RD(x) ((x) << 25) -#define INSN_RS1(x) ((x) << 14) -#define INSN_RS2(x) (x) -#define INSN_ASI(x) ((x) << 5) - -#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff)) -#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff)) -#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff)) -#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20)) -#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff) -#define INSN_COND(x) ((x) << 25) - -#define COND_N 0x0 -#define COND_E 0x1 -#define COND_LE 0x2 -#define COND_L 0x3 -#define COND_LEU 0x4 -#define COND_CS 0x5 -#define COND_NEG 0x6 -#define COND_VS 0x7 -#define COND_A 0x8 -#define COND_NE 0x9 -#define COND_G 0xa -#define COND_GE 0xb -#define COND_GU 0xc -#define COND_CC 0xd -#define COND_POS 0xe -#define COND_VC 0xf -#define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2)) - -#define RCOND_Z 1 -#define RCOND_LEZ 2 -#define RCOND_LZ 3 -#define RCOND_NZ 5 -#define RCOND_GZ 6 -#define RCOND_GEZ 7 - -#define MOVCC_ICC (1 << 18) -#define MOVCC_XCC (1 << 18 | 1 << 12) - -#define BPCC_ICC 0 -#define BPCC_XCC (2 << 20) -#define BPCC_PT (1 << 19) -#define BPCC_PN 0 -#define BPCC_A (1 << 29) - -#define BPR_PT BPCC_PT - -#define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00)) -#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10)) -#define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01)) -#define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05)) -#define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02)) -#define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12)) -#define ARITH_ORN (INSN_OP(2) | INSN_OP3(0x06)) -#define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03)) -#define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04)) -#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14)) -#define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08)) -#define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c)) -#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a)) -#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b)) -#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e)) -#define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f)) -#define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09)) -#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d)) -#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d)) -#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c)) -#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f)) - -#define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11)) -#define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16)) - -#define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25)) -#define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26)) -#define SHIFT_SRA (INSN_OP(2) | INSN_OP3(0x27)) - -#define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12)) -#define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12)) -#define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12)) - -#define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0)) -#define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0)) -#define JMPL (INSN_OP(2) | INSN_OP3(0x38)) -#define RETURN (INSN_OP(2) | INSN_OP3(0x39)) -#define SAVE (INSN_OP(2) | INSN_OP3(0x3c)) -#define RESTORE (INSN_OP(2) | INSN_OP3(0x3d)) -#define SETHI (INSN_OP(0) | INSN_OP2(0x4)) -#define CALL INSN_OP(1) -#define LDUB (INSN_OP(3) | INSN_OP3(0x01)) -#define LDSB (INSN_OP(3) | INSN_OP3(0x09)) -#define LDUH (INSN_OP(3) | INSN_OP3(0x02)) -#define LDSH (INSN_OP(3) | INSN_OP3(0x0a)) -#define LDUW (INSN_OP(3) | INSN_OP3(0x00)) -#define LDSW (INSN_OP(3) | INSN_OP3(0x08)) -#define LDX (INSN_OP(3) | INSN_OP3(0x0b)) -#define STB (INSN_OP(3) | INSN_OP3(0x05)) -#define STH (INSN_OP(3) | INSN_OP3(0x06)) -#define STW (INSN_OP(3) | INSN_OP3(0x04)) -#define STX (INSN_OP(3) | INSN_OP3(0x0e)) -#define LDUBA (INSN_OP(3) | INSN_OP3(0x11)) -#define LDSBA (INSN_OP(3) | INSN_OP3(0x19)) -#define LDUHA (INSN_OP(3) | INSN_OP3(0x12)) -#define LDSHA (INSN_OP(3) | INSN_OP3(0x1a)) -#define LDUWA (INSN_OP(3) | INSN_OP3(0x10)) -#define LDSWA (INSN_OP(3) | INSN_OP3(0x18)) -#define LDXA (INSN_OP(3) | INSN_OP3(0x1b)) -#define STBA (INSN_OP(3) | INSN_OP3(0x15)) -#define STHA (INSN_OP(3) | INSN_OP3(0x16)) -#define STWA (INSN_OP(3) | INSN_OP3(0x14)) -#define STXA (INSN_OP(3) | INSN_OP3(0x1e)) - -#ifndef ASI_PRIMARY_LITTLE -#define ASI_PRIMARY_LITTLE 0x88 -#endif - -#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE)) - -#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) - -#ifndef use_vis3_instructions -bool use_vis3_instructions; -#endif - -static inline int check_fit_i64(int64_t val, unsigned int bits) -{ - return val == sextract64(val, 0, bits); -} - -static inline int check_fit_i32(int32_t val, unsigned int bits) -{ - return val == sextract32(val, 0, bits); -} - -#define check_fit_tl check_fit_i64 -#if SPARC64 -# define check_fit_ptr check_fit_i64 -#else -# define check_fit_ptr check_fit_i32 -#endif - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - uint32_t insn; - - assert(addend == 0); - value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr); - - switch (type) { - case R_SPARC_WDISP16: - if (!check_fit_ptr(value >> 2, 16)) { - tcg_abort(); - } - insn = *code_ptr; - insn &= ~INSN_OFF16(-1); - insn |= INSN_OFF16(value); - *code_ptr = insn; - break; - case R_SPARC_WDISP19: - if (!check_fit_ptr(value >> 2, 19)) { - tcg_abort(); - } - insn = *code_ptr; - insn &= ~INSN_OFF19(-1); - insn |= INSN_OFF19(value); - *code_ptr = insn; - break; - default: - tcg_abort(); - } -} - -/* parse target specific constraints */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str; - - ct_str = *pct_str; - switch (ct_str[0]) { - case 'r': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - break; - case 'R': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, ALL_64); - break; - case 'A': /* qemu_ld/st address constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, - TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff); - reserve_helpers: - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); - break; - case 's': /* qemu_st data 32-bit constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xffffffff); - goto reserve_helpers; - case 'S': /* qemu_st data 64-bit constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, ALL_64); - goto reserve_helpers; - case 'I': - ct->ct |= TCG_CT_CONST_S11; - break; - case 'J': - ct->ct |= TCG_CT_CONST_S13; - break; - case 'Z': - ct->ct |= TCG_CT_CONST_ZERO; - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -/* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - int ct = arg_ct->ct; - - if (ct & TCG_CT_CONST) { - return 1; - } - - if (type == TCG_TYPE_I32) { - val = (int32_t)val; - } - - if ((ct & TCG_CT_CONST_ZERO) && val == 0) { - return 1; - } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) { - return 1; - } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) { - return 1; - } else { - return 0; - } -} - -static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1, - TCGReg rs2, int op) -{ - tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2)); -} - -static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1, - int32_t offset, int op) -{ - tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset)); -} - -static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1, - int32_t val2, int val2const, int op) -{ - tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) - | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2))); -} - -static inline void tcg_out_mov(TCGContext *s, TCGType type, - TCGReg ret, TCGReg arg) -{ - if (ret != arg) { - tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR); - } -} - -static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg) -{ - tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10)); -} - -static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg) -{ - tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR); -} - -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg) -{ - tcg_target_long hi, lo = (int32_t)arg; - - /* Make sure we test 32-bit constants for imm13 properly. */ - if (type == TCG_TYPE_I32) { - arg = lo; - } - - /* A 13-bit constant sign-extended to 64-bits. */ - if (check_fit_tl(arg, 13)) { - tcg_out_movi_imm13(s, ret, arg); - return; - } - - /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ - if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { - tcg_out_sethi(s, ret, arg); - if (arg & 0x3ff) { - tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); - } - return; - } - - /* A 32-bit constant sign-extended to 64-bits. */ - if (arg == lo) { - tcg_out_sethi(s, ret, ~arg); - tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR); - return; - } - - /* A 64-bit constant decomposed into 2 32-bit pieces. */ - if (check_fit_i32(lo, 13)) { - hi = (arg - lo) >> 32; - tcg_out_movi(s, TCG_TYPE_I32, ret, hi); - tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); - tcg_out_arithi(s, ret, ret, lo, ARITH_ADD); - } else { - hi = arg >> 32; - tcg_out_movi(s, TCG_TYPE_I32, ret, hi); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo); - tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); - tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR); - } -} - -static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1, - TCGReg a2, int op) -{ - tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2)); -} - -static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, - intptr_t offset, int op) -{ - if (check_fit_ptr(offset, 13)) { - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) | - INSN_IMM13(offset)); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset); - tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op); - } -} - -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX)); -} - -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) -{ - tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); -} - -static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg) -{ - tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); - tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); -} - -static inline void tcg_out_sety(TCGContext *s, TCGReg rs) -{ - tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); -} - -static inline void tcg_out_rdy(TCGContext *s, TCGReg rd) -{ - tcg_out32(s, RDY | INSN_RD(rd)); -} - -static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, - int32_t val2, int val2const, int uns) -{ - /* Load Y with the sign/zero extension of RS1 to 64-bits. */ - if (uns) { - tcg_out_sety(s, TCG_REG_G0); - } else { - tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA); - tcg_out_sety(s, TCG_REG_T1); - } - - tcg_out_arithc(s, rd, rs1, val2, val2const, - uns ? ARITH_UDIV : ARITH_SDIV); -} - -static inline void tcg_out_nop(TCGContext *s) -{ - tcg_out_sethi(s, TCG_REG_G0, 0); -} - -static const uint8_t tcg_cond_to_bcond[] = { - [TCG_COND_EQ] = COND_E, - [TCG_COND_NE] = COND_NE, - [TCG_COND_LT] = COND_L, - [TCG_COND_GE] = COND_GE, - [TCG_COND_LE] = COND_LE, - [TCG_COND_GT] = COND_G, - [TCG_COND_LTU] = COND_CS, - [TCG_COND_GEU] = COND_CC, - [TCG_COND_LEU] = COND_LEU, - [TCG_COND_GTU] = COND_GU, -}; - -static const uint8_t tcg_cond_to_rcond[] = { - [TCG_COND_EQ] = RCOND_Z, - [TCG_COND_NE] = RCOND_NZ, - [TCG_COND_LT] = RCOND_LZ, - [TCG_COND_GT] = RCOND_GZ, - [TCG_COND_LE] = RCOND_LEZ, - [TCG_COND_GE] = RCOND_GEZ -}; - -static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19) -{ - tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19); -} - -static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l) -{ - int off19; - - if (l->has_value) { - off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr)); - } else { - /* Make sure to preserve destinations during retranslation. */ - off19 = *s->code_ptr & INSN_OFF19(-1); - tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0); - } - tcg_out_bpcc0(s, scond, flags, off19); -} - -static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const) -{ - tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC); -} - -static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1, - int32_t arg2, int const_arg2, TCGLabel *l) -{ - tcg_out_cmp(s, arg1, arg2, const_arg2); - tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l); - tcg_out_nop(s); -} - -static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret, - int32_t v1, int v1const) -{ - tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) - | INSN_RS1(tcg_cond_to_bcond[cond]) - | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1))); -} - -static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const, - int32_t v1, int v1const) -{ - tcg_out_cmp(s, c1, c2, c2const); - tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); -} - -static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, - int32_t arg2, int const_arg2, TCGLabel *l) -{ - /* For 64-bit signed comparisons vs zero, we can avoid the compare. */ - if (arg2 == 0 && !is_unsigned_cond(cond)) { - int off16; - - if (l->has_value) { - off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr)); - } else { - /* Make sure to preserve destinations during retranslation. */ - off16 = *s->code_ptr & INSN_OFF16(-1); - tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0); - } - tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1) - | INSN_COND(tcg_cond_to_rcond[cond]) | off16); - } else { - tcg_out_cmp(s, arg1, arg2, const_arg2); - tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l); - } - tcg_out_nop(s); -} - -static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, - int32_t v1, int v1const) -{ - tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1) - | (tcg_cond_to_rcond[cond] << 10) - | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1))); -} - -static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const, - int32_t v1, int v1const) -{ - /* For 64-bit signed comparisons vs zero, we can avoid the compare. - Note that the immediate range is one bit smaller, so we must check - for that as well. */ - if (c2 == 0 && !is_unsigned_cond(cond) - && (!v1const || check_fit_i32(v1, 10))) { - tcg_out_movr(s, cond, ret, c1, v1, v1const); - } else { - tcg_out_cmp(s, c1, c2, c2const); - tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); - } -} - -static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const) -{ - /* For 32-bit comparisons, we can play games with ADDC/SUBC. */ - switch (cond) { - case TCG_COND_LTU: - case TCG_COND_GEU: - /* The result of the comparison is in the carry bit. */ - break; - - case TCG_COND_EQ: - case TCG_COND_NE: - /* For equality, we can transform to inequality vs zero. */ - if (c2 != 0) { - tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR); - c2 = TCG_REG_T1; - } else { - c2 = c1; - } - c1 = TCG_REG_G0, c2const = 0; - cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU); - break; - - case TCG_COND_GTU: - case TCG_COND_LEU: - /* If we don't need to load a constant into a register, we can - swap the operands on GTU/LEU. There's no benefit to loading - the constant into a temporary register. */ - if (!c2const || c2 == 0) { - TCGReg t = c1; - c1 = c2; - c2 = t; - c2const = 0; - cond = tcg_swap_cond(cond); - break; - } - /* FALLTHRU */ - - default: - tcg_out_cmp(s, c1, c2, c2const); - tcg_out_movi_imm13(s, ret, 0); - tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1); - return; - } - - tcg_out_cmp(s, c1, c2, c2const); - if (cond == TCG_COND_LTU) { - tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC); - } else { - tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC); - } -} - -static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const) -{ - if (use_vis3_instructions) { - switch (cond) { - case TCG_COND_NE: - if (c2 != 0) { - break; - } - c2 = c1, c2const = 0, c1 = TCG_REG_G0; - /* FALLTHRU */ - case TCG_COND_LTU: - tcg_out_cmp(s, c1, c2, c2const); - tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC); - return; - default: - break; - } - } - - /* For 64-bit signed comparisons vs zero, we can avoid the compare - if the input does not overlap the output. */ - if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) { - tcg_out_movi_imm13(s, ret, 0); - tcg_out_movr(s, cond, ret, c1, 1, 1); - } else { - tcg_out_cmp(s, c1, c2, c2const); - tcg_out_movi_imm13(s, ret, 0); - tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1); - } -} - -static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh, - TCGReg al, TCGReg ah, int32_t bl, int blconst, - int32_t bh, int bhconst, int opl, int oph) -{ - TCGReg tmp = TCG_REG_T1; - - /* Note that the low parts are fully consumed before tmp is set. */ - if (rl != ah && (bhconst || rl != bh)) { - tmp = rl; - } - - tcg_out_arithc(s, tmp, al, bl, blconst, opl); - tcg_out_arithc(s, rh, ah, bh, bhconst, oph); - tcg_out_mov(s, TCG_TYPE_I32, rl, tmp); -} - -static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, - TCGReg al, TCGReg ah, int32_t bl, int blconst, - int32_t bh, int bhconst, bool is_sub) -{ - TCGReg tmp = TCG_REG_T1; - - /* Note that the low parts are fully consumed before tmp is set. */ - if (rl != ah && (bhconst || rl != bh)) { - tmp = rl; - } - - tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC); - - if (use_vis3_instructions && !is_sub) { - /* Note that ADDXC doesn't accept immediates. */ - if (bhconst && bh != 0) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh); - bh = TCG_REG_T2; - } - tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC); - } else if (bh == TCG_REG_G0) { - /* If we have a zero, we can perform the operation in two insns, - with the arithmetic first, and a conditional move into place. */ - if (rh == ah) { - tcg_out_arithi(s, TCG_REG_T2, ah, 1, - is_sub ? ARITH_SUB : ARITH_ADD); - tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0); - } else { - tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD); - tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0); - } - } else { - /* Otherwise adjust BH as if there is carry into T2 ... */ - if (bhconst) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1)); - } else { - tcg_out_arithi(s, TCG_REG_T2, bh, 1, - is_sub ? ARITH_SUB : ARITH_ADD); - } - /* ... smoosh T2 back to original BH if carry is clear ... */ - tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst); - /* ... and finally perform the arithmetic with the new operand. */ - tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD); - } - - tcg_out_mov(s, TCG_TYPE_I64, rl, tmp); -} - -static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest) -{ - ptrdiff_t disp = tcg_pcrel_diff(s, dest); - - if (disp == (int32_t)disp) { - tcg_out32(s, CALL | (uint32_t)disp >> 2); - } else { - uintptr_t desti = (uintptr_t)dest; - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, desti & ~0xfff); - tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL); - } -} - -static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) -{ - tcg_out_call_nodelay(s, dest); - tcg_out_nop(s); -} - -#ifdef CONFIG_SOFTMMU -static tcg_insn_unit *qemu_ld_trampoline[16]; -static tcg_insn_unit *qemu_st_trampoline[16]; - -static void build_trampolines(TCGContext *s) -{ - static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_SB] = helper_ret_ldsb_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, - }; - static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, - }; - - int i; - TCGReg ra; - - for (i = 0; i < 16; ++i) { - if (qemu_ld_helpers[i] == NULL) { - continue; - } - - /* May as well align the trampoline. */ - while ((uintptr_t)s->code_ptr & 15) { - tcg_out_nop(s); - } - qemu_ld_trampoline[i] = s->code_ptr; - - if (SPARC64 || TARGET_LONG_BITS == 32) { - ra = TCG_REG_O3; - } else { - /* Install the high part of the address. */ - tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); - ra = TCG_REG_O4; - } - - /* Set the retaddr operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); - /* Set the env operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); - /* Tail call. */ - tcg_out_call_nodelay(s, qemu_ld_helpers[i]); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); - } - - for (i = 0; i < 16; ++i) { - if (qemu_st_helpers[i] == NULL) { - continue; - } - - /* May as well align the trampoline. */ - while ((uintptr_t)s->code_ptr & 15) { - tcg_out_nop(s); - } - qemu_st_trampoline[i] = s->code_ptr; - - if (SPARC64) { - ra = TCG_REG_O4; - } else { - ra = TCG_REG_O1; - if (TARGET_LONG_BITS == 64) { - /* Install the high part of the address. */ - tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); - ra += 2; - } else { - ra += 1; - } - if ((i & MO_SIZE) == MO_64) { - /* Install the high part of the data. */ - tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); - ra += 2; - } else { - ra += 1; - } - /* Skip the oi argument. */ - ra += 1; - } - - /* Set the retaddr operand. */ - if (ra >= TCG_REG_O6) { - tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET); - ra = TCG_REG_G1; - } - tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); - /* Set the env operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); - /* Tail call. */ - tcg_out_call_nodelay(s, qemu_st_helpers[i]); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); - } -} -#endif - -/* Generate global QEMU prologue and epilogue code */ -static void tcg_target_qemu_prologue(TCGContext *s) -{ - int tmp_buf_size, frame_size; - - /* The TCG temp buffer is at the top of the frame, immediately - below the frame pointer. */ - tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long); - tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size, - tmp_buf_size); - - /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is - otherwise the minimal frame usable by callees. */ - frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS; - frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size; - frame_size += TCG_TARGET_STACK_ALIGN - 1; - frame_size &= -TCG_TARGET_STACK_ALIGN; - tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) | - INSN_IMM13(-frame_size)); - -#ifndef CONFIG_SOFTMMU - if (guest_base != 0) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); - } -#endif - - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL); - /* delay slot */ - tcg_out_nop(s); - - /* No epilogue required. We issue ret + restore directly in the TB. */ - -#ifdef CONFIG_SOFTMMU - build_trampolines(s); -#endif -} - -#if defined(CONFIG_SOFTMMU) -/* Perform the TLB load and compare. - - Inputs: - ADDRLO and ADDRHI contain the possible two parts of the address. - - MEM_INDEX and S_BITS are the memory context and log2 size of the load. - - WHICH is the offset into the CPUTLBEntry structure of the slot to read. - This should be offsetof addr_read or addr_write. - - The result of the TLB comparison is in %[ix]cc. The sanitized address - is in the returned register, maybe %o0. The TLB addend is in %o1. */ - -static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, - TCGMemOp s_bits, int which) -{ - const TCGReg r0 = TCG_REG_O0; - const TCGReg r1 = TCG_REG_O1; - const TCGReg r2 = TCG_REG_O2; - int tlb_ofs; - - /* Shift the page number down. */ - tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL); - - /* Mask out the page offset, except for the required alignment. */ - tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1, - TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - - /* Mask the tlb index. */ - tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND); - - /* Mask page, part 2. */ - tcg_out_arith(s, r0, addr, TCG_REG_T1, ARITH_AND); - - /* Shift the tlb index into place. */ - tcg_out_arithi(s, r1, r1, CPU_TLB_ENTRY_BITS, SHIFT_SLL); - - /* Relative to the current ENV. */ - tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD); - - /* Find a base address that can load both tlb comparator and addend. */ - tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]); - if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) { - if (tlb_ofs & ~0x3ff) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff); - tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD); - } - tlb_ofs &= 0x3ff; - } - - /* Load the tlb comparator and the addend. */ - tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which); - tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); - - /* subcc arg0, arg2, %g0 */ - tcg_out_cmp(s, r0, r2, 0); - - /* If the guest address must be zero-extended, do so now. */ - if (SPARC64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL); - return r0; - } - return addr; -} -#endif /* CONFIG_SOFTMMU */ - -static const int qemu_ld_opc[16] = { - [MO_UB] = LDUB, - [MO_SB] = LDSB, - - [MO_BEUW] = LDUH, - [MO_BESW] = LDSH, - [MO_BEUL] = LDUW, - [MO_BESL] = LDSW, - [MO_BEQ] = LDX, - - [MO_LEUW] = LDUH_LE, - [MO_LESW] = LDSH_LE, - [MO_LEUL] = LDUW_LE, - [MO_LESL] = LDSW_LE, - [MO_LEQ] = LDX_LE, -}; - -static const int qemu_st_opc[16] = { - [MO_UB] = STB, - - [MO_BEUW] = STH, - [MO_BEUL] = STW, - [MO_BEQ] = STX, - - [MO_LEUW] = STH_LE, - [MO_LEUL] = STW_LE, - [MO_LEQ] = STX_LE, -}; - -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, - TCGMemOpIdx oi, bool is_64) -{ - TCGMemOp memop = get_memop(oi); -#ifdef CONFIG_SOFTMMU - unsigned memi = get_mmuidx(oi); - TCGReg addrz, param; - tcg_insn_unit *func; - tcg_insn_unit *label_ptr; - - addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, - offsetof(CPUTLBEntry, addr_read)); - - /* The fast path is exactly one insn. Thus we can perform the - entire TLB Hit in the (annulled) delay slot of the branch - over the TLB Miss case. */ - - /* beq,a,pt %[xi]cc, label0 */ - label_ptr = s->code_ptr; - tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT - | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); - /* delay slot */ - tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, - qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); - - /* TLB Miss. */ - - param = TCG_REG_O1; - if (!SPARC64 && TARGET_LONG_BITS == 64) { - /* Skip the high-part; we'll perform the extract in the trampoline. */ - param++; - } - tcg_out_mov(s, TCG_TYPE_REG, param++, addr); - - /* We use the helpers to extend SB and SW data, leaving the case - of SL needing explicit extending below. */ - if ((memop & MO_SSIZE) == MO_SL) { - func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)]; - } else { - func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)]; - } - assert(func != NULL); - tcg_out_call_nodelay(s, func); - /* delay slot */ - tcg_out_movi(s, TCG_TYPE_I32, param, oi); - - /* Recall that all of the helpers return 64-bit results. - Which complicates things for sparcv8plus. */ - if (SPARC64) { - /* We let the helper sign-extend SB and SW, but leave SL for here. */ - if (is_64 && (memop & MO_SSIZE) == MO_SL) { - tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); - } else { - tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); - } - } else { - if ((memop & MO_SIZE) == MO_64) { - tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); - tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); - tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); - } else if (is_64) { - /* Re-extend from 32-bit rather than reassembling when we - know the high register must be an extension. */ - tcg_out_arithi(s, data, TCG_REG_O1, 0, - memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL); - } else { - tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1); - } - } - - *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); -#else - if (SPARC64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); - addr = TCG_REG_T1; - } - tcg_out_ldst_rr(s, data, addr, - (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); -#endif /* CONFIG_SOFTMMU */ -} - -static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, - TCGMemOpIdx oi) -{ - TCGMemOp memop = get_memop(oi); -#ifdef CONFIG_SOFTMMU - unsigned memi = get_mmuidx(oi); - TCGReg addrz, param; - tcg_insn_unit *func; - tcg_insn_unit *label_ptr; - - addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, - offsetof(CPUTLBEntry, addr_write)); - - /* The fast path is exactly one insn. Thus we can perform the entire - TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ - /* beq,a,pt %[xi]cc, label0 */ - label_ptr = s->code_ptr; - tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT - | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); - /* delay slot */ - tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, - qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); - - /* TLB Miss. */ - - param = TCG_REG_O1; - if (!SPARC64 && TARGET_LONG_BITS == 64) { - /* Skip the high-part; we'll perform the extract in the trampoline. */ - param++; - } - tcg_out_mov(s, TCG_TYPE_REG, param++, addr); - if (!SPARC64 && (memop & MO_SIZE) == MO_64) { - /* Skip the high-part; we'll perform the extract in the trampoline. */ - param++; - } - tcg_out_mov(s, TCG_TYPE_REG, param++, data); - - func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)]; - assert(func != NULL); - tcg_out_call_nodelay(s, func); - /* delay slot */ - tcg_out_movi(s, TCG_TYPE_I32, param, oi); - - *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); -#else - if (SPARC64 && TARGET_LONG_BITS == 32) { - tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); - addr = TCG_REG_T1; - } - tcg_out_ldst_rr(s, data, addr, - (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), - qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); -#endif /* CONFIG_SOFTMMU */ -} - -static void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - TCGArg a0, a1, a2; - int c, c2; - - /* Hoist the loads of the most common arguments. */ - a0 = args[0]; - a1 = args[1]; - a2 = args[2]; - c2 = const_args[2]; - - switch (opc) { - case INDEX_op_exit_tb: - if (check_fit_ptr(a0, 13)) { - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); - tcg_out_movi_imm13(s, TCG_REG_O0, a0); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff); - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); - tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); - } - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* direct jump method */ - s->tb_jmp_offset[a0] = tcg_current_code_size(s); - /* Make sure to preserve links during retranslation. */ - tcg_out32(s, CALL | (*s->code_ptr & ~INSN_OP(-1))); - } else { - /* indirect jump method */ - tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0)); - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL); - } - tcg_out_nop(s); - s->tb_next_offset[a0] = tcg_current_code_size(s); - break; - case INDEX_op_br: - tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); - tcg_out_nop(s); - break; - -#define OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64) - - OP_32_64(ld8u): - tcg_out_ldst(s, a0, a1, a2, LDUB); - break; - OP_32_64(ld8s): - tcg_out_ldst(s, a0, a1, a2, LDSB); - break; - OP_32_64(ld16u): - tcg_out_ldst(s, a0, a1, a2, LDUH); - break; - OP_32_64(ld16s): - tcg_out_ldst(s, a0, a1, a2, LDSH); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, a0, a1, a2, LDUW); - break; - OP_32_64(st8): - tcg_out_ldst(s, a0, a1, a2, STB); - break; - OP_32_64(st16): - tcg_out_ldst(s, a0, a1, a2, STH); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, a0, a1, a2, STW); - break; - OP_32_64(add): - c = ARITH_ADD; - goto gen_arith; - OP_32_64(sub): - c = ARITH_SUB; - goto gen_arith; - OP_32_64(and): - c = ARITH_AND; - goto gen_arith; - OP_32_64(andc): - c = ARITH_ANDN; - goto gen_arith; - OP_32_64(or): - c = ARITH_OR; - goto gen_arith; - OP_32_64(orc): - c = ARITH_ORN; - goto gen_arith; - OP_32_64(xor): - c = ARITH_XOR; - goto gen_arith; - case INDEX_op_shl_i32: - c = SHIFT_SLL; - do_shift32: - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, a0, a1, a2 & 31, c2, c); - break; - case INDEX_op_shr_i32: - c = SHIFT_SRL; - goto do_shift32; - case INDEX_op_sar_i32: - c = SHIFT_SRA; - goto do_shift32; - case INDEX_op_mul_i32: - c = ARITH_UMUL; - goto gen_arith; - - OP_32_64(neg): - c = ARITH_SUB; - goto gen_arith1; - OP_32_64(not): - c = ARITH_ORN; - goto gen_arith1; - - case INDEX_op_div_i32: - tcg_out_div32(s, a0, a1, a2, c2, 0); - break; - case INDEX_op_divu_i32: - tcg_out_div32(s, a0, a1, a2, c2, 1); - break; - - case INDEX_op_brcond_i32: - tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i32: - tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2); - break; - case INDEX_op_movcond_i32: - tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); - break; - - case INDEX_op_add2_i32: - tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_ADDCC, ARITH_ADDC); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_SUBCC, ARITH_SUBC); - break; - case INDEX_op_mulu2_i32: - c = ARITH_UMUL; - goto do_mul2; - case INDEX_op_muls2_i32: - c = ARITH_SMUL; - do_mul2: - /* The 32-bit multiply insns produce a full 64-bit result. If the - destination register can hold it, we can avoid the slower RDY. */ - tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); - if (SPARC64 || a0 <= TCG_REG_O7) { - tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); - } else { - tcg_out_rdy(s, a1); - } - break; - - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, a1, a2, false); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, a0, a1, a2, true); - break; - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, a0, a1, a2); - break; - - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, a0, a1, a2, LDSW); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, a0, a1, a2, LDX); - break; - case INDEX_op_st_i64: - tcg_out_ldst(s, a0, a1, a2, STX); - break; - case INDEX_op_shl_i64: - c = SHIFT_SLLX; - do_shift64: - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, a0, a1, a2 & 63, c2, c); - break; - case INDEX_op_shr_i64: - c = SHIFT_SRLX; - goto do_shift64; - case INDEX_op_sar_i64: - c = SHIFT_SRAX; - goto do_shift64; - case INDEX_op_mul_i64: - c = ARITH_MULX; - goto gen_arith; - case INDEX_op_div_i64: - c = ARITH_SDIVX; - goto gen_arith; - case INDEX_op_divu_i64: - c = ARITH_UDIVX; - goto gen_arith; - case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: - tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA); - break; - case INDEX_op_extu_i32_i64: - case INDEX_op_ext32u_i64: - tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); - break; - case INDEX_op_extrl_i64_i32: - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - break; - case INDEX_op_extrh_i64_i32: - tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX); - break; - - case INDEX_op_brcond_i64: - tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); - break; - case INDEX_op_add2_i64: - tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], - const_args[4], args[5], const_args[5], false); - break; - case INDEX_op_sub2_i64: - tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], - const_args[4], args[5], const_args[5], true); - break; - case INDEX_op_muluh_i64: - tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI); - break; - - gen_arith: - tcg_out_arithc(s, a0, a1, a2, c2, c); - break; - - gen_arith1: - tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c); - break; - - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } -} - -static const TCGTargetOpDef sparc_op_defs[] = { - { INDEX_op_exit_tb, { } }, - { INDEX_op_goto_tb, { } }, - { INDEX_op_br, { } }, - - { INDEX_op_ld8u_i32, { "r", "r" } }, - { INDEX_op_ld8s_i32, { "r", "r" } }, - { INDEX_op_ld16u_i32, { "r", "r" } }, - { INDEX_op_ld16s_i32, { "r", "r" } }, - { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "rZ", "r" } }, - { INDEX_op_st16_i32, { "rZ", "r" } }, - { INDEX_op_st_i32, { "rZ", "r" } }, - - { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_mul_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_div_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_divu_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_and_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_andc_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_or_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_orc_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_xor_i32, { "r", "rZ", "rJ" } }, - - { INDEX_op_shl_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_shr_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_sar_i32, { "r", "rZ", "rJ" } }, - - { INDEX_op_neg_i32, { "r", "rJ" } }, - { INDEX_op_not_i32, { "r", "rJ" } }, - - { INDEX_op_brcond_i32, { "rZ", "rJ" } }, - { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } }, - { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } }, - - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, - { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } }, - { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } }, - - { INDEX_op_ld8u_i64, { "R", "r" } }, - { INDEX_op_ld8s_i64, { "R", "r" } }, - { INDEX_op_ld16u_i64, { "R", "r" } }, - { INDEX_op_ld16s_i64, { "R", "r" } }, - { INDEX_op_ld32u_i64, { "R", "r" } }, - { INDEX_op_ld32s_i64, { "R", "r" } }, - { INDEX_op_ld_i64, { "R", "r" } }, - { INDEX_op_st8_i64, { "RZ", "r" } }, - { INDEX_op_st16_i64, { "RZ", "r" } }, - { INDEX_op_st32_i64, { "RZ", "r" } }, - { INDEX_op_st_i64, { "RZ", "r" } }, - - { INDEX_op_add_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_mul_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_div_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_divu_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_sub_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_and_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_andc_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_or_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_orc_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_xor_i64, { "R", "RZ", "RJ" } }, - - { INDEX_op_shl_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_shr_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_sar_i64, { "R", "RZ", "RJ" } }, - - { INDEX_op_neg_i64, { "R", "RJ" } }, - { INDEX_op_not_i64, { "R", "RJ" } }, - - { INDEX_op_ext32s_i64, { "R", "R" } }, - { INDEX_op_ext32u_i64, { "R", "R" } }, - { INDEX_op_ext_i32_i64, { "R", "r" } }, - { INDEX_op_extu_i32_i64, { "R", "r" } }, - { INDEX_op_extrl_i64_i32, { "r", "R" } }, - { INDEX_op_extrh_i64_i32, { "r", "R" } }, - - { INDEX_op_brcond_i64, { "RZ", "RJ" } }, - { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, - { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } }, - - { INDEX_op_add2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } }, - { INDEX_op_sub2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } }, - { INDEX_op_muluh_i64, { "R", "RZ", "RZ" } }, - - { INDEX_op_qemu_ld_i32, { "r", "A" } }, - { INDEX_op_qemu_ld_i64, { "R", "A" } }, - { INDEX_op_qemu_st_i32, { "sZ", "A" } }, - { INDEX_op_qemu_st_i64, { "SZ", "A" } }, - - { -1 }, -}; - -static void tcg_target_init(TCGContext *s) -{ - /* Only probe for the platform and capabilities if we havn't already - determined maximum values at compile time. */ -#ifndef use_vis3_instructions - { - unsigned long hwcap = qemu_getauxval(AT_HWCAP); - use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0; - } -#endif - - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64); - - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - (1 << TCG_REG_G1) | - (1 << TCG_REG_G2) | - (1 << TCG_REG_G3) | - (1 << TCG_REG_G4) | - (1 << TCG_REG_G5) | - (1 << TCG_REG_G6) | - (1 << TCG_REG_G7) | - (1 << TCG_REG_O0) | - (1 << TCG_REG_O1) | - (1 << TCG_REG_O2) | - (1 << TCG_REG_O3) | - (1 << TCG_REG_O4) | - (1 << TCG_REG_O5) | - (1 << TCG_REG_O7)); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */ - - tcg_add_target_add_op_defs(sparc_op_defs); -} - -#if SPARC64 -# define ELF_HOST_MACHINE EM_SPARCV9 -#else -# define ELF_HOST_MACHINE EM_SPARC32PLUS -# define ELF_HOST_FLAGS EF_SPARC_32PLUS -#endif - -typedef struct { - DebugFrameHeader h; - uint8_t fde_def_cfa[SPARC64 ? 4 : 2]; - uint8_t fde_win_save; - uint8_t fde_ret_save[3]; -} DebugFrame; - -static const DebugFrame debug_frame = { - .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ - .h.cie.id = -1, - .h.cie.version = 1, - .h.cie.code_align = 1, - .h.cie.data_align = -sizeof(void *) & 0x7f, - .h.cie.return_column = 15, /* o7 */ - - /* Total FDE size does not include the "len" member. */ - .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), - - .fde_def_cfa = { -#if SPARC64 - 12, 30, /* DW_CFA_def_cfa i6, 2047 */ - (2047 & 0x7f) | 0x80, (2047 >> 7) -#else - 13, 30 /* DW_CFA_def_cfa_register i6 */ -#endif - }, - .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */ - .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */ -}; - -void tcg_register_jit(void *buf, size_t buf_size) -{ - tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); -} - -void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) -{ - uint32_t *ptr = (uint32_t *)jmp_addr; - uintptr_t disp = addr - jmp_addr; - - /* We can reach the entire address space for 32-bit. For 64-bit - the code_gen_buffer can't be larger than 2GB. */ - assert(disp == (int32_t)disp); - - *ptr = CALL | (uint32_t)disp >> 2; - flush_icache_range(jmp_addr, jmp_addr + 4); -} diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c new file mode 100644 index 0000000..d3100ab --- /dev/null +++ b/tcg/sparc/tcg-target.inc.c @@ -0,0 +1,1653 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-null.h" + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "%g0", + "%g1", + "%g2", + "%g3", + "%g4", + "%g5", + "%g6", + "%g7", + "%o0", + "%o1", + "%o2", + "%o3", + "%o4", + "%o5", + "%o6", + "%o7", + "%l0", + "%l1", + "%l2", + "%l3", + "%l4", + "%l5", + "%l6", + "%l7", + "%i0", + "%i1", + "%i2", + "%i3", + "%i4", + "%i5", + "%i6", + "%i7", +}; +#endif + +#ifdef __arch64__ +# define SPARC64 1 +#else +# define SPARC64 0 +#endif + +/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o + registers. These are saved manually by the kernel in full 64-bit + slots. The %i and %l registers are saved by the register window + mechanism, which only allocates space for 32 bits. Given that this + window spill/fill can happen on any signal, we must consider the + high bits of the %i and %l registers garbage at all times. */ +#if SPARC64 +# define ALL_64 0xffffffffu +#else +# define ALL_64 0xffffu +#endif + +/* Define some temporary registers. T2 is used for constant generation. */ +#define TCG_REG_T1 TCG_REG_G1 +#define TCG_REG_T2 TCG_REG_O7 + +#ifndef CONFIG_SOFTMMU +# define TCG_GUEST_BASE_REG TCG_REG_I5 +#endif + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_L0, + TCG_REG_L1, + TCG_REG_L2, + TCG_REG_L3, + TCG_REG_L4, + TCG_REG_L5, + TCG_REG_L6, + TCG_REG_L7, + + TCG_REG_I0, + TCG_REG_I1, + TCG_REG_I2, + TCG_REG_I3, + TCG_REG_I4, + TCG_REG_I5, + + TCG_REG_G2, + TCG_REG_G3, + TCG_REG_G4, + TCG_REG_G5, + + TCG_REG_O0, + TCG_REG_O1, + TCG_REG_O2, + TCG_REG_O3, + TCG_REG_O4, + TCG_REG_O5, +}; + +static const int tcg_target_call_iarg_regs[6] = { + TCG_REG_O0, + TCG_REG_O1, + TCG_REG_O2, + TCG_REG_O3, + TCG_REG_O4, + TCG_REG_O5, +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_O0, + TCG_REG_O1, + TCG_REG_O2, + TCG_REG_O3, +}; + +#define INSN_OP(x) ((x) << 30) +#define INSN_OP2(x) ((x) << 22) +#define INSN_OP3(x) ((x) << 19) +#define INSN_OPF(x) ((x) << 5) +#define INSN_RD(x) ((x) << 25) +#define INSN_RS1(x) ((x) << 14) +#define INSN_RS2(x) (x) +#define INSN_ASI(x) ((x) << 5) + +#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff)) +#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff)) +#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff)) +#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20)) +#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff) +#define INSN_COND(x) ((x) << 25) + +#define COND_N 0x0 +#define COND_E 0x1 +#define COND_LE 0x2 +#define COND_L 0x3 +#define COND_LEU 0x4 +#define COND_CS 0x5 +#define COND_NEG 0x6 +#define COND_VS 0x7 +#define COND_A 0x8 +#define COND_NE 0x9 +#define COND_G 0xa +#define COND_GE 0xb +#define COND_GU 0xc +#define COND_CC 0xd +#define COND_POS 0xe +#define COND_VC 0xf +#define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2)) + +#define RCOND_Z 1 +#define RCOND_LEZ 2 +#define RCOND_LZ 3 +#define RCOND_NZ 5 +#define RCOND_GZ 6 +#define RCOND_GEZ 7 + +#define MOVCC_ICC (1 << 18) +#define MOVCC_XCC (1 << 18 | 1 << 12) + +#define BPCC_ICC 0 +#define BPCC_XCC (2 << 20) +#define BPCC_PT (1 << 19) +#define BPCC_PN 0 +#define BPCC_A (1 << 29) + +#define BPR_PT BPCC_PT + +#define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00)) +#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10)) +#define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01)) +#define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05)) +#define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02)) +#define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12)) +#define ARITH_ORN (INSN_OP(2) | INSN_OP3(0x06)) +#define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03)) +#define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04)) +#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14)) +#define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08)) +#define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c)) +#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a)) +#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b)) +#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e)) +#define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f)) +#define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09)) +#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d)) +#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d)) +#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c)) +#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f)) + +#define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11)) +#define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16)) + +#define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25)) +#define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26)) +#define SHIFT_SRA (INSN_OP(2) | INSN_OP3(0x27)) + +#define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12)) +#define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12)) +#define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12)) + +#define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0)) +#define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0)) +#define JMPL (INSN_OP(2) | INSN_OP3(0x38)) +#define RETURN (INSN_OP(2) | INSN_OP3(0x39)) +#define SAVE (INSN_OP(2) | INSN_OP3(0x3c)) +#define RESTORE (INSN_OP(2) | INSN_OP3(0x3d)) +#define SETHI (INSN_OP(0) | INSN_OP2(0x4)) +#define CALL INSN_OP(1) +#define LDUB (INSN_OP(3) | INSN_OP3(0x01)) +#define LDSB (INSN_OP(3) | INSN_OP3(0x09)) +#define LDUH (INSN_OP(3) | INSN_OP3(0x02)) +#define LDSH (INSN_OP(3) | INSN_OP3(0x0a)) +#define LDUW (INSN_OP(3) | INSN_OP3(0x00)) +#define LDSW (INSN_OP(3) | INSN_OP3(0x08)) +#define LDX (INSN_OP(3) | INSN_OP3(0x0b)) +#define STB (INSN_OP(3) | INSN_OP3(0x05)) +#define STH (INSN_OP(3) | INSN_OP3(0x06)) +#define STW (INSN_OP(3) | INSN_OP3(0x04)) +#define STX (INSN_OP(3) | INSN_OP3(0x0e)) +#define LDUBA (INSN_OP(3) | INSN_OP3(0x11)) +#define LDSBA (INSN_OP(3) | INSN_OP3(0x19)) +#define LDUHA (INSN_OP(3) | INSN_OP3(0x12)) +#define LDSHA (INSN_OP(3) | INSN_OP3(0x1a)) +#define LDUWA (INSN_OP(3) | INSN_OP3(0x10)) +#define LDSWA (INSN_OP(3) | INSN_OP3(0x18)) +#define LDXA (INSN_OP(3) | INSN_OP3(0x1b)) +#define STBA (INSN_OP(3) | INSN_OP3(0x15)) +#define STHA (INSN_OP(3) | INSN_OP3(0x16)) +#define STWA (INSN_OP(3) | INSN_OP3(0x14)) +#define STXA (INSN_OP(3) | INSN_OP3(0x1e)) + +#ifndef ASI_PRIMARY_LITTLE +#define ASI_PRIMARY_LITTLE 0x88 +#endif + +#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE)) + +#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) + +#ifndef use_vis3_instructions +bool use_vis3_instructions; +#endif + +static inline int check_fit_i64(int64_t val, unsigned int bits) +{ + return val == sextract64(val, 0, bits); +} + +static inline int check_fit_i32(int32_t val, unsigned int bits) +{ + return val == sextract32(val, 0, bits); +} + +#define check_fit_tl check_fit_i64 +#if SPARC64 +# define check_fit_ptr check_fit_i64 +#else +# define check_fit_ptr check_fit_i32 +#endif + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + uint32_t insn; + + assert(addend == 0); + value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr); + + switch (type) { + case R_SPARC_WDISP16: + if (!check_fit_ptr(value >> 2, 16)) { + tcg_abort(); + } + insn = *code_ptr; + insn &= ~INSN_OFF16(-1); + insn |= INSN_OFF16(value); + *code_ptr = insn; + break; + case R_SPARC_WDISP19: + if (!check_fit_ptr(value >> 2, 19)) { + tcg_abort(); + } + insn = *code_ptr; + insn &= ~INSN_OFF19(-1); + insn |= INSN_OFF19(value); + *code_ptr = insn; + break; + default: + tcg_abort(); + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch (ct_str[0]) { + case 'r': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + break; + case 'R': + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, ALL_64); + break; + case 'A': /* qemu_ld/st address constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, + TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff); + reserve_helpers: + tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); + break; + case 's': /* qemu_st data 32-bit constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + goto reserve_helpers; + case 'S': /* qemu_st data 64-bit constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, ALL_64); + goto reserve_helpers; + case 'I': + ct->ct |= TCG_CT_CONST_S11; + break; + case 'J': + ct->ct |= TCG_CT_CONST_S13; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + + if (ct & TCG_CT_CONST) { + return 1; + } + + if (type == TCG_TYPE_I32) { + val = (int32_t)val; + } + + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) { + return 1; + } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) { + return 1; + } else { + return 0; + } +} + +static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1, + TCGReg rs2, int op) +{ + tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2)); +} + +static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t offset, int op) +{ + tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset)); +} + +static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t val2, int val2const, int op) +{ + tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) + | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2))); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg) +{ + if (ret != arg) { + tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR); + } +} + +static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg) +{ + tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10)); +} + +static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg) +{ + tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR); +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + tcg_target_long hi, lo = (int32_t)arg; + + /* Make sure we test 32-bit constants for imm13 properly. */ + if (type == TCG_TYPE_I32) { + arg = lo; + } + + /* A 13-bit constant sign-extended to 64-bits. */ + if (check_fit_tl(arg, 13)) { + tcg_out_movi_imm13(s, ret, arg); + return; + } + + /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ + if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { + tcg_out_sethi(s, ret, arg); + if (arg & 0x3ff) { + tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); + } + return; + } + + /* A 32-bit constant sign-extended to 64-bits. */ + if (arg == lo) { + tcg_out_sethi(s, ret, ~arg); + tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR); + return; + } + + /* A 64-bit constant decomposed into 2 32-bit pieces. */ + if (check_fit_i32(lo, 13)) { + hi = (arg - lo) >> 32; + tcg_out_movi(s, TCG_TYPE_I32, ret, hi); + tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); + tcg_out_arithi(s, ret, ret, lo, ARITH_ADD); + } else { + hi = arg >> 32; + tcg_out_movi(s, TCG_TYPE_I32, ret, hi); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo); + tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); + tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR); + } +} + +static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1, + TCGReg a2, int op) +{ + tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2)); +} + +static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, + intptr_t offset, int op) +{ + if (check_fit_ptr(offset, 13)) { + tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) | + INSN_IMM13(offset)); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset); + tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op); + } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX)); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); +} + +static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg) +{ + tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); + tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); +} + +static inline void tcg_out_sety(TCGContext *s, TCGReg rs) +{ + tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); +} + +static inline void tcg_out_rdy(TCGContext *s, TCGReg rd) +{ + tcg_out32(s, RDY | INSN_RD(rd)); +} + +static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, + int32_t val2, int val2const, int uns) +{ + /* Load Y with the sign/zero extension of RS1 to 64-bits. */ + if (uns) { + tcg_out_sety(s, TCG_REG_G0); + } else { + tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA); + tcg_out_sety(s, TCG_REG_T1); + } + + tcg_out_arithc(s, rd, rs1, val2, val2const, + uns ? ARITH_UDIV : ARITH_SDIV); +} + +static inline void tcg_out_nop(TCGContext *s) +{ + tcg_out_sethi(s, TCG_REG_G0, 0); +} + +static const uint8_t tcg_cond_to_bcond[] = { + [TCG_COND_EQ] = COND_E, + [TCG_COND_NE] = COND_NE, + [TCG_COND_LT] = COND_L, + [TCG_COND_GE] = COND_GE, + [TCG_COND_LE] = COND_LE, + [TCG_COND_GT] = COND_G, + [TCG_COND_LTU] = COND_CS, + [TCG_COND_GEU] = COND_CC, + [TCG_COND_LEU] = COND_LEU, + [TCG_COND_GTU] = COND_GU, +}; + +static const uint8_t tcg_cond_to_rcond[] = { + [TCG_COND_EQ] = RCOND_Z, + [TCG_COND_NE] = RCOND_NZ, + [TCG_COND_LT] = RCOND_LZ, + [TCG_COND_GT] = RCOND_GZ, + [TCG_COND_LE] = RCOND_LEZ, + [TCG_COND_GE] = RCOND_GEZ +}; + +static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19) +{ + tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19); +} + +static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l) +{ + int off19; + + if (l->has_value) { + off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr)); + } else { + /* Make sure to preserve destinations during retranslation. */ + off19 = *s->code_ptr & INSN_OFF19(-1); + tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0); + } + tcg_out_bpcc0(s, scond, flags, off19); +} + +static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const) +{ + tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC); +} + +static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1, + int32_t arg2, int const_arg2, TCGLabel *l) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2); + tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l); + tcg_out_nop(s); +} + +static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret, + int32_t v1, int v1const) +{ + tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) + | INSN_RS1(tcg_cond_to_bcond[cond]) + | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1))); +} + +static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const, + int32_t v1, int v1const) +{ + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); +} + +static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, + int32_t arg2, int const_arg2, TCGLabel *l) +{ + /* For 64-bit signed comparisons vs zero, we can avoid the compare. */ + if (arg2 == 0 && !is_unsigned_cond(cond)) { + int off16; + + if (l->has_value) { + off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr)); + } else { + /* Make sure to preserve destinations during retranslation. */ + off16 = *s->code_ptr & INSN_OFF16(-1); + tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0); + } + tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1) + | INSN_COND(tcg_cond_to_rcond[cond]) | off16); + } else { + tcg_out_cmp(s, arg1, arg2, const_arg2); + tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l); + } + tcg_out_nop(s); +} + +static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, + int32_t v1, int v1const) +{ + tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1) + | (tcg_cond_to_rcond[cond] << 10) + | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1))); +} + +static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const, + int32_t v1, int v1const) +{ + /* For 64-bit signed comparisons vs zero, we can avoid the compare. + Note that the immediate range is one bit smaller, so we must check + for that as well. */ + if (c2 == 0 && !is_unsigned_cond(cond) + && (!v1const || check_fit_i32(v1, 10))) { + tcg_out_movr(s, cond, ret, c1, v1, v1const); + } else { + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); + } +} + +static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const) +{ + /* For 32-bit comparisons, we can play games with ADDC/SUBC. */ + switch (cond) { + case TCG_COND_LTU: + case TCG_COND_GEU: + /* The result of the comparison is in the carry bit. */ + break; + + case TCG_COND_EQ: + case TCG_COND_NE: + /* For equality, we can transform to inequality vs zero. */ + if (c2 != 0) { + tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR); + c2 = TCG_REG_T1; + } else { + c2 = c1; + } + c1 = TCG_REG_G0, c2const = 0; + cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU); + break; + + case TCG_COND_GTU: + case TCG_COND_LEU: + /* If we don't need to load a constant into a register, we can + swap the operands on GTU/LEU. There's no benefit to loading + the constant into a temporary register. */ + if (!c2const || c2 == 0) { + TCGReg t = c1; + c1 = c2; + c2 = t; + c2const = 0; + cond = tcg_swap_cond(cond); + break; + } + /* FALLTHRU */ + + default: + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_movi_imm13(s, ret, 0); + tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1); + return; + } + + tcg_out_cmp(s, c1, c2, c2const); + if (cond == TCG_COND_LTU) { + tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC); + } else { + tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC); + } +} + +static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, int32_t c2, int c2const) +{ + if (use_vis3_instructions) { + switch (cond) { + case TCG_COND_NE: + if (c2 != 0) { + break; + } + c2 = c1, c2const = 0, c1 = TCG_REG_G0; + /* FALLTHRU */ + case TCG_COND_LTU: + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC); + return; + default: + break; + } + } + + /* For 64-bit signed comparisons vs zero, we can avoid the compare + if the input does not overlap the output. */ + if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) { + tcg_out_movi_imm13(s, ret, 0); + tcg_out_movr(s, cond, ret, c1, 1, 1); + } else { + tcg_out_cmp(s, c1, c2, c2const); + tcg_out_movi_imm13(s, ret, 0); + tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1); + } +} + +static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh, + TCGReg al, TCGReg ah, int32_t bl, int blconst, + int32_t bh, int bhconst, int opl, int oph) +{ + TCGReg tmp = TCG_REG_T1; + + /* Note that the low parts are fully consumed before tmp is set. */ + if (rl != ah && (bhconst || rl != bh)) { + tmp = rl; + } + + tcg_out_arithc(s, tmp, al, bl, blconst, opl); + tcg_out_arithc(s, rh, ah, bh, bhconst, oph); + tcg_out_mov(s, TCG_TYPE_I32, rl, tmp); +} + +static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, + TCGReg al, TCGReg ah, int32_t bl, int blconst, + int32_t bh, int bhconst, bool is_sub) +{ + TCGReg tmp = TCG_REG_T1; + + /* Note that the low parts are fully consumed before tmp is set. */ + if (rl != ah && (bhconst || rl != bh)) { + tmp = rl; + } + + tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC); + + if (use_vis3_instructions && !is_sub) { + /* Note that ADDXC doesn't accept immediates. */ + if (bhconst && bh != 0) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh); + bh = TCG_REG_T2; + } + tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC); + } else if (bh == TCG_REG_G0) { + /* If we have a zero, we can perform the operation in two insns, + with the arithmetic first, and a conditional move into place. */ + if (rh == ah) { + tcg_out_arithi(s, TCG_REG_T2, ah, 1, + is_sub ? ARITH_SUB : ARITH_ADD); + tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0); + } else { + tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD); + tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0); + } + } else { + /* Otherwise adjust BH as if there is carry into T2 ... */ + if (bhconst) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1)); + } else { + tcg_out_arithi(s, TCG_REG_T2, bh, 1, + is_sub ? ARITH_SUB : ARITH_ADD); + } + /* ... smoosh T2 back to original BH if carry is clear ... */ + tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst); + /* ... and finally perform the arithmetic with the new operand. */ + tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD); + } + + tcg_out_mov(s, TCG_TYPE_I64, rl, tmp); +} + +static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest) +{ + ptrdiff_t disp = tcg_pcrel_diff(s, dest); + + if (disp == (int32_t)disp) { + tcg_out32(s, CALL | (uint32_t)disp >> 2); + } else { + uintptr_t desti = (uintptr_t)dest; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, desti & ~0xfff); + tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL); + } +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +{ + tcg_out_call_nodelay(s, dest); + tcg_out_nop(s); +} + +#ifdef CONFIG_SOFTMMU +static tcg_insn_unit *qemu_ld_trampoline[16]; +static tcg_insn_unit *qemu_st_trampoline[16]; + +static void build_trampolines(TCGContext *s) +{ + static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, + }; + static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, + }; + + int i; + TCGReg ra; + + for (i = 0; i < 16; ++i) { + if (qemu_ld_helpers[i] == NULL) { + continue; + } + + /* May as well align the trampoline. */ + while ((uintptr_t)s->code_ptr & 15) { + tcg_out_nop(s); + } + qemu_ld_trampoline[i] = s->code_ptr; + + if (SPARC64 || TARGET_LONG_BITS == 32) { + ra = TCG_REG_O3; + } else { + /* Install the high part of the address. */ + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); + ra = TCG_REG_O4; + } + + /* Set the retaddr operand. */ + tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); + /* Set the env operand. */ + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); + /* Tail call. */ + tcg_out_call_nodelay(s, qemu_ld_helpers[i]); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); + } + + for (i = 0; i < 16; ++i) { + if (qemu_st_helpers[i] == NULL) { + continue; + } + + /* May as well align the trampoline. */ + while ((uintptr_t)s->code_ptr & 15) { + tcg_out_nop(s); + } + qemu_st_trampoline[i] = s->code_ptr; + + if (SPARC64) { + ra = TCG_REG_O4; + } else { + ra = TCG_REG_O1; + if (TARGET_LONG_BITS == 64) { + /* Install the high part of the address. */ + tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); + ra += 2; + } else { + ra += 1; + } + if ((i & MO_SIZE) == MO_64) { + /* Install the high part of the data. */ + tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); + ra += 2; + } else { + ra += 1; + } + /* Skip the oi argument. */ + ra += 1; + } + + /* Set the retaddr operand. */ + if (ra >= TCG_REG_O6) { + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, + TCG_TARGET_CALL_STACK_OFFSET); + ra = TCG_REG_G1; + } + tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); + /* Set the env operand. */ + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); + /* Tail call. */ + tcg_out_call_nodelay(s, qemu_st_helpers[i]); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); + } +} +#endif + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int tmp_buf_size, frame_size; + + /* The TCG temp buffer is at the top of the frame, immediately + below the frame pointer. */ + tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long); + tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size, + tmp_buf_size); + + /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is + otherwise the minimal frame usable by callees. */ + frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS; + frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size; + frame_size += TCG_TARGET_STACK_ALIGN - 1; + frame_size &= -TCG_TARGET_STACK_ALIGN; + tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) | + INSN_IMM13(-frame_size)); + +#ifndef CONFIG_SOFTMMU + if (guest_base != 0) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } +#endif + + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL); + /* delay slot */ + tcg_out_nop(s); + + /* No epilogue required. We issue ret + restore directly in the TB. */ + +#ifdef CONFIG_SOFTMMU + build_trampolines(s); +#endif +} + +#if defined(CONFIG_SOFTMMU) +/* Perform the TLB load and compare. + + Inputs: + ADDRLO and ADDRHI contain the possible two parts of the address. + + MEM_INDEX and S_BITS are the memory context and log2 size of the load. + + WHICH is the offset into the CPUTLBEntry structure of the slot to read. + This should be offsetof addr_read or addr_write. + + The result of the TLB comparison is in %[ix]cc. The sanitized address + is in the returned register, maybe %o0. The TLB addend is in %o1. */ + +static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, + TCGMemOp s_bits, int which) +{ + const TCGReg r0 = TCG_REG_O0; + const TCGReg r1 = TCG_REG_O1; + const TCGReg r2 = TCG_REG_O2; + int tlb_ofs; + + /* Shift the page number down. */ + tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL); + + /* Mask out the page offset, except for the required alignment. */ + tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1, + TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + + /* Mask the tlb index. */ + tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND); + + /* Mask page, part 2. */ + tcg_out_arith(s, r0, addr, TCG_REG_T1, ARITH_AND); + + /* Shift the tlb index into place. */ + tcg_out_arithi(s, r1, r1, CPU_TLB_ENTRY_BITS, SHIFT_SLL); + + /* Relative to the current ENV. */ + tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD); + + /* Find a base address that can load both tlb comparator and addend. */ + tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]); + if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) { + if (tlb_ofs & ~0x3ff) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff); + tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD); + } + tlb_ofs &= 0x3ff; + } + + /* Load the tlb comparator and the addend. */ + tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which); + tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); + + /* subcc arg0, arg2, %g0 */ + tcg_out_cmp(s, r0, r2, 0); + + /* If the guest address must be zero-extended, do so now. */ + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL); + return r0; + } + return addr; +} +#endif /* CONFIG_SOFTMMU */ + +static const int qemu_ld_opc[16] = { + [MO_UB] = LDUB, + [MO_SB] = LDSB, + + [MO_BEUW] = LDUH, + [MO_BESW] = LDSH, + [MO_BEUL] = LDUW, + [MO_BESL] = LDSW, + [MO_BEQ] = LDX, + + [MO_LEUW] = LDUH_LE, + [MO_LESW] = LDSH_LE, + [MO_LEUL] = LDUW_LE, + [MO_LESL] = LDSW_LE, + [MO_LEQ] = LDX_LE, +}; + +static const int qemu_st_opc[16] = { + [MO_UB] = STB, + + [MO_BEUW] = STH, + [MO_BEUL] = STW, + [MO_BEQ] = STX, + + [MO_LEUW] = STH_LE, + [MO_LEUL] = STW_LE, + [MO_LEQ] = STX_LE, +}; + +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, + TCGMemOpIdx oi, bool is_64) +{ + TCGMemOp memop = get_memop(oi); +#ifdef CONFIG_SOFTMMU + unsigned memi = get_mmuidx(oi); + TCGReg addrz, param; + tcg_insn_unit *func; + tcg_insn_unit *label_ptr; + + addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, + offsetof(CPUTLBEntry, addr_read)); + + /* The fast path is exactly one insn. Thus we can perform the + entire TLB Hit in the (annulled) delay slot of the branch + over the TLB Miss case. */ + + /* beq,a,pt %[xi]cc, label0 */ + label_ptr = s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT + | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, + qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); + + /* TLB Miss. */ + + param = TCG_REG_O1; + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; + } + tcg_out_mov(s, TCG_TYPE_REG, param++, addr); + + /* We use the helpers to extend SB and SW data, leaving the case + of SL needing explicit extending below. */ + if ((memop & MO_SSIZE) == MO_SL) { + func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)]; + } else { + func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)]; + } + assert(func != NULL); + tcg_out_call_nodelay(s, func); + /* delay slot */ + tcg_out_movi(s, TCG_TYPE_I32, param, oi); + + /* Recall that all of the helpers return 64-bit results. + Which complicates things for sparcv8plus. */ + if (SPARC64) { + /* We let the helper sign-extend SB and SW, but leave SL for here. */ + if (is_64 && (memop & MO_SSIZE) == MO_SL) { + tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); + } else { + tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); + } + } else { + if ((memop & MO_SIZE) == MO_64) { + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); + tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); + } else if (is_64) { + /* Re-extend from 32-bit rather than reassembling when we + know the high register must be an extension. */ + tcg_out_arithi(s, data, TCG_REG_O1, 0, + memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL); + } else { + tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1); + } + } + + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); +#else + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); + addr = TCG_REG_T1; + } + tcg_out_ldst_rr(s, data, addr, + (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), + qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); +#endif /* CONFIG_SOFTMMU */ +} + +static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, + TCGMemOpIdx oi) +{ + TCGMemOp memop = get_memop(oi); +#ifdef CONFIG_SOFTMMU + unsigned memi = get_mmuidx(oi); + TCGReg addrz, param; + tcg_insn_unit *func; + tcg_insn_unit *label_ptr; + + addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, + offsetof(CPUTLBEntry, addr_write)); + + /* The fast path is exactly one insn. Thus we can perform the entire + TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */ + /* beq,a,pt %[xi]cc, label0 */ + label_ptr = s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT + | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, + qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); + + /* TLB Miss. */ + + param = TCG_REG_O1; + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; + } + tcg_out_mov(s, TCG_TYPE_REG, param++, addr); + if (!SPARC64 && (memop & MO_SIZE) == MO_64) { + /* Skip the high-part; we'll perform the extract in the trampoline. */ + param++; + } + tcg_out_mov(s, TCG_TYPE_REG, param++, data); + + func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)]; + assert(func != NULL); + tcg_out_call_nodelay(s, func); + /* delay slot */ + tcg_out_movi(s, TCG_TYPE_I32, param, oi); + + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); +#else + if (SPARC64 && TARGET_LONG_BITS == 32) { + tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); + addr = TCG_REG_T1; + } + tcg_out_ldst_rr(s, data, addr, + (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), + qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); +#endif /* CONFIG_SOFTMMU */ +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg args[TCG_MAX_OP_ARGS], + const int const_args[TCG_MAX_OP_ARGS]) +{ + TCGArg a0, a1, a2; + int c, c2; + + /* Hoist the loads of the most common arguments. */ + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + c2 = const_args[2]; + + switch (opc) { + case INDEX_op_exit_tb: + if (check_fit_ptr(a0, 13)) { + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_movi_imm13(s, TCG_REG_O0, a0); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff); + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); + } + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* direct jump method */ + s->tb_jmp_offset[a0] = tcg_current_code_size(s); + /* Make sure to preserve links during retranslation. */ + tcg_out32(s, CALL | (*s->code_ptr & ~INSN_OP(-1))); + } else { + /* indirect jump method */ + tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0)); + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL); + } + tcg_out_nop(s); + s->tb_next_offset[a0] = tcg_current_code_size(s); + break; + case INDEX_op_br: + tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); + tcg_out_nop(s); + break; + +#define OP_32_64(x) \ + glue(glue(case INDEX_op_, x), _i32): \ + glue(glue(case INDEX_op_, x), _i64) + + OP_32_64(ld8u): + tcg_out_ldst(s, a0, a1, a2, LDUB); + break; + OP_32_64(ld8s): + tcg_out_ldst(s, a0, a1, a2, LDSB); + break; + OP_32_64(ld16u): + tcg_out_ldst(s, a0, a1, a2, LDUH); + break; + OP_32_64(ld16s): + tcg_out_ldst(s, a0, a1, a2, LDSH); + break; + case INDEX_op_ld_i32: + case INDEX_op_ld32u_i64: + tcg_out_ldst(s, a0, a1, a2, LDUW); + break; + OP_32_64(st8): + tcg_out_ldst(s, a0, a1, a2, STB); + break; + OP_32_64(st16): + tcg_out_ldst(s, a0, a1, a2, STH); + break; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_ldst(s, a0, a1, a2, STW); + break; + OP_32_64(add): + c = ARITH_ADD; + goto gen_arith; + OP_32_64(sub): + c = ARITH_SUB; + goto gen_arith; + OP_32_64(and): + c = ARITH_AND; + goto gen_arith; + OP_32_64(andc): + c = ARITH_ANDN; + goto gen_arith; + OP_32_64(or): + c = ARITH_OR; + goto gen_arith; + OP_32_64(orc): + c = ARITH_ORN; + goto gen_arith; + OP_32_64(xor): + c = ARITH_XOR; + goto gen_arith; + case INDEX_op_shl_i32: + c = SHIFT_SLL; + do_shift32: + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_arithc(s, a0, a1, a2 & 31, c2, c); + break; + case INDEX_op_shr_i32: + c = SHIFT_SRL; + goto do_shift32; + case INDEX_op_sar_i32: + c = SHIFT_SRA; + goto do_shift32; + case INDEX_op_mul_i32: + c = ARITH_UMUL; + goto gen_arith; + + OP_32_64(neg): + c = ARITH_SUB; + goto gen_arith1; + OP_32_64(not): + c = ARITH_ORN; + goto gen_arith1; + + case INDEX_op_div_i32: + tcg_out_div32(s, a0, a1, a2, c2, 0); + break; + case INDEX_op_divu_i32: + tcg_out_div32(s, a0, a1, a2, c2, 1); + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3])); + break; + case INDEX_op_setcond_i32: + tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2); + break; + case INDEX_op_movcond_i32: + tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); + break; + + case INDEX_op_add2_i32: + tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], + args[4], const_args[4], args[5], const_args[5], + ARITH_ADDCC, ARITH_ADDC); + break; + case INDEX_op_sub2_i32: + tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], + args[4], const_args[4], args[5], const_args[5], + ARITH_SUBCC, ARITH_SUBC); + break; + case INDEX_op_mulu2_i32: + c = ARITH_UMUL; + goto do_mul2; + case INDEX_op_muls2_i32: + c = ARITH_SMUL; + do_mul2: + /* The 32-bit multiply insns produce a full 64-bit result. If the + destination register can hold it, we can avoid the slower RDY. */ + tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); + if (SPARC64 || a0 <= TCG_REG_O7) { + tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); + } else { + tcg_out_rdy(s, a1); + } + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, a0, a1, a2, false); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, a0, a1, a2, true); + break; + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, a0, a1, a2); + break; + + case INDEX_op_ld32s_i64: + tcg_out_ldst(s, a0, a1, a2, LDSW); + break; + case INDEX_op_ld_i64: + tcg_out_ldst(s, a0, a1, a2, LDX); + break; + case INDEX_op_st_i64: + tcg_out_ldst(s, a0, a1, a2, STX); + break; + case INDEX_op_shl_i64: + c = SHIFT_SLLX; + do_shift64: + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_arithc(s, a0, a1, a2 & 63, c2, c); + break; + case INDEX_op_shr_i64: + c = SHIFT_SRLX; + goto do_shift64; + case INDEX_op_sar_i64: + c = SHIFT_SRAX; + goto do_shift64; + case INDEX_op_mul_i64: + c = ARITH_MULX; + goto gen_arith; + case INDEX_op_div_i64: + c = ARITH_SDIVX; + goto gen_arith; + case INDEX_op_divu_i64: + c = ARITH_UDIVX; + goto gen_arith; + case INDEX_op_ext_i32_i64: + case INDEX_op_ext32s_i64: + tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA); + break; + case INDEX_op_extu_i32_i64: + case INDEX_op_ext32u_i64: + tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); + break; + case INDEX_op_extrl_i64_i32: + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + break; + case INDEX_op_extrh_i64_i32: + tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX); + break; + + case INDEX_op_brcond_i64: + tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2); + break; + case INDEX_op_movcond_i64: + tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); + break; + case INDEX_op_add2_i64: + tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], + const_args[4], args[5], const_args[5], false); + break; + case INDEX_op_sub2_i64: + tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], + const_args[4], args[5], const_args[5], true); + break; + case INDEX_op_muluh_i64: + tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI); + break; + + gen_arith: + tcg_out_arithc(s, a0, a1, a2, c2, c); + break; + + gen_arith1: + tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } +} + +static const TCGTargetOpDef sparc_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_br, { } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "rZ", "r" } }, + { INDEX_op_st16_i32, { "rZ", "r" } }, + { INDEX_op_st_i32, { "rZ", "r" } }, + + { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_mul_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_div_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_divu_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_and_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_andc_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_or_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_orc_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_xor_i32, { "r", "rZ", "rJ" } }, + + { INDEX_op_shl_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_shr_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_sar_i32, { "r", "rZ", "rJ" } }, + + { INDEX_op_neg_i32, { "r", "rJ" } }, + { INDEX_op_not_i32, { "r", "rJ" } }, + + { INDEX_op_brcond_i32, { "rZ", "rJ" } }, + { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } }, + + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } }, + { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } }, + + { INDEX_op_ld8u_i64, { "R", "r" } }, + { INDEX_op_ld8s_i64, { "R", "r" } }, + { INDEX_op_ld16u_i64, { "R", "r" } }, + { INDEX_op_ld16s_i64, { "R", "r" } }, + { INDEX_op_ld32u_i64, { "R", "r" } }, + { INDEX_op_ld32s_i64, { "R", "r" } }, + { INDEX_op_ld_i64, { "R", "r" } }, + { INDEX_op_st8_i64, { "RZ", "r" } }, + { INDEX_op_st16_i64, { "RZ", "r" } }, + { INDEX_op_st32_i64, { "RZ", "r" } }, + { INDEX_op_st_i64, { "RZ", "r" } }, + + { INDEX_op_add_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_mul_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_div_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_divu_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_sub_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_and_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_andc_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_or_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_orc_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_xor_i64, { "R", "RZ", "RJ" } }, + + { INDEX_op_shl_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_shr_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_sar_i64, { "R", "RZ", "RJ" } }, + + { INDEX_op_neg_i64, { "R", "RJ" } }, + { INDEX_op_not_i64, { "R", "RJ" } }, + + { INDEX_op_ext32s_i64, { "R", "R" } }, + { INDEX_op_ext32u_i64, { "R", "R" } }, + { INDEX_op_ext_i32_i64, { "R", "r" } }, + { INDEX_op_extu_i32_i64, { "R", "r" } }, + { INDEX_op_extrl_i64_i32, { "r", "R" } }, + { INDEX_op_extrh_i64_i32, { "r", "R" } }, + + { INDEX_op_brcond_i64, { "RZ", "RJ" } }, + { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, + { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } }, + + { INDEX_op_add2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } }, + { INDEX_op_sub2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } }, + { INDEX_op_muluh_i64, { "R", "RZ", "RZ" } }, + + { INDEX_op_qemu_ld_i32, { "r", "A" } }, + { INDEX_op_qemu_ld_i64, { "R", "A" } }, + { INDEX_op_qemu_st_i32, { "sZ", "A" } }, + { INDEX_op_qemu_st_i64, { "SZ", "A" } }, + + { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ + /* Only probe for the platform and capabilities if we havn't already + determined maximum values at compile time. */ +#ifndef use_vis3_instructions + { + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0; + } +#endif + + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64); + + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + (1 << TCG_REG_G1) | + (1 << TCG_REG_G2) | + (1 << TCG_REG_G3) | + (1 << TCG_REG_G4) | + (1 << TCG_REG_G5) | + (1 << TCG_REG_G6) | + (1 << TCG_REG_G7) | + (1 << TCG_REG_O0) | + (1 << TCG_REG_O1) | + (1 << TCG_REG_O2) | + (1 << TCG_REG_O3) | + (1 << TCG_REG_O4) | + (1 << TCG_REG_O5) | + (1 << TCG_REG_O7)); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */ + + tcg_add_target_add_op_defs(sparc_op_defs); +} + +#if SPARC64 +# define ELF_HOST_MACHINE EM_SPARCV9 +#else +# define ELF_HOST_MACHINE EM_SPARC32PLUS +# define ELF_HOST_FLAGS EF_SPARC_32PLUS +#endif + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[SPARC64 ? 4 : 2]; + uint8_t fde_win_save; + uint8_t fde_ret_save[3]; +} DebugFrame; + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = -sizeof(void *) & 0x7f, + .h.cie.return_column = 15, /* o7 */ + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { +#if SPARC64 + 12, 30, /* DW_CFA_def_cfa i6, 2047 */ + (2047 & 0x7f) | 0x80, (2047 >> 7) +#else + 13, 30 /* DW_CFA_def_cfa_register i6 */ +#endif + }, + .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */ + .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */ +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} + +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ + uint32_t *ptr = (uint32_t *)jmp_addr; + uintptr_t disp = addr - jmp_addr; + + /* We can reach the entire address space for 32-bit. For 64-bit + the code_gen_buffer can't be larger than 2GB. */ + assert(disp == (int32_t)disp); + + *ptr = CALL | (uint32_t)disp >> 2; + flush_icache_range(jmp_addr, jmp_addr + 4); +} diff --git a/tcg/tcg.c b/tcg/tcg.c index 86208a8..550671b 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -62,7 +62,8 @@ #include "elf.h" #include "exec/log.h" -/* Forward declarations for functions declared in tcg-target.c and used here. */ +/* Forward declarations for functions declared in tcg-target.inc.c and + used here. */ static void tcg_target_init(TCGContext *s); static void tcg_target_qemu_prologue(TCGContext *s); static void patch_reloc(tcg_insn_unit *code_ptr, int type, @@ -96,7 +97,7 @@ static void tcg_register_jit_int(void *buf, size_t size, size_t debug_frame_size) __attribute__((unused)); -/* Forward declarations for functions declared and used in tcg-target.c. */ +/* Forward declarations for functions declared and used in tcg-target.inc.c. */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str); static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, intptr_t arg2); @@ -250,7 +251,7 @@ TCGLabel *gen_new_label(void) return l; } -#include "tcg-target.c" +#include "tcg-target.inc.c" /* pool based memory allocation */ void *tcg_malloc_internal(TCGContext *s, int size) diff --git a/tcg/tcg.h b/tcg/tcg.h index d181694..c45329a 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -568,7 +568,7 @@ struct TCGContext { TBContext tb_ctx; - /* The TCGBackendData structure is private to tcg-target.c. */ + /* The TCGBackendData structure is private to tcg-target.inc.c. */ struct TCGBackendData *be; TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; diff --git a/tcg/tci/README b/tcg/tci/README index dc57f07..3786b09 100644 --- a/tcg/tci/README +++ b/tcg/tci/README @@ -21,7 +21,7 @@ This is what TCI (Tiny Code Interpreter) does. 2) Implementation Like each TCG host frontend, TCI implements the code generator in -tcg-target.c, tcg-target.h. Both files are in directory tcg/tci. +tcg-target.inc.c, tcg-target.h. Both files are in directory tcg/tci. The additional file tcg/tci.c adds the interpreter. @@ -123,7 +123,7 @@ u1 = linux-user-test works would also improve speed for hosts which support byte alignment). * A better disassembler for the pseudo code would be nice (a very primitive - disassembler is included in tcg-target.c). + disassembler is included in tcg-target.inc.c). * It might be useful to have a runtime option which selects the native TCG or TCI, so QEMU would have to include two TCGs. Today, selecting TCI diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c deleted file mode 100644 index 16ce048..0000000 --- a/tcg/tci/tcg-target.c +++ /dev/null @@ -1,880 +0,0 @@ -/* - * Tiny Code Generator for QEMU - * - * Copyright (c) 2009, 2011 Stefan Weil - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "tcg-be-null.h" - -/* TODO list: - * - See TODO comments in code. - */ - -/* Marker for missing code. */ -#define TODO() \ - do { \ - fprintf(stderr, "TODO %s:%u: %s()\n", \ - __FILE__, __LINE__, __func__); \ - tcg_abort(); \ - } while (0) - -/* Bitfield n...m (in 32 bit value). */ -#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m) - -/* Macros used in tcg_target_op_defs. */ -#define R "r" -#define RI "ri" -#if TCG_TARGET_REG_BITS == 32 -# define R64 "r", "r" -#else -# define R64 "r" -#endif -#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS -# define L "L", "L" -# define S "S", "S" -#else -# define L "L" -# define S "S" -#endif - -/* TODO: documentation. */ -static const TCGTargetOpDef tcg_target_op_defs[] = { - { INDEX_op_exit_tb, { NULL } }, - { INDEX_op_goto_tb, { NULL } }, - { INDEX_op_br, { NULL } }, - - { INDEX_op_ld8u_i32, { R, R } }, - { INDEX_op_ld8s_i32, { R, R } }, - { INDEX_op_ld16u_i32, { R, R } }, - { INDEX_op_ld16s_i32, { R, R } }, - { INDEX_op_ld_i32, { R, R } }, - { INDEX_op_st8_i32, { R, R } }, - { INDEX_op_st16_i32, { R, R } }, - { INDEX_op_st_i32, { R, R } }, - - { INDEX_op_add_i32, { R, RI, RI } }, - { INDEX_op_sub_i32, { R, RI, RI } }, - { INDEX_op_mul_i32, { R, RI, RI } }, -#if TCG_TARGET_HAS_div_i32 - { INDEX_op_div_i32, { R, R, R } }, - { INDEX_op_divu_i32, { R, R, R } }, - { INDEX_op_rem_i32, { R, R, R } }, - { INDEX_op_remu_i32, { R, R, R } }, -#elif TCG_TARGET_HAS_div2_i32 - { INDEX_op_div2_i32, { R, R, "0", "1", R } }, - { INDEX_op_divu2_i32, { R, R, "0", "1", R } }, -#endif - /* TODO: Does R, RI, RI result in faster code than R, R, RI? - If both operands are constants, we can optimize. */ - { INDEX_op_and_i32, { R, RI, RI } }, -#if TCG_TARGET_HAS_andc_i32 - { INDEX_op_andc_i32, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_eqv_i32 - { INDEX_op_eqv_i32, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_nand_i32 - { INDEX_op_nand_i32, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_nor_i32 - { INDEX_op_nor_i32, { R, RI, RI } }, -#endif - { INDEX_op_or_i32, { R, RI, RI } }, -#if TCG_TARGET_HAS_orc_i32 - { INDEX_op_orc_i32, { R, RI, RI } }, -#endif - { INDEX_op_xor_i32, { R, RI, RI } }, - { INDEX_op_shl_i32, { R, RI, RI } }, - { INDEX_op_shr_i32, { R, RI, RI } }, - { INDEX_op_sar_i32, { R, RI, RI } }, -#if TCG_TARGET_HAS_rot_i32 - { INDEX_op_rotl_i32, { R, RI, RI } }, - { INDEX_op_rotr_i32, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_deposit_i32 - { INDEX_op_deposit_i32, { R, "0", R } }, -#endif - - { INDEX_op_brcond_i32, { R, RI } }, - - { INDEX_op_setcond_i32, { R, R, RI } }, -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_setcond_i64, { R, R, RI } }, -#endif /* TCG_TARGET_REG_BITS == 64 */ - -#if TCG_TARGET_REG_BITS == 32 - /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */ - { INDEX_op_add2_i32, { R, R, R, R, R, R } }, - { INDEX_op_sub2_i32, { R, R, R, R, R, R } }, - { INDEX_op_brcond2_i32, { R, R, RI, RI } }, - { INDEX_op_mulu2_i32, { R, R, R, R } }, - { INDEX_op_setcond2_i32, { R, R, R, RI, RI } }, -#endif - -#if TCG_TARGET_HAS_not_i32 - { INDEX_op_not_i32, { R, R } }, -#endif -#if TCG_TARGET_HAS_neg_i32 - { INDEX_op_neg_i32, { R, R } }, -#endif - -#if TCG_TARGET_REG_BITS == 64 - { INDEX_op_ld8u_i64, { R, R } }, - { INDEX_op_ld8s_i64, { R, R } }, - { INDEX_op_ld16u_i64, { R, R } }, - { INDEX_op_ld16s_i64, { R, R } }, - { INDEX_op_ld32u_i64, { R, R } }, - { INDEX_op_ld32s_i64, { R, R } }, - { INDEX_op_ld_i64, { R, R } }, - - { INDEX_op_st8_i64, { R, R } }, - { INDEX_op_st16_i64, { R, R } }, - { INDEX_op_st32_i64, { R, R } }, - { INDEX_op_st_i64, { R, R } }, - - { INDEX_op_add_i64, { R, RI, RI } }, - { INDEX_op_sub_i64, { R, RI, RI } }, - { INDEX_op_mul_i64, { R, RI, RI } }, -#if TCG_TARGET_HAS_div_i64 - { INDEX_op_div_i64, { R, R, R } }, - { INDEX_op_divu_i64, { R, R, R } }, - { INDEX_op_rem_i64, { R, R, R } }, - { INDEX_op_remu_i64, { R, R, R } }, -#elif TCG_TARGET_HAS_div2_i64 - { INDEX_op_div2_i64, { R, R, "0", "1", R } }, - { INDEX_op_divu2_i64, { R, R, "0", "1", R } }, -#endif - { INDEX_op_and_i64, { R, RI, RI } }, -#if TCG_TARGET_HAS_andc_i64 - { INDEX_op_andc_i64, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_eqv_i64 - { INDEX_op_eqv_i64, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_nand_i64 - { INDEX_op_nand_i64, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_nor_i64 - { INDEX_op_nor_i64, { R, RI, RI } }, -#endif - { INDEX_op_or_i64, { R, RI, RI } }, -#if TCG_TARGET_HAS_orc_i64 - { INDEX_op_orc_i64, { R, RI, RI } }, -#endif - { INDEX_op_xor_i64, { R, RI, RI } }, - { INDEX_op_shl_i64, { R, RI, RI } }, - { INDEX_op_shr_i64, { R, RI, RI } }, - { INDEX_op_sar_i64, { R, RI, RI } }, -#if TCG_TARGET_HAS_rot_i64 - { INDEX_op_rotl_i64, { R, RI, RI } }, - { INDEX_op_rotr_i64, { R, RI, RI } }, -#endif -#if TCG_TARGET_HAS_deposit_i64 - { INDEX_op_deposit_i64, { R, "0", R } }, -#endif - { INDEX_op_brcond_i64, { R, RI } }, - -#if TCG_TARGET_HAS_ext8s_i64 - { INDEX_op_ext8s_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext16s_i64 - { INDEX_op_ext16s_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext32s_i64 - { INDEX_op_ext32s_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext8u_i64 - { INDEX_op_ext8u_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext16u_i64 - { INDEX_op_ext16u_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext32u_i64 - { INDEX_op_ext32u_i64, { R, R } }, -#endif - { INDEX_op_ext_i32_i64, { R, R } }, - { INDEX_op_extu_i32_i64, { R, R } }, -#if TCG_TARGET_HAS_bswap16_i64 - { INDEX_op_bswap16_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_bswap32_i64 - { INDEX_op_bswap32_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_bswap64_i64 - { INDEX_op_bswap64_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_not_i64 - { INDEX_op_not_i64, { R, R } }, -#endif -#if TCG_TARGET_HAS_neg_i64 - { INDEX_op_neg_i64, { R, R } }, -#endif -#endif /* TCG_TARGET_REG_BITS == 64 */ - - { INDEX_op_qemu_ld_i32, { R, L } }, - { INDEX_op_qemu_ld_i64, { R64, L } }, - - { INDEX_op_qemu_st_i32, { R, S } }, - { INDEX_op_qemu_st_i64, { R64, S } }, - -#if TCG_TARGET_HAS_ext8s_i32 - { INDEX_op_ext8s_i32, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext16s_i32 - { INDEX_op_ext16s_i32, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext8u_i32 - { INDEX_op_ext8u_i32, { R, R } }, -#endif -#if TCG_TARGET_HAS_ext16u_i32 - { INDEX_op_ext16u_i32, { R, R } }, -#endif - -#if TCG_TARGET_HAS_bswap16_i32 - { INDEX_op_bswap16_i32, { R, R } }, -#endif -#if TCG_TARGET_HAS_bswap32_i32 - { INDEX_op_bswap32_i32, { R, R } }, -#endif - - { -1 }, -}; - -static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R0, - TCG_REG_R1, - TCG_REG_R2, - TCG_REG_R3, -#if 0 /* used for TCG_REG_CALL_STACK */ - TCG_REG_R4, -#endif - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, -#if TCG_TARGET_NB_REGS >= 16 - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11, - TCG_REG_R12, - TCG_REG_R13, - TCG_REG_R14, - TCG_REG_R15, -#endif -}; - -#if MAX_OPC_PARAM_IARGS != 5 -# error Fix needed, number of supported input arguments changed! -#endif - -static const int tcg_target_call_iarg_regs[] = { - TCG_REG_R0, - TCG_REG_R1, - TCG_REG_R2, - TCG_REG_R3, -#if 0 /* used for TCG_REG_CALL_STACK */ - TCG_REG_R4, -#endif - TCG_REG_R5, -#if TCG_TARGET_REG_BITS == 32 - /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */ - TCG_REG_R6, - TCG_REG_R7, -#if TCG_TARGET_NB_REGS >= 16 - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, -#else -# error Too few input registers available -#endif -#endif -}; - -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R0, -#if TCG_TARGET_REG_BITS == 32 - TCG_REG_R1 -#endif -}; - -#ifndef NDEBUG -static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "r00", - "r01", - "r02", - "r03", - "r04", - "r05", - "r06", - "r07", -#if TCG_TARGET_NB_REGS >= 16 - "r08", - "r09", - "r10", - "r11", - "r12", - "r13", - "r14", - "r15", -#if TCG_TARGET_NB_REGS >= 32 - "r16", - "r17", - "r18", - "r19", - "r20", - "r21", - "r22", - "r23", - "r24", - "r25", - "r26", - "r27", - "r28", - "r29", - "r30", - "r31" -#endif -#endif -}; -#endif - -static void patch_reloc(tcg_insn_unit *code_ptr, int type, - intptr_t value, intptr_t addend) -{ - /* tcg_out_reloc always uses the same type, addend. */ - assert(type == sizeof(tcg_target_long)); - assert(addend == 0); - assert(value != 0); - if (TCG_TARGET_REG_BITS == 32) { - tcg_patch32(code_ptr, value); - } else { - tcg_patch64(code_ptr, value); - } -} - -/* Parse target specific constraints. */ -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) -{ - const char *ct_str = *pct_str; - switch (ct_str[0]) { - case 'r': - case 'L': /* qemu_ld constraint */ - case 'S': /* qemu_st constraint */ - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, BIT(TCG_TARGET_NB_REGS) - 1); - break; - default: - return -1; - } - ct_str++; - *pct_str = ct_str; - return 0; -} - -#if defined(CONFIG_DEBUG_TCG_INTERPRETER) -/* Show current bytecode. Used by tcg interpreter. */ -void tci_disas(uint8_t opc) -{ - const TCGOpDef *def = &tcg_op_defs[opc]; - fprintf(stderr, "TCG %s %u, %u, %u\n", - def->name, def->nb_oargs, def->nb_iargs, def->nb_cargs); -} -#endif - -/* Write value (native size). */ -static void tcg_out_i(TCGContext *s, tcg_target_ulong v) -{ - if (TCG_TARGET_REG_BITS == 32) { - tcg_out32(s, v); - } else { - tcg_out64(s, v); - } -} - -/* Write opcode. */ -static void tcg_out_op_t(TCGContext *s, TCGOpcode op) -{ - tcg_out8(s, op); - tcg_out8(s, 0); -} - -/* Write register. */ -static void tcg_out_r(TCGContext *s, TCGArg t0) -{ - assert(t0 < TCG_TARGET_NB_REGS); - tcg_out8(s, t0); -} - -/* Write register or constant (native size). */ -static void tcg_out_ri(TCGContext *s, int const_arg, TCGArg arg) -{ - if (const_arg) { - assert(const_arg == 1); - tcg_out8(s, TCG_CONST); - tcg_out_i(s, arg); - } else { - tcg_out_r(s, arg); - } -} - -/* Write register or constant (32 bit). */ -static void tcg_out_ri32(TCGContext *s, int const_arg, TCGArg arg) -{ - if (const_arg) { - assert(const_arg == 1); - tcg_out8(s, TCG_CONST); - tcg_out32(s, arg); - } else { - tcg_out_r(s, arg); - } -} - -#if TCG_TARGET_REG_BITS == 64 -/* Write register or constant (64 bit). */ -static void tcg_out_ri64(TCGContext *s, int const_arg, TCGArg arg) -{ - if (const_arg) { - assert(const_arg == 1); - tcg_out8(s, TCG_CONST); - tcg_out64(s, arg); - } else { - tcg_out_r(s, arg); - } -} -#endif - -/* Write label. */ -static void tci_out_label(TCGContext *s, TCGLabel *label) -{ - if (label->has_value) { - tcg_out_i(s, label->u.value); - assert(label->u.value); - } else { - tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0); - s->code_ptr += sizeof(tcg_target_ulong); - } -} - -static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, - intptr_t arg2) -{ - uint8_t *old_code_ptr = s->code_ptr; - if (type == TCG_TYPE_I32) { - tcg_out_op_t(s, INDEX_op_ld_i32); - tcg_out_r(s, ret); - tcg_out_r(s, arg1); - tcg_out32(s, arg2); - } else { - assert(type == TCG_TYPE_I64); -#if TCG_TARGET_REG_BITS == 64 - tcg_out_op_t(s, INDEX_op_ld_i64); - tcg_out_r(s, ret); - tcg_out_r(s, arg1); - assert(arg2 == (int32_t)arg2); - tcg_out32(s, arg2); -#else - TODO(); -#endif - } - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) -{ - uint8_t *old_code_ptr = s->code_ptr; - assert(ret != arg); -#if TCG_TARGET_REG_BITS == 32 - tcg_out_op_t(s, INDEX_op_mov_i32); -#else - tcg_out_op_t(s, INDEX_op_mov_i64); -#endif - tcg_out_r(s, ret); - tcg_out_r(s, arg); - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg t0, tcg_target_long arg) -{ - uint8_t *old_code_ptr = s->code_ptr; - uint32_t arg32 = arg; - if (type == TCG_TYPE_I32 || arg == arg32) { - tcg_out_op_t(s, INDEX_op_movi_i32); - tcg_out_r(s, t0); - tcg_out32(s, arg32); - } else { - assert(type == TCG_TYPE_I64); -#if TCG_TARGET_REG_BITS == 64 - tcg_out_op_t(s, INDEX_op_movi_i64); - tcg_out_r(s, t0); - tcg_out64(s, arg); -#else - TODO(); -#endif - } - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) -{ - uint8_t *old_code_ptr = s->code_ptr; - tcg_out_op_t(s, INDEX_op_call); - tcg_out_ri(s, 1, (uintptr_t)arg); - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, - const int *const_args) -{ - uint8_t *old_code_ptr = s->code_ptr; - - tcg_out_op_t(s, opc); - - switch (opc) { - case INDEX_op_exit_tb: - tcg_out64(s, args[0]); - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* Direct jump method. */ - assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset)); - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - tcg_out32(s, 0); - } else { - /* Indirect jump method. */ - TODO(); - } - assert(args[0] < ARRAY_SIZE(s->tb_next_offset)); - s->tb_next_offset[args[0]] = tcg_current_code_size(s); - break; - case INDEX_op_br: - tci_out_label(s, arg_label(args[0])); - break; - case INDEX_op_setcond_i32: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_ri32(s, const_args[2], args[2]); - tcg_out8(s, args[3]); /* condition */ - break; -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_setcond2_i32: - /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - tcg_out_ri32(s, const_args[3], args[3]); - tcg_out_ri32(s, const_args[4], args[4]); - tcg_out8(s, args[5]); /* condition */ - break; -#elif TCG_TARGET_REG_BITS == 64 - case INDEX_op_setcond_i64: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_ri64(s, const_args[2], args[2]); - tcg_out8(s, args[3]); /* condition */ - break; -#endif - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - assert(args[2] == (int32_t)args[2]); - tcg_out32(s, args[2]); - break; - case INDEX_op_add_i32: - case INDEX_op_sub_i32: - case INDEX_op_mul_i32: - case INDEX_op_and_i32: - case INDEX_op_andc_i32: /* Optional (TCG_TARGET_HAS_andc_i32). */ - case INDEX_op_eqv_i32: /* Optional (TCG_TARGET_HAS_eqv_i32). */ - case INDEX_op_nand_i32: /* Optional (TCG_TARGET_HAS_nand_i32). */ - case INDEX_op_nor_i32: /* Optional (TCG_TARGET_HAS_nor_i32). */ - case INDEX_op_or_i32: - case INDEX_op_orc_i32: /* Optional (TCG_TARGET_HAS_orc_i32). */ - case INDEX_op_xor_i32: - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: - case INDEX_op_rotl_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ - case INDEX_op_rotr_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ - tcg_out_r(s, args[0]); - tcg_out_ri32(s, const_args[1], args[1]); - tcg_out_ri32(s, const_args[2], args[2]); - break; - case INDEX_op_deposit_i32: /* Optional (TCG_TARGET_HAS_deposit_i32). */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - assert(args[3] <= UINT8_MAX); - tcg_out8(s, args[3]); - assert(args[4] <= UINT8_MAX); - tcg_out8(s, args[4]); - break; - -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_add_i64: - case INDEX_op_sub_i64: - case INDEX_op_mul_i64: - case INDEX_op_and_i64: - case INDEX_op_andc_i64: /* Optional (TCG_TARGET_HAS_andc_i64). */ - case INDEX_op_eqv_i64: /* Optional (TCG_TARGET_HAS_eqv_i64). */ - case INDEX_op_nand_i64: /* Optional (TCG_TARGET_HAS_nand_i64). */ - case INDEX_op_nor_i64: /* Optional (TCG_TARGET_HAS_nor_i64). */ - case INDEX_op_or_i64: - case INDEX_op_orc_i64: /* Optional (TCG_TARGET_HAS_orc_i64). */ - case INDEX_op_xor_i64: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: - case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ - case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ - tcg_out_r(s, args[0]); - tcg_out_ri64(s, const_args[1], args[1]); - tcg_out_ri64(s, const_args[2], args[2]); - break; - case INDEX_op_deposit_i64: /* Optional (TCG_TARGET_HAS_deposit_i64). */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - assert(args[3] <= UINT8_MAX); - tcg_out8(s, args[3]); - assert(args[4] <= UINT8_MAX); - tcg_out8(s, args[4]); - break; - case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - case INDEX_op_divu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - case INDEX_op_rem_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - case INDEX_op_remu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ - TODO(); - break; - case INDEX_op_div2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */ - case INDEX_op_divu2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */ - TODO(); - break; - case INDEX_op_brcond_i64: - tcg_out_r(s, args[0]); - tcg_out_ri64(s, const_args[1], args[1]); - tcg_out8(s, args[2]); /* condition */ - tci_out_label(s, arg_label(args[3])); - break; - case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */ - case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ - case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ - case INDEX_op_not_i64: /* Optional (TCG_TARGET_HAS_not_i64). */ - case INDEX_op_neg_i64: /* Optional (TCG_TARGET_HAS_neg_i64). */ - case INDEX_op_ext8s_i64: /* Optional (TCG_TARGET_HAS_ext8s_i64). */ - case INDEX_op_ext8u_i64: /* Optional (TCG_TARGET_HAS_ext8u_i64). */ - case INDEX_op_ext16s_i64: /* Optional (TCG_TARGET_HAS_ext16s_i64). */ - case INDEX_op_ext16u_i64: /* Optional (TCG_TARGET_HAS_ext16u_i64). */ - case INDEX_op_ext32s_i64: /* Optional (TCG_TARGET_HAS_ext32s_i64). */ - case INDEX_op_ext32u_i64: /* Optional (TCG_TARGET_HAS_ext32u_i64). */ - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: -#endif /* TCG_TARGET_REG_BITS == 64 */ - case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */ - case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */ - case INDEX_op_ext8s_i32: /* Optional (TCG_TARGET_HAS_ext8s_i32). */ - case INDEX_op_ext16s_i32: /* Optional (TCG_TARGET_HAS_ext16s_i32). */ - case INDEX_op_ext8u_i32: /* Optional (TCG_TARGET_HAS_ext8u_i32). */ - case INDEX_op_ext16u_i32: /* Optional (TCG_TARGET_HAS_ext16u_i32). */ - case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */ - case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - break; - case INDEX_op_div_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - case INDEX_op_divu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - case INDEX_op_rem_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - case INDEX_op_remu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ - tcg_out_r(s, args[0]); - tcg_out_ri32(s, const_args[1], args[1]); - tcg_out_ri32(s, const_args[2], args[2]); - break; - case INDEX_op_div2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */ - case INDEX_op_divu2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */ - TODO(); - break; -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_add2_i32: - case INDEX_op_sub2_i32: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - tcg_out_r(s, args[3]); - tcg_out_r(s, args[4]); - tcg_out_r(s, args[5]); - break; - case INDEX_op_brcond2_i32: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_ri32(s, const_args[2], args[2]); - tcg_out_ri32(s, const_args[3], args[3]); - tcg_out8(s, args[4]); /* condition */ - tci_out_label(s, arg_label(args[5])); - break; - case INDEX_op_mulu2_i32: - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - tcg_out_r(s, args[3]); - break; -#endif - case INDEX_op_brcond_i32: - tcg_out_r(s, args[0]); - tcg_out_ri32(s, const_args[1], args[1]); - tcg_out8(s, args[2]); /* condition */ - tci_out_label(s, arg_label(args[3])); - break; - case INDEX_op_qemu_ld_i32: - tcg_out_r(s, *args++); - tcg_out_r(s, *args++); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_r(s, *args++); - } - tcg_out_i(s, *args++); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_r(s, *args++); - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_r(s, *args++); - } - tcg_out_r(s, *args++); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_r(s, *args++); - } - tcg_out_i(s, *args++); - break; - case INDEX_op_qemu_st_i32: - tcg_out_r(s, *args++); - tcg_out_r(s, *args++); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_r(s, *args++); - } - tcg_out_i(s, *args++); - break; - case INDEX_op_qemu_st_i64: - tcg_out_r(s, *args++); - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_r(s, *args++); - } - tcg_out_r(s, *args++); - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - tcg_out_r(s, *args++); - } - tcg_out_i(s, *args++); - break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ - case INDEX_op_movi_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - default: - tcg_abort(); - } - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, - intptr_t arg2) -{ - uint8_t *old_code_ptr = s->code_ptr; - if (type == TCG_TYPE_I32) { - tcg_out_op_t(s, INDEX_op_st_i32); - tcg_out_r(s, arg); - tcg_out_r(s, arg1); - tcg_out32(s, arg2); - } else { - assert(type == TCG_TYPE_I64); -#if TCG_TARGET_REG_BITS == 64 - tcg_out_op_t(s, INDEX_op_st_i64); - tcg_out_r(s, arg); - tcg_out_r(s, arg1); - tcg_out32(s, arg2); -#else - TODO(); -#endif - } - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -/* Test if a constant matches the constraint. */ -static int tcg_target_const_match(tcg_target_long val, TCGType type, - const TCGArgConstraint *arg_ct) -{ - /* No need to return 0 or 1, 0 or != 0 is good enough. */ - return arg_ct->ct & TCG_CT_CONST; -} - -static void tcg_target_init(TCGContext *s) -{ -#if defined(CONFIG_DEBUG_TCG_INTERPRETER) - const char *envval = getenv("DEBUG_TCG"); - if (envval) { - qemu_set_log(strtol(envval, NULL, 0)); - } -#endif - - /* The current code uses uint8_t for tcg operations. */ - assert(tcg_op_defs_max <= UINT8_MAX); - - /* Registers available for 32 bit operations. */ - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, - BIT(TCG_TARGET_NB_REGS) - 1); - /* Registers available for 64 bit operations. */ - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, - BIT(TCG_TARGET_NB_REGS) - 1); - /* TODO: Which registers should be set here? */ - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - BIT(TCG_TARGET_NB_REGS) - 1); - - tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - tcg_add_target_add_op_defs(tcg_target_op_defs); - - /* We use negative offsets from "sp" so that we can distinguish - stores that might pretend to be call arguments. */ - tcg_set_frame(s, TCG_REG_CALL_STACK, - -CPU_TEMP_BUF_NLONGS * sizeof(long), - CPU_TEMP_BUF_NLONGS * sizeof(long)); -} - -/* Generate global QEMU prologue and epilogue code. */ -static inline void tcg_target_qemu_prologue(TCGContext *s) -{ -} diff --git a/tcg/tci/tcg-target.inc.c b/tcg/tci/tcg-target.inc.c new file mode 100644 index 0000000..16ce048 --- /dev/null +++ b/tcg/tci/tcg-target.inc.c @@ -0,0 +1,880 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009, 2011 Stefan Weil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "tcg-be-null.h" + +/* TODO list: + * - See TODO comments in code. + */ + +/* Marker for missing code. */ +#define TODO() \ + do { \ + fprintf(stderr, "TODO %s:%u: %s()\n", \ + __FILE__, __LINE__, __func__); \ + tcg_abort(); \ + } while (0) + +/* Bitfield n...m (in 32 bit value). */ +#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m) + +/* Macros used in tcg_target_op_defs. */ +#define R "r" +#define RI "ri" +#if TCG_TARGET_REG_BITS == 32 +# define R64 "r", "r" +#else +# define R64 "r" +#endif +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS +# define L "L", "L" +# define S "S", "S" +#else +# define L "L" +# define S "S" +#endif + +/* TODO: documentation. */ +static const TCGTargetOpDef tcg_target_op_defs[] = { + { INDEX_op_exit_tb, { NULL } }, + { INDEX_op_goto_tb, { NULL } }, + { INDEX_op_br, { NULL } }, + + { INDEX_op_ld8u_i32, { R, R } }, + { INDEX_op_ld8s_i32, { R, R } }, + { INDEX_op_ld16u_i32, { R, R } }, + { INDEX_op_ld16s_i32, { R, R } }, + { INDEX_op_ld_i32, { R, R } }, + { INDEX_op_st8_i32, { R, R } }, + { INDEX_op_st16_i32, { R, R } }, + { INDEX_op_st_i32, { R, R } }, + + { INDEX_op_add_i32, { R, RI, RI } }, + { INDEX_op_sub_i32, { R, RI, RI } }, + { INDEX_op_mul_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_div_i32 + { INDEX_op_div_i32, { R, R, R } }, + { INDEX_op_divu_i32, { R, R, R } }, + { INDEX_op_rem_i32, { R, R, R } }, + { INDEX_op_remu_i32, { R, R, R } }, +#elif TCG_TARGET_HAS_div2_i32 + { INDEX_op_div2_i32, { R, R, "0", "1", R } }, + { INDEX_op_divu2_i32, { R, R, "0", "1", R } }, +#endif + /* TODO: Does R, RI, RI result in faster code than R, R, RI? + If both operands are constants, we can optimize. */ + { INDEX_op_and_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_andc_i32 + { INDEX_op_andc_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_eqv_i32 + { INDEX_op_eqv_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nand_i32 + { INDEX_op_nand_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nor_i32 + { INDEX_op_nor_i32, { R, RI, RI } }, +#endif + { INDEX_op_or_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_orc_i32 + { INDEX_op_orc_i32, { R, RI, RI } }, +#endif + { INDEX_op_xor_i32, { R, RI, RI } }, + { INDEX_op_shl_i32, { R, RI, RI } }, + { INDEX_op_shr_i32, { R, RI, RI } }, + { INDEX_op_sar_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_rot_i32 + { INDEX_op_rotl_i32, { R, RI, RI } }, + { INDEX_op_rotr_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_deposit_i32 + { INDEX_op_deposit_i32, { R, "0", R } }, +#endif + + { INDEX_op_brcond_i32, { R, RI } }, + + { INDEX_op_setcond_i32, { R, R, RI } }, +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_setcond_i64, { R, R, RI } }, +#endif /* TCG_TARGET_REG_BITS == 64 */ + +#if TCG_TARGET_REG_BITS == 32 + /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */ + { INDEX_op_add2_i32, { R, R, R, R, R, R } }, + { INDEX_op_sub2_i32, { R, R, R, R, R, R } }, + { INDEX_op_brcond2_i32, { R, R, RI, RI } }, + { INDEX_op_mulu2_i32, { R, R, R, R } }, + { INDEX_op_setcond2_i32, { R, R, R, RI, RI } }, +#endif + +#if TCG_TARGET_HAS_not_i32 + { INDEX_op_not_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_neg_i32 + { INDEX_op_neg_i32, { R, R } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_ld8u_i64, { R, R } }, + { INDEX_op_ld8s_i64, { R, R } }, + { INDEX_op_ld16u_i64, { R, R } }, + { INDEX_op_ld16s_i64, { R, R } }, + { INDEX_op_ld32u_i64, { R, R } }, + { INDEX_op_ld32s_i64, { R, R } }, + { INDEX_op_ld_i64, { R, R } }, + + { INDEX_op_st8_i64, { R, R } }, + { INDEX_op_st16_i64, { R, R } }, + { INDEX_op_st32_i64, { R, R } }, + { INDEX_op_st_i64, { R, R } }, + + { INDEX_op_add_i64, { R, RI, RI } }, + { INDEX_op_sub_i64, { R, RI, RI } }, + { INDEX_op_mul_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_div_i64 + { INDEX_op_div_i64, { R, R, R } }, + { INDEX_op_divu_i64, { R, R, R } }, + { INDEX_op_rem_i64, { R, R, R } }, + { INDEX_op_remu_i64, { R, R, R } }, +#elif TCG_TARGET_HAS_div2_i64 + { INDEX_op_div2_i64, { R, R, "0", "1", R } }, + { INDEX_op_divu2_i64, { R, R, "0", "1", R } }, +#endif + { INDEX_op_and_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_andc_i64 + { INDEX_op_andc_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_eqv_i64 + { INDEX_op_eqv_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nand_i64 + { INDEX_op_nand_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nor_i64 + { INDEX_op_nor_i64, { R, RI, RI } }, +#endif + { INDEX_op_or_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_orc_i64 + { INDEX_op_orc_i64, { R, RI, RI } }, +#endif + { INDEX_op_xor_i64, { R, RI, RI } }, + { INDEX_op_shl_i64, { R, RI, RI } }, + { INDEX_op_shr_i64, { R, RI, RI } }, + { INDEX_op_sar_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_rot_i64 + { INDEX_op_rotl_i64, { R, RI, RI } }, + { INDEX_op_rotr_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_deposit_i64 + { INDEX_op_deposit_i64, { R, "0", R } }, +#endif + { INDEX_op_brcond_i64, { R, RI } }, + +#if TCG_TARGET_HAS_ext8s_i64 + { INDEX_op_ext8s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16s_i64 + { INDEX_op_ext16s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext32s_i64 + { INDEX_op_ext32s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext8u_i64 + { INDEX_op_ext8u_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16u_i64 + { INDEX_op_ext16u_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext32u_i64 + { INDEX_op_ext32u_i64, { R, R } }, +#endif + { INDEX_op_ext_i32_i64, { R, R } }, + { INDEX_op_extu_i32_i64, { R, R } }, +#if TCG_TARGET_HAS_bswap16_i64 + { INDEX_op_bswap16_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap32_i64 + { INDEX_op_bswap32_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap64_i64 + { INDEX_op_bswap64_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_not_i64 + { INDEX_op_not_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_neg_i64 + { INDEX_op_neg_i64, { R, R } }, +#endif +#endif /* TCG_TARGET_REG_BITS == 64 */ + + { INDEX_op_qemu_ld_i32, { R, L } }, + { INDEX_op_qemu_ld_i64, { R64, L } }, + + { INDEX_op_qemu_st_i32, { R, S } }, + { INDEX_op_qemu_st_i64, { R64, S } }, + +#if TCG_TARGET_HAS_ext8s_i32 + { INDEX_op_ext8s_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16s_i32 + { INDEX_op_ext16s_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext8u_i32 + { INDEX_op_ext8u_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16u_i32 + { INDEX_op_ext16u_i32, { R, R } }, +#endif + +#if TCG_TARGET_HAS_bswap16_i32 + { INDEX_op_bswap16_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap32_i32 + { INDEX_op_bswap32_i32, { R, R } }, +#endif + + { -1 }, +}; + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, +#if 0 /* used for TCG_REG_CALL_STACK */ + TCG_REG_R4, +#endif + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, +#if TCG_TARGET_NB_REGS >= 16 + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, +#endif +}; + +#if MAX_OPC_PARAM_IARGS != 5 +# error Fix needed, number of supported input arguments changed! +#endif + +static const int tcg_target_call_iarg_regs[] = { + TCG_REG_R0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, +#if 0 /* used for TCG_REG_CALL_STACK */ + TCG_REG_R4, +#endif + TCG_REG_R5, +#if TCG_TARGET_REG_BITS == 32 + /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */ + TCG_REG_R6, + TCG_REG_R7, +#if TCG_TARGET_NB_REGS >= 16 + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, +#else +# error Too few input registers available +#endif +#endif +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_R0, +#if TCG_TARGET_REG_BITS == 32 + TCG_REG_R1 +#endif +}; + +#ifndef NDEBUG +static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "r00", + "r01", + "r02", + "r03", + "r04", + "r05", + "r06", + "r07", +#if TCG_TARGET_NB_REGS >= 16 + "r08", + "r09", + "r10", + "r11", + "r12", + "r13", + "r14", + "r15", +#if TCG_TARGET_NB_REGS >= 32 + "r16", + "r17", + "r18", + "r19", + "r20", + "r21", + "r22", + "r23", + "r24", + "r25", + "r26", + "r27", + "r28", + "r29", + "r30", + "r31" +#endif +#endif +}; +#endif + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + /* tcg_out_reloc always uses the same type, addend. */ + assert(type == sizeof(tcg_target_long)); + assert(addend == 0); + assert(value != 0); + if (TCG_TARGET_REG_BITS == 32) { + tcg_patch32(code_ptr, value); + } else { + tcg_patch64(code_ptr, value); + } +} + +/* Parse target specific constraints. */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str = *pct_str; + switch (ct_str[0]) { + case 'r': + case 'L': /* qemu_ld constraint */ + case 'S': /* qemu_st constraint */ + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, BIT(TCG_TARGET_NB_REGS) - 1); + break; + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +#if defined(CONFIG_DEBUG_TCG_INTERPRETER) +/* Show current bytecode. Used by tcg interpreter. */ +void tci_disas(uint8_t opc) +{ + const TCGOpDef *def = &tcg_op_defs[opc]; + fprintf(stderr, "TCG %s %u, %u, %u\n", + def->name, def->nb_oargs, def->nb_iargs, def->nb_cargs); +} +#endif + +/* Write value (native size). */ +static void tcg_out_i(TCGContext *s, tcg_target_ulong v) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out32(s, v); + } else { + tcg_out64(s, v); + } +} + +/* Write opcode. */ +static void tcg_out_op_t(TCGContext *s, TCGOpcode op) +{ + tcg_out8(s, op); + tcg_out8(s, 0); +} + +/* Write register. */ +static void tcg_out_r(TCGContext *s, TCGArg t0) +{ + assert(t0 < TCG_TARGET_NB_REGS); + tcg_out8(s, t0); +} + +/* Write register or constant (native size). */ +static void tcg_out_ri(TCGContext *s, int const_arg, TCGArg arg) +{ + if (const_arg) { + assert(const_arg == 1); + tcg_out8(s, TCG_CONST); + tcg_out_i(s, arg); + } else { + tcg_out_r(s, arg); + } +} + +/* Write register or constant (32 bit). */ +static void tcg_out_ri32(TCGContext *s, int const_arg, TCGArg arg) +{ + if (const_arg) { + assert(const_arg == 1); + tcg_out8(s, TCG_CONST); + tcg_out32(s, arg); + } else { + tcg_out_r(s, arg); + } +} + +#if TCG_TARGET_REG_BITS == 64 +/* Write register or constant (64 bit). */ +static void tcg_out_ri64(TCGContext *s, int const_arg, TCGArg arg) +{ + if (const_arg) { + assert(const_arg == 1); + tcg_out8(s, TCG_CONST); + tcg_out64(s, arg); + } else { + tcg_out_r(s, arg); + } +} +#endif + +/* Write label. */ +static void tci_out_label(TCGContext *s, TCGLabel *label) +{ + if (label->has_value) { + tcg_out_i(s, label->u.value); + assert(label->u.value); + } else { + tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0); + s->code_ptr += sizeof(tcg_target_ulong); + } +} + +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, + intptr_t arg2) +{ + uint8_t *old_code_ptr = s->code_ptr; + if (type == TCG_TYPE_I32) { + tcg_out_op_t(s, INDEX_op_ld_i32); + tcg_out_r(s, ret); + tcg_out_r(s, arg1); + tcg_out32(s, arg2); + } else { + assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 + tcg_out_op_t(s, INDEX_op_ld_i64); + tcg_out_r(s, ret); + tcg_out_r(s, arg1); + assert(arg2 == (int32_t)arg2); + tcg_out32(s, arg2); +#else + TODO(); +#endif + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + uint8_t *old_code_ptr = s->code_ptr; + assert(ret != arg); +#if TCG_TARGET_REG_BITS == 32 + tcg_out_op_t(s, INDEX_op_mov_i32); +#else + tcg_out_op_t(s, INDEX_op_mov_i64); +#endif + tcg_out_r(s, ret); + tcg_out_r(s, arg); + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg t0, tcg_target_long arg) +{ + uint8_t *old_code_ptr = s->code_ptr; + uint32_t arg32 = arg; + if (type == TCG_TYPE_I32 || arg == arg32) { + tcg_out_op_t(s, INDEX_op_movi_i32); + tcg_out_r(s, t0); + tcg_out32(s, arg32); + } else { + assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 + tcg_out_op_t(s, INDEX_op_movi_i64); + tcg_out_r(s, t0); + tcg_out64(s, arg); +#else + TODO(); +#endif + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + uint8_t *old_code_ptr = s->code_ptr; + tcg_out_op_t(s, INDEX_op_call); + tcg_out_ri(s, 1, (uintptr_t)arg); + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, + const int *const_args) +{ + uint8_t *old_code_ptr = s->code_ptr; + + tcg_out_op_t(s, opc); + + switch (opc) { + case INDEX_op_exit_tb: + tcg_out64(s, args[0]); + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* Direct jump method. */ + assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset)); + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + tcg_out32(s, 0); + } else { + /* Indirect jump method. */ + TODO(); + } + assert(args[0] < ARRAY_SIZE(s->tb_next_offset)); + s->tb_next_offset[args[0]] = tcg_current_code_size(s); + break; + case INDEX_op_br: + tci_out_label(s, arg_label(args[0])); + break; + case INDEX_op_setcond_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + tcg_out8(s, args[3]); /* condition */ + break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_setcond2_i32: + /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_out_ri32(s, const_args[3], args[3]); + tcg_out_ri32(s, const_args[4], args[4]); + tcg_out8(s, args[5]); /* condition */ + break; +#elif TCG_TARGET_REG_BITS == 64 + case INDEX_op_setcond_i64: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_ri64(s, const_args[2], args[2]); + tcg_out8(s, args[3]); /* condition */ + break; +#endif + case INDEX_op_ld8u_i32: + case INDEX_op_ld8s_i32: + case INDEX_op_ld16u_i32: + case INDEX_op_ld16s_i32: + case INDEX_op_ld_i32: + case INDEX_op_st8_i32: + case INDEX_op_st16_i32: + case INDEX_op_st_i32: + case INDEX_op_ld8u_i64: + case INDEX_op_ld8s_i64: + case INDEX_op_ld16u_i64: + case INDEX_op_ld16s_i64: + case INDEX_op_ld32u_i64: + case INDEX_op_ld32s_i64: + case INDEX_op_ld_i64: + case INDEX_op_st8_i64: + case INDEX_op_st16_i64: + case INDEX_op_st32_i64: + case INDEX_op_st_i64: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + assert(args[2] == (int32_t)args[2]); + tcg_out32(s, args[2]); + break; + case INDEX_op_add_i32: + case INDEX_op_sub_i32: + case INDEX_op_mul_i32: + case INDEX_op_and_i32: + case INDEX_op_andc_i32: /* Optional (TCG_TARGET_HAS_andc_i32). */ + case INDEX_op_eqv_i32: /* Optional (TCG_TARGET_HAS_eqv_i32). */ + case INDEX_op_nand_i32: /* Optional (TCG_TARGET_HAS_nand_i32). */ + case INDEX_op_nor_i32: /* Optional (TCG_TARGET_HAS_nor_i32). */ + case INDEX_op_or_i32: + case INDEX_op_orc_i32: /* Optional (TCG_TARGET_HAS_orc_i32). */ + case INDEX_op_xor_i32: + case INDEX_op_shl_i32: + case INDEX_op_shr_i32: + case INDEX_op_sar_i32: + case INDEX_op_rotl_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ + case INDEX_op_rotr_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */ + tcg_out_r(s, args[0]); + tcg_out_ri32(s, const_args[1], args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + break; + case INDEX_op_deposit_i32: /* Optional (TCG_TARGET_HAS_deposit_i32). */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + assert(args[3] <= UINT8_MAX); + tcg_out8(s, args[3]); + assert(args[4] <= UINT8_MAX); + tcg_out8(s, args[4]); + break; + +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_add_i64: + case INDEX_op_sub_i64: + case INDEX_op_mul_i64: + case INDEX_op_and_i64: + case INDEX_op_andc_i64: /* Optional (TCG_TARGET_HAS_andc_i64). */ + case INDEX_op_eqv_i64: /* Optional (TCG_TARGET_HAS_eqv_i64). */ + case INDEX_op_nand_i64: /* Optional (TCG_TARGET_HAS_nand_i64). */ + case INDEX_op_nor_i64: /* Optional (TCG_TARGET_HAS_nor_i64). */ + case INDEX_op_or_i64: + case INDEX_op_orc_i64: /* Optional (TCG_TARGET_HAS_orc_i64). */ + case INDEX_op_xor_i64: + case INDEX_op_shl_i64: + case INDEX_op_shr_i64: + case INDEX_op_sar_i64: + case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ + case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */ + tcg_out_r(s, args[0]); + tcg_out_ri64(s, const_args[1], args[1]); + tcg_out_ri64(s, const_args[2], args[2]); + break; + case INDEX_op_deposit_i64: /* Optional (TCG_TARGET_HAS_deposit_i64). */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + assert(args[3] <= UINT8_MAX); + tcg_out8(s, args[3]); + assert(args[4] <= UINT8_MAX); + tcg_out8(s, args[4]); + break; + case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + case INDEX_op_divu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + case INDEX_op_rem_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + case INDEX_op_remu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */ + TODO(); + break; + case INDEX_op_div2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */ + case INDEX_op_divu2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */ + TODO(); + break; + case INDEX_op_brcond_i64: + tcg_out_r(s, args[0]); + tcg_out_ri64(s, const_args[1], args[1]); + tcg_out8(s, args[2]); /* condition */ + tci_out_label(s, arg_label(args[3])); + break; + case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */ + case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ + case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ + case INDEX_op_not_i64: /* Optional (TCG_TARGET_HAS_not_i64). */ + case INDEX_op_neg_i64: /* Optional (TCG_TARGET_HAS_neg_i64). */ + case INDEX_op_ext8s_i64: /* Optional (TCG_TARGET_HAS_ext8s_i64). */ + case INDEX_op_ext8u_i64: /* Optional (TCG_TARGET_HAS_ext8u_i64). */ + case INDEX_op_ext16s_i64: /* Optional (TCG_TARGET_HAS_ext16s_i64). */ + case INDEX_op_ext16u_i64: /* Optional (TCG_TARGET_HAS_ext16u_i64). */ + case INDEX_op_ext32s_i64: /* Optional (TCG_TARGET_HAS_ext32s_i64). */ + case INDEX_op_ext32u_i64: /* Optional (TCG_TARGET_HAS_ext32u_i64). */ + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: +#endif /* TCG_TARGET_REG_BITS == 64 */ + case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */ + case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */ + case INDEX_op_ext8s_i32: /* Optional (TCG_TARGET_HAS_ext8s_i32). */ + case INDEX_op_ext16s_i32: /* Optional (TCG_TARGET_HAS_ext16s_i32). */ + case INDEX_op_ext8u_i32: /* Optional (TCG_TARGET_HAS_ext8u_i32). */ + case INDEX_op_ext16u_i32: /* Optional (TCG_TARGET_HAS_ext16u_i32). */ + case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */ + case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + break; + case INDEX_op_div_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + case INDEX_op_divu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + case INDEX_op_rem_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + case INDEX_op_remu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */ + tcg_out_r(s, args[0]); + tcg_out_ri32(s, const_args[1], args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + break; + case INDEX_op_div2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */ + case INDEX_op_divu2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */ + TODO(); + break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_add2_i32: + case INDEX_op_sub2_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_out_r(s, args[3]); + tcg_out_r(s, args[4]); + tcg_out_r(s, args[5]); + break; + case INDEX_op_brcond2_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_ri32(s, const_args[2], args[2]); + tcg_out_ri32(s, const_args[3], args[3]); + tcg_out8(s, args[4]); /* condition */ + tci_out_label(s, arg_label(args[5])); + break; + case INDEX_op_mulu2_i32: + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_out_r(s, args[3]); + break; +#endif + case INDEX_op_brcond_i32: + tcg_out_r(s, args[0]); + tcg_out_ri32(s, const_args[1], args[1]); + tcg_out8(s, args[2]); /* condition */ + tci_out_label(s, arg_label(args[3])); + break; + case INDEX_op_qemu_ld_i32: + tcg_out_r(s, *args++); + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_r(s, *args++); + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_r(s, *args++); + } + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_qemu_st_i32: + tcg_out_r(s, *args++); + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_qemu_st_i64: + tcg_out_r(s, *args++); + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_r(s, *args++); + } + tcg_out_r(s, *args++); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + tcg_out_r(s, *args++); + } + tcg_out_i(s, *args++); + break; + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + tcg_abort(); + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, + intptr_t arg2) +{ + uint8_t *old_code_ptr = s->code_ptr; + if (type == TCG_TYPE_I32) { + tcg_out_op_t(s, INDEX_op_st_i32); + tcg_out_r(s, arg); + tcg_out_r(s, arg1); + tcg_out32(s, arg2); + } else { + assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 + tcg_out_op_t(s, INDEX_op_st_i64); + tcg_out_r(s, arg); + tcg_out_r(s, arg1); + tcg_out32(s, arg2); +#else + TODO(); +#endif + } + old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +/* Test if a constant matches the constraint. */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + /* No need to return 0 or 1, 0 or != 0 is good enough. */ + return arg_ct->ct & TCG_CT_CONST; +} + +static void tcg_target_init(TCGContext *s) +{ +#if defined(CONFIG_DEBUG_TCG_INTERPRETER) + const char *envval = getenv("DEBUG_TCG"); + if (envval) { + qemu_set_log(strtol(envval, NULL, 0)); + } +#endif + + /* The current code uses uint8_t for tcg operations. */ + assert(tcg_op_defs_max <= UINT8_MAX); + + /* Registers available for 32 bit operations. */ + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, + BIT(TCG_TARGET_NB_REGS) - 1); + /* Registers available for 64 bit operations. */ + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, + BIT(TCG_TARGET_NB_REGS) - 1); + /* TODO: Which registers should be set here? */ + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + BIT(TCG_TARGET_NB_REGS) - 1); + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + tcg_add_target_add_op_defs(tcg_target_op_defs); + + /* We use negative offsets from "sp" so that we can distinguish + stores that might pretend to be call arguments. */ + tcg_set_frame(s, TCG_REG_CALL_STACK, + -CPU_TEMP_BUF_NLONGS * sizeof(long), + CPU_TEMP_BUF_NLONGS * sizeof(long)); +} + +/* Generate global QEMU prologue and epilogue code. */ +static inline void tcg_target_qemu_prologue(TCGContext *s) +{ +} -- cgit v1.1