diff options
32 files changed, 1456 insertions, 1092 deletions
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index e67b161..ba09fd0 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -254,28 +254,35 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, #if defined(__NetBSD__) #include <ucontext.h> +#include <machine/trap.h> #define EIP_sig(context) ((context)->uc_mcontext.__gregs[_REG_EIP]) #define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO]) #define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR]) #define MASK_sig(context) ((context)->uc_sigmask) +#define PAGE_FAULT_TRAP T_PAGEFLT #elif defined(__FreeBSD__) || defined(__DragonFly__) #include <ucontext.h> +#include <machine/trap.h> #define EIP_sig(context) (*((unsigned long *)&(context)->uc_mcontext.mc_eip)) #define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno) #define ERROR_sig(context) ((context)->uc_mcontext.mc_err) #define MASK_sig(context) ((context)->uc_sigmask) +#define PAGE_FAULT_TRAP T_PAGEFLT #elif defined(__OpenBSD__) +#include <machine/trap.h> #define EIP_sig(context) ((context)->sc_eip) #define TRAP_sig(context) ((context)->sc_trapno) #define ERROR_sig(context) ((context)->sc_err) #define MASK_sig(context) ((context)->sc_mask) +#define PAGE_FAULT_TRAP T_PAGEFLT #else #define EIP_sig(context) ((context)->uc_mcontext.gregs[REG_EIP]) #define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO]) #define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR]) #define MASK_sig(context) ((context)->uc_sigmask) +#define PAGE_FAULT_TRAP 0xe #endif int cpu_signal_handler(int host_signum, void *pinfo, @@ -301,34 +308,42 @@ int cpu_signal_handler(int host_signum, void *pinfo, pc = EIP_sig(uc); trapno = TRAP_sig(uc); return handle_cpu_signal(pc, info, - trapno == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 0, + trapno == PAGE_FAULT_TRAP ? + (ERROR_sig(uc) >> 1) & 1 : 0, &MASK_sig(uc)); } #elif defined(__x86_64__) #ifdef __NetBSD__ +#include <machine/trap.h> #define PC_sig(context) _UC_MACHINE_PC(context) #define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO]) #define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR]) #define MASK_sig(context) ((context)->uc_sigmask) +#define PAGE_FAULT_TRAP T_PAGEFLT #elif defined(__OpenBSD__) +#include <machine/trap.h> #define PC_sig(context) ((context)->sc_rip) #define TRAP_sig(context) ((context)->sc_trapno) #define ERROR_sig(context) ((context)->sc_err) #define MASK_sig(context) ((context)->sc_mask) +#define PAGE_FAULT_TRAP T_PAGEFLT #elif defined(__FreeBSD__) || defined(__DragonFly__) #include <ucontext.h> +#include <machine/trap.h> #define PC_sig(context) (*((unsigned long *)&(context)->uc_mcontext.mc_rip)) #define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno) #define ERROR_sig(context) ((context)->uc_mcontext.mc_err) #define MASK_sig(context) ((context)->uc_sigmask) +#define PAGE_FAULT_TRAP T_PAGEFLT #else #define PC_sig(context) ((context)->uc_mcontext.gregs[REG_RIP]) #define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO]) #define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR]) #define MASK_sig(context) ((context)->uc_sigmask) +#define PAGE_FAULT_TRAP 0xe #endif int cpu_signal_handler(int host_signum, void *pinfo, @@ -346,7 +361,8 @@ int cpu_signal_handler(int host_signum, void *pinfo, pc = PC_sig(uc); return handle_cpu_signal(pc, info, - TRAP_sig(uc) == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 0, + TRAP_sig(uc) == PAGE_FAULT_TRAP ? + (ERROR_sig(uc) >> 1) & 1 : 0, &MASK_sig(uc)); } diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h index c69a7de..da55fed 100644 --- a/include/tcg/tcg-op-gvec.h +++ b/include/tcg/tcg-op-gvec.h @@ -401,4 +401,47 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c); +/* 32-bit vector operations. */ +void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); +void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); + +void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); +void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); + +void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); + +#if TARGET_LONG_BITS == 64 +#define tcg_gen_vec_add8_tl tcg_gen_vec_add8_i64 +#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i64 +#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64 +#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64 +#define tcg_gen_vec_add32_tl tcg_gen_vec_add32_i64 +#define tcg_gen_vec_sub32_tl tcg_gen_vec_sub32_i64 +#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64 +#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64 +#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64 +#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64 +#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64 +#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64 + +#else +#define tcg_gen_vec_add8_tl tcg_gen_vec_add8_i32 +#define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i32 +#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32 +#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32 +#define tcg_gen_vec_add32_tl tcg_gen_add_i32 +#define tcg_gen_vec_sub32_tl tcg_gen_sub_i32 +#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32 +#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32 +#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32 +#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32 +#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32 +#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32 +#endif + #endif diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index 1a2ae93..2a654f3 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -330,7 +330,7 @@ void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg); -void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags); void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); @@ -528,8 +528,8 @@ void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg); -void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg); -void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags); +void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags); void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); @@ -1192,7 +1192,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_ext32u_tl tcg_gen_mov_i32 #define tcg_gen_ext32s_tl tcg_gen_mov_i32 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 -#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32 +#define tcg_gen_bswap32_tl(D, S, F) tcg_gen_bswap32_i32(D, S) #define tcg_gen_bswap_tl tcg_gen_bswap32_i32 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64 #define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32 diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 5bbec85..9939923 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -96,8 +96,8 @@ DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32)) DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32)) DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32)) DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32)) -DEF(bswap16_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap16_i32)) -DEF(bswap32_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap32_i32)) +DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32)) +DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32)) DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32)) DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32)) DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32)) @@ -165,9 +165,9 @@ DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64)) DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64)) DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64)) DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64)) -DEF(bswap16_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64)) -DEF(bswap32_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64)) -DEF(bswap64_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64)) +DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64)) +DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64)) +DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64)) DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64)) DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64)) DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64)) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 2dad364..8994937 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -408,6 +408,18 @@ typedef TCGv_ptr TCGv_env; /* Used to align parameters. See the comment before tcgv_i32_temp. */ #define TCG_CALL_DUMMY_ARG ((TCGArg)0) +/* + * Flags for the bswap opcodes. + * If IZ, the input is zero-extended, otherwise unknown. + * If OZ or OS, the output is zero- or sign-extended respectively, + * otherwise the high bits are undefined. + */ +enum { + TCG_BSWAP_IZ = 1, + TCG_BSWAP_OZ = 2, + TCG_BSWAP_OS = 4, +}; + typedef enum TCGTempVal { TEMP_VAL_DEAD, TEMP_VAL_REG, diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 7f74d0e..1a40e49 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -5430,22 +5430,13 @@ static void handle_rev32(DisasContext *s, unsigned int sf, unsigned int rn, unsigned int rd) { TCGv_i64 tcg_rd = cpu_reg(s, rd); + TCGv_i64 tcg_rn = cpu_reg(s, rn); if (sf) { - TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); - - /* bswap32_i64 requires zero high word */ - tcg_gen_ext32u_i64(tcg_tmp, tcg_rn); - tcg_gen_bswap32_i64(tcg_rd, tcg_tmp); - tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32); - tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp); - tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp); - - tcg_temp_free_i64(tcg_tmp); + tcg_gen_bswap64_i64(tcg_rd, tcg_rn); + tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); } else { - tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn)); - tcg_gen_bswap32_i64(tcg_rd, tcg_rd); + tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); } } @@ -12453,10 +12444,10 @@ static void handle_rev(DisasContext *s, int opcode, bool u, read_vec_element(s, tcg_tmp, rn, i, grp_size); switch (grp_size) { case MO_16: - tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); + tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); break; case MO_32: - tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp); + tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); break; case MO_64: tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); diff --git a/target/arm/translate.c b/target/arm/translate.c index 9e2cca7..a0c6cfa 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -354,9 +354,7 @@ void gen_rev16(TCGv_i32 dest, TCGv_i32 var) /* Byteswap low halfword and sign extend. */ static void gen_revsh(TCGv_i32 dest, TCGv_i32 var) { - tcg_gen_ext16u_i32(var, var); - tcg_gen_bswap16_i32(var, var); - tcg_gen_ext16s_i32(dest, var); + tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS); } /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead. diff --git a/target/avr/translate.c b/target/avr/translate.c index 850c594..c06ce45 100644 --- a/target/avr/translate.c +++ b/target/avr/translate.c @@ -80,7 +80,7 @@ typedef struct DisasContext DisasContext; /* This is the state at translation time. */ struct DisasContext { - TranslationBlock *tb; + DisasContextBase base; CPUAVRState *env; CPUState *cs; @@ -90,8 +90,6 @@ struct DisasContext { /* Routine used to access memory */ int memidx; - int bstate; - int singlestep; /* * some AVR instructions can make the following instruction to be skipped @@ -106,7 +104,7 @@ struct DisasContext { * used in the following manner (sketch) * * TCGLabel *skip_label = NULL; - * if (ctx.skip_cond != TCG_COND_NEVER) { + * if (ctx->skip_cond != TCG_COND_NEVER) { * skip_label = gen_new_label(); * tcg_gen_brcond_tl(skip_cond, skip_var0, skip_var1, skip_label); * } @@ -116,7 +114,7 @@ struct DisasContext { * free_skip_var0 = false; * } * - * translate(&ctx); + * translate(ctx); * * if (skip_label) { * gen_set_label(skip_label); @@ -191,7 +189,7 @@ static bool avr_have_feature(DisasContext *ctx, int feature) { if (!avr_feature(ctx->env, feature)) { gen_helper_unsupported(cpu_env); - ctx->bstate = DISAS_NORETURN; + ctx->base.is_jmp = DISAS_NORETURN; return false; } return true; @@ -1011,13 +1009,13 @@ static void gen_jmp_ez(DisasContext *ctx) { tcg_gen_deposit_tl(cpu_pc, cpu_r[30], cpu_r[31], 8, 8); tcg_gen_or_tl(cpu_pc, cpu_pc, cpu_eind); - ctx->bstate = DISAS_LOOKUP; + ctx->base.is_jmp = DISAS_LOOKUP; } static void gen_jmp_z(DisasContext *ctx) { tcg_gen_deposit_tl(cpu_pc, cpu_r[30], cpu_r[31], 8, 8); - ctx->bstate = DISAS_LOOKUP; + ctx->base.is_jmp = DISAS_LOOKUP; } static void gen_push_ret(DisasContext *ctx, int ret) @@ -1083,9 +1081,9 @@ static void gen_pop_ret(DisasContext *ctx, TCGv ret) static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) { - TranslationBlock *tb = ctx->tb; + const TranslationBlock *tb = ctx->base.tb; - if (ctx->singlestep == 0) { + if (!ctx->base.singlestep_enabled) { tcg_gen_goto_tb(n); tcg_gen_movi_i32(cpu_pc, dest); tcg_gen_exit_tb(tb, n); @@ -1094,7 +1092,7 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) gen_helper_debug(cpu_env); tcg_gen_exit_tb(NULL, 0); } - ctx->bstate = DISAS_NORETURN; + ctx->base.is_jmp = DISAS_NORETURN; } /* @@ -1254,7 +1252,7 @@ static bool trans_RET(DisasContext *ctx, arg_RET *a) { gen_pop_ret(ctx, cpu_pc); - ctx->bstate = DISAS_LOOKUP; + ctx->base.is_jmp = DISAS_LOOKUP; return true; } @@ -1272,7 +1270,7 @@ static bool trans_RETI(DisasContext *ctx, arg_RETI *a) tcg_gen_movi_tl(cpu_If, 1); /* Need to return to main loop to re-evaluate interrupts. */ - ctx->bstate = DISAS_EXIT; + ctx->base.is_jmp = DISAS_EXIT; return true; } @@ -1484,7 +1482,7 @@ static bool trans_BRBC(DisasContext *ctx, arg_BRBC *a) gen_goto_tb(ctx, 0, ctx->npc + a->imm); gen_set_label(not_taken); - ctx->bstate = DISAS_CHAIN; + ctx->base.is_jmp = DISAS_CHAIN; return true; } @@ -1533,7 +1531,7 @@ static bool trans_BRBS(DisasContext *ctx, arg_BRBS *a) gen_goto_tb(ctx, 0, ctx->npc + a->imm); gen_set_label(not_taken); - ctx->bstate = DISAS_CHAIN; + ctx->base.is_jmp = DISAS_CHAIN; return true; } @@ -1610,7 +1608,7 @@ static TCGv gen_get_zaddr(void) */ static void gen_data_store(DisasContext *ctx, TCGv data, TCGv addr) { - if (ctx->tb->flags & TB_FLAGS_FULL_ACCESS) { + if (ctx->base.tb->flags & TB_FLAGS_FULL_ACCESS) { gen_helper_fullwr(cpu_env, data, addr); } else { tcg_gen_qemu_st8(data, addr, MMU_DATA_IDX); /* mem[addr] = data */ @@ -1619,7 +1617,7 @@ static void gen_data_store(DisasContext *ctx, TCGv data, TCGv addr) static void gen_data_load(DisasContext *ctx, TCGv data, TCGv addr) { - if (ctx->tb->flags & TB_FLAGS_FULL_ACCESS) { + if (ctx->base.tb->flags & TB_FLAGS_FULL_ACCESS) { gen_helper_fullrd(data, cpu_env, addr); } else { tcg_gen_qemu_ld8u(data, addr, MMU_DATA_IDX); /* data = mem[addr] */ @@ -2793,7 +2791,7 @@ static bool trans_BREAK(DisasContext *ctx, arg_BREAK *a) #ifdef BREAKPOINT_ON_BREAK tcg_gen_movi_tl(cpu_pc, ctx->npc - 1); gen_helper_debug(cpu_env); - ctx->bstate = DISAS_EXIT; + ctx->base.is_jmp = DISAS_EXIT; #else /* NOP */ #endif @@ -2819,7 +2817,7 @@ static bool trans_NOP(DisasContext *ctx, arg_NOP *a) static bool trans_SLEEP(DisasContext *ctx, arg_SLEEP *a) { gen_helper_sleep(cpu_env); - ctx->bstate = DISAS_NORETURN; + ctx->base.is_jmp = DISAS_NORETURN; return true; } @@ -2850,7 +2848,7 @@ static void translate(DisasContext *ctx) if (!decode_insn(ctx, opcode)) { gen_helper_unsupported(cpu_env); - ctx->bstate = DISAS_NORETURN; + ctx->base.is_jmp = DISAS_NORETURN; } } @@ -2899,112 +2897,134 @@ static bool canonicalize_skip(DisasContext *ctx) return true; } -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +static void gen_breakpoint(DisasContext *ctx) { + canonicalize_skip(ctx); + tcg_gen_movi_tl(cpu_pc, ctx->npc); + gen_helper_debug(cpu_env); + ctx->base.is_jmp = DISAS_NORETURN; +} + +static void avr_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *ctx = container_of(dcbase, DisasContext, base); CPUAVRState *env = cs->env_ptr; - DisasContext ctx = { - .tb = tb, - .cs = cs, - .env = env, - .memidx = 0, - .bstate = DISAS_NEXT, - .skip_cond = TCG_COND_NEVER, - .singlestep = cs->singlestep_enabled, - }; - target_ulong pc_start = tb->pc / 2; - int num_insns = 0; - - if (tb->flags & TB_FLAGS_FULL_ACCESS) { + uint32_t tb_flags = ctx->base.tb->flags; + + ctx->cs = cs; + ctx->env = env; + ctx->npc = ctx->base.pc_first / 2; + + ctx->skip_cond = TCG_COND_NEVER; + if (tb_flags & TB_FLAGS_SKIP) { + ctx->skip_cond = TCG_COND_ALWAYS; + ctx->skip_var0 = cpu_skip; + } + + if (tb_flags & TB_FLAGS_FULL_ACCESS) { /* * This flag is set by ST/LD instruction we will regenerate it ONLY * with mem/cpu memory access instead of mem access */ - max_insns = 1; - } - if (ctx.singlestep) { - max_insns = 1; + ctx->base.max_insns = 1; } +} + +static void avr_tr_tb_start(DisasContextBase *db, CPUState *cs) +{ +} - gen_tb_start(tb); +static void avr_tr_insn_start(DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *ctx = container_of(dcbase, DisasContext, base); - ctx.npc = pc_start; - if (tb->flags & TB_FLAGS_SKIP) { - ctx.skip_cond = TCG_COND_ALWAYS; - ctx.skip_var0 = cpu_skip; - } + tcg_gen_insn_start(ctx->npc); +} - do { - TCGLabel *skip_label = NULL; +static bool avr_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs, + const CPUBreakpoint *bp) +{ + DisasContext *ctx = container_of(dcbase, DisasContext, base); - /* translate current instruction */ - tcg_gen_insn_start(ctx.npc); - num_insns++; + gen_breakpoint(ctx); + return true; +} - /* - * this is due to some strange GDB behavior - * let's assume main has address 0x100 - * b main - sets breakpoint at address 0x00000100 (code) - * b *0x100 - sets breakpoint at address 0x00800100 (data) - */ - if (unlikely(!ctx.singlestep && - (cpu_breakpoint_test(cs, OFFSET_CODE + ctx.npc * 2, BP_ANY) || - cpu_breakpoint_test(cs, OFFSET_DATA + ctx.npc * 2, BP_ANY)))) { - canonicalize_skip(&ctx); - tcg_gen_movi_tl(cpu_pc, ctx.npc); - gen_helper_debug(cpu_env); - goto done_generating; - } +static void avr_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *ctx = container_of(dcbase, DisasContext, base); + TCGLabel *skip_label = NULL; + + /* + * This is due to some strange GDB behavior + * Let's assume main has address 0x100: + * b main - sets breakpoint at address 0x00000100 (code) + * b *0x100 - sets breakpoint at address 0x00800100 (data) + * + * The translator driver has already taken care of the code pointer. + */ + if (!ctx->base.singlestep_enabled && + cpu_breakpoint_test(cs, OFFSET_DATA + ctx->base.pc_next, BP_ANY)) { + gen_breakpoint(ctx); + return; + } - /* Conditionally skip the next instruction, if indicated. */ - if (ctx.skip_cond != TCG_COND_NEVER) { - skip_label = gen_new_label(); - if (ctx.skip_var0 == cpu_skip) { - /* - * Copy cpu_skip so that we may zero it before the branch. - * This ensures that cpu_skip is non-zero after the label - * if and only if the skipped insn itself sets a skip. - */ - ctx.free_skip_var0 = true; - ctx.skip_var0 = tcg_temp_new(); - tcg_gen_mov_tl(ctx.skip_var0, cpu_skip); - tcg_gen_movi_tl(cpu_skip, 0); - } - if (ctx.skip_var1 == NULL) { - tcg_gen_brcondi_tl(ctx.skip_cond, ctx.skip_var0, 0, skip_label); - } else { - tcg_gen_brcond_tl(ctx.skip_cond, ctx.skip_var0, - ctx.skip_var1, skip_label); - ctx.skip_var1 = NULL; - } - if (ctx.free_skip_var0) { - tcg_temp_free(ctx.skip_var0); - ctx.free_skip_var0 = false; - } - ctx.skip_cond = TCG_COND_NEVER; - ctx.skip_var0 = NULL; + /* Conditionally skip the next instruction, if indicated. */ + if (ctx->skip_cond != TCG_COND_NEVER) { + skip_label = gen_new_label(); + if (ctx->skip_var0 == cpu_skip) { + /* + * Copy cpu_skip so that we may zero it before the branch. + * This ensures that cpu_skip is non-zero after the label + * if and only if the skipped insn itself sets a skip. + */ + ctx->free_skip_var0 = true; + ctx->skip_var0 = tcg_temp_new(); + tcg_gen_mov_tl(ctx->skip_var0, cpu_skip); + tcg_gen_movi_tl(cpu_skip, 0); } + if (ctx->skip_var1 == NULL) { + tcg_gen_brcondi_tl(ctx->skip_cond, ctx->skip_var0, 0, skip_label); + } else { + tcg_gen_brcond_tl(ctx->skip_cond, ctx->skip_var0, + ctx->skip_var1, skip_label); + ctx->skip_var1 = NULL; + } + if (ctx->free_skip_var0) { + tcg_temp_free(ctx->skip_var0); + ctx->free_skip_var0 = false; + } + ctx->skip_cond = TCG_COND_NEVER; + ctx->skip_var0 = NULL; + } - translate(&ctx); + translate(ctx); - if (skip_label) { - canonicalize_skip(&ctx); - gen_set_label(skip_label); - if (ctx.bstate == DISAS_NORETURN) { - ctx.bstate = DISAS_CHAIN; - } + ctx->base.pc_next = ctx->npc * 2; + + if (skip_label) { + canonicalize_skip(ctx); + gen_set_label(skip_label); + if (ctx->base.is_jmp == DISAS_NORETURN) { + ctx->base.is_jmp = DISAS_CHAIN; } - } while (ctx.bstate == DISAS_NEXT - && num_insns < max_insns - && (ctx.npc - pc_start) * 2 < TARGET_PAGE_SIZE - 4 - && !tcg_op_buf_full()); + } + + if (ctx->base.is_jmp == DISAS_NEXT) { + target_ulong page_first = ctx->base.pc_first & TARGET_PAGE_MASK; - if (tb->cflags & CF_LAST_IO) { - gen_io_end(); + if ((ctx->base.pc_next - page_first) >= TARGET_PAGE_SIZE - 4) { + ctx->base.is_jmp = DISAS_TOO_MANY; + } } +} - bool nonconst_skip = canonicalize_skip(&ctx); +static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *ctx = container_of(dcbase, DisasContext, base); + bool nonconst_skip = canonicalize_skip(ctx); - switch (ctx.bstate) { + switch (ctx->base.is_jmp) { case DISAS_NORETURN: assert(!nonconst_skip); break; @@ -3013,19 +3033,19 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) case DISAS_CHAIN: if (!nonconst_skip) { /* Note gen_goto_tb checks singlestep. */ - gen_goto_tb(&ctx, 1, ctx.npc); + gen_goto_tb(ctx, 1, ctx->npc); break; } - tcg_gen_movi_tl(cpu_pc, ctx.npc); + tcg_gen_movi_tl(cpu_pc, ctx->npc); /* fall through */ case DISAS_LOOKUP: - if (!ctx.singlestep) { + if (!ctx->base.singlestep_enabled) { tcg_gen_lookup_and_goto_ptr(); break; } /* fall through */ case DISAS_EXIT: - if (ctx.singlestep) { + if (ctx->base.singlestep_enabled) { gen_helper_debug(cpu_env); } else { tcg_gen_exit_tb(NULL, 0); @@ -3034,24 +3054,28 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) default: g_assert_not_reached(); } +} -done_generating: - gen_tb_end(tb, num_insns); - - tb->size = (ctx.npc - pc_start) * 2; - tb->icount = num_insns; - -#ifdef DEBUG_DISAS - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) - && qemu_log_in_addr_range(tb->pc)) { - FILE *fd; - fd = qemu_log_lock(); - qemu_log("IN: %s\n", lookup_symbol(tb->pc)); - log_target_disas(cs, tb->pc, tb->size); - qemu_log("\n"); - qemu_log_unlock(fd); - } -#endif +static void avr_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs) +{ + qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first)); + log_target_disas(cs, dcbase->pc_first, dcbase->tb->size); +} + +static const TranslatorOps avr_tr_ops = { + .init_disas_context = avr_tr_init_disas_context, + .tb_start = avr_tr_tb_start, + .insn_start = avr_tr_insn_start, + .breakpoint_check = avr_tr_breakpoint_check, + .translate_insn = avr_tr_translate_insn, + .tb_stop = avr_tr_tb_stop, + .disas_log = avr_tr_disas_log, +}; + +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +{ + DisasContext dc = { }; + translator_loop(&avr_tr_ops, &dc.base, cs, tb, max_insns); } void restore_state_to_opc(CPUAVRState *env, TranslationBlock *tb, diff --git a/target/cris/helper.h b/target/cris/helper.h index 20d21c4..3abf608 100644 --- a/target/cris/helper.h +++ b/target/cris/helper.h @@ -1,4 +1,4 @@ -DEF_HELPER_2(raise_exception, void, env, i32) +DEF_HELPER_2(raise_exception, noreturn, env, i32) DEF_HELPER_2(tlb_flush_pid, void, env, i32) DEF_HELPER_2(spc_write, void, env, i32) DEF_HELPER_1(rfe, void, env) diff --git a/target/cris/translate.c b/target/cris/translate.c index 6dd5a26..4cfe5c8 100644 --- a/target/cris/translate.c +++ b/target/cris/translate.c @@ -52,11 +52,17 @@ #define BUG() (gen_BUG(dc, __FILE__, __LINE__)) #define BUG_ON(x) ({if (x) BUG();}) -/* is_jmp field values */ -#define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */ -#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */ -#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */ -#define DISAS_SWI DISAS_TARGET_3 +/* + * Target-specific is_jmp field values + */ +/* Only pc was modified dynamically */ +#define DISAS_JUMP DISAS_TARGET_0 +/* Cpu state was modified dynamically, including pc */ +#define DISAS_UPDATE DISAS_TARGET_1 +/* Cpu state was modified dynamically, excluding pc -- use npc */ +#define DISAS_UPDATE_NEXT DISAS_TARGET_2 +/* PC update for delayed branch, see cpustate_changed otherwise */ +#define DISAS_DBRANCH DISAS_TARGET_3 /* Used by the decoder. */ #define EXTRACT_FIELD(src, start, end) \ @@ -85,6 +91,8 @@ static TCGv env_pc; /* This is the state at translation time. */ typedef struct DisasContext { + DisasContextBase base; + CRISCPU *cpu; target_ulong pc, ppc; @@ -112,8 +120,6 @@ typedef struct DisasContext { int cc_x_uptodate; /* 1 - ccs, 2 - known | X_FLAG. 0 not up-to-date. */ int flags_uptodate; /* Whether or not $ccs is up-to-date. */ - int flagx_known; /* Whether or not flags_x has the x flag known at - translation time. */ int flags_x; int clear_x; /* Clear x after this insn? */ @@ -121,7 +127,6 @@ typedef struct DisasContext { int clear_locked_irq; /* Clear the irq lockout. */ int cpustate_changed; unsigned int tb_flags; /* tb dependent flags. */ - int is_jmp; #define JMP_NOJMP 0 #define JMP_DIRECT 1 @@ -131,9 +136,6 @@ typedef struct DisasContext { uint32_t jmp_pc; int delayed_branch; - - TranslationBlock *tb; - int singlestep_enabled; } DisasContext; static void gen_BUG(DisasContext *dc, const char *file, int line) @@ -141,14 +143,15 @@ static void gen_BUG(DisasContext *dc, const char *file, int line) cpu_abort(CPU(dc->cpu), "%s:%d pc=%x\n", file, line, dc->pc); } -static const char *regnames_v32[] = +static const char * const regnames_v32[] = { "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$sp", "$acr", }; -static const char *pregnames_v32[] = + +static const char * const pregnames_v32[] = { "$bz", "$vr", "$pid", "$srs", "$wz", "$exs", "$eda", "$mof", @@ -157,7 +160,7 @@ static const char *pregnames_v32[] = }; /* We need this table to handle preg-moves with implicit width. */ -static int preg_sizes[] = { +static const int preg_sizes[] = { 1, /* bz. */ 1, /* vr. */ 4, /* pid. */ @@ -372,66 +375,26 @@ static inline void t_gen_add_flag(TCGv d, int flag) static inline void t_gen_addx_carry(DisasContext *dc, TCGv d) { - if (dc->flagx_known) { - if (dc->flags_x) { - TCGv c; - - c = tcg_temp_new(); - t_gen_mov_TN_preg(c, PR_CCS); - /* C flag is already at bit 0. */ - tcg_gen_andi_tl(c, c, C_FLAG); - tcg_gen_add_tl(d, d, c); - tcg_temp_free(c); - } - } else { - TCGv x, c; - - x = tcg_temp_new(); - c = tcg_temp_new(); - t_gen_mov_TN_preg(x, PR_CCS); - tcg_gen_mov_tl(c, x); + if (dc->flags_x) { + TCGv c = tcg_temp_new(); - /* Propagate carry into d if X is set. Branch free. */ + t_gen_mov_TN_preg(c, PR_CCS); + /* C flag is already at bit 0. */ tcg_gen_andi_tl(c, c, C_FLAG); - tcg_gen_andi_tl(x, x, X_FLAG); - tcg_gen_shri_tl(x, x, 4); - - tcg_gen_and_tl(x, x, c); - tcg_gen_add_tl(d, d, x); - tcg_temp_free(x); + tcg_gen_add_tl(d, d, c); tcg_temp_free(c); } } static inline void t_gen_subx_carry(DisasContext *dc, TCGv d) { - if (dc->flagx_known) { - if (dc->flags_x) { - TCGv c; - - c = tcg_temp_new(); - t_gen_mov_TN_preg(c, PR_CCS); - /* C flag is already at bit 0. */ - tcg_gen_andi_tl(c, c, C_FLAG); - tcg_gen_sub_tl(d, d, c); - tcg_temp_free(c); - } - } else { - TCGv x, c; - - x = tcg_temp_new(); - c = tcg_temp_new(); - t_gen_mov_TN_preg(x, PR_CCS); - tcg_gen_mov_tl(c, x); + if (dc->flags_x) { + TCGv c = tcg_temp_new(); - /* Propagate carry into d if X is set. Branch free. */ + t_gen_mov_TN_preg(c, PR_CCS); + /* C flag is already at bit 0. */ tcg_gen_andi_tl(c, c, C_FLAG); - tcg_gen_andi_tl(x, x, X_FLAG); - tcg_gen_shri_tl(x, x, 4); - - tcg_gen_and_tl(x, x, c); - tcg_gen_sub_tl(d, d, x); - tcg_temp_free(x); + tcg_gen_sub_tl(d, d, c); tcg_temp_free(c); } } @@ -479,9 +442,9 @@ static inline void t_gen_swapw(TCGv d, TCGv s) ((T0 >> 5) & 0x02020202) | ((T0 >> 7) & 0x01010101)); */ -static inline void t_gen_swapr(TCGv d, TCGv s) +static void t_gen_swapr(TCGv d, TCGv s) { - struct { + static const struct { int shift; /* LSL when positive, LSR when negative. */ uint32_t mask; } bitrev[] = { @@ -517,25 +480,9 @@ static inline void t_gen_swapr(TCGv d, TCGv s) tcg_temp_free(org_s); } -static void t_gen_cc_jmp(TCGv pc_true, TCGv pc_false) -{ - TCGLabel *l1 = gen_new_label(); - - /* Conditional jmp. */ - tcg_gen_mov_tl(env_pc, pc_false); - tcg_gen_brcondi_tl(TCG_COND_EQ, env_btaken, 0, l1); - tcg_gen_mov_tl(env_pc, pc_true); - gen_set_label(l1); -} - -static inline bool use_goto_tb(DisasContext *dc, target_ulong dest) +static bool use_goto_tb(DisasContext *dc, target_ulong dest) { -#ifndef CONFIG_USER_ONLY - return (dc->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) || - (dc->ppc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK); -#else - return true; -#endif + return ((dest ^ dc->base.pc_first) & TARGET_PAGE_MASK) == 0; } static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest) @@ -543,20 +490,18 @@ static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest) if (use_goto_tb(dc, dest)) { tcg_gen_goto_tb(n); tcg_gen_movi_tl(env_pc, dest); - tcg_gen_exit_tb(dc->tb, n); + tcg_gen_exit_tb(dc->base.tb, n); } else { tcg_gen_movi_tl(env_pc, dest); - tcg_gen_exit_tb(NULL, 0); + tcg_gen_lookup_and_goto_ptr(); } } static inline void cris_clear_x_flag(DisasContext *dc) { - if (dc->flagx_known && dc->flags_x) { + if (dc->flags_x) { dc->flags_uptodate = 0; } - - dc->flagx_known = 1; dc->flags_x = 0; } @@ -641,12 +586,10 @@ static void cris_evaluate_flags(DisasContext *dc) break; } - if (dc->flagx_known) { - if (dc->flags_x) { - tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], X_FLAG); - } else if (dc->cc_op == CC_OP_FLAGS) { - tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~X_FLAG); - } + if (dc->flags_x) { + tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], X_FLAG); + } else if (dc->cc_op == CC_OP_FLAGS) { + tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~X_FLAG); } dc->flags_uptodate = 1; } @@ -681,16 +624,11 @@ static void cris_update_cc_op(DisasContext *dc, int op, int size) static inline void cris_update_cc_x(DisasContext *dc) { /* Save the x flag state at the time of the cc snapshot. */ - if (dc->flagx_known) { - if (dc->cc_x_uptodate == (2 | dc->flags_x)) { - return; - } - tcg_gen_movi_tl(cc_x, dc->flags_x); - dc->cc_x_uptodate = 2 | dc->flags_x; - } else { - tcg_gen_andi_tl(cc_x, cpu_PR[PR_CCS], X_FLAG); - dc->cc_x_uptodate = 1; + if (dc->cc_x_uptodate == (2 | dc->flags_x)) { + return; } + tcg_gen_movi_tl(cc_x, dc->flags_x); + dc->cc_x_uptodate = 2 | dc->flags_x; } /* Update cc prior to executing ALU op. Needs source operands untouched. */ @@ -1142,7 +1080,7 @@ static void gen_store (DisasContext *dc, TCGv addr, TCGv val, /* Conditional writes. We only support the kind were X and P are known at translation time. */ - if (dc->flagx_known && dc->flags_x && (dc->tb_flags & P_FLAG)) { + if (dc->flags_x && (dc->tb_flags & P_FLAG)) { dc->postinc = 0; cris_evaluate_flags(dc); tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], C_FLAG); @@ -1151,7 +1089,7 @@ static void gen_store (DisasContext *dc, TCGv addr, TCGv val, tcg_gen_qemu_st_tl(val, addr, mem_index, MO_TE + ctz32(size)); - if (dc->flagx_known && dc->flags_x) { + if (dc->flags_x) { cris_evaluate_flags(dc); tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~C_FLAG); } @@ -1288,7 +1226,7 @@ static int dec_prep_alu_m(CPUCRISState *env, DisasContext *dc, #if DISAS_CRIS static const char *cc_name(int cc) { - static const char *cc_names[16] = { + static const char * const cc_names[16] = { "cc", "cs", "ne", "eq", "vc", "vs", "pl", "mi", "ls", "hi", "ge", "lt", "gt", "le", "a", "p" }; @@ -1738,8 +1676,8 @@ static int dec_addc_r(CPUCRISState *env, DisasContext *dc) LOG_DIS("addc $r%u, $r%u\n", dc->op1, dc->op2); cris_evaluate_flags(dc); + /* Set for this insn. */ - dc->flagx_known = 1; dc->flags_x = X_FLAG; cris_cc_mask(dc, CC_MASK_NZVC); @@ -2026,7 +1964,6 @@ static int dec_setclrf(CPUCRISState *env, DisasContext *dc) } if (flags & X_FLAG) { - dc->flagx_known = 1; if (set) { dc->flags_x = X_FLAG; } else { @@ -2037,14 +1974,14 @@ static int dec_setclrf(CPUCRISState *env, DisasContext *dc) /* Break the TB if any of the SPI flag changes. */ if (flags & (P_FLAG | S_FLAG)) { tcg_gen_movi_tl(env_pc, dc->pc + 2); - dc->is_jmp = DISAS_UPDATE; + dc->base.is_jmp = DISAS_UPDATE; dc->cpustate_changed = 1; } /* For the I flag, only act on posedge. */ if ((flags & I_FLAG)) { tcg_gen_movi_tl(env_pc, dc->pc + 2); - dc->is_jmp = DISAS_UPDATE; + dc->base.is_jmp = DISAS_UPDATE; dc->cpustate_changed = 1; } @@ -2490,7 +2427,6 @@ static int dec_addc_mr(CPUCRISState *env, DisasContext *dc) cris_evaluate_flags(dc); /* Set for this insn. */ - dc->flagx_known = 1; dc->flags_x = X_FLAG; cris_alu_m_alloc_temps(t); @@ -2877,6 +2813,7 @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc) -offsetof(CRISCPU, env) + offsetof(CPUState, halted)); tcg_gen_movi_tl(env_pc, dc->pc + 2); t_gen_raise_exception(EXCP_HLT); + dc->base.is_jmp = DISAS_NORETURN; return 2; } @@ -2886,14 +2823,16 @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc) LOG_DIS("rfe\n"); cris_evaluate_flags(dc); gen_helper_rfe(cpu_env); - dc->is_jmp = DISAS_UPDATE; + dc->base.is_jmp = DISAS_UPDATE; + dc->cpustate_changed = true; break; case 5: /* rfn. */ LOG_DIS("rfn\n"); cris_evaluate_flags(dc); gen_helper_rfn(cpu_env); - dc->is_jmp = DISAS_UPDATE; + dc->base.is_jmp = DISAS_UPDATE; + dc->cpustate_changed = true; break; case 6: LOG_DIS("break %d\n", dc->op1); @@ -2904,7 +2843,7 @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc) /* Breaks start at 16 in the exception vector. */ t_gen_movi_env_TN(trap_vector, dc->op1 + 16); t_gen_raise_exception(EXCP_BREAK); - dc->is_jmp = DISAS_UPDATE; + dc->base.is_jmp = DISAS_NORETURN; break; default: printf("op2=%x\n", dc->op2); @@ -2934,7 +2873,7 @@ static int dec_null(CPUCRISState *env, DisasContext *dc) return 2; } -static struct decoder_info { +static const struct decoder_info { struct { uint32_t bits; uint32_t mask; @@ -3122,17 +3061,12 @@ static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc) * */ -/* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +static void cris_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { + DisasContext *dc = container_of(dcbase, DisasContext, base); CPUCRISState *env = cs->env_ptr; + uint32_t tb_flags = dc->base.tb->flags; uint32_t pc_start; - unsigned int insn_len; - struct DisasContext ctx; - struct DisasContext *dc = &ctx; - uint32_t page_start; - target_ulong npc; - int num_insns; if (env->pregs[PR_VR] == 32) { dc->decoder = crisv32_decoder; @@ -3142,147 +3076,139 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) dc->clear_locked_irq = 1; } - /* Odd PC indicates that branch is rexecuting due to exception in the + /* + * Odd PC indicates that branch is rexecuting due to exception in the * delayslot, like in real hw. */ - pc_start = tb->pc & ~1; - dc->cpu = env_archcpu(env); - dc->tb = tb; + pc_start = dc->base.pc_first & ~1; + dc->base.pc_first = pc_start; + dc->base.pc_next = pc_start; - dc->is_jmp = DISAS_NEXT; + dc->cpu = env_archcpu(env); dc->ppc = pc_start; dc->pc = pc_start; - dc->singlestep_enabled = cs->singlestep_enabled; dc->flags_uptodate = 1; - dc->flagx_known = 1; - dc->flags_x = tb->flags & X_FLAG; + dc->flags_x = tb_flags & X_FLAG; dc->cc_x_uptodate = 0; dc->cc_mask = 0; dc->update_cc = 0; dc->clear_prefix = 0; + dc->cpustate_changed = 0; cris_update_cc_op(dc, CC_OP_FLAGS, 4); dc->cc_size_uptodate = -1; /* Decode TB flags. */ - dc->tb_flags = tb->flags & (S_FLAG | P_FLAG | U_FLAG \ - | X_FLAG | PFIX_FLAG); - dc->delayed_branch = !!(tb->flags & 7); + dc->tb_flags = tb_flags & (S_FLAG | P_FLAG | U_FLAG | X_FLAG | PFIX_FLAG); + dc->delayed_branch = !!(tb_flags & 7); if (dc->delayed_branch) { dc->jmp = JMP_INDIRECT; } else { dc->jmp = JMP_NOJMP; } +} - dc->cpustate_changed = 0; +static void cris_tr_tb_start(DisasContextBase *db, CPUState *cpu) +{ +} - page_start = pc_start & TARGET_PAGE_MASK; - num_insns = 0; +static void cris_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); - gen_tb_start(tb); - do { - tcg_gen_insn_start(dc->delayed_branch == 1 - ? dc->ppc | 1 : dc->pc); - num_insns++; + tcg_gen_insn_start(dc->delayed_branch == 1 ? dc->ppc | 1 : dc->pc); +} - if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) { - cris_evaluate_flags(dc); - tcg_gen_movi_tl(env_pc, dc->pc); - t_gen_raise_exception(EXCP_DEBUG); - dc->is_jmp = DISAS_UPDATE; - /* The address covered by the breakpoint must be included in - [tb->pc, tb->pc + tb->size) in order to for it to be - properly cleared -- thus we increment the PC here so that - the logic setting tb->size below does the right thing. */ - dc->pc += 2; - break; - } +static bool cris_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu, + const CPUBreakpoint *bp) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); - /* Pretty disas. */ - LOG_DIS("%8.8x:\t", dc->pc); + cris_evaluate_flags(dc); + tcg_gen_movi_tl(env_pc, dc->pc); + t_gen_raise_exception(EXCP_DEBUG); + dc->base.is_jmp = DISAS_NORETURN; + /* + * The address covered by the breakpoint must be included in + * [tb->pc, tb->pc + tb->size) in order to for it to be + * properly cleared -- thus we increment the PC here so that + * the logic setting tb->size below does the right thing. + */ + dc->pc += 2; + return true; +} - if (num_insns == max_insns && (tb_cflags(tb) & CF_LAST_IO)) { - gen_io_start(); - } - dc->clear_x = 1; +static void cris_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + CPUCRISState *env = cs->env_ptr; + unsigned int insn_len; - insn_len = dc->decoder(env, dc); - dc->ppc = dc->pc; - dc->pc += insn_len; - if (dc->clear_x) { - cris_clear_x_flag(dc); - } + /* Pretty disas. */ + LOG_DIS("%8.8x:\t", dc->pc); - /* Check for delayed branches here. If we do it before - actually generating any host code, the simulator will just - loop doing nothing for on this program location. */ - if (dc->delayed_branch) { - dc->delayed_branch--; - if (dc->delayed_branch == 0) { - if (tb->flags & 7) { - t_gen_movi_env_TN(dslot, 0); - } - if (dc->cpustate_changed || !dc->flagx_known - || (dc->flags_x != (tb->flags & X_FLAG))) { - cris_store_direct_jmp(dc); - } + dc->clear_x = 1; - if (dc->clear_locked_irq) { - dc->clear_locked_irq = 0; - t_gen_movi_env_TN(locked_irq, 0); - } + insn_len = dc->decoder(env, dc); + dc->ppc = dc->pc; + dc->pc += insn_len; + dc->base.pc_next += insn_len; - if (dc->jmp == JMP_DIRECT_CC) { - TCGLabel *l1 = gen_new_label(); - cris_evaluate_flags(dc); - - /* Conditional jmp. */ - tcg_gen_brcondi_tl(TCG_COND_EQ, - env_btaken, 0, l1); - gen_goto_tb(dc, 1, dc->jmp_pc); - gen_set_label(l1); - gen_goto_tb(dc, 0, dc->pc); - dc->is_jmp = DISAS_TB_JUMP; - dc->jmp = JMP_NOJMP; - } else if (dc->jmp == JMP_DIRECT) { - cris_evaluate_flags(dc); - gen_goto_tb(dc, 0, dc->jmp_pc); - dc->is_jmp = DISAS_TB_JUMP; - dc->jmp = JMP_NOJMP; - } else { - TCGv c = tcg_const_tl(dc->pc); - t_gen_cc_jmp(env_btarget, c); - tcg_temp_free(c); - dc->is_jmp = DISAS_JUMP; - } - break; - } - } + if (dc->base.is_jmp == DISAS_NORETURN) { + return; + } - /* If we are rexecuting a branch due to exceptions on - delay slots don't break. */ - if (!(tb->pc & 1) && cs->singlestep_enabled) { - break; - } - } while (!dc->is_jmp && !dc->cpustate_changed - && !tcg_op_buf_full() - && !singlestep - && (dc->pc - page_start < TARGET_PAGE_SIZE) - && num_insns < max_insns); + if (dc->clear_x) { + cris_clear_x_flag(dc); + } - if (dc->clear_locked_irq) { - t_gen_movi_env_TN(locked_irq, 0); + /* + * All branches are delayed branches, handled immediately below. + * We don't expect to see odd combinations of exit conditions. + */ + assert(dc->base.is_jmp == DISAS_NEXT || dc->cpustate_changed); + + if (dc->delayed_branch && --dc->delayed_branch == 0) { + dc->base.is_jmp = DISAS_DBRANCH; + return; } - npc = dc->pc; + if (dc->base.is_jmp != DISAS_NEXT) { + return; + } /* Force an update if the per-tb cpu state has changed. */ - if (dc->is_jmp == DISAS_NEXT - && (dc->cpustate_changed || !dc->flagx_known - || (dc->flags_x != (tb->flags & X_FLAG)))) { - dc->is_jmp = DISAS_UPDATE; - tcg_gen_movi_tl(env_pc, npc); + if (dc->cpustate_changed) { + dc->base.is_jmp = DISAS_UPDATE_NEXT; + return; + } + + /* + * FIXME: Only the first insn in the TB should cross a page boundary. + * If we can detect the length of the next insn easily, we should. + * In the meantime, simply stop when we do cross. + */ + if ((dc->pc ^ dc->base.pc_first) & TARGET_PAGE_MASK) { + dc->base.is_jmp = DISAS_TOO_MANY; + } +} + +static void cris_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + DisasJumpType is_jmp = dc->base.is_jmp; + target_ulong npc = dc->pc; + + if (is_jmp == DISAS_NORETURN) { + /* If we have a broken branch+delayslot sequence, it's too late. */ + assert(dc->delayed_branch != 1); + return; + } + + if (dc->clear_locked_irq) { + t_gen_movi_env_TN(locked_irq, 0); } + /* Broken branch+delayslot sequence. */ if (dc->delayed_branch == 1) { /* Set env->dslot to the size of the branch insn. */ @@ -3292,54 +3218,123 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) cris_evaluate_flags(dc); - if (unlikely(cs->singlestep_enabled)) { - if (dc->is_jmp == DISAS_NEXT) { - tcg_gen_movi_tl(env_pc, npc); + /* Evaluate delayed branch destination and fold to another is_jmp case. */ + if (is_jmp == DISAS_DBRANCH) { + if (dc->base.tb->flags & 7) { + t_gen_movi_env_TN(dslot, 0); } - t_gen_raise_exception(EXCP_DEBUG); - } else { - switch (dc->is_jmp) { - case DISAS_NEXT: - gen_goto_tb(dc, 1, npc); + + switch (dc->jmp) { + case JMP_DIRECT: + npc = dc->jmp_pc; + is_jmp = dc->cpustate_changed ? DISAS_UPDATE_NEXT : DISAS_TOO_MANY; + break; + + case JMP_DIRECT_CC: + /* + * Use a conditional branch if either taken or not-taken path + * can use goto_tb. If neither can, then treat it as indirect. + */ + if (likely(!dc->base.singlestep_enabled) + && likely(!dc->cpustate_changed) + && (use_goto_tb(dc, dc->jmp_pc) || use_goto_tb(dc, npc))) { + TCGLabel *not_taken = gen_new_label(); + + tcg_gen_brcondi_tl(TCG_COND_EQ, env_btaken, 0, not_taken); + gen_goto_tb(dc, 1, dc->jmp_pc); + gen_set_label(not_taken); + + /* not-taken case handled below. */ + is_jmp = DISAS_TOO_MANY; + break; + } + tcg_gen_movi_tl(env_btarget, dc->jmp_pc); + /* fall through */ + + case JMP_INDIRECT: + tcg_gen_movcond_tl(TCG_COND_NE, env_pc, + env_btaken, tcg_constant_tl(0), + env_btarget, tcg_constant_tl(npc)); + is_jmp = dc->cpustate_changed ? DISAS_UPDATE : DISAS_JUMP; + + /* + * We have now consumed btaken and btarget. Hint to the + * tcg compiler that the writeback to env may be dropped. + */ + tcg_gen_discard_tl(env_btaken); + tcg_gen_discard_tl(env_btarget); break; + default: + g_assert_not_reached(); + } + } + + if (unlikely(dc->base.singlestep_enabled)) { + switch (is_jmp) { + case DISAS_TOO_MANY: + case DISAS_UPDATE_NEXT: + tcg_gen_movi_tl(env_pc, npc); + /* fall through */ case DISAS_JUMP: case DISAS_UPDATE: - /* indicate that the hash table must be used - to find the next TB */ - tcg_gen_exit_tb(NULL, 0); - break; - case DISAS_SWI: - case DISAS_TB_JUMP: - /* nothing more to generate */ + t_gen_raise_exception(EXCP_DEBUG); + return; + default: break; } + g_assert_not_reached(); } - gen_tb_end(tb, num_insns); - tb->size = dc->pc - pc_start; - tb->icount = num_insns; + switch (is_jmp) { + case DISAS_TOO_MANY: + gen_goto_tb(dc, 0, npc); + break; + case DISAS_UPDATE_NEXT: + tcg_gen_movi_tl(env_pc, npc); + /* fall through */ + case DISAS_JUMP: + tcg_gen_lookup_and_goto_ptr(); + break; + case DISAS_UPDATE: + /* Indicate that interupts must be re-evaluated before the next TB. */ + tcg_gen_exit_tb(NULL, 0); + break; + default: + g_assert_not_reached(); + } +} -#ifdef DEBUG_DISAS -#if !DISAS_CRIS - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) - && qemu_log_in_addr_range(pc_start)) { - FILE *logfile = qemu_log_lock(); - qemu_log("--------------\n"); - qemu_log("IN: %s\n", lookup_symbol(pc_start)); - log_target_disas(cs, pc_start, dc->pc - pc_start); - qemu_log_unlock(logfile); +static void cris_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu) +{ + if (!DISAS_CRIS) { + qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first)); + log_target_disas(cpu, dcbase->pc_first, dcbase->tb->size); } -#endif -#endif +} + +static const TranslatorOps cris_tr_ops = { + .init_disas_context = cris_tr_init_disas_context, + .tb_start = cris_tr_tb_start, + .insn_start = cris_tr_insn_start, + .breakpoint_check = cris_tr_breakpoint_check, + .translate_insn = cris_tr_translate_insn, + .tb_stop = cris_tr_tb_stop, + .disas_log = cris_tr_disas_log, +}; + +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +{ + DisasContext dc; + translator_loop(&cris_tr_ops, &dc.base, cs, tb, max_insns); } void cris_cpu_dump_state(CPUState *cs, FILE *f, int flags) { CRISCPU *cpu = CRIS_CPU(cs); CPUCRISState *env = &cpu->env; - const char **regnames; - const char **pregnames; + const char * const *regnames; + const char * const *pregnames; int i; if (!env) { diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc index f7cd67b..f500e93 100644 --- a/target/cris/translate_v10.c.inc +++ b/target/cris/translate_v10.c.inc @@ -21,7 +21,7 @@ #include "qemu/osdep.h" #include "crisv10-decode.h" -static const char *regnames_v10[] = +static const char * const regnames_v10[] = { "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", @@ -29,7 +29,7 @@ static const char *regnames_v10[] = "$r12", "$r13", "$sp", "$pc", }; -static const char *pregnames_v10[] = +static const char * const pregnames_v10[] = { "$bz", "$vr", "$p2", "$p3", "$wz", "$ccr", "$p6-prefix", "$mof", @@ -38,7 +38,7 @@ static const char *pregnames_v10[] = }; /* We need this table to handle preg-moves with implicit width. */ -static int preg_sizes_v10[] = { +static const int preg_sizes_v10[] = { 1, /* bz. */ 1, /* vr. */ 1, /* pid. */ @@ -61,6 +61,7 @@ static inline void cris_illegal_insn(DisasContext *dc) { qemu_log_mask(LOG_GUEST_ERROR, "illegal insn at pc=%x\n", dc->pc); t_gen_raise_exception(EXCP_BREAK); + dc->base.is_jmp = DISAS_NORETURN; } static void gen_store_v10_conditional(DisasContext *dc, TCGv addr, TCGv val, @@ -105,9 +106,8 @@ static void gen_store_v10(DisasContext *dc, TCGv addr, TCGv val, cris_store_direct_jmp(dc); } - /* Conditional writes. We only support the kind were X is known - at translation time. */ - if (dc->flagx_known && dc->flags_x) { + /* Conditional writes. */ + if (dc->flags_x) { gen_store_v10_conditional(dc, addr, val, size, mem_index); return; } @@ -375,7 +375,6 @@ static unsigned int dec10_setclrf(DisasContext *dc) if (flags & X_FLAG) { - dc->flagx_known = 1; if (set) dc->flags_x = X_FLAG; else @@ -1169,7 +1168,7 @@ static unsigned int dec10_ind(CPUCRISState *env, DisasContext *dc) t_gen_mov_env_TN(trap_vector, c); tcg_temp_free(c); t_gen_raise_exception(EXCP_BREAK); - dc->is_jmp = DISAS_UPDATE; + dc->base.is_jmp = DISAS_NORETURN; return insn_len; } LOG_DIS("%d: jump.%d %d r%d r%d\n", __LINE__, size, @@ -1277,7 +1276,7 @@ static unsigned int crisv10_decoder(CPUCRISState *env, DisasContext *dc) if (dc->clear_prefix && dc->tb_flags & PFIX_FLAG) { dc->tb_flags &= ~PFIX_FLAG; tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~PFIX_FLAG); - if (dc->tb_flags != dc->tb->flags) { + if (dc->tb_flags != dc->base.tb->flags) { dc->cpustate_changed = 1; } } diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a7f5c0c..b21873e 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -7195,17 +7195,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) reg = (b & 7) | REX_B(s); #ifdef TARGET_X86_64 if (dflag == MO_64) { - gen_op_mov_v_reg(s, MO_64, s->T0, reg); - tcg_gen_bswap64_i64(s->T0, s->T0); - gen_op_mov_reg_v(s, MO_64, reg, s->T0); - } else -#endif - { - gen_op_mov_v_reg(s, MO_32, s->T0, reg); - tcg_gen_ext32u_tl(s->T0, s->T0); - tcg_gen_bswap32_tl(s->T0, s->T0); - gen_op_mov_reg_v(s, MO_32, reg, s->T0); + tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]); + break; } +#endif + tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ); break; case 0xd6: /* salc */ if (CODE64(s)) diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c index 963d4ba..f52244e 100644 --- a/target/mips/tcg/mxu_translate.c +++ b/target/mips/tcg/mxu_translate.c @@ -857,12 +857,8 @@ static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx) tcg_gen_ori_tl(t1, t1, 0xFFFFF000); } tcg_gen_add_tl(t1, t0, t1); - tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_SL); + tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_TESL ^ (sel * MO_BSWAP)); - if (sel == 1) { - /* S32LDDR */ - tcg_gen_bswap32_tl(t1, t1); - } gen_store_mxu_gpr(t1, XRa); tcg_temp_free(t0); diff --git a/target/nios2/translate.c b/target/nios2/translate.c index 399f22d..930f3d3 100644 --- a/target/nios2/translate.c +++ b/target/nios2/translate.c @@ -37,7 +37,6 @@ /* is_jmp field values */ #define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */ #define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */ -#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */ #define INSTRUCTION_FLG(func, flags) { (func), (flags) } #define INSTRUCTION(func) \ @@ -98,16 +97,14 @@ } typedef struct DisasContext { - TCGv_ptr cpu_env; - TCGv *cpu_R; + DisasContextBase base; TCGv_i32 zero; - int is_jmp; target_ulong pc; - TranslationBlock *tb; int mem_idx; - bool singlestep_enabled; } DisasContext; +static TCGv cpu_R[NUM_CORE_REGS]; + typedef struct Nios2Instruction { void (*handler)(DisasContext *dc, uint32_t code, uint32_t flags); uint32_t flags; @@ -136,7 +133,7 @@ static TCGv load_zero(DisasContext *dc) static TCGv load_gpr(DisasContext *dc, uint8_t reg) { if (likely(reg != R_ZERO)) { - return dc->cpu_R[reg]; + return cpu_R[reg]; } else { return load_zero(dc); } @@ -147,20 +144,20 @@ static void t_gen_helper_raise_exception(DisasContext *dc, { TCGv_i32 tmp = tcg_const_i32(index); - tcg_gen_movi_tl(dc->cpu_R[R_PC], dc->pc); - gen_helper_raise_exception(dc->cpu_env, tmp); + tcg_gen_movi_tl(cpu_R[R_PC], dc->pc); + gen_helper_raise_exception(cpu_env, tmp); tcg_temp_free_i32(tmp); - dc->is_jmp = DISAS_NORETURN; + dc->base.is_jmp = DISAS_NORETURN; } static bool use_goto_tb(DisasContext *dc, uint32_t dest) { - if (unlikely(dc->singlestep_enabled)) { + if (unlikely(dc->base.singlestep_enabled)) { return false; } #ifndef CONFIG_USER_ONLY - return (dc->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK); + return (dc->base.pc_first & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK); #else return true; #endif @@ -168,14 +165,14 @@ static bool use_goto_tb(DisasContext *dc, uint32_t dest) static void gen_goto_tb(DisasContext *dc, int n, uint32_t dest) { - TranslationBlock *tb = dc->tb; + const TranslationBlock *tb = dc->base.tb; if (use_goto_tb(dc, dest)) { tcg_gen_goto_tb(n); - tcg_gen_movi_tl(dc->cpu_R[R_PC], dest); + tcg_gen_movi_tl(cpu_R[R_PC], dest); tcg_gen_exit_tb(tb, n); } else { - tcg_gen_movi_tl(dc->cpu_R[R_PC], dest); + tcg_gen_movi_tl(cpu_R[R_PC], dest); tcg_gen_exit_tb(NULL, 0); } } @@ -187,7 +184,7 @@ static void gen_excp(DisasContext *dc, uint32_t code, uint32_t flags) static void gen_check_supervisor(DisasContext *dc) { - if (dc->tb->flags & CR_STATUS_U) { + if (dc->base.tb->flags & CR_STATUS_U) { /* CPU in user mode, privileged instruction called, stop. */ t_gen_helper_raise_exception(dc, EXCP_SUPERI); } @@ -209,12 +206,12 @@ static void jmpi(DisasContext *dc, uint32_t code, uint32_t flags) { J_TYPE(instr, code); gen_goto_tb(dc, 0, (dc->pc & 0xF0000000) | (instr.imm26 << 2)); - dc->is_jmp = DISAS_TB_JUMP; + dc->base.is_jmp = DISAS_NORETURN; } static void call(DisasContext *dc, uint32_t code, uint32_t flags) { - tcg_gen_movi_tl(dc->cpu_R[R_RA], dc->pc + 4); + tcg_gen_movi_tl(cpu_R[R_RA], dc->base.pc_next); jmpi(dc, code, flags); } @@ -236,7 +233,7 @@ static void gen_ldx(DisasContext *dc, uint32_t code, uint32_t flags) * the Nios2 CPU. */ if (likely(instr.b != R_ZERO)) { - data = dc->cpu_R[instr.b]; + data = cpu_R[instr.b]; } else { data = tcg_temp_new(); } @@ -268,8 +265,8 @@ static void br(DisasContext *dc, uint32_t code, uint32_t flags) { I_TYPE(instr, code); - gen_goto_tb(dc, 0, dc->pc + 4 + (instr.imm16.s & -4)); - dc->is_jmp = DISAS_TB_JUMP; + gen_goto_tb(dc, 0, dc->base.pc_next + (instr.imm16.s & -4)); + dc->base.is_jmp = DISAS_NORETURN; } static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags) @@ -277,11 +274,11 @@ static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags) I_TYPE(instr, code); TCGLabel *l1 = gen_new_label(); - tcg_gen_brcond_tl(flags, dc->cpu_R[instr.a], dc->cpu_R[instr.b], l1); - gen_goto_tb(dc, 0, dc->pc + 4); + tcg_gen_brcond_tl(flags, cpu_R[instr.a], cpu_R[instr.b], l1); + gen_goto_tb(dc, 0, dc->base.pc_next); gen_set_label(l1); - gen_goto_tb(dc, 1, dc->pc + 4 + (instr.imm16.s & -4)); - dc->is_jmp = DISAS_TB_JUMP; + gen_goto_tb(dc, 1, dc->base.pc_next + (instr.imm16.s & -4)); + dc->base.is_jmp = DISAS_NORETURN; } /* Comparison instructions */ @@ -289,8 +286,7 @@ static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags) static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \ { \ I_TYPE(instr, (code)); \ - tcg_gen_setcondi_tl(flags, (dc)->cpu_R[instr.b], (dc)->cpu_R[instr.a], \ - (op3)); \ + tcg_gen_setcondi_tl(flags, cpu_R[instr.b], cpu_R[instr.a], (op3)); \ } gen_i_cmpxx(gen_cmpxxsi, instr.imm16.s) @@ -304,10 +300,9 @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \ if (unlikely(instr.b == R_ZERO)) { /* Store to R_ZERO is ignored */ \ return; \ } else if (instr.a == R_ZERO) { /* MOVxI optimizations */ \ - tcg_gen_movi_tl(dc->cpu_R[instr.b], (resimm) ? (op3) : 0); \ + tcg_gen_movi_tl(cpu_R[instr.b], (resimm) ? (op3) : 0); \ } else { \ - tcg_gen_##insn##_tl((dc)->cpu_R[instr.b], (dc)->cpu_R[instr.a], \ - (op3)); \ + tcg_gen_##insn##_tl(cpu_R[instr.b], cpu_R[instr.a], (op3)); \ } \ } @@ -402,26 +397,26 @@ static const Nios2Instruction i_type_instructions[] = { */ static void eret(DisasContext *dc, uint32_t code, uint32_t flags) { - tcg_gen_mov_tl(dc->cpu_R[CR_STATUS], dc->cpu_R[CR_ESTATUS]); - tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[R_EA]); + tcg_gen_mov_tl(cpu_R[CR_STATUS], cpu_R[CR_ESTATUS]); + tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_EA]); - dc->is_jmp = DISAS_JUMP; + dc->base.is_jmp = DISAS_JUMP; } /* PC <- ra */ static void ret(DisasContext *dc, uint32_t code, uint32_t flags) { - tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[R_RA]); + tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_RA]); - dc->is_jmp = DISAS_JUMP; + dc->base.is_jmp = DISAS_JUMP; } /* PC <- ba */ static void bret(DisasContext *dc, uint32_t code, uint32_t flags) { - tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[R_BA]); + tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_BA]); - dc->is_jmp = DISAS_JUMP; + dc->base.is_jmp = DISAS_JUMP; } /* PC <- rA */ @@ -429,9 +424,9 @@ static void jmp(DisasContext *dc, uint32_t code, uint32_t flags) { R_TYPE(instr, code); - tcg_gen_mov_tl(dc->cpu_R[R_PC], load_gpr(dc, instr.a)); + tcg_gen_mov_tl(cpu_R[R_PC], load_gpr(dc, instr.a)); - dc->is_jmp = DISAS_JUMP; + dc->base.is_jmp = DISAS_JUMP; } /* rC <- PC + 4 */ @@ -440,7 +435,7 @@ static void nextpc(DisasContext *dc, uint32_t code, uint32_t flags) R_TYPE(instr, code); if (likely(instr.c != R_ZERO)) { - tcg_gen_movi_tl(dc->cpu_R[instr.c], dc->pc + 4); + tcg_gen_movi_tl(cpu_R[instr.c], dc->base.pc_next); } } @@ -452,10 +447,10 @@ static void callr(DisasContext *dc, uint32_t code, uint32_t flags) { R_TYPE(instr, code); - tcg_gen_mov_tl(dc->cpu_R[R_PC], load_gpr(dc, instr.a)); - tcg_gen_movi_tl(dc->cpu_R[R_RA], dc->pc + 4); + tcg_gen_mov_tl(cpu_R[R_PC], load_gpr(dc, instr.a)); + tcg_gen_movi_tl(cpu_R[R_RA], dc->base.pc_next); - dc->is_jmp = DISAS_JUMP; + dc->base.is_jmp = DISAS_JUMP; } /* rC <- ctlN */ @@ -472,10 +467,10 @@ static void rdctl(DisasContext *dc, uint32_t code, uint32_t flags) { #if !defined(CONFIG_USER_ONLY) if (likely(instr.c != R_ZERO)) { - tcg_gen_mov_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.imm5 + CR_BASE]); + tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]); #ifdef DEBUG_MMU TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE); - gen_helper_mmu_read_debug(dc->cpu_R[instr.c], dc->cpu_env, tmp); + gen_helper_mmu_read_debug(cpu_R[instr.c], cpu_env, tmp); tcg_temp_free_i32(tmp); #endif } @@ -485,7 +480,7 @@ static void rdctl(DisasContext *dc, uint32_t code, uint32_t flags) default: if (likely(instr.c != R_ZERO)) { - tcg_gen_mov_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.imm5 + CR_BASE]); + tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]); } break; } @@ -505,25 +500,25 @@ static void wrctl(DisasContext *dc, uint32_t code, uint32_t flags) { #if !defined(CONFIG_USER_ONLY) TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE); - gen_helper_mmu_write(dc->cpu_env, tmp, load_gpr(dc, instr.a)); + gen_helper_mmu_write(cpu_env, tmp, load_gpr(dc, instr.a)); tcg_temp_free_i32(tmp); #endif break; } default: - tcg_gen_mov_tl(dc->cpu_R[instr.imm5 + CR_BASE], load_gpr(dc, instr.a)); + tcg_gen_mov_tl(cpu_R[instr.imm5 + CR_BASE], load_gpr(dc, instr.a)); break; } /* If interrupts were enabled using WRCTL, trigger them. */ #if !defined(CONFIG_USER_ONLY) if ((instr.imm5 + CR_BASE) == CR_STATUS) { - if (tb_cflags(dc->tb) & CF_USE_ICOUNT) { + if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { gen_io_start(); } - gen_helper_check_interrupts(dc->cpu_env); - dc->is_jmp = DISAS_UPDATE; + gen_helper_check_interrupts(cpu_env); + dc->base.is_jmp = DISAS_UPDATE; } #endif } @@ -533,8 +528,8 @@ static void gen_cmpxx(DisasContext *dc, uint32_t code, uint32_t flags) { R_TYPE(instr, code); if (likely(instr.c != R_ZERO)) { - tcg_gen_setcond_tl(flags, dc->cpu_R[instr.c], dc->cpu_R[instr.a], - dc->cpu_R[instr.b]); + tcg_gen_setcond_tl(flags, cpu_R[instr.c], cpu_R[instr.a], + cpu_R[instr.b]); } } @@ -544,8 +539,7 @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \ { \ R_TYPE(instr, (code)); \ if (likely(instr.c != R_ZERO)) { \ - tcg_gen_##insn((dc)->cpu_R[instr.c], load_gpr((dc), instr.a), \ - (op3)); \ + tcg_gen_##insn(cpu_R[instr.c], load_gpr((dc), instr.a), (op3)); \ } \ } @@ -569,8 +563,8 @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \ R_TYPE(instr, (code)); \ if (likely(instr.c != R_ZERO)) { \ TCGv t0 = tcg_temp_new(); \ - tcg_gen_##insn(t0, dc->cpu_R[instr.c], \ - load_gpr(dc, instr.a), load_gpr(dc, instr.b)); \ + tcg_gen_##insn(t0, cpu_R[instr.c], \ + load_gpr(dc, instr.a), load_gpr(dc, instr.b)); \ tcg_temp_free(t0); \ } \ } @@ -586,7 +580,7 @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \ if (likely(instr.c != R_ZERO)) { \ TCGv t0 = tcg_temp_new(); \ tcg_gen_andi_tl(t0, load_gpr((dc), instr.b), 31); \ - tcg_gen_##insn((dc)->cpu_R[instr.c], load_gpr((dc), instr.a), t0); \ + tcg_gen_##insn(cpu_R[instr.c], load_gpr((dc), instr.a), t0); \ tcg_temp_free(t0); \ } \ } @@ -620,8 +614,8 @@ static void divs(DisasContext *dc, uint32_t code, uint32_t flags) tcg_gen_or_tl(t2, t2, t3); tcg_gen_movi_tl(t3, 0); tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1); - tcg_gen_div_tl(dc->cpu_R[instr.c], t0, t1); - tcg_gen_ext32s_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.c]); + tcg_gen_div_tl(cpu_R[instr.c], t0, t1); + tcg_gen_ext32s_tl(cpu_R[instr.c], cpu_R[instr.c]); tcg_temp_free(t3); tcg_temp_free(t2); @@ -646,8 +640,8 @@ static void divu(DisasContext *dc, uint32_t code, uint32_t flags) tcg_gen_ext32u_tl(t0, load_gpr(dc, instr.a)); tcg_gen_ext32u_tl(t1, load_gpr(dc, instr.b)); tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1); - tcg_gen_divu_tl(dc->cpu_R[instr.c], t0, t1); - tcg_gen_ext32s_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.c]); + tcg_gen_divu_tl(cpu_R[instr.c], t0, t1); + tcg_gen_ext32s_tl(cpu_R[instr.c], cpu_R[instr.c]); tcg_temp_free(t3); tcg_temp_free(t2); @@ -741,41 +735,6 @@ illegal_op: t_gen_helper_raise_exception(dc, EXCP_ILLEGAL); } -static void handle_instruction(DisasContext *dc, CPUNios2State *env) -{ - uint32_t code; - uint8_t op; - const Nios2Instruction *instr; -#if defined(CONFIG_USER_ONLY) - /* FIXME: Is this needed ? */ - if (dc->pc >= 0x1000 && dc->pc < 0x2000) { - env->regs[R_PC] = dc->pc; - t_gen_helper_raise_exception(dc, 0xaa); - return; - } -#endif - code = cpu_ldl_code(env, dc->pc); - op = get_opcode(code); - - if (unlikely(op >= ARRAY_SIZE(i_type_instructions))) { - goto illegal_op; - } - - dc->zero = NULL; - - instr = &i_type_instructions[op]; - instr->handler(dc, code, instr->flags); - - if (dc->zero) { - tcg_temp_free(dc->zero); - } - - return; - -illegal_op: - t_gen_helper_raise_exception(dc, EXCP_ILLEGAL); -} - static const char * const regnames[] = { "zero", "at", "r2", "r3", "r4", "r5", "r6", "r7", @@ -796,8 +755,6 @@ static const char * const regnames[] = { "rpc" }; -static TCGv cpu_R[NUM_CORE_REGS]; - #include "exec/gen-icount.h" static void gen_exception(DisasContext *dc, uint32_t excp) @@ -807,104 +764,135 @@ static void gen_exception(DisasContext *dc, uint32_t excp) tcg_gen_movi_tl(cpu_R[R_PC], dc->pc); gen_helper_raise_exception(cpu_env, tmp); tcg_temp_free_i32(tmp); - dc->is_jmp = DISAS_NORETURN; + dc->base.is_jmp = DISAS_NORETURN; } /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +static void nios2_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { + DisasContext *dc = container_of(dcbase, DisasContext, base); CPUNios2State *env = cs->env_ptr; - DisasContext dc1, *dc = &dc1; - int num_insns; - - /* Initialize DC */ - dc->cpu_env = cpu_env; - dc->cpu_R = cpu_R; - dc->is_jmp = DISAS_NEXT; - dc->pc = tb->pc; - dc->tb = tb; + int page_insns; + dc->mem_idx = cpu_mmu_index(env, false); - dc->singlestep_enabled = cs->singlestep_enabled; - - /* Set up instruction counts */ - num_insns = 0; - if (max_insns > 1) { - int page_insns = (TARGET_PAGE_SIZE - (tb->pc & ~TARGET_PAGE_MASK)) / 4; - if (max_insns > page_insns) { - max_insns = page_insns; - } + + /* Bound the number of insns to execute to those left on the page. */ + page_insns = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; + dc->base.max_insns = MIN(page_insns, dc->base.max_insns); +} + +static void nios2_tr_tb_start(DisasContextBase *db, CPUState *cs) +{ +} + +static void nios2_tr_insn_start(DisasContextBase *dcbase, CPUState *cs) +{ + tcg_gen_insn_start(dcbase->pc_next); +} + +static bool nios2_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs, + const CPUBreakpoint *bp) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + + gen_exception(dc, EXCP_DEBUG); + /* + * The address covered by the breakpoint must be included in + * [tb->pc, tb->pc + tb->size) in order to for it to be + * properly cleared -- thus we increment the PC here so that + * the logic setting tb->size below does the right thing. + */ + dc->base.pc_next += 4; + return true; +} + +static void nios2_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + CPUNios2State *env = cs->env_ptr; + const Nios2Instruction *instr; + uint32_t code, pc; + uint8_t op; + + pc = dc->base.pc_next; + dc->pc = pc; + dc->base.pc_next = pc + 4; + + /* Decode an instruction */ + +#if defined(CONFIG_USER_ONLY) + /* FIXME: Is this needed ? */ + if (pc >= 0x1000 && pc < 0x2000) { + t_gen_helper_raise_exception(dc, 0xaa); + return; } +#endif - gen_tb_start(tb); - do { - tcg_gen_insn_start(dc->pc); - num_insns++; - - if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) { - gen_exception(dc, EXCP_DEBUG); - /* The address covered by the breakpoint must be included in - [tb->pc, tb->pc + tb->size) in order to for it to be - properly cleared -- thus we increment the PC here so that - the logic setting tb->size below does the right thing. */ - dc->pc += 4; - break; - } + code = cpu_ldl_code(env, pc); + op = get_opcode(code); - if (num_insns == max_insns && (tb_cflags(tb) & CF_LAST_IO)) { - gen_io_start(); - } + if (unlikely(op >= ARRAY_SIZE(i_type_instructions))) { + t_gen_helper_raise_exception(dc, EXCP_ILLEGAL); + return; + } + + dc->zero = NULL; - /* Decode an instruction */ - handle_instruction(dc, env); + instr = &i_type_instructions[op]; + instr->handler(dc, code, instr->flags); - dc->pc += 4; + if (dc->zero) { + tcg_temp_free(dc->zero); + } +} - /* Translation stops when a conditional branch is encountered. - * Otherwise the subsequent code could get translated several times. - * Also stop translation when a page boundary is reached. This - * ensures prefetch aborts occur at the right place. */ - } while (!dc->is_jmp && - !tcg_op_buf_full() && - num_insns < max_insns); +static void nios2_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); /* Indicate where the next block should start */ - switch (dc->is_jmp) { - case DISAS_NEXT: + switch (dc->base.is_jmp) { + case DISAS_TOO_MANY: case DISAS_UPDATE: /* Save the current PC back into the CPU register */ - tcg_gen_movi_tl(cpu_R[R_PC], dc->pc); + tcg_gen_movi_tl(cpu_R[R_PC], dc->base.pc_next); tcg_gen_exit_tb(NULL, 0); break; - default: case DISAS_JUMP: /* The jump will already have updated the PC register */ tcg_gen_exit_tb(NULL, 0); break; case DISAS_NORETURN: - case DISAS_TB_JUMP: /* nothing more to generate */ break; - } - /* End off the block */ - gen_tb_end(tb, num_insns); - - /* Mark instruction starts for the final generated instruction */ - tb->size = dc->pc - tb->pc; - tb->icount = num_insns; - -#ifdef DEBUG_DISAS - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) - && qemu_log_in_addr_range(tb->pc)) { - FILE *logfile = qemu_log_lock(); - qemu_log("IN: %s\n", lookup_symbol(tb->pc)); - log_target_disas(cs, tb->pc, dc->pc - tb->pc); - qemu_log("\n"); - qemu_log_unlock(logfile); + default: + g_assert_not_reached(); } -#endif +} + +static void nios2_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu) +{ + qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first)); + log_target_disas(cpu, dcbase->pc_first, dcbase->tb->size); +} + +static const TranslatorOps nios2_tr_ops = { + .init_disas_context = nios2_tr_init_disas_context, + .tb_start = nios2_tr_tb_start, + .insn_start = nios2_tr_insn_start, + .breakpoint_check = nios2_tr_breakpoint_check, + .translate_insn = nios2_tr_translate_insn, + .tb_stop = nios2_tr_tb_stop, + .disas_log = nios2_tr_disas_log, +}; + +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +{ + DisasContext dc; + translator_loop(&nios2_tr_ops, &dc.base, cs, tb, max_insns); } void nios2_cpu_dump_state(CPUState *cs, FILE *f, int flags) diff --git a/target/s390x/translate.c b/target/s390x/translate.c index e243624..03dab9f 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -3939,13 +3939,13 @@ static DisasJumpType op_rosbg(DisasContext *s, DisasOps *o) static DisasJumpType op_rev16(DisasContext *s, DisasOps *o) { - tcg_gen_bswap16_i64(o->out, o->in2); + tcg_gen_bswap16_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ); return DISAS_NEXT; } static DisasJumpType op_rev32(DisasContext *s, DisasOps *o) { - tcg_gen_bswap32_i64(o->out, o->in2); + tcg_gen_bswap32_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ); return DISAS_NEXT; } diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 9312790..4dcfff8 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -676,8 +676,7 @@ static void _decode_opc(DisasContext * ctx) case 0x6008: /* swap.b Rm,Rn */ { TCGv low = tcg_temp_new(); - tcg_gen_ext16u_i32(low, REG(B7_4)); - tcg_gen_bswap16_i32(low, low); + tcg_gen_bswap16_i32(low, REG(B7_4), 0); tcg_gen_deposit_i32(REG(B11_8), REG(B7_4), low, 0, 16); tcg_temp_free(low); } @@ -295,19 +295,25 @@ ext32u_i64 t0, t1 8, 16 or 32 bit sign/zero extension (both operands must have the same type) -* bswap16_i32/i64 t0, t1 +* bswap16_i32/i64 t0, t1, flags -16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order -bytes are set to zero. +16 bit byte swap on the low bits of a 32/64 bit input. +If flags & TCG_BSWAP_IZ, then t1 is known to be zero-extended from bit 15. +If flags & TCG_BSWAP_OZ, then t0 will be zero-extended from bit 15. +If flags & TCG_BSWAP_OS, then t0 will be sign-extended from bit 15. +If neither TCG_BSWAP_OZ nor TCG_BSWAP_OS are set, then the bits of +t0 above bit 15 may contain any value. -* bswap32_i32/i64 t0, t1 +* bswap32_i64 t0, t1, flags -32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that -the four high order bytes are set to zero. +32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, +except they apply from bit 31 instead of bit 15. -* bswap64_i64 t0, t1 +* bswap32_i32 t0, t1, flags +* bswap64_i64 t0, t1, flags -64 bit byte swap +32/64 bit byte swap. The flags are ignored, but still present +for consistency with the other bswap opcodes. * discard_i32/i64 t0 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 27cde314..5924977 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -475,9 +475,7 @@ typedef enum { /* Data-processing (1 source) instructions. */ I3507_CLZ = 0x5ac01000, I3507_RBIT = 0x5ac00000, - I3507_REV16 = 0x5ac00400, - I3507_REV32 = 0x5ac00800, - I3507_REV64 = 0x5ac00c00, + I3507_REV = 0x5ac00000, /* + size << 10 */ /* Data-processing (2 source) instructions. */ I3508_LSLV = 0x1ac02000, @@ -1417,19 +1415,11 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, } } -static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); -} - -static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) -{ - tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); -} - -static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) +static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, + TCGReg rd, TCGReg rn) { - tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); + /* REV, REV16, REV32 */ + tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn); } static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits, @@ -1557,28 +1547,34 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * TCGMemOpIdx oi, uintptr_t ra) */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, +static void * const qemu_ld_helpers[4] = { + [MO_8] = helper_ret_ldub_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_16] = helper_be_lduw_mmu, + [MO_32] = helper_be_ldul_mmu, + [MO_64] = helper_be_ldq_mmu, +#else + [MO_16] = helper_le_lduw_mmu, + [MO_32] = helper_le_ldul_mmu, + [MO_64] = helper_le_ldq_mmu, +#endif }; /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, * uintxx_t val, TCGMemOpIdx oi, * uintptr_t ra) */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, +static void * const qemu_st_helpers[4] = { + [MO_8] = helper_ret_stb_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_16] = helper_be_stw_mmu, + [MO_32] = helper_be_stl_mmu, + [MO_64] = helper_be_stq_mmu, +#else + [MO_16] = helper_le_stw_mmu, + [MO_32] = helper_le_stl_mmu, + [MO_64] = helper_le_stq_mmu, +#endif }; static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) @@ -1602,7 +1598,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); tcg_out_adr(s, TCG_REG_X3, lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); if (opc & MO_SIGN) { tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); } else { @@ -1628,7 +1624,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); tcg_out_adr(s, TCG_REG_X4, lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]); tcg_out_goto(s, lb->raddr); return true; } @@ -1724,7 +1720,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, TCGReg data_r, TCGReg addr_r, TCGType otype, TCGReg off_r) { - const MemOp bswap = memop & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((memop & MO_BSWAP) == 0); switch (memop & MO_SSIZE) { case MO_UB: @@ -1736,40 +1733,19 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, break; case MO_UW: tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev16(s, data_r, data_r); - } break; case MO_SW: - if (bswap) { - tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); - tcg_out_rev16(s, data_r, data_r); - tcg_out_sxt(s, ext, MO_16, data_r, data_r); - } else { - tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), - data_r, addr_r, otype, off_r); - } + tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), + data_r, addr_r, otype, off_r); break; case MO_UL: tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev32(s, data_r, data_r); - } break; case MO_SL: - if (bswap) { - tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); - tcg_out_rev32(s, data_r, data_r); - tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); - } else { - tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); - } + tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); break; case MO_Q: tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); - if (bswap) { - tcg_out_rev64(s, data_r, data_r); - } break; default: tcg_abort(); @@ -1780,31 +1756,20 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, TCGReg data_r, TCGReg addr_r, TCGType otype, TCGReg off_r) { - const MemOp bswap = memop & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((memop & MO_BSWAP) == 0); switch (memop & MO_SIZE) { case MO_8: tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); break; case MO_16: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev16(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); break; case MO_32: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev32(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); break; case MO_64: - if (bswap && data_r != TCG_REG_XZR) { - tcg_out_rev64(s, TCG_REG_TMP, data_r); - data_r = TCG_REG_TMP; - } tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); break; default: @@ -2184,15 +2149,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_bswap64_i64: - tcg_out_rev64(s, a0, a1); + tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); break; case INDEX_op_bswap32_i64: + tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); + if (a2 & TCG_BSWAP_OS) { + tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0); + } + break; case INDEX_op_bswap32_i32: - tcg_out_rev32(s, a0, a1); + tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); break; case INDEX_op_bswap16_i64: case INDEX_op_bswap16_i32: - tcg_out_rev16(s, a0, a1); + tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); + if (a2 & TCG_BSWAP_OS) { + /* Output must be sign-extended. */ + tcg_out_sxt(s, ext, MO_16, a0, a0); + } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* Output must be zero-extended, but input isn't. */ + tcg_out_uxt(s, MO_16, a0, a0); + } break; case INDEX_op_ext8s_i64: diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index ef55f7c..551baf8 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -148,7 +148,7 @@ typedef enum { #define TCG_TARGET_HAS_cmpsel_vec 0 #define TCG_TARGET_DEFAULT_MO (0) -#define TCG_TARGET_HAS_MEMORY_BSWAP 1 +#define TCG_TARGET_HAS_MEMORY_BSWAP 0 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 5157143..7a761a6 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1013,50 +1013,71 @@ static inline void tcg_out_ext16u(TCGContext *s, int cond, } } -static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn) +static void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn, int flags) { if (use_armv6_instructions) { - /* revsh */ - tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); - } -} + if (flags & TCG_BSWAP_OS) { + /* revsh */ + tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); + return; + } -static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { /* rev16 */ tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); + if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* uxth */ + tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd); + } + return; } -} -/* swap the two low bytes assuming that the two high input bytes and the - two high output bit can hold any value. */ -static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn) -{ - if (use_armv6_instructions) { - /* rev16 */ - tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - } else { + if (flags == 0) { + /* + * For stores, no input or output extension: + * rn = xxAB + * lsr tmp, rn, #8 tmp = 0xxA + * and tmp, tmp, #0xff tmp = 000A + * orr rd, tmp, rn, lsl #8 rd = xABA + */ tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); tcg_out_dat_reg(s, cond, ARITH_ORR, rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); + return; + } + + /* + * Byte swap, leaving the result at the top of the register. + * We will then shift down, zero or sign-extending. + */ + if (flags & TCG_BSWAP_IZ) { + /* + * rn = 00AB + * ror tmp, rn, #8 tmp = B00A + * orr tmp, tmp, tmp, lsl #16 tmp = BA00 + */ + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, rn, SHIFT_IMM_ROR(8)); + tcg_out_dat_reg(s, cond, ARITH_ORR, + TCG_REG_TMP, TCG_REG_TMP, TCG_REG_TMP, + SHIFT_IMM_LSL(16)); + } else { + /* + * rn = xxAB + * and tmp, rn, #0xff00 tmp = 00A0 + * lsl tmp, tmp, #8 tmp = 0A00 + * orr tmp, tmp, rn, lsl #24 tmp = BA00 + */ + tcg_out_dat_rI(s, cond, ARITH_AND, TCG_REG_TMP, rn, 0xff00, 1); + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSL(8)); + tcg_out_dat_reg(s, cond, ARITH_ORR, + TCG_REG_TMP, TCG_REG_TMP, rn, SHIFT_IMM_LSL(24)); } + tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, TCG_REG_TMP, + (flags & TCG_BSWAP_OS + ? SHIFT_IMM_ASR(8) : SHIFT_IMM_LSR(8))); } static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) @@ -1372,34 +1393,38 @@ static void tcg_out_vldst(TCGContext *s, ARMInsn insn, /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ -static void * const qemu_ld_helpers[16] = { +static void * const qemu_ld_helpers[8] = { [MO_UB] = helper_ret_ldub_mmu, [MO_SB] = helper_ret_ldsb_mmu, - - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, - [MO_LEQ] = helper_le_ldq_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LESL] = helper_le_ldul_mmu, - - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, - [MO_BEQ] = helper_be_ldq_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BESL] = helper_be_ldul_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_UW] = helper_be_lduw_mmu, + [MO_UL] = helper_be_ldul_mmu, + [MO_Q] = helper_be_ldq_mmu, + [MO_SW] = helper_be_ldsw_mmu, + [MO_SL] = helper_be_ldul_mmu, +#else + [MO_UW] = helper_le_lduw_mmu, + [MO_UL] = helper_le_ldul_mmu, + [MO_Q] = helper_le_ldq_mmu, + [MO_SW] = helper_le_ldsw_mmu, + [MO_SL] = helper_le_ldul_mmu, +#endif }; /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, * uintxx_t val, int mmu_idx, uintptr_t ra) */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, +static void * const qemu_st_helpers[4] = { + [MO_8] = helper_ret_stb_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_16] = helper_be_stw_mmu, + [MO_32] = helper_be_stl_mmu, + [MO_64] = helper_be_stq_mmu, +#else + [MO_16] = helper_le_stw_mmu, + [MO_32] = helper_le_stl_mmu, + [MO_64] = helper_le_stq_mmu, +#endif }; /* Helper routines for marshalling helper function arguments into @@ -1604,9 +1629,9 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) icache usage. For pre-armv6, use the signed helpers since we do not have a single insn sign-extend. */ if (use_armv6_instructions) { - func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]; + func = qemu_ld_helpers[opc & MO_SIZE]; } else { - func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]; + func = qemu_ld_helpers[opc & MO_SSIZE]; if (opc & MO_SIGN) { opc = MO_UL; } @@ -1684,7 +1709,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); /* Tail-call to the helper, which will return to the fast path. */ - tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]); return true; } #endif /* SOFTMMU */ @@ -1693,7 +1718,8 @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addend) { - MemOp bswap = opc & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & MO_SSIZE) { case MO_UB: @@ -1704,49 +1730,30 @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, break; case MO_UW: tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); - if (bswap) { - tcg_out_bswap16(s, COND_AL, datalo, datalo); - } break; case MO_SW: - if (bswap) { - tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); - tcg_out_bswap16s(s, COND_AL, datalo, datalo); - } else { - tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend); - } + tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend); break; case MO_UL: - default: tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); - if (bswap) { - tcg_out_bswap32(s, COND_AL, datalo, datalo); - } break; case MO_Q: - { - TCGReg dl = (bswap ? datahi : datalo); - TCGReg dh = (bswap ? datalo : datahi); - - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (dl & 1) == 0 && dh == dl + 1) { - tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend); - } else if (dl != addend) { - tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo); - tcg_out_ld32_12(s, COND_AL, dh, addend, 4); - } else { - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP, - addend, addrlo, SHIFT_IMM_LSL(0)); - tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0); - tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4); - } - if (bswap) { - tcg_out_bswap32(s, COND_AL, dl, dl); - tcg_out_bswap32(s, COND_AL, dh, dh); - } + /* Avoid ldrd for user-only emulation, to handle unaligned. */ + if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { + tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); + } else if (datalo != addend) { + tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo); + tcg_out_ld32_12(s, COND_AL, datahi, addend, 4); + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP, + addend, addrlo, SHIFT_IMM_LSL(0)); + tcg_out_ld32_12(s, COND_AL, datalo, TCG_REG_TMP, 0); + tcg_out_ld32_12(s, COND_AL, datahi, TCG_REG_TMP, 4); } break; + default: + g_assert_not_reached(); } } @@ -1754,7 +1761,8 @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, TCGReg addrlo) { - MemOp bswap = opc & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & MO_SSIZE) { case MO_UB: @@ -1765,47 +1773,28 @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, break; case MO_UW: tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); - if (bswap) { - tcg_out_bswap16(s, COND_AL, datalo, datalo); - } break; case MO_SW: - if (bswap) { - tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); - tcg_out_bswap16s(s, COND_AL, datalo, datalo); - } else { - tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0); - } + tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0); break; case MO_UL: - default: tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); - if (bswap) { - tcg_out_bswap32(s, COND_AL, datalo, datalo); - } break; case MO_Q: - { - TCGReg dl = (bswap ? datahi : datalo); - TCGReg dh = (bswap ? datalo : datahi); - - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (dl & 1) == 0 && dh == dl + 1) { - tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0); - } else if (dl == addrlo) { - tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); - tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); - } else { - tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); - tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); - } - if (bswap) { - tcg_out_bswap32(s, COND_AL, dl, dl); - tcg_out_bswap32(s, COND_AL, dh, dh); - } + /* Avoid ldrd for user-only emulation, to handle unaligned. */ + if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { + tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0); + } else if (datalo == addrlo) { + tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4); + tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); + } else { + tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); + tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4); } break; + default: + g_assert_not_reached(); } } @@ -1854,44 +1843,31 @@ static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, MemOp opc, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addend) { - MemOp bswap = opc & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & MO_SIZE) { case MO_8: tcg_out_st8_r(s, cond, datalo, addrlo, addend); break; case MO_16: - if (bswap) { - tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo); - tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend); - } else { - tcg_out_st16_r(s, cond, datalo, addrlo, addend); - } + tcg_out_st16_r(s, cond, datalo, addrlo, addend); break; case MO_32: - default: - if (bswap) { - tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); - tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend); - } else { - tcg_out_st32_r(s, cond, datalo, addrlo, addend); - } + tcg_out_st32_r(s, cond, datalo, addrlo, addend); break; case MO_64: /* Avoid strd for user-only emulation, to handle unaligned. */ - if (bswap) { - tcg_out_bswap32(s, cond, TCG_REG_R0, datahi); - tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo); - tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); - tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4); - } else if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { + if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_r(s, cond, datalo, addrlo, addend); } else { tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); tcg_out_st32_12(s, cond, datahi, addend, 4); } break; + default: + g_assert_not_reached(); } } @@ -1899,44 +1875,31 @@ static inline void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, TCGReg addrlo) { - MemOp bswap = opc & MO_BSWAP; + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & MO_SIZE) { case MO_8: tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0); break; case MO_16: - if (bswap) { - tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0); - } else { - tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0); - } + tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0); break; case MO_32: - default: - if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); - } else { - tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); - } + tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); break; case MO_64: /* Avoid strd for user-only emulation, to handle unaligned. */ - if (bswap) { - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); - tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); - tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4); - } else if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { + if (USING_SOFTMMU && use_armv6_instructions + && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); } else { tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4); } break; + default: + g_assert_not_reached(); } } @@ -2245,7 +2208,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_bswap16_i32: - tcg_out_bswap16(s, COND_AL, args[0], args[1]); + tcg_out_bswap16(s, COND_AL, args[0], args[1], args[2]); break; case INDEX_op_bswap32_i32: tcg_out_bswap32(s, COND_AL, args[0], args[1]); diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 57fd0c0..95fcef3 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -174,7 +174,7 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_cmpsel_vec 0 #define TCG_TARGET_DEFAULT_MO (0) -#define TCG_TARGET_HAS_MEMORY_BSWAP 1 +#define TCG_TARGET_HAS_MEMORY_BSWAP 0 /* not defined -- call should be eliminated at compile time */ void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 3411338..98d924b 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2421,10 +2421,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; OP_32_64(bswap16): - tcg_out_rolw_8(s, a0); + if (a2 & TCG_BSWAP_OS) { + /* Output must be sign-extended. */ + if (rexw) { + tcg_out_bswap64(s, a0); + tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48); + } else { + tcg_out_bswap32(s, a0); + tcg_out_shifti(s, SHIFT_SAR, a0, 16); + } + } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* Output must be zero-extended, but input isn't. */ + tcg_out_bswap32(s, a0); + tcg_out_shifti(s, SHIFT_SHR, a0, 16); + } else { + tcg_out_rolw_8(s, a0); + } break; OP_32_64(bswap32): tcg_out_bswap32(s, a0); + if (rexw && (a2 & TCG_BSWAP_OS)) { + tcg_out_ext32s(s, a0, a0); + } break; OP_32_64(neg): diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 5944448..bf0eb84 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -540,39 +540,37 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } } -static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) +static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags) { + /* ret and arg can't be register tmp0 */ + tcg_debug_assert(ret != TCG_TMP0); + tcg_debug_assert(arg != TCG_TMP0); + + /* With arg = abcd: */ if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - } else { - /* ret and arg can't be register at */ - if (ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); /* badc */ + if (flags & TCG_BSWAP_OS) { + tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); /* ssdc */ + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xffff); /* 00dc */ } - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + return; } -} -static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); /* 0abc */ + if (!(flags & TCG_BSWAP_IZ)) { + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff); /* 000c */ + } + if (flags & TCG_BSWAP_OS) { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); /* d000 */ + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); /* ssd0 */ } else { - /* ret and arg can't be register at */ - if (ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); /* bcd0 */ + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); /* 00d0 */ } - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); /* ssdc */ } static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub) @@ -581,27 +579,20 @@ static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub) tcg_debug_assert(ok); } -static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) +static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg, int flags) { if (use_mips32r2_instructions) { tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_bf(s, OPC_DEXT, ret, ret, 31, 0); + } } else { - tcg_out_bswap_subr(s, bswap32_addr); - /* delay slot -- never omit the insn, like tcg_out_mov might. */ - tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); - tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); - } -} - -static void tcg_out_bswap32u(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg); - tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret); - tcg_out_dsrl(s, ret, ret, 32); - } else { - tcg_out_bswap_subr(s, bswap32u_addr); + if (flags & TCG_BSWAP_OZ) { + tcg_out_bswap_subr(s, bswap32u_addr); + } else { + tcg_out_bswap_subr(s, bswap32_addr); + } /* delay slot -- never omit the insn, like tcg_out_mov might. */ tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); @@ -1367,14 +1358,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, break; case MO_UW | MO_BSWAP: tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16(s, lo, TCG_TMP1); + tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ); break; case MO_UW: tcg_out_opc_imm(s, OPC_LHU, lo, base, 0); break; case MO_SW | MO_BSWAP: tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16s(s, lo, TCG_TMP1); + tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS); break; case MO_SW: tcg_out_opc_imm(s, OPC_LH, lo, base, 0); @@ -1383,7 +1374,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, if (TCG_TARGET_REG_BITS == 64 && is_64) { if (use_mips32r2_instructions) { tcg_out_opc_imm(s, OPC_LWU, lo, base, 0); - tcg_out_bswap32u(s, lo, lo); + tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ); } else { tcg_out_bswap_subr(s, bswap32u_addr); /* delay slot */ @@ -1396,7 +1387,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, case MO_SL | MO_BSWAP: if (use_mips32r2_instructions) { tcg_out_opc_imm(s, OPC_LW, lo, base, 0); - tcg_out_bswap32(s, lo, lo); + tcg_out_bswap32(s, lo, lo, 0); } else { tcg_out_bswap_subr(s, bswap32_addr); /* delay slot */ @@ -1514,8 +1505,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, break; case MO_16 | MO_BSWAP: - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, lo, 0xffff); - tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1); + tcg_out_bswap16(s, TCG_TMP1, lo, 0); lo = TCG_TMP1; /* FALLTHRU */ case MO_16: @@ -1523,7 +1513,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, break; case MO_32 | MO_BSWAP: - tcg_out_bswap32(s, TCG_TMP3, lo); + tcg_out_bswap32(s, TCG_TMP3, lo, 0); lo = TCG_TMP3; /* FALLTHRU */ case MO_32: @@ -1542,9 +1532,9 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0); tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4); } else { - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi); + tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0); tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0); - tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo); + tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0); tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4); } break; @@ -1933,10 +1923,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_not_i64: i1 = OPC_NOR; goto do_unary; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - i1 = OPC_WSBH; - goto do_unary; case INDEX_op_ext8s_i32: case INDEX_op_ext8s_i64: i1 = OPC_SEB; @@ -1948,11 +1934,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); break; + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + tcg_out_bswap16(s, a0, a1, a2); + break; case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, a0, a1); + tcg_out_bswap32(s, a0, a1, 0); break; case INDEX_op_bswap32_i64: - tcg_out_bswap32u(s, a0, a1); + tcg_out_bswap32(s, a0, a1, a2); break; case INDEX_op_bswap64_i64: tcg_out_bswap64(s, a0, a1); diff --git a/tcg/optimize.c b/tcg/optimize.c index 211a420..9876ac5 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -355,10 +355,12 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) return (uint16_t)x; CASE_OP_32_64(bswap16): - return bswap16(x); + x = bswap16(x); + return y & TCG_BSWAP_OS ? (int16_t)x : x; CASE_OP_32_64(bswap32): - return bswap32(x); + x = bswap32(x); + return y & TCG_BSWAP_OS ? (int32_t)x : x; case INDEX_op_bswap64_i64: return bswap64(x); @@ -1029,6 +1031,42 @@ void tcg_optimize(TCGContext *s) } break; + CASE_OP_32_64(bswap16): + mask = arg_info(op->args[1])->mask; + if (mask <= 0xffff) { + op->args[2] |= TCG_BSWAP_IZ; + } + mask = bswap16(mask); + switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { + case TCG_BSWAP_OZ: + break; + case TCG_BSWAP_OS: + mask = (int16_t)mask; + break; + default: /* undefined high bits */ + mask |= MAKE_64BIT_MASK(16, 48); + break; + } + break; + + case INDEX_op_bswap32_i64: + mask = arg_info(op->args[1])->mask; + if (mask <= 0xffffffffu) { + op->args[2] |= TCG_BSWAP_IZ; + } + mask = bswap32(mask); + switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { + case TCG_BSWAP_OZ: + break; + case TCG_BSWAP_OS: + mask = (int32_t)mask; + break; + default: /* undefined high bits */ + mask |= MAKE_64BIT_MASK(32, 32); + break; + } + break; + default: break; } @@ -1135,9 +1173,6 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(ext16s): CASE_OP_32_64(ext16u): CASE_OP_32_64(ctpop): - CASE_OP_32_64(bswap16): - CASE_OP_32_64(bswap32): - case INDEX_op_bswap64_i64: case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: @@ -1151,6 +1186,17 @@ void tcg_optimize(TCGContext *s) } goto do_default; + CASE_OP_32_64(bswap16): + CASE_OP_32_64(bswap32): + case INDEX_op_bswap64_i64: + if (arg_is_const(op->args[1])) { + tmp = do_constant_folding(opc, arg_info(op->args[1])->val, + op->args[2]); + tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp); + break; + } + goto do_default; + CASE_OP_32_64(add): CASE_OP_32_64(sub): CASE_OP_32_64(mul): diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 7957014..e0f4665 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -413,6 +413,10 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define SRAD XO31(794) #define SRADI XO31(413<<1) +#define BRH XO31(219) +#define BRW XO31(155) +#define BRD XO31(187) + #define TW XO31( 4) #define TRAP (TW | TO(31)) @@ -738,6 +742,26 @@ static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); } +static inline void tcg_out_ext8s(TCGContext *s, TCGReg dst, TCGReg src) +{ + tcg_out32(s, EXTSB | RA(dst) | RS(src)); +} + +static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src) +{ + tcg_out32(s, EXTSH | RA(dst) | RS(src)); +} + +static inline void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src) +{ + tcg_out32(s, ANDI | SAI(src, dst, 0xffff)); +} + +static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src) +{ + tcg_out32(s, EXTSW | RA(dst) | RS(src)); +} + static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) { tcg_out_rld(s, RLDICL, dst, src, 0, 32); @@ -753,6 +777,12 @@ static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); } +static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31)); +} + static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) { tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); @@ -763,6 +793,116 @@ static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); } +static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ + tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2)); +} + +static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) +{ + TCGReg tmp = dst == src ? TCG_REG_R0 : dst; + + if (have_isa_3_10) { + tcg_out32(s, BRH | RA(dst) | RS(src)); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext16s(s, dst, dst); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext16u(s, dst, dst); + } + return; + } + + /* + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = xxxxabcd + */ + /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ + tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); + /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ + tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); + + if (flags & TCG_BSWAP_OS) { + tcg_out_ext16s(s, dst, tmp); + } else { + tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); + } +} + +static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) +{ + TCGReg tmp = dst == src ? TCG_REG_R0 : dst; + + if (have_isa_3_10) { + tcg_out32(s, BRW | RA(dst) | RS(src)); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, dst, dst); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext32u(s, dst, dst); + } + return; + } + + /* + * Stolen from gcc's builtin_bswap32. + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = xxxxabcd + */ + /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ + tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); + /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ + tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); + /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ + tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); + + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, dst, tmp); + } else { + tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); + } +} + +static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) +{ + TCGReg t0 = dst == src ? TCG_REG_R0 : dst; + TCGReg t1 = dst == src ? dst : TCG_REG_R0; + + if (have_isa_3_10) { + tcg_out32(s, BRD | RA(dst) | RS(src)); + return; + } + + /* + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = abcdefgh + */ + /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ + tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); + /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ + tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); + /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ + tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); + + /* t0 = rol64(t0, 32) = hgfe0000 */ + tcg_out_rld(s, RLDICL, t0, t0, 32, 0); + /* t1 = rol64(src, 32) = efghabcd */ + tcg_out_rld(s, RLDICL, t1, src, 32, 0); + + /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ + tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); + /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ + tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); + /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ + tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); + + tcg_out_mov(s, TCG_TYPE_REG, dst, t0); +} + /* Emit a move into ret of arg, if it can be done in one insn. */ static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) { @@ -2322,7 +2462,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const int const_args[TCG_MAX_OP_ARGS]) { TCGArg a0, a1, a2; - int c; switch (opc) { case INDEX_op_exit_tb: @@ -2390,7 +2529,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); + tcg_out_ext8s(s, args[0], args[0]); break; case INDEX_op_ld16u_i32: case INDEX_op_ld16u_i64: @@ -2587,8 +2726,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_sar_i32: if (const_args[2]) { - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31)); + tcg_out_sari32(s, args[0], args[1], args[2]); } else { tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); } @@ -2676,8 +2814,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_sar_i64: if (const_args[2]) { - int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); - tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh); + tcg_out_sari64(s, args[0], args[1], args[2]); } else { tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); } @@ -2728,18 +2865,15 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext8s_i32: case INDEX_op_ext8s_i64: - c = EXTSB; - goto gen_ext; + tcg_out_ext8s(s, args[0], args[1]); + break; case INDEX_op_ext16s_i32: case INDEX_op_ext16s_i64: - c = EXTSH; - goto gen_ext; + tcg_out_ext16s(s, args[0], args[1]); + break; case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: - c = EXTSW; - goto gen_ext; - gen_ext: - tcg_out32(s, c | RS(args[1]) | RA(args[0])); + tcg_out_ext32s(s, args[0], args[1]); break; case INDEX_op_extu_i32_i64: tcg_out_ext32u(s, args[0], args[1]); @@ -2759,72 +2893,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_bswap16_i32: case INDEX_op_bswap16_i64: - a0 = args[0], a1 = args[1]; - /* a1 = abcd */ - if (a0 != a1) { - /* a0 = (a1 r<< 24) & 0xff # 000c */ - tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); - /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */ - tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23); - } else { - /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */ - tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23); - /* a0 = (a1 r<< 24) & 0xff # 000c */ - tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); - /* a0 = a0 | r0 # 00dc */ - tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0)); - } + tcg_out_bswap16(s, args[0], args[1], args[2]); break; - case INDEX_op_bswap32_i32: + tcg_out_bswap32(s, args[0], args[1], 0); + break; case INDEX_op_bswap32_i64: - /* Stolen from gcc's builtin_bswap32 */ - a1 = args[1]; - a0 = args[0] == a1 ? TCG_REG_R0 : args[0]; - - /* a1 = args[1] # abcd */ - /* a0 = rotate_left (a1, 8) # bcda */ - tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); - /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); - /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); - - if (a0 == TCG_REG_R0) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } + tcg_out_bswap32(s, args[0], args[1], args[2]); break; - case INDEX_op_bswap64_i64: - a0 = args[0], a1 = args[1], a2 = TCG_REG_R0; - if (a0 == a1) { - a0 = TCG_REG_R0; - a2 = a1; - } - - /* a1 = # abcd efgh */ - /* a0 = rl32(a1, 8) # 0000 fghe */ - tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); - /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); - /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */ - tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); - - /* a0 = rl64(a0, 32) # hgfe 0000 */ - /* a2 = rl64(a1, 32) # efgh abcd */ - tcg_out_rld(s, RLDICL, a0, a0, 32, 0); - tcg_out_rld(s, RLDICL, a2, a1, 32, 0); - - /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */ - tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31); - /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */ - tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7); - /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */ - tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23); - - if (a0 == 0) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } + tcg_out_bswap64(s, args[0], args[1]); break; case INDEX_op_deposit_i32: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index da7eeca..c16f96b 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -852,37 +852,43 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * TCGMemOpIdx oi, uintptr_t ra) */ -static void * const qemu_ld_helpers[16] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_SB] = helper_ret_ldsb_mmu, - [MO_LEUW] = helper_le_lduw_mmu, - [MO_LESW] = helper_le_ldsw_mmu, - [MO_LEUL] = helper_le_ldul_mmu, +static void * const qemu_ld_helpers[8] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_UW] = helper_be_lduw_mmu, + [MO_SW] = helper_be_ldsw_mmu, + [MO_UL] = helper_be_ldul_mmu, #if TCG_TARGET_REG_BITS == 64 - [MO_LESL] = helper_le_ldsl_mmu, + [MO_SL] = helper_be_ldsl_mmu, #endif - [MO_LEQ] = helper_le_ldq_mmu, - [MO_BEUW] = helper_be_lduw_mmu, - [MO_BESW] = helper_be_ldsw_mmu, - [MO_BEUL] = helper_be_ldul_mmu, + [MO_Q] = helper_be_ldq_mmu, +#else + [MO_UW] = helper_le_lduw_mmu, + [MO_SW] = helper_le_ldsw_mmu, + [MO_UL] = helper_le_ldul_mmu, #if TCG_TARGET_REG_BITS == 64 - [MO_BESL] = helper_be_ldsl_mmu, + [MO_SL] = helper_le_ldsl_mmu, +#endif + [MO_Q] = helper_le_ldq_mmu, #endif - [MO_BEQ] = helper_be_ldq_mmu, }; /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, * uintxx_t val, TCGMemOpIdx oi, * uintptr_t ra) */ -static void * const qemu_st_helpers[16] = { - [MO_UB] = helper_ret_stb_mmu, - [MO_LEUW] = helper_le_stw_mmu, - [MO_LEUL] = helper_le_stl_mmu, - [MO_LEQ] = helper_le_stq_mmu, - [MO_BEUW] = helper_be_stw_mmu, - [MO_BEUL] = helper_be_stl_mmu, - [MO_BEQ] = helper_be_stq_mmu, +static void * const qemu_st_helpers[4] = { + [MO_8] = helper_ret_stb_mmu, +#ifdef HOST_WORDS_BIGENDIAN + [MO_16] = helper_be_stw_mmu, + [MO_32] = helper_be_stl_mmu, + [MO_64] = helper_be_stq_mmu, +#else + [MO_16] = helper_le_stw_mmu, + [MO_32] = helper_le_stl_mmu, + [MO_64] = helper_le_stq_mmu, +#endif }; /* We don't support oversize guests */ @@ -997,7 +1003,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_movi(s, TCG_TYPE_PTR, a2, oi); tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); + tcg_out_call(s, qemu_ld_helpers[opc & MO_SSIZE]); tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); tcg_out_goto(s, l->raddr); @@ -1042,7 +1048,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_movi(s, TCG_TYPE_PTR, a3, oi); tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr); - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SSIZE)]); + tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]); tcg_out_goto(s, l->raddr); return true; @@ -1052,10 +1058,8 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc, bool is_64) { - const MemOp bswap = opc & MO_BSWAP; - - /* We don't yet handle byteswapping, assert */ - g_assert(!bswap); + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & (MO_SSIZE)) { case MO_UB: @@ -1139,10 +1143,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc) { - const MemOp bswap = opc & MO_BSWAP; - - /* We don't yet handle byteswapping, assert */ - g_assert(!bswap); + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & (MO_SSIZE)) { case MO_8: diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc index 5fe073f..b82cf19 100644 --- a/tcg/s390/tcg-target.c.inc +++ b/tcg/s390/tcg-target.c.inc @@ -1951,15 +1951,37 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]); break; - OP_32_64(bswap16): - /* The TCG bswap definition requires bits 0-47 already be zero. - Thus we don't need the G-type insns to implement bswap16_i64. */ - tcg_out_insn(s, RRE, LRVR, args[0], args[1]); - tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16); + case INDEX_op_bswap16_i32: + a0 = args[0], a1 = args[1], a2 = args[2]; + tcg_out_insn(s, RRE, LRVR, a0, a1); + if (a2 & TCG_BSWAP_OS) { + tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16); + } else { + tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16); + } + break; + case INDEX_op_bswap16_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + tcg_out_insn(s, RRE, LRVGR, a0, a1); + if (a2 & TCG_BSWAP_OS) { + tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48); + } else { + tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48); + } break; - OP_32_64(bswap32): + + case INDEX_op_bswap32_i32: tcg_out_insn(s, RRE, LRVR, args[0], args[1]); break; + case INDEX_op_bswap32_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + tcg_out_insn(s, RRE, LRVR, a0, a1); + if (a2 & TCG_BSWAP_OS) { + tgen_ext32s(s, a0, a0); + } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tgen_ext32u(s, a0, a0); + } + break; case INDEX_op_add2_i32: if (const_args[4]) { diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index 515db12..ffe55e9 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -1736,12 +1736,45 @@ void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) gen_addv_mask(d, a, b, m); } +void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80)); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + + tcg_gen_andc_i32(t1, a, m); + tcg_gen_andc_i32(t2, b, m); + tcg_gen_xor_i32(t3, a, b); + tcg_gen_add_i32(d, t1, t2); + tcg_gen_and_i32(t3, t3, m); + tcg_gen_xor_i32(d, d, t3); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); +} + void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000)); gen_addv_mask(d, a, b, m); } +void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t1, a, ~0xffff); + tcg_gen_add_i32(t2, a, b); + tcg_gen_add_i32(t1, t1, b); + tcg_gen_deposit_i32(d, t1, t2, 0, 16); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); +} + void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 t1 = tcg_temp_new_i64(); @@ -1886,12 +1919,45 @@ void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) gen_subv_mask(d, a, b, m); } +void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80)); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + + tcg_gen_or_i32(t1, a, m); + tcg_gen_andc_i32(t2, b, m); + tcg_gen_eqv_i32(t3, a, b); + tcg_gen_sub_i32(d, t1, t2); + tcg_gen_and_i32(t3, t3, m); + tcg_gen_xor_i32(d, d, t3); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t3); +} + void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000)); gen_subv_mask(d, a, b, m); } +void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t1, b, ~0xffff); + tcg_gen_sub_i32(t2, a, b); + tcg_gen_sub_i32(t1, a, t1); + tcg_gen_deposit_i32(d, t1, t2, 0, 16); + + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); +} + void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 t1 = tcg_temp_new_i64(); @@ -2612,6 +2678,20 @@ void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_gen_andi_i64(d, d, mask); } +void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_8, 0xff << c); + tcg_gen_shli_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + +void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_16, 0xffff << c); + tcg_gen_shli_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { @@ -2663,6 +2743,20 @@ void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_gen_andi_i64(d, d, mask); } +void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_8, 0xff >> c); + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + +void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_16, 0xffff >> c); + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { @@ -2728,6 +2822,34 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_temp_free_i64(s); } +void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t s_mask = dup_const(MO_8, 0x80 >> c); + uint32_t c_mask = dup_const(MO_8, 0xff >> c); + TCGv_i32 s = tcg_temp_new_i32(); + + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */ + tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */ + tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */ + tcg_gen_or_i32(d, d, s); /* include sign extension */ + tcg_temp_free_i32(s); +} + +void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t s_mask = dup_const(MO_16, 0x8000 >> c); + uint32_t c_mask = dup_const(MO_16, 0xffff >> c); + TCGv_i32 s = tcg_temp_new_i32(); + + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */ + tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */ + tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */ + tcg_gen_or_i32(d, d, s); /* include sign extension */ + tcg_temp_free_i32(s); +} + void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index dcc2ed0..44d711c 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1001,26 +1001,42 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg) } } -/* Note: we assume the two high bytes are set to zero */ -void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg) +void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags) { + /* Only one extension flag may be present. */ + tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); + if (TCG_TARGET_HAS_bswap16_i32) { - tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg); + tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags); } else { TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); - tcg_gen_ext8u_i32(t0, arg); - tcg_gen_shli_i32(t0, t0, 8); - tcg_gen_shri_i32(ret, arg, 8); - tcg_gen_or_i32(ret, ret, t0); + tcg_gen_shri_i32(t0, arg, 8); + if (!(flags & TCG_BSWAP_IZ)) { + tcg_gen_ext8u_i32(t0, t0); + } + + if (flags & TCG_BSWAP_OS) { + tcg_gen_shli_i32(t1, arg, 24); + tcg_gen_sari_i32(t1, t1, 16); + } else if (flags & TCG_BSWAP_OZ) { + tcg_gen_ext8u_i32(t1, arg); + tcg_gen_shli_i32(t1, t1, 8); + } else { + tcg_gen_shli_i32(t1, arg, 8); + } + + tcg_gen_or_i32(ret, t0, t1); tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); } } void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) { if (TCG_TARGET_HAS_bswap32_i32) { - tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg); + tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0); } else { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); @@ -1654,49 +1670,79 @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) } } -/* Note: we assume the six high bytes are set to zero */ -void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg) +void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) { + /* Only one extension flag may be present. */ + tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); + if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg), flags); + if (flags & TCG_BSWAP_OS) { + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + } else { + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } } else if (TCG_TARGET_HAS_bswap16_i64) { - tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg); + tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t0, arg, 8); + if (!(flags & TCG_BSWAP_IZ)) { + tcg_gen_ext8u_i64(t0, t0); + } - tcg_gen_ext8u_i64(t0, arg); - tcg_gen_shli_i64(t0, t0, 8); - tcg_gen_shri_i64(ret, arg, 8); - tcg_gen_or_i64(ret, ret, t0); + if (flags & TCG_BSWAP_OS) { + tcg_gen_shli_i64(t1, arg, 56); + tcg_gen_sari_i64(t1, t1, 48); + } else if (flags & TCG_BSWAP_OZ) { + tcg_gen_ext8u_i64(t1, arg); + tcg_gen_shli_i64(t1, t1, 8); + } else { + tcg_gen_shli_i64(t1, arg, 8); + } + + tcg_gen_or_i64(ret, t0, t1); tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); } } -/* Note: we assume the four high bytes are set to zero */ -void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg) +void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) { + /* Only one extension flag may be present. */ + tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); + if (TCG_TARGET_REG_BITS == 32) { tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + if (flags & TCG_BSWAP_OS) { + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + } else { + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } } else if (TCG_TARGET_HAS_bswap32_i64) { - tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg); + tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff); - /* arg = ....abcd */ - tcg_gen_shri_i64(t0, arg, 8); /* t0 = .....abc */ - tcg_gen_and_i64(t1, arg, t2); /* t1 = .....b.d */ - tcg_gen_and_i64(t0, t0, t2); /* t0 = .....a.c */ - tcg_gen_shli_i64(t1, t1, 8); /* t1 = ....b.d. */ - tcg_gen_or_i64(ret, t0, t1); /* ret = ....badc */ - - tcg_gen_shli_i64(t1, ret, 48); /* t1 = dc...... */ - tcg_gen_shri_i64(t0, ret, 16); /* t0 = ......ba */ - tcg_gen_shri_i64(t1, t1, 32); /* t1 = ....dc.. */ - tcg_gen_or_i64(ret, t0, t1); /* ret = ....dcba */ + /* arg = xxxxabcd */ + tcg_gen_shri_i64(t0, arg, 8); /* t0 = .xxxxabc */ + tcg_gen_and_i64(t1, arg, t2); /* t1 = .....b.d */ + tcg_gen_and_i64(t0, t0, t2); /* t0 = .....a.c */ + tcg_gen_shli_i64(t1, t1, 8); /* t1 = ....b.d. */ + tcg_gen_or_i64(ret, t0, t1); /* ret = ....badc */ + + tcg_gen_shli_i64(t1, ret, 48); /* t1 = dc...... */ + tcg_gen_shri_i64(t0, ret, 16); /* t0 = ......ba */ + if (flags & TCG_BSWAP_OS) { + tcg_gen_sari_i64(t1, t1, 32); /* t1 = ssssdc.. */ + } else { + tcg_gen_shri_i64(t1, t1, 32); /* t1 = ....dc.. */ + } + tcg_gen_or_i64(ret, t0, t1); /* ret = ssssdcba */ tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); @@ -1717,7 +1763,7 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); } else if (TCG_TARGET_HAS_bswap64_i64) { - tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg); + tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -2830,7 +2876,7 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop) orig_memop = memop; if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { memop &= ~MO_BSWAP; - /* The bswap primitive requires zero-extended input. */ + /* The bswap primitive benefits from zero-extended input. */ if ((memop & MO_SSIZE) == MO_SW) { memop &= ~MO_SIGN; } @@ -2843,10 +2889,9 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop) if ((orig_memop ^ memop) & MO_BSWAP) { switch (orig_memop & MO_SIZE) { case MO_16: - tcg_gen_bswap16_i32(val, val); - if (orig_memop & MO_SIGN) { - tcg_gen_ext16s_i32(val, val); - } + tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN + ? TCG_BSWAP_IZ | TCG_BSWAP_OS + : TCG_BSWAP_IZ | TCG_BSWAP_OZ)); break; case MO_32: tcg_gen_bswap32_i32(val, val); @@ -2870,8 +2915,7 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop) swap = tcg_temp_new_i32(); switch (memop & MO_SIZE) { case MO_16: - tcg_gen_ext16u_i32(swap, val); - tcg_gen_bswap16_i32(swap, swap); + tcg_gen_bswap16_i32(swap, val, 0); break; case MO_32: tcg_gen_bswap32_i32(swap, val); @@ -2919,7 +2963,7 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop) orig_memop = memop; if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { memop &= ~MO_BSWAP; - /* The bswap primitive requires zero-extended input. */ + /* The bswap primitive benefits from zero-extended input. */ if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) { memop &= ~MO_SIGN; } @@ -2930,18 +2974,15 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop) plugin_gen_mem_callbacks(addr, info); if ((orig_memop ^ memop) & MO_BSWAP) { + int flags = (orig_memop & MO_SIGN + ? TCG_BSWAP_IZ | TCG_BSWAP_OS + : TCG_BSWAP_IZ | TCG_BSWAP_OZ); switch (orig_memop & MO_SIZE) { case MO_16: - tcg_gen_bswap16_i64(val, val); - if (orig_memop & MO_SIGN) { - tcg_gen_ext16s_i64(val, val); - } + tcg_gen_bswap16_i64(val, val, flags); break; case MO_32: - tcg_gen_bswap32_i64(val, val); - if (orig_memop & MO_SIGN) { - tcg_gen_ext32s_i64(val, val); - } + tcg_gen_bswap32_i64(val, val, flags); break; case MO_64: tcg_gen_bswap64_i64(val, val); @@ -2971,12 +3012,10 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop) swap = tcg_temp_new_i64(); switch (memop & MO_SIZE) { case MO_16: - tcg_gen_ext16u_i64(swap, val); - tcg_gen_bswap16_i64(swap, swap); + tcg_gen_bswap16_i64(swap, val, 0); break; case MO_32: - tcg_gen_ext32u_i64(swap, val); - tcg_gen_bswap32_i64(swap, swap); + tcg_gen_bswap32_i64(swap, val, 0); break; case MO_64: tcg_gen_bswap64_i64(swap, val); @@ -1778,6 +1778,14 @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", }; +static const char bswap_flag_name[][6] = { + [TCG_BSWAP_IZ] = "iz", + [TCG_BSWAP_OZ] = "oz", + [TCG_BSWAP_OS] = "os", + [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", + [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", +}; + static inline bool tcg_regset_single(TCGRegSet d) { return (d & (d - 1)) == 0; @@ -1921,6 +1929,26 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs) i = 1; } break; + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + case INDEX_op_bswap32_i32: + case INDEX_op_bswap32_i64: + case INDEX_op_bswap64_i64: + { + TCGArg flags = op->args[k]; + const char *name = NULL; + + if (flags < ARRAY_SIZE(bswap_flag_name)) { + name = bswap_flag_name[flags]; + } + if (name) { + col += qemu_log(",%s", name); + } else { + col += qemu_log(",$0x%" TCG_PRIlx, flags); + } + i = k = 1; + } + break; default: i = 0; break; @@ -808,7 +808,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, regs[r0] = (int8_t)regs[r1]; break; #endif -#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 +#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 || \ + TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 CASE_32_64(ext16s) tci_args_rr(insn, &r0, &r1); regs[r0] = (int16_t)regs[r1]; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 9651e7a..0cb16aa 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -597,6 +597,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { + TCGOpcode exts; + switch (opc) { case INDEX_op_exit_tb: tcg_out_op_p(s, opc, (void *)args[0]); @@ -710,13 +712,28 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, CASE_64(ext32u) /* Optional (TCG_TARGET_HAS_ext32u_i64). */ CASE_64(ext_i32) CASE_64(extu_i32) - CASE_32_64(bswap16) /* Optional (TCG_TARGET_HAS_bswap16_*). */ - CASE_32_64(bswap32) /* Optional (TCG_TARGET_HAS_bswap32_*). */ - CASE_64(bswap64) /* Optional (TCG_TARGET_HAS_bswap64_i64). */ CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */ + case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ + case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ tcg_out_op_rr(s, opc, args[0], args[1]); break; + case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */ + exts = INDEX_op_ext16s_i32; + goto do_bswap; + case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */ + exts = INDEX_op_ext16s_i64; + goto do_bswap; + case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ + exts = INDEX_op_ext32s_i64; + do_bswap: + /* The base tci bswaps zero-extend, and ignore high bits. */ + tcg_out_op_rr(s, opc, args[0], args[1]); + if (args[2] & TCG_BSWAP_OS) { + tcg_out_op_rr(s, exts, args[0], args[0]); + } + break; + CASE_32_64(add2) CASE_32_64(sub2) tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2], |