diff options
Diffstat (limited to 'target')
306 files changed, 31741 insertions, 15816 deletions
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index 890b84c..932cdda 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -86,10 +86,10 @@ static bool alpha_cpu_has_work(CPUState *cs) assume that if a CPU really wants to stay asleep, it will mask interrupts at the chipset level, which will prevent these bits from being set in the first place. */ - return cs->interrupt_request & (CPU_INTERRUPT_HARD - | CPU_INTERRUPT_TIMER - | CPU_INTERRUPT_SMP - | CPU_INTERRUPT_MCHK); + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD + | CPU_INTERRUPT_TIMER + | CPU_INTERRUPT_SMP + | CPU_INTERRUPT_MCHK); } #endif /* !CONFIG_USER_ONLY */ @@ -261,6 +261,7 @@ static const TCGCPUOps alpha_tcg_ops = { .record_sigbus = alpha_cpu_record_sigbus, #else .tlb_fill = alpha_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_notreached, .cpu_exec_interrupt = alpha_cpu_exec_interrupt, .cpu_exec_halt = alpha_cpu_has_work, .cpu_exec_reset = cpu_reset, @@ -285,6 +286,7 @@ static void alpha_cpu_class_init(ObjectClass *oc, const void *data) cc->get_pc = alpha_cpu_get_pc; cc->gdb_read_register = alpha_cpu_gdb_read_register; cc->gdb_write_register = alpha_cpu_gdb_write_register; + cc->gdb_core_xml_file = "alpha-core.xml"; #ifndef CONFIG_USER_ONLY dc->vmsd = &vmstate_alpha_cpu; cc->sysemu_ops = &alpha_sysemu_ops; diff --git a/target/alpha/helper.h b/target/alpha/helper.h index d60f208..954a5c8 100644 --- a/target/alpha/helper.h +++ b/target/alpha/helper.h @@ -90,9 +90,9 @@ DEF_HELPER_FLAGS_2(ieee_input_s, TCG_CALL_NO_WG, void, env, i64) #if !defined (CONFIG_USER_ONLY) DEF_HELPER_FLAGS_1(tbia, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_2(tbis, TCG_CALL_NO_RWG, void, env, i64) -DEF_HELPER_FLAGS_1(tb_flush, TCG_CALL_NO_RWG, void, env) DEF_HELPER_1(halt, void, i64) +DEF_HELPER_1(whami, i64, env) DEF_HELPER_FLAGS_0(get_vmtime, TCG_CALL_NO_RWG, i64) DEF_HELPER_FLAGS_0(get_walltime, TCG_CALL_NO_RWG, i64) diff --git a/target/alpha/machine.c b/target/alpha/machine.c index 5f302b1..6828b12 100644 --- a/target/alpha/machine.c +++ b/target/alpha/machine.c @@ -25,8 +25,8 @@ static const VMStateInfo vmstate_fpcr = { }; static const VMStateField vmstate_env_fields[] = { - VMSTATE_UINTTL_ARRAY(ir, CPUAlphaState, 31), - VMSTATE_UINTTL_ARRAY(fir, CPUAlphaState, 31), + VMSTATE_UINT64_ARRAY(ir, CPUAlphaState, 31), + VMSTATE_UINT64_ARRAY(fir, CPUAlphaState, 31), /* Save the architecture value of the fpcr, not the internally expanded version. Since this architecture value does not exist in memory to be stored, this requires a but of hoop @@ -41,27 +41,27 @@ static const VMStateField vmstate_env_fields[] = { .flags = VMS_SINGLE, .offset = 0 }, - VMSTATE_UINTTL(pc, CPUAlphaState), - VMSTATE_UINTTL(unique, CPUAlphaState), - VMSTATE_UINTTL(lock_addr, CPUAlphaState), - VMSTATE_UINTTL(lock_value, CPUAlphaState), + VMSTATE_UINT64(pc, CPUAlphaState), + VMSTATE_UINT64(unique, CPUAlphaState), + VMSTATE_UINT64(lock_addr, CPUAlphaState), + VMSTATE_UINT64(lock_value, CPUAlphaState), VMSTATE_UINT32(flags, CPUAlphaState), VMSTATE_UINT32(pcc_ofs, CPUAlphaState), - VMSTATE_UINTTL(trap_arg0, CPUAlphaState), - VMSTATE_UINTTL(trap_arg1, CPUAlphaState), - VMSTATE_UINTTL(trap_arg2, CPUAlphaState), + VMSTATE_UINT64(trap_arg0, CPUAlphaState), + VMSTATE_UINT64(trap_arg1, CPUAlphaState), + VMSTATE_UINT64(trap_arg2, CPUAlphaState), - VMSTATE_UINTTL(exc_addr, CPUAlphaState), - VMSTATE_UINTTL(palbr, CPUAlphaState), - VMSTATE_UINTTL(ptbr, CPUAlphaState), - VMSTATE_UINTTL(vptptr, CPUAlphaState), - VMSTATE_UINTTL(sysval, CPUAlphaState), - VMSTATE_UINTTL(usp, CPUAlphaState), + VMSTATE_UINT64(exc_addr, CPUAlphaState), + VMSTATE_UINT64(palbr, CPUAlphaState), + VMSTATE_UINT64(ptbr, CPUAlphaState), + VMSTATE_UINT64(vptptr, CPUAlphaState), + VMSTATE_UINT64(sysval, CPUAlphaState), + VMSTATE_UINT64(usp, CPUAlphaState), - VMSTATE_UINTTL_ARRAY(shadow, CPUAlphaState, 8), - VMSTATE_UINTTL_ARRAY(scratch, CPUAlphaState, 24), + VMSTATE_UINT64_ARRAY(shadow, CPUAlphaState, 8), + VMSTATE_UINT64_ARRAY(scratch, CPUAlphaState, 24), VMSTATE_END_OF_LIST() }; diff --git a/target/alpha/sys_helper.c b/target/alpha/sys_helper.c index 51e3254..0e0a619 100644 --- a/target/alpha/sys_helper.c +++ b/target/alpha/sys_helper.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/cputlb.h" -#include "exec/tb-flush.h" #include "exec/helper-proto.h" #include "system/runstate.h" #include "system/system.h" @@ -38,11 +37,6 @@ void helper_tbis(CPUAlphaState *env, uint64_t p) tlb_flush_page(env_cpu(env), p); } -void helper_tb_flush(CPUAlphaState *env) -{ - tb_flush(env_cpu(env)); -} - void helper_halt(uint64_t restart) { if (restart) { @@ -73,3 +67,8 @@ void helper_set_alarm(CPUAlphaState *env, uint64_t expire) timer_del(cpu->alarm_timer); } } + +uint64_t HELPER(whami)(CPUAlphaState *env) +{ + return env_cpu(env)->cpu_index; +} diff --git a/target/alpha/translate.c b/target/alpha/translate.c index cebab03..b1d8a4e 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -48,8 +48,6 @@ struct DisasContext { #ifdef CONFIG_USER_ONLY MemOp unalign; -#else - uint64_t palbr; #endif uint32_t tbflags; int mem_idx; @@ -438,18 +436,18 @@ static DisasJumpType gen_store_conditional(DisasContext *ctx, int ra, int rb, return DISAS_NEXT; } -static void gen_goto_tb(DisasContext *ctx, int idx, int32_t disp) +static void gen_goto_tb(DisasContext *ctx, unsigned tb_slot_idx, int32_t disp) { if (translator_use_goto_tb(&ctx->base, ctx->base.pc_next + disp)) { /* With PCREL, PC must always be up-to-date. */ if (ctx->pcrel) { gen_pc_disp(ctx, cpu_pc, disp); - tcg_gen_goto_tb(idx); + tcg_gen_goto_tb(tb_slot_idx); } else { - tcg_gen_goto_tb(idx); + tcg_gen_goto_tb(tb_slot_idx); gen_pc_disp(ctx, cpu_pc, disp); } - tcg_gen_exit_tb(ctx->base.tb, idx); + tcg_gen_exit_tb(ctx->base.tb, tb_slot_idx); } else { gen_pc_disp(ctx, cpu_pc, disp); tcg_gen_lookup_and_goto_ptr(); @@ -1128,8 +1126,7 @@ static DisasJumpType gen_call_pal(DisasContext *ctx, int palcode) break; case 0x3C: /* WHAMI */ - tcg_gen_ld32s_i64(ctx->ir[IR_V0], tcg_env, - -offsetof(AlphaCPU, env) + offsetof(CPUState, cpu_index)); + gen_helper_whami(ctx->ir[IR_V0], tcg_env); break; case 0x3E: @@ -1155,7 +1152,6 @@ static DisasJumpType gen_call_pal(DisasContext *ctx, int palcode) #else { TCGv tmp = tcg_temp_new(); - uint64_t entry; gen_pc_disp(ctx, tmp, 0); if (ctx->tbflags & ENV_FLAG_PAL_MODE) { @@ -1165,12 +1161,11 @@ static DisasJumpType gen_call_pal(DisasContext *ctx, int palcode) } tcg_gen_st_i64(tmp, tcg_env, offsetof(CPUAlphaState, exc_addr)); - entry = ctx->palbr; - entry += (palcode & 0x80 - ? 0x2000 + (palcode - 0x80) * 64 - : 0x1000 + palcode * 64); - - tcg_gen_movi_i64(cpu_pc, entry); + tcg_gen_ld_i64(cpu_pc, tcg_env, offsetof(CPUAlphaState, palbr)); + tcg_gen_addi_i64(cpu_pc, cpu_pc, + palcode & 0x80 + ? 0x2000 + (palcode - 0x80) * 64 + : 0x1000 + palcode * 64); return DISAS_PC_UPDATED; } #endif @@ -1292,11 +1287,7 @@ static DisasJumpType gen_mtpr(DisasContext *ctx, TCGv vb, int regno) case 7: /* PALBR */ tcg_gen_st_i64(vb, tcg_env, offsetof(CPUAlphaState, palbr)); - /* Changing the PAL base register implies un-chaining all of the TBs - that ended with a CALL_PAL. Since the base register usually only - changes during boot, flushing everything works well. */ - gen_helper_tb_flush(tcg_env); - return DISAS_PC_STALE; + break; case 32 ... 39: /* Accessing the "non-shadow" general registers. */ @@ -2874,7 +2865,6 @@ static void alpha_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) ctx->ir = cpu_std_ir; ctx->unalign = (ctx->tbflags & TB_FLAG_UNALIGN ? MO_UNALN : MO_ALIGN); #else - ctx->palbr = env->palbr; ctx->ir = (ctx->tbflags & ENV_FLAG_PAL_MODE ? cpu_pal_ir : cpu_std_ir); #endif diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c index 20c70c7..a788376 100644 --- a/target/arm/arm-powerctl.c +++ b/target/arm/arm-powerctl.c @@ -17,24 +17,12 @@ #include "qemu/main-loop.h" #include "system/tcg.h" #include "target/arm/multiprocessing.h" - -#ifndef DEBUG_ARM_POWERCTL -#define DEBUG_ARM_POWERCTL 0 -#endif - -#define DPRINTF(fmt, args...) \ - do { \ - if (DEBUG_ARM_POWERCTL) { \ - fprintf(stderr, "[ARM]%s: " fmt , __func__, ##args); \ - } \ - } while (0) +#include "trace.h" CPUState *arm_get_cpu_by_id(uint64_t id) { CPUState *cpu; - DPRINTF("cpu %" PRId64 "\n", id); - CPU_FOREACH(cpu) { ARMCPU *armcpu = ARM_CPU(cpu); @@ -102,9 +90,9 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, assert(bql_locked()); - DPRINTF("cpu %" PRId64 " (EL %d, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64 - "\n", cpuid, target_el, target_aa64 ? "aarch64" : "aarch32", entry, - context_id); + trace_arm_powerctl_set_cpu_on(cpuid, target_el, + target_aa64 ? "aarch64" : "aarch32", + entry, context_id); /* requested EL level need to be in the 1 to 3 range */ assert((target_el > 0) && (target_el < 4)); @@ -208,6 +196,8 @@ int arm_set_cpu_on_and_reset(uint64_t cpuid) assert(bql_locked()); + trace_arm_powerctl_set_cpu_on_and_reset(cpuid); + /* Retrieve the cpu we are powering up */ target_cpu_state = arm_get_cpu_by_id(cpuid); if (!target_cpu_state) { @@ -261,7 +251,7 @@ int arm_set_cpu_off(uint64_t cpuid) assert(bql_locked()); - DPRINTF("cpu %" PRId64 "\n", cpuid); + trace_arm_powerctl_set_cpu_off(cpuid); /* change to the cpu we are powering up */ target_cpu_state = arm_get_cpu_by_id(cpuid); @@ -297,7 +287,7 @@ int arm_reset_cpu(uint64_t cpuid) assert(bql_locked()); - DPRINTF("cpu %" PRId64 "\n", cpuid); + trace_arm_powerctl_set_cpu_off(cpuid); /* change to the cpu we are resetting */ target_cpu_state = arm_get_cpu_by_id(cpuid); diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c index a1a944a..d292c97 100644 --- a/target/arm/arm-qmp-cmds.c +++ b/target/arm/arm-qmp-cmds.c @@ -21,15 +21,17 @@ */ #include "qemu/osdep.h" +#include "qemu/target-info.h" #include "hw/boards.h" #include "kvm_arm.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "qapi/qobject-input-visitor.h" -#include "qapi/qapi-commands-machine-target.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qapi-commands-misc-arm.h" #include "qobject/qdict.h" #include "qom/qom-qobject.h" +#include "cpu.h" static GICCapability *gic_cap_new(int version) { @@ -240,7 +242,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) CpuDefinitionInfoList *cpu_list = NULL; GSList *list; - list = object_class_get_list(TYPE_ARM_CPU, false); + list = object_class_get_list(target_cpu_type(), false); g_slist_foreach(list, arm_cpu_add_definition, &cpu_list); g_slist_free(list); diff --git a/target/arm/common-semi-target.h b/target/arm/common-semi-target.c index da51f2d..2b77ce9 100644 --- a/target/arm/common-semi-target.h +++ b/target/arm/common-semi-target.c @@ -7,12 +7,12 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef TARGET_ARM_COMMON_SEMI_TARGET_H -#define TARGET_ARM_COMMON_SEMI_TARGET_H - +#include "qemu/osdep.h" +#include "cpu.h" +#include "semihosting/common-semi.h" #include "target/arm/cpu-qom.h" -static inline target_ulong common_semi_arg(CPUState *cs, int argno) +uint64_t common_semi_arg(CPUState *cs, int argno) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; @@ -23,7 +23,7 @@ static inline target_ulong common_semi_arg(CPUState *cs, int argno) } } -static inline void common_semi_set_ret(CPUState *cs, target_ulong ret) +void common_semi_set_ret(CPUState *cs, uint64_t ret) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; @@ -34,27 +34,25 @@ static inline void common_semi_set_ret(CPUState *cs, target_ulong ret) } } -static inline bool common_semi_sys_exit_extended(CPUState *cs, int nr) +bool common_semi_sys_exit_is_extended(CPUState *cs) { - return nr == TARGET_SYS_EXIT_EXTENDED || is_a64(cpu_env(cs)); + return is_a64(cpu_env(cs)); } -static inline bool is_64bit_semihosting(CPUArchState *env) +bool is_64bit_semihosting(CPUArchState *env) { return is_a64(env); } -static inline target_ulong common_semi_stack_bottom(CPUState *cs) +uint64_t common_semi_stack_bottom(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; return is_a64(env) ? env->xregs[31] : env->regs[13]; } -static inline bool common_semi_has_synccache(CPUArchState *env) +bool common_semi_has_synccache(CPUArchState *env) { /* Ok for A64, invalid for A32/T32 */ return is_a64(env); } - -#endif diff --git a/target/arm/cpregs-gcs.c b/target/arm/cpregs-gcs.c new file mode 100644 index 0000000..1ed52a2 --- /dev/null +++ b/target/arm/cpregs-gcs.c @@ -0,0 +1,156 @@ +/* + * QEMU ARM CP Register GCS regiters and instructions + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "exec/icount.h" +#include "hw/irq.h" +#include "cpu.h" +#include "cpu-features.h" +#include "cpregs.h" +#include "internals.h" + + +static CPAccessResult access_gcs(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_GCSEN)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_gcs_el0(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 0 && !(env->cp15.gcscr_el[0] & GCSCRE0_NTR)) { + return CP_ACCESS_TRAP_EL1; + } + return access_gcs(env, ri, isread); +} + +static void gcspr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Bits [2:0] are RES0, so we might as well clear them now, + * rather than upon each usage a-la GetCurrentGCSPointer. + */ + raw_write(env, ri, value & ~7); +} + +static CPAccessResult access_gcspushm(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + if (!(env->cp15.gcscr_el[el] & GCSCR_PUSHMEN)) { + return CP_ACCESS_TRAP_BIT | (el ? el : 1); + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_gcspushx(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + /* Trap if lock taken, and enabled. */ + if (!(env->pstate & PSTATE_EXLOCK)) { + int el = arm_current_el(env); + if (env->cp15.gcscr_el[el] & GCSCR_EXLOCKEN) { + return CP_ACCESS_EXLOCK; + } + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_gcspopcx(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + /* Trap if lock not taken, and enabled. */ + if (env->pstate & PSTATE_EXLOCK) { + int el = arm_current_el(env); + if (env->cp15.gcscr_el[el] & GCSCR_EXLOCKEN) { + return CP_ACCESS_EXLOCK; + } + } + return CP_ACCESS_OK; +} + +static const ARMCPRegInfo gcs_reginfo[] = { + { .name = "GCSCRE0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 5, .opc2 = 2, + .access = PL1_RW, .accessfn = access_gcs, .fgt = FGT_NGCS_EL0, + .fieldoffset = offsetof(CPUARMState, cp15.gcscr_el[0]) }, + { .name = "GCSCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 5, .opc2 = 0, + .access = PL1_RW, .accessfn = access_gcs, .fgt = FGT_NGCS_EL1, + .nv2_redirect_offset = 0x8d0 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 5, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 5, 0), + .fieldoffset = offsetof(CPUARMState, cp15.gcscr_el[1]) }, + { .name = "GCSCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 5, .opc2 = 0, + .access = PL2_RW, .accessfn = access_gcs, + .fieldoffset = offsetof(CPUARMState, cp15.gcscr_el[2]) }, + { .name = "GCSCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 5, .opc2 = 0, + .access = PL3_RW, + .fieldoffset = offsetof(CPUARMState, cp15.gcscr_el[3]) }, + + { .name = "GCSPR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 2, .crm = 5, .opc2 = 1, + .access = PL0_R | PL1_W, .accessfn = access_gcs_el0, + .fgt = FGT_NGCS_EL0, .writefn = gcspr_write, + .fieldoffset = offsetof(CPUARMState, cp15.gcspr_el[0]) }, + { .name = "GCSPR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 5, .opc2 = 1, + .access = PL1_RW, .accessfn = access_gcs, + .fgt = FGT_NGCS_EL1, .writefn = gcspr_write, + .nv2_redirect_offset = 0x8c0 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 5, 1), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 5, 1), + .fieldoffset = offsetof(CPUARMState, cp15.gcspr_el[1]) }, + { .name = "GCSPR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 5, .opc2 = 1, + .access = PL2_RW, .accessfn = access_gcs, .writefn = gcspr_write, + .fieldoffset = offsetof(CPUARMState, cp15.gcspr_el[2]) }, + { .name = "GCSPR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 5, .opc2 = 1, + .access = PL3_RW, .writefn = gcspr_write, + .fieldoffset = offsetof(CPUARMState, cp15.gcspr_el[2]) }, + + { .name = "GCSPUSHM", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 7, .opc2 = 0, + .access = PL0_W, .accessfn = access_gcspushm, + .fgt = FGT_NGCSPUSHM_EL1, .type = ARM_CP_GCSPUSHM }, + { .name = "GCSPOPM", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 7, .opc2 = 1, + .access = PL0_R, .type = ARM_CP_GCSPOPM }, + { .name = "GCSSS1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 7, .opc2 = 2, + .access = PL0_W, .type = ARM_CP_GCSSS1 }, + { .name = "GCSSS2", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 7, .opc2 = 3, + .access = PL0_R, .type = ARM_CP_GCSSS2 }, + { .name = "GCSPUSHX", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 7, .opc2 = 4, + .access = PL1_W, .accessfn = access_gcspushx, .fgt = FGT_NGCSEPP, + .type = ARM_CP_GCSPUSHX }, + { .name = "GCSPOPCX", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 7, .opc2 = 5, + .access = PL1_W, .accessfn = access_gcspopcx, .fgt = FGT_NGCSEPP, + .type = ARM_CP_GCSPOPCX }, + { .name = "GCSPOPX", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 7, .opc2 = 6, + .access = PL1_W, .type = ARM_CP_GCSPOPX }, +}; + +void define_gcs_cpregs(ARMCPU *cpu) +{ + if (cpu_isar_feature(aa64_gcs, cpu)) { + define_arm_cp_regs(cpu, gcs_reginfo); + } +} diff --git a/target/arm/cpregs-pmu.c b/target/arm/cpregs-pmu.c new file mode 100644 index 0000000..31c01ed --- /dev/null +++ b/target/arm/cpregs-pmu.c @@ -0,0 +1,1346 @@ +/* + * QEMU ARM CP Register PMU insns + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "exec/icount.h" +#include "hw/irq.h" +#include "cpu.h" +#include "cpu-features.h" +#include "cpregs.h" +#include "internals.h" + + +#define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ + +/* + * Check for traps to performance monitor registers, which are controlled + * by MDCR_EL2.TPM for EL2 and MDCR_EL3.TPM for EL3. + */ +static CPAccessResult access_tpm(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); + + if (el < 2 && (mdcr_el2 & MDCR_TPM)) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TPM)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +typedef struct pm_event { + uint16_t number; /* PMEVTYPER.evtCount is 16 bits wide */ + /* If the event is supported on this CPU (used to generate PMCEID[01]) */ + bool (*supported)(CPUARMState *); + /* + * Retrieve the current count of the underlying event. The programmed + * counters hold a difference from the return value from this function + */ + uint64_t (*get_count)(CPUARMState *); + /* + * Return how many nanoseconds it will take (at a minimum) for count events + * to occur. A negative value indicates the counter will never overflow, or + * that the counter has otherwise arranged for the overflow bit to be set + * and the PMU interrupt to be raised on overflow. + */ + int64_t (*ns_per_count)(uint64_t); +} pm_event; + +static bool event_always_supported(CPUARMState *env) +{ + return true; +} + +static uint64_t swinc_get_count(CPUARMState *env) +{ + /* + * SW_INCR events are written directly to the pmevcntr's by writes to + * PMSWINC, so there is no underlying count maintained by the PMU itself + */ + return 0; +} + +static int64_t swinc_ns_per(uint64_t ignored) +{ + return -1; +} + +/* + * Return the underlying cycle count for the PMU cycle counters. If we're in + * usermode, simply return 0. + */ +static uint64_t cycles_get_count(CPUARMState *env) +{ +#ifndef CONFIG_USER_ONLY + return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + ARM_CPU_FREQ, NANOSECONDS_PER_SECOND); +#else + return cpu_get_host_ticks(); +#endif +} + +#ifndef CONFIG_USER_ONLY +static int64_t cycles_ns_per(uint64_t cycles) +{ + return (ARM_CPU_FREQ / NANOSECONDS_PER_SECOND) * cycles; +} + +static bool instructions_supported(CPUARMState *env) +{ + /* Precise instruction counting */ + return icount_enabled() == ICOUNT_PRECISE; +} + +static uint64_t instructions_get_count(CPUARMState *env) +{ + assert(icount_enabled() == ICOUNT_PRECISE); + return (uint64_t)icount_get_raw(); +} + +static int64_t instructions_ns_per(uint64_t icount) +{ + assert(icount_enabled() == ICOUNT_PRECISE); + return icount_to_ns((int64_t)icount); +} +#endif + +static bool pmuv3p1_events_supported(CPUARMState *env) +{ + /* For events which are supported in any v8.1 PMU */ + return cpu_isar_feature(any_pmuv3p1, env_archcpu(env)); +} + +static bool pmuv3p4_events_supported(CPUARMState *env) +{ + /* For events which are supported in any v8.1 PMU */ + return cpu_isar_feature(any_pmuv3p4, env_archcpu(env)); +} + +static uint64_t zero_event_get_count(CPUARMState *env) +{ + /* For events which on QEMU never fire, so their count is always zero */ + return 0; +} + +static int64_t zero_event_ns_per(uint64_t cycles) +{ + /* An event which never fires can never overflow */ + return -1; +} + +static const pm_event pm_events[] = { + { .number = 0x000, /* SW_INCR */ + .supported = event_always_supported, + .get_count = swinc_get_count, + .ns_per_count = swinc_ns_per, + }, +#ifndef CONFIG_USER_ONLY + { .number = 0x008, /* INST_RETIRED, Instruction architecturally executed */ + .supported = instructions_supported, + .get_count = instructions_get_count, + .ns_per_count = instructions_ns_per, + }, + { .number = 0x011, /* CPU_CYCLES, Cycle */ + .supported = event_always_supported, + .get_count = cycles_get_count, + .ns_per_count = cycles_ns_per, + }, +#endif + { .number = 0x023, /* STALL_FRONTEND */ + .supported = pmuv3p1_events_supported, + .get_count = zero_event_get_count, + .ns_per_count = zero_event_ns_per, + }, + { .number = 0x024, /* STALL_BACKEND */ + .supported = pmuv3p1_events_supported, + .get_count = zero_event_get_count, + .ns_per_count = zero_event_ns_per, + }, + { .number = 0x03c, /* STALL */ + .supported = pmuv3p4_events_supported, + .get_count = zero_event_get_count, + .ns_per_count = zero_event_ns_per, + }, +}; + +/* + * Note: Before increasing MAX_EVENT_ID beyond 0x3f into the 0x40xx range of + * events (i.e. the statistical profiling extension), this implementation + * should first be updated to something sparse instead of the current + * supported_event_map[] array. + */ +#define MAX_EVENT_ID 0x3c +#define UNSUPPORTED_EVENT UINT16_MAX +static uint16_t supported_event_map[MAX_EVENT_ID + 1]; + +/* + * Called upon CPU initialization to initialize PMCEID[01]_EL0 and build a map + * of ARM event numbers to indices in our pm_events array. + * + * Note: Events in the 0x40XX range are not currently supported. + */ +void pmu_init(ARMCPU *cpu) +{ + unsigned int i; + + /* + * Empty supported_event_map and cpu->pmceid[01] before adding supported + * events to them + */ + for (i = 0; i < ARRAY_SIZE(supported_event_map); i++) { + supported_event_map[i] = UNSUPPORTED_EVENT; + } + cpu->pmceid0 = 0; + cpu->pmceid1 = 0; + + for (i = 0; i < ARRAY_SIZE(pm_events); i++) { + const pm_event *cnt = &pm_events[i]; + assert(cnt->number <= MAX_EVENT_ID); + /* We do not currently support events in the 0x40xx range */ + assert(cnt->number <= 0x3f); + + if (cnt->supported(&cpu->env)) { + supported_event_map[cnt->number] = i; + uint64_t event_mask = 1ULL << (cnt->number & 0x1f); + if (cnt->number & 0x20) { + cpu->pmceid1 |= event_mask; + } else { + cpu->pmceid0 |= event_mask; + } + } + } +} + +/* + * Check at runtime whether a PMU event is supported for the current machine + */ +static bool event_supported(uint16_t number) +{ + if (number > MAX_EVENT_ID) { + return false; + } + return supported_event_map[number] != UNSUPPORTED_EVENT; +} + +static CPAccessResult do_pmreg_access(CPUARMState *env, bool is_pmcr) +{ + /* + * Performance monitor registers user accessibility is controlled + * by PMUSERENR. MDCR_EL2.TPM/TPMCR and MDCR_EL3.TPM allow configurable + * trapping to EL2 or EL3 for other accesses. + */ + int el = arm_current_el(env); + + if (el == 0 && !(env->cp15.c9_pmuserenr & 1)) { + return CP_ACCESS_TRAP_EL1; + } + if (el < 2) { + uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); + + if (mdcr_el2 & MDCR_TPM) { + return CP_ACCESS_TRAP_EL2; + } + if (is_pmcr && (mdcr_el2 & MDCR_TPMCR)) { + return CP_ACCESS_TRAP_EL2; + } + } + if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TPM)) { + return CP_ACCESS_TRAP_EL3; + } + + return CP_ACCESS_OK; +} + +static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + return do_pmreg_access(env, false); +} + +static CPAccessResult pmreg_access_pmcr(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + return do_pmreg_access(env, true); +} + +static CPAccessResult pmreg_access_xevcntr(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + /* ER: event counter read trap control */ + if (arm_feature(env, ARM_FEATURE_V8) + && arm_current_el(env) == 0 + && (env->cp15.c9_pmuserenr & (1 << 3)) != 0 + && isread) { + return CP_ACCESS_OK; + } + + return pmreg_access(env, ri, isread); +} + +static CPAccessResult pmreg_access_swinc(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + /* SW: software increment write trap control */ + if (arm_feature(env, ARM_FEATURE_V8) + && arm_current_el(env) == 0 + && (env->cp15.c9_pmuserenr & (1 << 1)) != 0 + && !isread) { + return CP_ACCESS_OK; + } + + return pmreg_access(env, ri, isread); +} + +static CPAccessResult pmreg_access_selr(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + /* ER: event counter read trap control */ + if (arm_feature(env, ARM_FEATURE_V8) + && arm_current_el(env) == 0 + && (env->cp15.c9_pmuserenr & (1 << 3)) != 0) { + return CP_ACCESS_OK; + } + + return pmreg_access(env, ri, isread); +} + +static CPAccessResult pmreg_access_ccntr(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + /* CR: cycle counter read trap control */ + if (arm_feature(env, ARM_FEATURE_V8) + && arm_current_el(env) == 0 + && (env->cp15.c9_pmuserenr & (1 << 2)) != 0 + && isread) { + return CP_ACCESS_OK; + } + + return pmreg_access(env, ri, isread); +} + +/* + * Returns true if the counter (pass 31 for PMCCNTR) should count events using + * the current EL, security state, and register configuration. + */ +static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter) +{ + uint64_t filter; + bool e, p, u, nsk, nsu, nsh, m; + bool enabled, prohibited = false, filtered; + bool secure = arm_is_secure(env); + int el = arm_current_el(env); + uint64_t mdcr_el2; + uint8_t hpmn; + + /* + * We might be called for M-profile cores where MDCR_EL2 doesn't + * exist and arm_mdcr_el2_eff() will assert, so this early-exit check + * must be before we read that value. + */ + if (!arm_feature(env, ARM_FEATURE_PMU)) { + return false; + } + + mdcr_el2 = arm_mdcr_el2_eff(env); + hpmn = mdcr_el2 & MDCR_HPMN; + + if (!arm_feature(env, ARM_FEATURE_EL2) || + (counter < hpmn || counter == 31)) { + e = env->cp15.c9_pmcr & PMCRE; + } else { + e = mdcr_el2 & MDCR_HPME; + } + enabled = e && (env->cp15.c9_pmcnten & (1 << counter)); + + /* Is event counting prohibited? */ + if (el == 2 && (counter < hpmn || counter == 31)) { + prohibited = mdcr_el2 & MDCR_HPMD; + } + if (secure) { + prohibited = prohibited || !(env->cp15.mdcr_el3 & MDCR_SPME); + } + + if (counter == 31) { + /* + * The cycle counter defaults to running. PMCR.DP says "disable + * the cycle counter when event counting is prohibited". + * Some MDCR bits disable the cycle counter specifically. + */ + prohibited = prohibited && env->cp15.c9_pmcr & PMCRDP; + if (cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + if (secure) { + prohibited = prohibited || (env->cp15.mdcr_el3 & MDCR_SCCD); + } + if (el == 2) { + prohibited = prohibited || (mdcr_el2 & MDCR_HCCD); + } + } + } + + if (counter == 31) { + filter = env->cp15.pmccfiltr_el0; + } else { + filter = env->cp15.c14_pmevtyper[counter]; + } + + p = filter & PMXEVTYPER_P; + u = filter & PMXEVTYPER_U; + nsk = arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_NSK); + nsu = arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_NSU); + nsh = arm_feature(env, ARM_FEATURE_EL2) && (filter & PMXEVTYPER_NSH); + m = arm_el_is_aa64(env, 1) && + arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_M); + + if (el == 0) { + filtered = secure ? u : u != nsu; + } else if (el == 1) { + filtered = secure ? p : p != nsk; + } else if (el == 2) { + filtered = !nsh; + } else { /* EL3 */ + filtered = m != p; + } + + if (counter != 31) { + /* + * If not checking PMCCNTR, ensure the counter is setup to an event we + * support + */ + uint16_t event = filter & PMXEVTYPER_EVTCOUNT; + if (!event_supported(event)) { + return false; + } + } + + return enabled && !prohibited && !filtered; +} + +static void pmu_update_irq(CPUARMState *env) +{ + ARMCPU *cpu = env_archcpu(env); + qemu_set_irq(cpu->pmu_interrupt, (env->cp15.c9_pmcr & PMCRE) && + (env->cp15.c9_pminten & env->cp15.c9_pmovsr)); +} + +static bool pmccntr_clockdiv_enabled(CPUARMState *env) +{ + /* + * Return true if the clock divider is enabled and the cycle counter + * is supposed to tick only once every 64 clock cycles. This is + * controlled by PMCR.D, but if PMCR.LC is set to enable the long + * (64-bit) cycle counter PMCR.D has no effect. + */ + return (env->cp15.c9_pmcr & (PMCRD | PMCRLC)) == PMCRD; +} + +static bool pmevcntr_is_64_bit(CPUARMState *env, int counter) +{ + /* Return true if the specified event counter is configured to be 64 bit */ + + /* This isn't intended to be used with the cycle counter */ + assert(counter < 31); + + if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + return false; + } + + if (arm_feature(env, ARM_FEATURE_EL2)) { + /* + * MDCR_EL2.HLP still applies even when EL2 is disabled in the + * current security state, so we don't use arm_mdcr_el2_eff() here. + */ + bool hlp = env->cp15.mdcr_el2 & MDCR_HLP; + int hpmn = env->cp15.mdcr_el2 & MDCR_HPMN; + + if (counter >= hpmn) { + return hlp; + } + } + return env->cp15.c9_pmcr & PMCRLP; +} + +/* + * Ensure c15_ccnt is the guest-visible count so that operations such as + * enabling/disabling the counter or filtering, modifying the count itself, + * etc. can be done logically. This is essentially a no-op if the counter is + * not enabled at the time of the call. + */ +static void pmccntr_op_start(CPUARMState *env) +{ + uint64_t cycles = cycles_get_count(env); + + if (pmu_counter_enabled(env, 31)) { + uint64_t eff_cycles = cycles; + if (pmccntr_clockdiv_enabled(env)) { + eff_cycles /= 64; + } + + uint64_t new_pmccntr = eff_cycles - env->cp15.c15_ccnt_delta; + + uint64_t overflow_mask = env->cp15.c9_pmcr & PMCRLC ? \ + 1ull << 63 : 1ull << 31; + if (env->cp15.c15_ccnt & ~new_pmccntr & overflow_mask) { + env->cp15.c9_pmovsr |= (1ULL << 31); + pmu_update_irq(env); + } + + env->cp15.c15_ccnt = new_pmccntr; + } + env->cp15.c15_ccnt_delta = cycles; +} + +/* + * If PMCCNTR is enabled, recalculate the delta between the clock and the + * guest-visible count. A call to pmccntr_op_finish should follow every call to + * pmccntr_op_start. + */ +static void pmccntr_op_finish(CPUARMState *env) +{ + if (pmu_counter_enabled(env, 31)) { +#ifndef CONFIG_USER_ONLY + /* Calculate when the counter will next overflow */ + uint64_t remaining_cycles = -env->cp15.c15_ccnt; + if (!(env->cp15.c9_pmcr & PMCRLC)) { + remaining_cycles = (uint32_t)remaining_cycles; + } + int64_t overflow_in = cycles_ns_per(remaining_cycles); + + if (overflow_in > 0) { + int64_t overflow_at; + + if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + overflow_in, &overflow_at)) { + ARMCPU *cpu = env_archcpu(env); + timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); + } + } +#endif + + uint64_t prev_cycles = env->cp15.c15_ccnt_delta; + if (pmccntr_clockdiv_enabled(env)) { + prev_cycles /= 64; + } + env->cp15.c15_ccnt_delta = prev_cycles - env->cp15.c15_ccnt; + } +} + +static void pmevcntr_op_start(CPUARMState *env, uint8_t counter) +{ + + uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT; + uint64_t count = 0; + if (event_supported(event)) { + uint16_t event_idx = supported_event_map[event]; + count = pm_events[event_idx].get_count(env); + } + + if (pmu_counter_enabled(env, counter)) { + uint64_t new_pmevcntr = count - env->cp15.c14_pmevcntr_delta[counter]; + uint64_t overflow_mask = pmevcntr_is_64_bit(env, counter) ? + 1ULL << 63 : 1ULL << 31; + + if (env->cp15.c14_pmevcntr[counter] & ~new_pmevcntr & overflow_mask) { + env->cp15.c9_pmovsr |= (1 << counter); + pmu_update_irq(env); + } + env->cp15.c14_pmevcntr[counter] = new_pmevcntr; + } + env->cp15.c14_pmevcntr_delta[counter] = count; +} + +static void pmevcntr_op_finish(CPUARMState *env, uint8_t counter) +{ + if (pmu_counter_enabled(env, counter)) { +#ifndef CONFIG_USER_ONLY + uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT; + uint16_t event_idx = supported_event_map[event]; + uint64_t delta = -(env->cp15.c14_pmevcntr[counter] + 1); + int64_t overflow_in; + + if (!pmevcntr_is_64_bit(env, counter)) { + delta = (uint32_t)delta; + } + overflow_in = pm_events[event_idx].ns_per_count(delta); + + if (overflow_in > 0) { + int64_t overflow_at; + + if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + overflow_in, &overflow_at)) { + ARMCPU *cpu = env_archcpu(env); + timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); + } + } +#endif + + env->cp15.c14_pmevcntr_delta[counter] -= + env->cp15.c14_pmevcntr[counter]; + } +} + +void pmu_op_start(CPUARMState *env) +{ + unsigned int i; + pmccntr_op_start(env); + for (i = 0; i < pmu_num_counters(env); i++) { + pmevcntr_op_start(env, i); + } +} + +void pmu_op_finish(CPUARMState *env) +{ + unsigned int i; + pmccntr_op_finish(env); + for (i = 0; i < pmu_num_counters(env); i++) { + pmevcntr_op_finish(env, i); + } +} + +void pmu_pre_el_change(ARMCPU *cpu, void *ignored) +{ + pmu_op_start(&cpu->env); +} + +void pmu_post_el_change(ARMCPU *cpu, void *ignored) +{ + pmu_op_finish(&cpu->env); +} + +void arm_pmu_timer_cb(void *opaque) +{ + ARMCPU *cpu = opaque; + + /* + * Update all the counter values based on the current underlying counts, + * triggering interrupts to be raised, if necessary. pmu_op_finish() also + * has the effect of setting the cpu->pmu_timer to the next earliest time a + * counter may expire. + */ + pmu_op_start(&cpu->env); + pmu_op_finish(&cpu->env); +} + +static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmu_op_start(env); + + if (value & PMCRC) { + /* The counter has been reset */ + env->cp15.c15_ccnt = 0; + } + + if (value & PMCRP) { + unsigned int i; + for (i = 0; i < pmu_num_counters(env); i++) { + env->cp15.c14_pmevcntr[i] = 0; + } + } + + env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK; + env->cp15.c9_pmcr |= (value & PMCR_WRITABLE_MASK); + + pmu_op_finish(env); +} + +static uint64_t pmcr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint64_t pmcr = env->cp15.c9_pmcr; + + /* + * If EL2 is implemented and enabled for the current security state, reads + * of PMCR.N from EL1 or EL0 return the value of MDCR_EL2.HPMN or HDCR.HPMN. + */ + if (arm_current_el(env) <= 1 && arm_is_el2_enabled(env)) { + pmcr &= ~PMCRN_MASK; + pmcr |= (env->cp15.mdcr_el2 & MDCR_HPMN) << PMCRN_SHIFT; + } + + return pmcr; +} + +static void pmswinc_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + unsigned int i; + uint64_t overflow_mask, new_pmswinc; + + for (i = 0; i < pmu_num_counters(env); i++) { + /* Increment a counter's count iff: */ + if ((value & (1 << i)) && /* counter's bit is set */ + /* counter is enabled and not filtered */ + pmu_counter_enabled(env, i) && + /* counter is SW_INCR */ + (env->cp15.c14_pmevtyper[i] & PMXEVTYPER_EVTCOUNT) == 0x0) { + pmevcntr_op_start(env, i); + + /* + * Detect if this write causes an overflow since we can't predict + * PMSWINC overflows like we can for other events + */ + new_pmswinc = env->cp15.c14_pmevcntr[i] + 1; + + overflow_mask = pmevcntr_is_64_bit(env, i) ? + 1ULL << 63 : 1ULL << 31; + + if (env->cp15.c14_pmevcntr[i] & ~new_pmswinc & overflow_mask) { + env->cp15.c9_pmovsr |= (1 << i); + pmu_update_irq(env); + } + + env->cp15.c14_pmevcntr[i] = new_pmswinc; + + pmevcntr_op_finish(env, i); + } + } +} + +static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint64_t ret; + pmccntr_op_start(env); + ret = env->cp15.c15_ccnt; + pmccntr_op_finish(env); + return ret; +} + +static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * The value of PMSELR.SEL affects the behavior of PMXEVTYPER and + * PMXEVCNTR. We allow [0..31] to be written to PMSELR here; in the + * meanwhile, we check PMSELR.SEL when PMXEVTYPER and PMXEVCNTR are + * accessed. + */ + env->cp15.c9_pmselr = value & 0x1f; +} + +static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmccntr_op_start(env); + env->cp15.c15_ccnt = value; + pmccntr_op_finish(env); +} + +static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t cur_val = pmccntr_read(env, NULL); + + pmccntr_write(env, ri, deposit64(cur_val, 0, 32, value)); +} + +static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmccntr_op_start(env); + env->cp15.pmccfiltr_el0 = value & PMCCFILTR_EL0; + pmccntr_op_finish(env); +} + +static void pmccfiltr_write_a32(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmccntr_op_start(env); + /* M is not accessible from AArch32 */ + env->cp15.pmccfiltr_el0 = (env->cp15.pmccfiltr_el0 & PMCCFILTR_M) | + (value & PMCCFILTR); + pmccntr_op_finish(env); +} + +static uint64_t pmccfiltr_read_a32(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* M is not visible in AArch32 */ + return env->cp15.pmccfiltr_el0 & PMCCFILTR; +} + +static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmu_op_start(env); + value &= pmu_counter_mask(env); + env->cp15.c9_pmcnten |= value; + pmu_op_finish(env); +} + +static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmu_op_start(env); + value &= pmu_counter_mask(env); + env->cp15.c9_pmcnten &= ~value; + pmu_op_finish(env); +} + +static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value &= pmu_counter_mask(env); + env->cp15.c9_pmovsr &= ~value; + pmu_update_irq(env); +} + +static void pmovsset_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value &= pmu_counter_mask(env); + env->cp15.c9_pmovsr |= value; + pmu_update_irq(env); +} + +static void pmevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value, const uint8_t counter) +{ + if (counter == 31) { + pmccfiltr_write(env, ri, value); + } else if (counter < pmu_num_counters(env)) { + pmevcntr_op_start(env, counter); + + /* + * If this counter's event type is changing, store the current + * underlying count for the new type in c14_pmevcntr_delta[counter] so + * pmevcntr_op_finish has the correct baseline when it converts back to + * a delta. + */ + uint16_t old_event = env->cp15.c14_pmevtyper[counter] & + PMXEVTYPER_EVTCOUNT; + uint16_t new_event = value & PMXEVTYPER_EVTCOUNT; + if (old_event != new_event) { + uint64_t count = 0; + if (event_supported(new_event)) { + uint16_t event_idx = supported_event_map[new_event]; + count = pm_events[event_idx].get_count(env); + } + env->cp15.c14_pmevcntr_delta[counter] = count; + } + + env->cp15.c14_pmevtyper[counter] = value & PMXEVTYPER_MASK; + pmevcntr_op_finish(env, counter); + } + /* + * Attempts to access PMXEVTYPER are CONSTRAINED UNPREDICTABLE when + * PMSELR value is equal to or greater than the number of implemented + * counters, but not equal to 0x1f. We opt to behave as a RAZ/WI. + */ +} + +static uint64_t pmevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri, + const uint8_t counter) +{ + if (counter == 31) { + return env->cp15.pmccfiltr_el0; + } else if (counter < pmu_num_counters(env)) { + return env->cp15.c14_pmevtyper[counter]; + } else { + /* + * We opt to behave as a RAZ/WI when attempts to access PMXEVTYPER + * are CONSTRAINED UNPREDICTABLE. See comments in pmevtyper_write(). + */ + return 0; + } +} + +static void pmevtyper_writefn(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + pmevtyper_write(env, ri, value, counter); +} + +static void pmevtyper_rawwrite(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + env->cp15.c14_pmevtyper[counter] = value; + + /* + * pmevtyper_rawwrite is called between a pair of pmu_op_start and + * pmu_op_finish calls when loading saved state for a migration. Because + * we're potentially updating the type of event here, the value written to + * c14_pmevcntr_delta by the preceding pmu_op_start call may be for a + * different counter type. Therefore, we need to set this value to the + * current count for the counter type we're writing so that pmu_op_finish + * has the correct count for its calculation. + */ + uint16_t event = value & PMXEVTYPER_EVTCOUNT; + if (event_supported(event)) { + uint16_t event_idx = supported_event_map[event]; + env->cp15.c14_pmevcntr_delta[counter] = + pm_events[event_idx].get_count(env); + } +} + +static uint64_t pmevtyper_readfn(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + return pmevtyper_read(env, ri, counter); +} + +static void pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmevtyper_write(env, ri, value, env->cp15.c9_pmselr & 31); +} + +static uint64_t pmxevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return pmevtyper_read(env, ri, env->cp15.c9_pmselr & 31); +} + +static void pmevcntr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value, uint8_t counter) +{ + if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */ + value &= MAKE_64BIT_MASK(0, 32); + } + if (counter < pmu_num_counters(env)) { + pmevcntr_op_start(env, counter); + env->cp15.c14_pmevcntr[counter] = value; + pmevcntr_op_finish(env, counter); + } + /* + * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR + * are CONSTRAINED UNPREDICTABLE. + */ +} + +static uint64_t pmevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri, + uint8_t counter) +{ + if (counter < pmu_num_counters(env)) { + uint64_t ret; + pmevcntr_op_start(env, counter); + ret = env->cp15.c14_pmevcntr[counter]; + pmevcntr_op_finish(env, counter); + if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */ + ret &= MAKE_64BIT_MASK(0, 32); + } + return ret; + } else { + /* + * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR + * are CONSTRAINED UNPREDICTABLE. + */ + return 0; + } +} + +static void pmevcntr_writefn(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + pmevcntr_write(env, ri, value, counter); +} + +static uint64_t pmevcntr_readfn(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + return pmevcntr_read(env, ri, counter); +} + +static void pmevcntr_rawwrite(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + assert(counter < pmu_num_counters(env)); + env->cp15.c14_pmevcntr[counter] = value; + pmevcntr_write(env, ri, value, counter); +} + +static uint64_t pmevcntr_rawread(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); + assert(counter < pmu_num_counters(env)); + return env->cp15.c14_pmevcntr[counter]; +} + +static void pmxevcntr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmevcntr_write(env, ri, value, env->cp15.c9_pmselr & 31); +} + +static uint64_t pmxevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return pmevcntr_read(env, ri, env->cp15.c9_pmselr & 31); +} + +static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + if (arm_feature(env, ARM_FEATURE_V8)) { + env->cp15.c9_pmuserenr = value & 0xf; + } else { + env->cp15.c9_pmuserenr = value & 1; + } +} + +static void pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* We have no event counters so only the C bit can be changed */ + value &= pmu_counter_mask(env); + env->cp15.c9_pminten |= value; + pmu_update_irq(env); +} + +static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value &= pmu_counter_mask(env); + env->cp15.c9_pminten &= ~value; + pmu_update_irq(env); +} + +static const ARMCPRegInfo v7_pm_reginfo[] = { + /* + * Performance monitors are implementation defined in v7, + * but with an ARM recommended set of registers, which we + * follow. + * + * Performance registers fall into three categories: + * (a) always UNDEF in PL0, RW in PL1 (PMINTENSET, PMINTENCLR) + * (b) RO in PL0 (ie UNDEF on write), RW in PL1 (PMUSERENR) + * (c) UNDEF in PL0 if PMUSERENR.EN==0, otherwise accessible (all others) + * For the cases controlled by PMUSERENR we must set .access to PL0_RW + * or PL0_RO as appropriate and then check PMUSERENR in the helper fn. + */ + { .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1, + .access = PL0_RW, .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), + .writefn = pmcntenset_write, + .accessfn = pmreg_access, + .fgt = FGT_PMCNTEN, + .raw_writefn = raw_write }, + { .name = "PMCNTENSET_EL0", .state = ARM_CP_STATE_AA64, .type = ARM_CP_IO, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 1, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCNTEN, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), .resetvalue = 0, + .writefn = pmcntenset_write, .raw_writefn = raw_write }, + { .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2, + .access = PL0_RW, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), + .accessfn = pmreg_access, + .fgt = FGT_PMCNTEN, + .writefn = pmcntenclr_write, .raw_writefn = raw_write, + .type = ARM_CP_ALIAS | ARM_CP_IO }, + { .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCNTEN, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), + .writefn = pmcntenclr_write, .raw_writefn = raw_write }, + { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3, + .access = PL0_RW, .type = ARM_CP_IO, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), + .accessfn = pmreg_access, + .fgt = FGT_PMOVS, + .writefn = pmovsr_write, + .raw_writefn = raw_write }, + { .name = "PMOVSCLR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 3, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMOVS, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), + .writefn = pmovsr_write, + .raw_writefn = raw_write }, + { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4, + .access = PL0_W, .accessfn = pmreg_access_swinc, + .fgt = FGT_PMSWINC_EL0, + .type = ARM_CP_NO_RAW | ARM_CP_IO, + .writefn = pmswinc_write }, + { .name = "PMSWINC_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 4, + .access = PL0_W, .accessfn = pmreg_access_swinc, + .fgt = FGT_PMSWINC_EL0, + .type = ARM_CP_NO_RAW | ARM_CP_IO, + .writefn = pmswinc_write }, + { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5, + .access = PL0_RW, .type = ARM_CP_ALIAS, + .fgt = FGT_PMSELR_EL0, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmselr), + .accessfn = pmreg_access_selr, .writefn = pmselr_write, + .raw_writefn = raw_write}, + { .name = "PMSELR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 5, + .access = PL0_RW, .accessfn = pmreg_access_selr, + .fgt = FGT_PMSELR_EL0, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmselr), + .writefn = pmselr_write, .raw_writefn = raw_write, }, + { .name = "PMCCNTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 0, + .access = PL0_RW, .accessfn = pmreg_access_ccntr, + .fgt = FGT_PMCCNTR_EL0, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c15_ccnt), + .readfn = pmccntr_read, .writefn = pmccntr_write, + .raw_readfn = raw_read, .raw_writefn = raw_write, }, + { .name = "PMCCFILTR", .cp = 15, .opc1 = 0, .crn = 14, .crm = 15, .opc2 = 7, + .writefn = pmccfiltr_write_a32, .readfn = pmccfiltr_read_a32, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCCFILTR_EL0, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .resetvalue = 0, }, + { .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7, + .writefn = pmccfiltr_write, .raw_writefn = raw_write, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCCFILTR_EL0, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.pmccfiltr_el0), + .resetvalue = 0, }, + { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1, + .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = pmreg_access, + .fgt = FGT_PMEVTYPERN_EL0, + .writefn = pmxevtyper_write, .readfn = pmxevtyper_read }, + { .name = "PMXEVTYPER_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 1, + .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = pmreg_access, + .fgt = FGT_PMEVTYPERN_EL0, + .writefn = pmxevtyper_write, .readfn = pmxevtyper_read }, + { .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2, + .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = pmreg_access_xevcntr, + .fgt = FGT_PMEVCNTRN_EL0, + .writefn = pmxevcntr_write, .readfn = pmxevcntr_read }, + { .name = "PMXEVCNTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 2, + .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = pmreg_access_xevcntr, + .fgt = FGT_PMEVCNTRN_EL0, + .writefn = pmxevcntr_write, .readfn = pmxevcntr_read }, + { .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0, + .access = PL0_R | PL1_RW, .accessfn = access_tpm, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmuserenr), + .resetvalue = 0, + .writefn = pmuserenr_write, .raw_writefn = raw_write }, + { .name = "PMUSERENR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 14, .opc2 = 0, + .access = PL0_R | PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr), + .resetvalue = 0, + .writefn = pmuserenr_write, .raw_writefn = raw_write }, + { .name = "PMINTENSET", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pminten), + .resetvalue = 0, + .writefn = pmintenset_write, .raw_writefn = raw_write }, + { .name = "PMINTENSET_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 1, + .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), + .writefn = pmintenset_write, .raw_writefn = raw_write, + .resetvalue = 0x0 }, + { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, + .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), + .writefn = pmintenclr_write, .raw_writefn = raw_write }, + { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2, + .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), + .writefn = pmintenclr_write, .raw_writefn = raw_write }, +}; + +static const ARMCPRegInfo pmovsset_cp_reginfo[] = { + /* PMOVSSET is not implemented in v7 before v7ve */ + { .name = "PMOVSSET", .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 3, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMOVS, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), + .writefn = pmovsset_write, + .raw_writefn = raw_write }, + { .name = "PMOVSSET_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 14, .opc2 = 3, + .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMOVS, + .type = ARM_CP_ALIAS | ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), + .writefn = pmovsset_write, + .raw_writefn = raw_write }, +}; + +void define_pm_cpregs(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + + if (arm_feature(env, ARM_FEATURE_V7)) { + /* + * v7 performance monitor control register: same implementor + * field as main ID register, and we implement four counters in + * addition to the cycle count register. + */ + static const ARMCPRegInfo pmcr = { + .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0, + .access = PL0_RW, + .fgt = FGT_PMCR_EL0, + .type = ARM_CP_IO | ARM_CP_ALIAS, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr), + .accessfn = pmreg_access_pmcr, + .readfn = pmcr_read, .raw_readfn = raw_read, + .writefn = pmcr_write, .raw_writefn = raw_write, + }; + const ARMCPRegInfo pmcr64 = { + .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0, + .access = PL0_RW, .accessfn = pmreg_access_pmcr, + .fgt = FGT_PMCR_EL0, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr), + .resetvalue = cpu->isar.reset_pmcr_el0, + .readfn = pmcr_read, .raw_readfn = raw_read, + .writefn = pmcr_write, .raw_writefn = raw_write, + }; + + define_one_arm_cp_reg(cpu, &pmcr); + define_one_arm_cp_reg(cpu, &pmcr64); + define_arm_cp_regs(cpu, v7_pm_reginfo); + /* + * 32-bit AArch32 PMCCNTR. We don't expose this to GDB if the + * new-in-v8 PMUv3 64-bit AArch32 PMCCNTR register is implemented + * (as that will provide the GDB user's view of "PMCCNTR"). + */ + ARMCPRegInfo pmccntr = { + .name = "PMCCNTR", + .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0, + .access = PL0_RW, .accessfn = pmreg_access_ccntr, + .resetvalue = 0, .type = ARM_CP_ALIAS | ARM_CP_IO, + .fgt = FGT_PMCCNTR_EL0, + .readfn = pmccntr_read, .writefn = pmccntr_write32, + }; + if (arm_feature(env, ARM_FEATURE_V8)) { + pmccntr.type |= ARM_CP_NO_GDB; + } + define_one_arm_cp_reg(cpu, &pmccntr); + + for (unsigned i = 0, pmcrn = pmu_num_counters(env); i < pmcrn; i++) { + g_autofree char *pmevcntr_name = g_strdup_printf("PMEVCNTR%d", i); + g_autofree char *pmevcntr_el0_name = g_strdup_printf("PMEVCNTR%d_EL0", i); + g_autofree char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i); + g_autofree char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i); + + ARMCPRegInfo pmev_regs[] = { + { .name = pmevcntr_name, .cp = 15, .crn = 14, + .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, + .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, + .fgt = FGT_PMEVCNTRN_EL0, + .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, + .accessfn = pmreg_access_xevcntr }, + { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)), + .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access_xevcntr, + .type = ARM_CP_IO, + .fgt = FGT_PMEVCNTRN_EL0, + .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, + .raw_readfn = pmevcntr_rawread, + .raw_writefn = pmevcntr_rawwrite }, + { .name = pmevtyper_name, .cp = 15, .crn = 14, + .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, + .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, + .fgt = FGT_PMEVTYPERN_EL0, + .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, + .accessfn = pmreg_access }, + { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)), + .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMEVTYPERN_EL0, + .type = ARM_CP_IO, + .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, + .raw_writefn = pmevtyper_rawwrite }, + }; + define_arm_cp_regs(cpu, pmev_regs); + } + } + if (arm_feature(env, ARM_FEATURE_V7VE)) { + define_arm_cp_regs(cpu, pmovsset_cp_reginfo); + } + + if (arm_feature(env, ARM_FEATURE_V8)) { + const ARMCPRegInfo v8_pm_reginfo[] = { + { .name = "PMCEID0", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 6, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = extract64(cpu->pmceid0, 0, 32) }, + { .name = "PMCEID0_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 6, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = cpu->pmceid0 }, + { .name = "PMCEID1", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 7, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = extract64(cpu->pmceid1, 0, 32) }, + { .name = "PMCEID1_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 7, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = cpu->pmceid1 }, + /* AArch32 64-bit PMCCNTR view: added in PMUv3 with Armv8 */ + { .name = "PMCCNTR", .state = ARM_CP_STATE_AA32, + .cp = 15, .crm = 9, .opc1 = 0, + .access = PL0_RW, .accessfn = pmreg_access_ccntr, .resetvalue = 0, + .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_64BIT, + .fgt = FGT_PMCCNTR_EL0, .readfn = pmccntr_read, + .writefn = pmccntr_write, }, + }; + define_arm_cp_regs(cpu, v8_pm_reginfo); + } + + if (cpu_isar_feature(aa32_pmuv3p1, cpu)) { + ARMCPRegInfo v81_pmu_regs[] = { + { .name = "PMCEID2", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = extract64(cpu->pmceid0, 32, 32) }, + { .name = "PMCEID3", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 5, + .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, + .resetvalue = extract64(cpu->pmceid1, 32, 32) }, + }; + define_arm_cp_regs(cpu, v81_pmu_regs); + } + + if (cpu_isar_feature(any_pmuv3p4, cpu)) { + static const ARMCPRegInfo v84_pmmir = { + .name = "PMMIR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 6, + .access = PL1_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMMIR_EL1, + .resetvalue = 0 + }; + define_one_arm_cp_reg(cpu, &v84_pmmir); + } +} diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h index 2183de8..763de5e 100644 --- a/target/arm/cpregs.h +++ b/target/arm/cpregs.h @@ -22,7 +22,9 @@ #define TARGET_ARM_CPREGS_H #include "hw/registerfields.h" +#include "exec/memop.h" #include "target/arm/kvm-consts.h" +#include "cpu.h" /* * ARMCPRegInfo type field bits: @@ -45,6 +47,14 @@ enum { ARM_CP_DC_ZVA = 0x0005, ARM_CP_DC_GVA = 0x0006, ARM_CP_DC_GZVA = 0x0007, + /* Special: gcs instructions */ + ARM_CP_GCSPUSHM = 0x0008, + ARM_CP_GCSPOPM = 0x0009, + ARM_CP_GCSPUSHX = 0x000a, + ARM_CP_GCSPOPX = 0x000b, + ARM_CP_GCSPOPCX = 0x000c, + ARM_CP_GCSSS1 = 0x000d, + ARM_CP_GCSSS2 = 0x000e, /* Flag: reads produce resetvalue; writes ignored. */ ARM_CP_CONST = 1 << 4, @@ -134,6 +144,11 @@ enum { * identically to the normal one, other than FGT trapping handling.) */ ARM_CP_ADD_TLBI_NXS = 1 << 21, + /* + * Flag: even though this sysreg has opc1 == 4 or 5, it + * should not trap to EL2 when HCR_EL2.NV is set. + */ + ARM_CP_NV_NO_TRAP = 1 << 22, }; /* @@ -173,16 +188,20 @@ enum { * add a bit to distinguish between secure and non-secure cpregs in the * hashtable. */ -#define CP_REG_NS_SHIFT 29 -#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT) +#define CP_REG_AA32_NS_SHIFT 29 +#define CP_REG_AA32_NS_MASK (1 << CP_REG_AA32_NS_SHIFT) + +/* Distinguish 32-bit and 64-bit views of AArch32 system registers. */ +#define CP_REG_AA32_64BIT_SHIFT 15 +#define CP_REG_AA32_64BIT_MASK (1 << CP_REG_AA32_64BIT_SHIFT) #define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2) \ - ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) | \ - ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2)) + (((ns) << CP_REG_AA32_NS_SHIFT) | \ + ((is64) << CP_REG_AA32_64BIT_SHIFT) | \ + ((cp) << 16) | ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2)) -#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \ - (CP_REG_AA64_MASK | \ - ((cp) << CP_REG_ARM_COPROC_SHIFT) | \ +#define ENCODE_AA64_CP_REG(op0, op1, crn, crm, op2) \ + (CP_REG_AA64_MASK | CP_REG_ARM64_SYSREG | \ ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \ ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \ ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \ @@ -200,14 +219,14 @@ static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid) cpregid |= CP_REG_AA64_MASK; } else { if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) { - cpregid |= (1 << 15); + cpregid |= CP_REG_AA32_64BIT_MASK; } /* * KVM is always non-secure so add the NS flag on AArch32 register * entries. */ - cpregid |= 1 << CP_REG_NS_SHIFT; + cpregid |= CP_REG_AA32_NS_MASK; } return cpregid; } @@ -224,8 +243,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) kvmid = cpregid & ~CP_REG_AA64_MASK; kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64; } else { - kvmid = cpregid & ~(1 << 15); - if (cpregid & (1 << 15)) { + kvmid = cpregid & ~CP_REG_AA32_64BIT_MASK; + if (cpregid & CP_REG_AA32_64BIT_MASK) { kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM; } else { kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM; @@ -345,6 +364,14 @@ typedef enum CPAccessResult { * specified target EL. */ CP_ACCESS_UNDEFINED = (2 << 2), + + /* + * Access fails with EXLOCK, a GCS exception syndrome. + * These traps are always to the current execution EL, + * which is the same as the usual target EL because + * they cannot occur from EL0. + */ + CP_ACCESS_EXLOCK = (3 << 2), } CPAccessResult; /* Indexes into fgt_read[] */ @@ -407,10 +434,19 @@ FIELD(HFGRTR_EL2, ERXPFGCTL_EL1, 47, 1) FIELD(HFGRTR_EL2, ERXPFGCDN_EL1, 48, 1) FIELD(HFGRTR_EL2, ERXADDR_EL1, 49, 1) FIELD(HFGRTR_EL2, NACCDATA_EL1, 50, 1) -/* 51-53: RES0 */ +/* 51: RES0 */ +FIELD(HFGRTR_EL2, NGCS_EL0, 52, 1) +FIELD(HFGRTR_EL2, NGCS_EL1, 53, 1) FIELD(HFGRTR_EL2, NSMPRI_EL1, 54, 1) FIELD(HFGRTR_EL2, NTPIDR2_EL0, 55, 1) -/* 56-63: RES0 */ +FIELD(HFGRTR_EL2, NRCWMASK_EL1, 56, 1) +FIELD(HFGRTR_EL2, NPIRE0_EL1, 57, 1) +FIELD(HFGRTR_EL2, NPIR_EL1, 58, 1) +FIELD(HFGRTR_EL2, NPOR_EL0, 59, 1) +FIELD(HFGRTR_EL2, NPOR_EL1, 60, 1) +FIELD(HFGRTR_EL2, NS2POR_EL1, 61, 1) +FIELD(HFGRTR_EL2, NMAIR2_EL1, 62, 1) +FIELD(HFGRTR_EL2, NAMAIR2_EL1, 63, 1) /* These match HFGRTR but bits for RO registers are RES0 */ FIELD(HFGWTR_EL2, AFSR0_EL1, 0, 1) @@ -451,8 +487,18 @@ FIELD(HFGWTR_EL2, ERXPFGCTL_EL1, 47, 1) FIELD(HFGWTR_EL2, ERXPFGCDN_EL1, 48, 1) FIELD(HFGWTR_EL2, ERXADDR_EL1, 49, 1) FIELD(HFGWTR_EL2, NACCDATA_EL1, 50, 1) +FIELD(HFGWTR_EL2, NGCS_EL0, 52, 1) +FIELD(HFGWTR_EL2, NGCS_EL1, 53, 1) FIELD(HFGWTR_EL2, NSMPRI_EL1, 54, 1) FIELD(HFGWTR_EL2, NTPIDR2_EL0, 55, 1) +FIELD(HFGWTR_EL2, NRCWMASK_EL1, 56, 1) +FIELD(HFGWTR_EL2, NPIRE0_EL1, 57, 1) +FIELD(HFGWTR_EL2, NPIR_EL1, 58, 1) +FIELD(HFGWTR_EL2, NPOR_EL0, 59, 1) +FIELD(HFGWTR_EL2, NPOR_EL1, 60, 1) +FIELD(HFGWTR_EL2, NS2POR_EL1, 61, 1) +FIELD(HFGWTR_EL2, NMAIR2_EL1, 62, 1) +FIELD(HFGWTR_EL2, NAMAIR2_EL1, 63, 1) FIELD(HFGITR_EL2, ICIALLUIS, 0, 1) FIELD(HFGITR_EL2, ICIALLU, 1, 1) @@ -511,6 +557,11 @@ FIELD(HFGITR_EL2, SVC_EL1, 53, 1) FIELD(HFGITR_EL2, DCCVAC, 54, 1) FIELD(HFGITR_EL2, NBRBINJ, 55, 1) FIELD(HFGITR_EL2, NBRBIALL, 56, 1) +FIELD(HFGITR_EL2, NGCSPUSHM_EL1, 57, 1) +FIELD(HFGITR_EL2, NGCSSTR_EL1, 58, 1) +FIELD(HFGITR_EL2, NGCSEPP, 59, 1) +FIELD(HFGITR_EL2, COSPRCTX, 60, 1) +FIELD(HFGITR_EL2, ATS1E1A, 62, 1) FIELD(HDFGRTR_EL2, DBGBCRN_EL1, 0, 1) FIELD(HDFGRTR_EL2, DBGBVRN_EL1, 1, 1) @@ -749,8 +800,12 @@ typedef enum FGTBit { DO_BIT(HFGRTR, VBAR_EL1), DO_BIT(HFGRTR, ICC_IGRPENN_EL1), DO_BIT(HFGRTR, ERRIDR_EL1), + DO_REV_BIT(HFGRTR, NGCS_EL0), + DO_REV_BIT(HFGRTR, NGCS_EL1), DO_REV_BIT(HFGRTR, NSMPRI_EL1), DO_REV_BIT(HFGRTR, NTPIDR2_EL0), + DO_REV_BIT(HFGRTR, NPIRE0_EL1), + DO_REV_BIT(HFGRTR, NPIR_EL1), /* Trap bits in HDFGRTR_EL2 / HDFGWTR_EL2, starting from bit 0. */ DO_BIT(HDFGRTR, DBGBCRN_EL1), @@ -829,6 +884,9 @@ typedef enum FGTBit { DO_BIT(HFGITR, DVPRCTX), DO_BIT(HFGITR, CPPRCTX), DO_BIT(HFGITR, DCCVAC), + DO_REV_BIT(HFGITR, NGCSPUSHM_EL1), + DO_REV_BIT(HFGITR, NGCSEPP), + DO_BIT(HFGITR, ATS1E1A), } FGTBit; #undef DO_BIT @@ -840,15 +898,15 @@ typedef struct ARMCPRegInfo ARMCPRegInfo; * Access functions for coprocessor registers. These cannot fail and * may not raise exceptions. */ -typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque); -typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque, +typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *ri); +typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value); /* Access permission check functions for coprocessor registers. */ typedef CPAccessResult CPAccessFn(CPUARMState *env, - const ARMCPRegInfo *opaque, + const ARMCPRegInfo *ri, bool isread); /* Hook function for register reset */ -typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *opaque); +typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *ri); #define CP_ANY 0xff @@ -906,11 +964,19 @@ struct ARMCPRegInfo { uint32_t nv2_redirect_offset; /* - * The opaque pointer passed to define_arm_cp_regs_with_opaque() when - * this register was defined: can be used to hand data through to the - * register read/write functions, since they are passed the ARMCPRegInfo*. + * With VHE, with E2H, at EL2, access to this EL0/EL1 reg redirects + * to the EL2 reg with the specified key. + */ + uint32_t vhe_redir_to_el2; + + /* + * For VHE. Before registration, this field holds the key for an + * EL02/EL12 reg to be created to point back to this EL0/EL1 reg. + * After registration, this field is set only on the EL02/EL12 reg + * and points back to the EL02/EL12 reg for redirection with E2H. */ - void *opaque; + uint32_t vhe_redir_to_el01; + /* * Value of this register, if it is ARM_CP_CONST. Otherwise, if * fieldoffset is non-zero, the reset value of the register. @@ -978,52 +1044,17 @@ struct ARMCPRegInfo { * fieldoffset is 0 then no reset will be done. */ CPResetFn *resetfn; - - /* - * "Original" readfn, writefn, accessfn. - * For ARMv8.1-VHE register aliases, we overwrite the read/write - * accessor functions of various EL1/EL0 to perform the runtime - * check for which sysreg should actually be modified, and then - * forwards the operation. Before overwriting the accessors, - * the original function is copied here, so that accesses that - * really do go to the EL1/EL0 version proceed normally. - * (The corresponding EL2 register is linked via opaque.) - */ - CPReadFn *orig_readfn; - CPWriteFn *orig_writefn; - CPAccessFn *orig_accessfn; }; -/* - * Macros which are lvalues for the field in CPUARMState for the - * ARMCPRegInfo *ri. - */ -#define CPREG_FIELD32(env, ri) \ - (*(uint32_t *)((char *)(env) + (ri)->fieldoffset)) -#define CPREG_FIELD64(env, ri) \ - (*(uint64_t *)((char *)(env) + (ri)->fieldoffset)) - -void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, const ARMCPRegInfo *reg, - void *opaque); +void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs); +void define_arm_cp_regs_len(ARMCPU *cpu, const ARMCPRegInfo *regs, size_t len); -static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs) -{ - define_one_arm_cp_reg_with_opaque(cpu, regs, NULL); -} - -void define_arm_cp_regs_with_opaque_len(ARMCPU *cpu, const ARMCPRegInfo *regs, - void *opaque, size_t len); - -#define define_arm_cp_regs_with_opaque(CPU, REGS, OPAQUE) \ - do { \ - QEMU_BUILD_BUG_ON(ARRAY_SIZE(REGS) == 0); \ - define_arm_cp_regs_with_opaque_len(CPU, REGS, OPAQUE, \ - ARRAY_SIZE(REGS)); \ +#define define_arm_cp_regs(CPU, REGS) \ + do { \ + QEMU_BUILD_BUG_ON(ARRAY_SIZE(REGS) == 0); \ + define_arm_cp_regs_len(CPU, REGS, ARRAY_SIZE(REGS)); \ } while (0) -#define define_arm_cp_regs(CPU, REGS) \ - define_arm_cp_regs_with_opaque(CPU, REGS, NULL) - const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp); /* @@ -1064,6 +1095,9 @@ void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, /* CPReadFn that can be used for read-as-zero behaviour */ uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri); +/* CPReadFn that just reads the value from ri->fieldoffset */ +uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri); + /* CPWriteFn that just writes the value to ri->fieldoffset */ void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value); @@ -1071,15 +1105,16 @@ void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value); * CPResetFn that does nothing, for use if no reset is required even * if fieldoffset is non zero. */ -void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque); +void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *ri); /* - * Return true if this reginfo struct's field in the cpu state struct - * is 64 bits wide. + * Return MO_32 if the field in CPUARMState is uint32_t or + * MO_64 if the field in CPUARMState is uint64_t. */ -static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri) +static inline MemOp cpreg_field_type(const ARMCPRegInfo *ri) { - return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT); + return (ri->state == ARM_CP_STATE_AA64 || (ri->type & ARM_CP_64BIT) + ? MO_64 : MO_32); } static inline bool cp_access_ok(int current_el, @@ -1139,7 +1174,7 @@ static inline bool arm_cpreg_traps_in_nv(const ARMCPRegInfo *ri) * means that the right set of registers is exactly those where * the opc1 field is 4 or 5. (You can see this also in the assert * we do that the opc1 field and the permissions mask line up in - * define_one_arm_cp_reg_with_opaque().) + * define_one_arm_cp_reg().) * Checking the opc1 field is easier for us and avoids the problem * that we do not consistently use the right architectural names * for all sysregs, since we treat the name field as largely for debug. @@ -1148,12 +1183,17 @@ static inline bool arm_cpreg_traps_in_nv(const ARMCPRegInfo *ri) * fragile to future new sysregs, but this seems the least likely * to break. * - * In particular, note that the released sysreg XML defines that - * the FEAT_MEC sysregs and instructions do not follow this FEAT_NV - * trapping rule, so we will need to add an ARM_CP_* flag to indicate - * "register does not trap on NV" to handle those if/when we implement - * FEAT_MEC. + * In particular, note that the FEAT_MEC sysregs and instructions + * are exceptions to this trapping rule, so they are marked as + * ARM_CP_NV_NO_TRAP to indicate that they should not be trapped + * to EL2. (They are an exception because the FEAT_MEC sysregs UNDEF + * unless in Realm, and Realm is not expected to be virtualized.) */ + + if (ri->type & ARM_CP_NV_NO_TRAP) { + return false; + } + return ri->opc1 == 4 || ri->opc1 == 5; } diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h index 525e4ce..37f1eca 100644 --- a/target/arm/cpu-features.h +++ b/target/arm/cpu-features.h @@ -22,6 +22,423 @@ #include "hw/registerfields.h" #include "qemu/host-utils.h" +#include "cpu.h" +#include "cpu-sysregs.h" + +/* + * System register ID fields. + */ +FIELD(CLIDR_EL1, CTYPE1, 0, 3) +FIELD(CLIDR_EL1, CTYPE2, 3, 3) +FIELD(CLIDR_EL1, CTYPE3, 6, 3) +FIELD(CLIDR_EL1, CTYPE4, 9, 3) +FIELD(CLIDR_EL1, CTYPE5, 12, 3) +FIELD(CLIDR_EL1, CTYPE6, 15, 3) +FIELD(CLIDR_EL1, CTYPE7, 18, 3) +FIELD(CLIDR_EL1, LOUIS, 21, 3) +FIELD(CLIDR_EL1, LOC, 24, 3) +FIELD(CLIDR_EL1, LOUU, 27, 3) +FIELD(CLIDR_EL1, ICB, 30, 3) + +/* When FEAT_CCIDX is implemented */ +FIELD(CCSIDR_EL1, CCIDX_LINESIZE, 0, 3) +FIELD(CCSIDR_EL1, CCIDX_ASSOCIATIVITY, 3, 21) +FIELD(CCSIDR_EL1, CCIDX_NUMSETS, 32, 24) + +/* When FEAT_CCIDX is not implemented */ +FIELD(CCSIDR_EL1, LINESIZE, 0, 3) +FIELD(CCSIDR_EL1, ASSOCIATIVITY, 3, 10) +FIELD(CCSIDR_EL1, NUMSETS, 13, 15) + +FIELD(CTR_EL0, IMINLINE, 0, 4) +FIELD(CTR_EL0, L1IP, 14, 2) +FIELD(CTR_EL0, DMINLINE, 16, 4) +FIELD(CTR_EL0, ERG, 20, 4) +FIELD(CTR_EL0, CWG, 24, 4) +FIELD(CTR_EL0, IDC, 28, 1) +FIELD(CTR_EL0, DIC, 29, 1) +FIELD(CTR_EL0, TMINLINE, 32, 6) + +FIELD(MIDR_EL1, REVISION, 0, 4) +FIELD(MIDR_EL1, PARTNUM, 4, 12) +FIELD(MIDR_EL1, ARCHITECTURE, 16, 4) +FIELD(MIDR_EL1, VARIANT, 20, 4) +FIELD(MIDR_EL1, IMPLEMENTER, 24, 8) + +FIELD(ID_ISAR0, SWAP, 0, 4) +FIELD(ID_ISAR0, BITCOUNT, 4, 4) +FIELD(ID_ISAR0, BITFIELD, 8, 4) +FIELD(ID_ISAR0, CMPBRANCH, 12, 4) +FIELD(ID_ISAR0, COPROC, 16, 4) +FIELD(ID_ISAR0, DEBUG, 20, 4) +FIELD(ID_ISAR0, DIVIDE, 24, 4) + +FIELD(ID_ISAR1, ENDIAN, 0, 4) +FIELD(ID_ISAR1, EXCEPT, 4, 4) +FIELD(ID_ISAR1, EXCEPT_AR, 8, 4) +FIELD(ID_ISAR1, EXTEND, 12, 4) +FIELD(ID_ISAR1, IFTHEN, 16, 4) +FIELD(ID_ISAR1, IMMEDIATE, 20, 4) +FIELD(ID_ISAR1, INTERWORK, 24, 4) +FIELD(ID_ISAR1, JAZELLE, 28, 4) + +FIELD(ID_ISAR2, LOADSTORE, 0, 4) +FIELD(ID_ISAR2, MEMHINT, 4, 4) +FIELD(ID_ISAR2, MULTIACCESSINT, 8, 4) +FIELD(ID_ISAR2, MULT, 12, 4) +FIELD(ID_ISAR2, MULTS, 16, 4) +FIELD(ID_ISAR2, MULTU, 20, 4) +FIELD(ID_ISAR2, PSR_AR, 24, 4) +FIELD(ID_ISAR2, REVERSAL, 28, 4) + +FIELD(ID_ISAR3, SATURATE, 0, 4) +FIELD(ID_ISAR3, SIMD, 4, 4) +FIELD(ID_ISAR3, SVC, 8, 4) +FIELD(ID_ISAR3, SYNCHPRIM, 12, 4) +FIELD(ID_ISAR3, TABBRANCH, 16, 4) +FIELD(ID_ISAR3, T32COPY, 20, 4) +FIELD(ID_ISAR3, TRUENOP, 24, 4) +FIELD(ID_ISAR3, T32EE, 28, 4) + +FIELD(ID_ISAR4, UNPRIV, 0, 4) +FIELD(ID_ISAR4, WITHSHIFTS, 4, 4) +FIELD(ID_ISAR4, WRITEBACK, 8, 4) +FIELD(ID_ISAR4, SMC, 12, 4) +FIELD(ID_ISAR4, BARRIER, 16, 4) +FIELD(ID_ISAR4, SYNCHPRIM_FRAC, 20, 4) +FIELD(ID_ISAR4, PSR_M, 24, 4) +FIELD(ID_ISAR4, SWP_FRAC, 28, 4) + +FIELD(ID_ISAR5, SEVL, 0, 4) +FIELD(ID_ISAR5, AES, 4, 4) +FIELD(ID_ISAR5, SHA1, 8, 4) +FIELD(ID_ISAR5, SHA2, 12, 4) +FIELD(ID_ISAR5, CRC32, 16, 4) +FIELD(ID_ISAR5, RDM, 24, 4) +FIELD(ID_ISAR5, VCMA, 28, 4) + +FIELD(ID_ISAR6, JSCVT, 0, 4) +FIELD(ID_ISAR6, DP, 4, 4) +FIELD(ID_ISAR6, FHM, 8, 4) +FIELD(ID_ISAR6, SB, 12, 4) +FIELD(ID_ISAR6, SPECRES, 16, 4) +FIELD(ID_ISAR6, BF16, 20, 4) +FIELD(ID_ISAR6, I8MM, 24, 4) + +FIELD(ID_MMFR0, VMSA, 0, 4) +FIELD(ID_MMFR0, PMSA, 4, 4) +FIELD(ID_MMFR0, OUTERSHR, 8, 4) +FIELD(ID_MMFR0, SHARELVL, 12, 4) +FIELD(ID_MMFR0, TCM, 16, 4) +FIELD(ID_MMFR0, AUXREG, 20, 4) +FIELD(ID_MMFR0, FCSE, 24, 4) +FIELD(ID_MMFR0, INNERSHR, 28, 4) + +FIELD(ID_MMFR1, L1HVDVA, 0, 4) +FIELD(ID_MMFR1, L1UNIVA, 4, 4) +FIELD(ID_MMFR1, L1HVDSW, 8, 4) +FIELD(ID_MMFR1, L1UNISW, 12, 4) +FIELD(ID_MMFR1, L1HVD, 16, 4) +FIELD(ID_MMFR1, L1UNI, 20, 4) +FIELD(ID_MMFR1, L1TSTCLN, 24, 4) +FIELD(ID_MMFR1, BPRED, 28, 4) + +FIELD(ID_MMFR2, L1HVDFG, 0, 4) +FIELD(ID_MMFR2, L1HVDBG, 4, 4) +FIELD(ID_MMFR2, L1HVDRNG, 8, 4) +FIELD(ID_MMFR2, HVDTLB, 12, 4) +FIELD(ID_MMFR2, UNITLB, 16, 4) +FIELD(ID_MMFR2, MEMBARR, 20, 4) +FIELD(ID_MMFR2, WFISTALL, 24, 4) +FIELD(ID_MMFR2, HWACCFLG, 28, 4) + +FIELD(ID_MMFR3, CMAINTVA, 0, 4) +FIELD(ID_MMFR3, CMAINTSW, 4, 4) +FIELD(ID_MMFR3, BPMAINT, 8, 4) +FIELD(ID_MMFR3, MAINTBCST, 12, 4) +FIELD(ID_MMFR3, PAN, 16, 4) +FIELD(ID_MMFR3, COHWALK, 20, 4) +FIELD(ID_MMFR3, CMEMSZ, 24, 4) +FIELD(ID_MMFR3, SUPERSEC, 28, 4) + +FIELD(ID_MMFR4, SPECSEI, 0, 4) +FIELD(ID_MMFR4, AC2, 4, 4) +FIELD(ID_MMFR4, XNX, 8, 4) +FIELD(ID_MMFR4, CNP, 12, 4) +FIELD(ID_MMFR4, HPDS, 16, 4) +FIELD(ID_MMFR4, LSM, 20, 4) +FIELD(ID_MMFR4, CCIDX, 24, 4) +FIELD(ID_MMFR4, EVT, 28, 4) + +FIELD(ID_MMFR5, ETS, 0, 4) +FIELD(ID_MMFR5, NTLBPA, 4, 4) + +FIELD(ID_PFR0, STATE0, 0, 4) +FIELD(ID_PFR0, STATE1, 4, 4) +FIELD(ID_PFR0, STATE2, 8, 4) +FIELD(ID_PFR0, STATE3, 12, 4) +FIELD(ID_PFR0, CSV2, 16, 4) +FIELD(ID_PFR0, AMU, 20, 4) +FIELD(ID_PFR0, DIT, 24, 4) +FIELD(ID_PFR0, RAS, 28, 4) + +FIELD(ID_PFR1, PROGMOD, 0, 4) +FIELD(ID_PFR1, SECURITY, 4, 4) +FIELD(ID_PFR1, MPROGMOD, 8, 4) +FIELD(ID_PFR1, VIRTUALIZATION, 12, 4) +FIELD(ID_PFR1, GENTIMER, 16, 4) +FIELD(ID_PFR1, SEC_FRAC, 20, 4) +FIELD(ID_PFR1, VIRT_FRAC, 24, 4) +FIELD(ID_PFR1, GIC, 28, 4) + +FIELD(ID_PFR2, CSV3, 0, 4) +FIELD(ID_PFR2, SSBS, 4, 4) +FIELD(ID_PFR2, RAS_FRAC, 8, 4) + +FIELD(ID_AA64ISAR0, AES, 4, 4) +FIELD(ID_AA64ISAR0, SHA1, 8, 4) +FIELD(ID_AA64ISAR0, SHA2, 12, 4) +FIELD(ID_AA64ISAR0, CRC32, 16, 4) +FIELD(ID_AA64ISAR0, ATOMIC, 20, 4) +FIELD(ID_AA64ISAR0, TME, 24, 4) +FIELD(ID_AA64ISAR0, RDM, 28, 4) +FIELD(ID_AA64ISAR0, SHA3, 32, 4) +FIELD(ID_AA64ISAR0, SM3, 36, 4) +FIELD(ID_AA64ISAR0, SM4, 40, 4) +FIELD(ID_AA64ISAR0, DP, 44, 4) +FIELD(ID_AA64ISAR0, FHM, 48, 4) +FIELD(ID_AA64ISAR0, TS, 52, 4) +FIELD(ID_AA64ISAR0, TLB, 56, 4) +FIELD(ID_AA64ISAR0, RNDR, 60, 4) + +FIELD(ID_AA64ISAR1, DPB, 0, 4) +FIELD(ID_AA64ISAR1, APA, 4, 4) +FIELD(ID_AA64ISAR1, API, 8, 4) +FIELD(ID_AA64ISAR1, JSCVT, 12, 4) +FIELD(ID_AA64ISAR1, FCMA, 16, 4) +FIELD(ID_AA64ISAR1, LRCPC, 20, 4) +FIELD(ID_AA64ISAR1, GPA, 24, 4) +FIELD(ID_AA64ISAR1, GPI, 28, 4) +FIELD(ID_AA64ISAR1, FRINTTS, 32, 4) +FIELD(ID_AA64ISAR1, SB, 36, 4) +FIELD(ID_AA64ISAR1, SPECRES, 40, 4) +FIELD(ID_AA64ISAR1, BF16, 44, 4) +FIELD(ID_AA64ISAR1, DGH, 48, 4) +FIELD(ID_AA64ISAR1, I8MM, 52, 4) +FIELD(ID_AA64ISAR1, XS, 56, 4) +FIELD(ID_AA64ISAR1, LS64, 60, 4) + +FIELD(ID_AA64ISAR2, WFXT, 0, 4) +FIELD(ID_AA64ISAR2, RPRES, 4, 4) +FIELD(ID_AA64ISAR2, GPA3, 8, 4) +FIELD(ID_AA64ISAR2, APA3, 12, 4) +FIELD(ID_AA64ISAR2, MOPS, 16, 4) +FIELD(ID_AA64ISAR2, BC, 20, 4) +FIELD(ID_AA64ISAR2, PAC_FRAC, 24, 4) +FIELD(ID_AA64ISAR2, CLRBHB, 28, 4) +FIELD(ID_AA64ISAR2, SYSREG_128, 32, 4) +FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4) +FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4) +FIELD(ID_AA64ISAR2, RPRFM, 48, 4) +FIELD(ID_AA64ISAR2, CSSC, 52, 4) +FIELD(ID_AA64ISAR2, LUT, 56, 4) +FIELD(ID_AA64ISAR2, ATS1A, 60, 4) + +FIELD(ID_AA64PFR0, EL0, 0, 4) +FIELD(ID_AA64PFR0, EL1, 4, 4) +FIELD(ID_AA64PFR0, EL2, 8, 4) +FIELD(ID_AA64PFR0, EL3, 12, 4) +FIELD(ID_AA64PFR0, FP, 16, 4) +FIELD(ID_AA64PFR0, ADVSIMD, 20, 4) +FIELD(ID_AA64PFR0, GIC, 24, 4) +FIELD(ID_AA64PFR0, RAS, 28, 4) +FIELD(ID_AA64PFR0, SVE, 32, 4) +FIELD(ID_AA64PFR0, SEL2, 36, 4) +FIELD(ID_AA64PFR0, MPAM, 40, 4) +FIELD(ID_AA64PFR0, AMU, 44, 4) +FIELD(ID_AA64PFR0, DIT, 48, 4) +FIELD(ID_AA64PFR0, RME, 52, 4) +FIELD(ID_AA64PFR0, CSV2, 56, 4) +FIELD(ID_AA64PFR0, CSV3, 60, 4) + +FIELD(ID_AA64PFR1, BT, 0, 4) +FIELD(ID_AA64PFR1, SSBS, 4, 4) +FIELD(ID_AA64PFR1, MTE, 8, 4) +FIELD(ID_AA64PFR1, RAS_FRAC, 12, 4) +FIELD(ID_AA64PFR1, MPAM_FRAC, 16, 4) +FIELD(ID_AA64PFR1, SME, 24, 4) +FIELD(ID_AA64PFR1, RNDR_TRAP, 28, 4) +FIELD(ID_AA64PFR1, CSV2_FRAC, 32, 4) +FIELD(ID_AA64PFR1, NMI, 36, 4) +FIELD(ID_AA64PFR1, MTE_FRAC, 40, 4) +FIELD(ID_AA64PFR1, GCS, 44, 4) +FIELD(ID_AA64PFR1, THE, 48, 4) +FIELD(ID_AA64PFR1, MTEX, 52, 4) +FIELD(ID_AA64PFR1, DF2, 56, 4) +FIELD(ID_AA64PFR1, PFAR, 60, 4) + +FIELD(ID_AA64PFR2, MTEPERM, 0, 4) +FIELD(ID_AA64PFR2, MTESTOREONLY, 4, 4) +FIELD(ID_AA64PFR2, MTEFAR, 8, 4) +FIELD(ID_AA64PFR2, FPMR, 32, 4) + +FIELD(ID_AA64MMFR0, PARANGE, 0, 4) +FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4) +FIELD(ID_AA64MMFR0, BIGEND, 8, 4) +FIELD(ID_AA64MMFR0, SNSMEM, 12, 4) +FIELD(ID_AA64MMFR0, BIGENDEL0, 16, 4) +FIELD(ID_AA64MMFR0, TGRAN16, 20, 4) +FIELD(ID_AA64MMFR0, TGRAN64, 24, 4) +FIELD(ID_AA64MMFR0, TGRAN4, 28, 4) +FIELD(ID_AA64MMFR0, TGRAN16_2, 32, 4) +FIELD(ID_AA64MMFR0, TGRAN64_2, 36, 4) +FIELD(ID_AA64MMFR0, TGRAN4_2, 40, 4) +FIELD(ID_AA64MMFR0, EXS, 44, 4) +FIELD(ID_AA64MMFR0, FGT, 56, 4) +FIELD(ID_AA64MMFR0, ECV, 60, 4) + +FIELD(ID_AA64MMFR1, HAFDBS, 0, 4) +FIELD(ID_AA64MMFR1, VMIDBITS, 4, 4) +FIELD(ID_AA64MMFR1, VH, 8, 4) +FIELD(ID_AA64MMFR1, HPDS, 12, 4) +FIELD(ID_AA64MMFR1, LO, 16, 4) +FIELD(ID_AA64MMFR1, PAN, 20, 4) +FIELD(ID_AA64MMFR1, SPECSEI, 24, 4) +FIELD(ID_AA64MMFR1, XNX, 28, 4) +FIELD(ID_AA64MMFR1, TWED, 32, 4) +FIELD(ID_AA64MMFR1, ETS, 36, 4) +FIELD(ID_AA64MMFR1, HCX, 40, 4) +FIELD(ID_AA64MMFR1, AFP, 44, 4) +FIELD(ID_AA64MMFR1, NTLBPA, 48, 4) +FIELD(ID_AA64MMFR1, TIDCP1, 52, 4) +FIELD(ID_AA64MMFR1, CMOW, 56, 4) +FIELD(ID_AA64MMFR1, ECBHB, 60, 4) + +FIELD(ID_AA64MMFR2, CNP, 0, 4) +FIELD(ID_AA64MMFR2, UAO, 4, 4) +FIELD(ID_AA64MMFR2, LSM, 8, 4) +FIELD(ID_AA64MMFR2, IESB, 12, 4) +FIELD(ID_AA64MMFR2, VARANGE, 16, 4) +FIELD(ID_AA64MMFR2, CCIDX, 20, 4) +FIELD(ID_AA64MMFR2, NV, 24, 4) +FIELD(ID_AA64MMFR2, ST, 28, 4) +FIELD(ID_AA64MMFR2, AT, 32, 4) +FIELD(ID_AA64MMFR2, IDS, 36, 4) +FIELD(ID_AA64MMFR2, FWB, 40, 4) +FIELD(ID_AA64MMFR2, TTL, 48, 4) +FIELD(ID_AA64MMFR2, BBM, 52, 4) +FIELD(ID_AA64MMFR2, EVT, 56, 4) +FIELD(ID_AA64MMFR2, E0PD, 60, 4) + +FIELD(ID_AA64MMFR3, TCRX, 0, 4) +FIELD(ID_AA64MMFR3, SCTLRX, 4, 4) +FIELD(ID_AA64MMFR3, S1PIE, 8, 4) +FIELD(ID_AA64MMFR3, S2PIE, 12, 4) +FIELD(ID_AA64MMFR3, S1POE, 16, 4) +FIELD(ID_AA64MMFR3, S2POE, 20, 4) +FIELD(ID_AA64MMFR3, AIE, 24, 4) +FIELD(ID_AA64MMFR3, MEC, 28, 4) +FIELD(ID_AA64MMFR3, D128, 32, 4) +FIELD(ID_AA64MMFR3, D128_2, 36, 4) +FIELD(ID_AA64MMFR3, SNERR, 40, 4) +FIELD(ID_AA64MMFR3, ANERR, 44, 4) +FIELD(ID_AA64MMFR3, SDERR, 52, 4) +FIELD(ID_AA64MMFR3, ADERR, 56, 4) +FIELD(ID_AA64MMFR3, SPEC_FPACC, 60, 4) + +FIELD(ID_AA64DFR0, DEBUGVER, 0, 4) +FIELD(ID_AA64DFR0, TRACEVER, 4, 4) +FIELD(ID_AA64DFR0, PMUVER, 8, 4) +FIELD(ID_AA64DFR0, BRPS, 12, 4) +FIELD(ID_AA64DFR0, PMSS, 16, 4) +FIELD(ID_AA64DFR0, WRPS, 20, 4) +FIELD(ID_AA64DFR0, SEBEP, 24, 4) +FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4) +FIELD(ID_AA64DFR0, PMSVER, 32, 4) +FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4) +FIELD(ID_AA64DFR0, TRACEFILT, 40, 4) +FIELD(ID_AA64DFR0, TRACEBUFFER, 44, 4) +FIELD(ID_AA64DFR0, MTPMU, 48, 4) +FIELD(ID_AA64DFR0, BRBE, 52, 4) +FIELD(ID_AA64DFR0, EXTTRCBUFF, 56, 4) +FIELD(ID_AA64DFR0, HPMN0, 60, 4) + +FIELD(ID_AA64ZFR0, SVEVER, 0, 4) +FIELD(ID_AA64ZFR0, AES, 4, 4) +FIELD(ID_AA64ZFR0, BITPERM, 16, 4) +FIELD(ID_AA64ZFR0, BFLOAT16, 20, 4) +FIELD(ID_AA64ZFR0, B16B16, 24, 4) +FIELD(ID_AA64ZFR0, SHA3, 32, 4) +FIELD(ID_AA64ZFR0, SM4, 40, 4) +FIELD(ID_AA64ZFR0, I8MM, 44, 4) +FIELD(ID_AA64ZFR0, F32MM, 52, 4) +FIELD(ID_AA64ZFR0, F64MM, 56, 4) + +FIELD(ID_AA64SMFR0, F32F32, 32, 1) +FIELD(ID_AA64SMFR0, BI32I32, 33, 1) +FIELD(ID_AA64SMFR0, B16F32, 34, 1) +FIELD(ID_AA64SMFR0, F16F32, 35, 1) +FIELD(ID_AA64SMFR0, I8I32, 36, 4) +FIELD(ID_AA64SMFR0, F16F16, 42, 1) +FIELD(ID_AA64SMFR0, B16B16, 43, 1) +FIELD(ID_AA64SMFR0, I16I32, 44, 4) +FIELD(ID_AA64SMFR0, F64F64, 48, 1) +FIELD(ID_AA64SMFR0, I16I64, 52, 4) +FIELD(ID_AA64SMFR0, SMEVER, 56, 4) +FIELD(ID_AA64SMFR0, FA64, 63, 1) + +FIELD(ID_DFR0, COPDBG, 0, 4) +FIELD(ID_DFR0, COPSDBG, 4, 4) +FIELD(ID_DFR0, MMAPDBG, 8, 4) +FIELD(ID_DFR0, COPTRC, 12, 4) +FIELD(ID_DFR0, MMAPTRC, 16, 4) +FIELD(ID_DFR0, MPROFDBG, 20, 4) +FIELD(ID_DFR0, PERFMON, 24, 4) +FIELD(ID_DFR0, TRACEFILT, 28, 4) + +FIELD(ID_DFR1, MTPMU, 0, 4) +FIELD(ID_DFR1, HPMN0, 4, 4) + +FIELD(DBGDIDR, SE_IMP, 12, 1) +FIELD(DBGDIDR, NSUHD_IMP, 14, 1) +FIELD(DBGDIDR, VERSION, 16, 4) +FIELD(DBGDIDR, CTX_CMPS, 20, 4) +FIELD(DBGDIDR, BRPS, 24, 4) +FIELD(DBGDIDR, WRPS, 28, 4) + +FIELD(DBGDEVID, PCSAMPLE, 0, 4) +FIELD(DBGDEVID, WPADDRMASK, 4, 4) +FIELD(DBGDEVID, BPADDRMASK, 8, 4) +FIELD(DBGDEVID, VECTORCATCH, 12, 4) +FIELD(DBGDEVID, VIRTEXTNS, 16, 4) +FIELD(DBGDEVID, DOUBLELOCK, 20, 4) +FIELD(DBGDEVID, AUXREGS, 24, 4) +FIELD(DBGDEVID, CIDMASK, 28, 4) + +FIELD(DBGDEVID1, PCSROFFSET, 0, 4) + +FIELD(MVFR0, SIMDREG, 0, 4) +FIELD(MVFR0, FPSP, 4, 4) +FIELD(MVFR0, FPDP, 8, 4) +FIELD(MVFR0, FPTRAP, 12, 4) +FIELD(MVFR0, FPDIVIDE, 16, 4) +FIELD(MVFR0, FPSQRT, 20, 4) +FIELD(MVFR0, FPSHVEC, 24, 4) +FIELD(MVFR0, FPROUND, 28, 4) + +FIELD(MVFR1, FPFTZ, 0, 4) +FIELD(MVFR1, FPDNAN, 4, 4) +FIELD(MVFR1, SIMDLS, 8, 4) /* A-profile only */ +FIELD(MVFR1, SIMDINT, 12, 4) /* A-profile only */ +FIELD(MVFR1, SIMDSP, 16, 4) /* A-profile only */ +FIELD(MVFR1, SIMDHP, 20, 4) /* A-profile only */ +FIELD(MVFR1, MVE, 8, 4) /* M-profile only */ +FIELD(MVFR1, FP16, 20, 4) /* M-profile only */ +FIELD(MVFR1, FPHP, 24, 4) +FIELD(MVFR1, SIMDFMAC, 28, 4) + +FIELD(MVFR2, SIMDMISC, 0, 4) +FIELD(MVFR2, FPMISC, 4, 4) /* * Naming convention for isar_feature functions: @@ -44,103 +461,103 @@ */ static inline bool isar_feature_aa32_thumb_div(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR0, DIVIDE) != 0; } static inline bool isar_feature_aa32_arm_div(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; + return FIELD_EX32_IDREG(id, ID_ISAR0, DIVIDE) > 1; } static inline bool isar_feature_aa32_lob(const ARMISARegisters *id) { /* (M-profile) low-overhead loops and branch future */ - return FIELD_EX32(id->id_isar0, ID_ISAR0, CMPBRANCH) >= 3; + return FIELD_EX32_IDREG(id, ID_ISAR0, CMPBRANCH) >= 3; } static inline bool isar_feature_aa32_jazelle(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR1, JAZELLE) != 0; } static inline bool isar_feature_aa32_aes(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, AES) != 0; } static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1; + return FIELD_EX32_IDREG(id, ID_ISAR5, AES) > 1; } static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, SHA1) != 0; } static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, SHA2) != 0; } static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, CRC32) != 0; } static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, RDM) != 0; } static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR5, VCMA) != 0; } static inline bool isar_feature_aa32_jscvt(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, JSCVT) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, JSCVT) != 0; } static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, DP) != 0; } static inline bool isar_feature_aa32_fhm(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, FHM) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, FHM) != 0; } static inline bool isar_feature_aa32_sb(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, SB) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, SB) != 0; } static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, SPECRES) != 0; } static inline bool isar_feature_aa32_bf16(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, BF16) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, BF16) != 0; } static inline bool isar_feature_aa32_i8mm(const ARMISARegisters *id) { - return FIELD_EX32(id->id_isar6, ID_ISAR6, I8MM) != 0; + return FIELD_EX32_IDREG(id, ID_ISAR6, I8MM) != 0; } static inline bool isar_feature_aa32_ras(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr0, ID_PFR0, RAS) != 0; + return FIELD_EX32_IDREG(id, ID_PFR0, RAS) != 0; } static inline bool isar_feature_aa32_mprofile(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr1, ID_PFR1, MPROGMOD) != 0; + return FIELD_EX32_IDREG(id, ID_PFR1, MPROGMOD) != 0; } static inline bool isar_feature_aa32_m_sec_state(const ARMISARegisters *id) @@ -149,7 +566,7 @@ static inline bool isar_feature_aa32_m_sec_state(const ARMISARegisters *id) * Return true if M-profile state handling insns * (VSCCLRM, CLRM, FPCTX access insns) are implemented */ - return FIELD_EX32(id->id_pfr1, ID_PFR1, SECURITY) >= 3; + return FIELD_EX32_IDREG(id, ID_PFR1, SECURITY) >= 3; } static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) @@ -282,88 +699,88 @@ static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) static inline bool isar_feature_aa32_pxn(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr0, ID_MMFR0, VMSA) >= 4; + return FIELD_EX32_IDREG(id, ID_MMFR0, VMSA) >= 4; } static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR3, PAN) != 0; } static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2; + return FIELD_EX32_IDREG(id, ID_MMFR3, PAN) >= 2; } static inline bool isar_feature_aa32_pmuv3p1(const ARMISARegisters *id) { /* 0xf means "non-standard IMPDEF PMU" */ - return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 && - FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; + return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 4 && + FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf; } static inline bool isar_feature_aa32_pmuv3p4(const ARMISARegisters *id) { /* 0xf means "non-standard IMPDEF PMU" */ - return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 5 && - FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; + return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 5 && + FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf; } static inline bool isar_feature_aa32_pmuv3p5(const ARMISARegisters *id) { /* 0xf means "non-standard IMPDEF PMU" */ - return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 6 && - FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; + return FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) >= 6 && + FIELD_EX32_IDREG(id, ID_DFR0, PERFMON) != 0xf; } static inline bool isar_feature_aa32_hpd(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, HPDS) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, HPDS) != 0; } static inline bool isar_feature_aa32_ac2(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, AC2) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, AC2) != 0; } static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, CCIDX) != 0; } static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0; + return FIELD_EX32_IDREG(id, ID_MMFR4, XNX) != 0; } static inline bool isar_feature_aa32_half_evt(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 1; + return FIELD_EX32_IDREG(id, ID_MMFR4, EVT) >= 1; } static inline bool isar_feature_aa32_evt(const ARMISARegisters *id) { - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 2; + return FIELD_EX32_IDREG(id, ID_MMFR4, EVT) >= 2; } static inline bool isar_feature_aa32_dit(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr0, ID_PFR0, DIT) != 0; + return FIELD_EX32_IDREG(id, ID_PFR0, DIT) != 0; } static inline bool isar_feature_aa32_ssbs(const ARMISARegisters *id) { - return FIELD_EX32(id->id_pfr2, ID_PFR2, SSBS) != 0; + return FIELD_EX32_IDREG(id, ID_PFR2, SSBS) != 0; } static inline bool isar_feature_aa32_debugv7p1(const ARMISARegisters *id) { - return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 5; + return FIELD_EX32_IDREG(id, ID_DFR0, COPDBG) >= 5; } static inline bool isar_feature_aa32_debugv8p2(const ARMISARegisters *id) { - return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 8; + return FIELD_EX32_IDREG(id, ID_DFR0, COPDBG) >= 8; } static inline bool isar_feature_aa32_doublelock(const ARMISARegisters *id) @@ -376,107 +793,112 @@ static inline bool isar_feature_aa32_doublelock(const ARMISARegisters *id) */ static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, AES) != 0; } static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, AES) > 1; } static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA1) != 0; } static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA2) != 0; } static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA2) > 1; } static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, CRC32) != 0; } -static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id) +static inline bool isar_feature_aa64_lse(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, ATOMIC) >= 2; +} + +static inline bool isar_feature_aa64_lse128(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, ATOMIC) >= 3; } static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, RDM) != 0; } static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SHA3) != 0; } static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SM3) != 0; } static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, SM4) != 0; } static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, DP) != 0; } static inline bool isar_feature_aa64_fhm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, FHM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, FHM) != 0; } static inline bool isar_feature_aa64_condm_4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TS) != 0; } static inline bool isar_feature_aa64_condm_5(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TS) >= 2; } static inline bool isar_feature_aa64_rndr(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RNDR) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, RNDR) != 0; } static inline bool isar_feature_aa64_tlbirange(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) == 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TLB) == 2; } static inline bool isar_feature_aa64_tlbios(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, TLB) != 0; } static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, JSCVT) != 0; } static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, FCMA) != 0; } static inline bool isar_feature_aa64_xs(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, XS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, XS) != 0; } /* @@ -500,9 +922,9 @@ isar_feature_pauth_feature(const ARMISARegisters *id) * Architecturally, only one of {APA,API,APA3} may be active (non-zero) * and the other two must be zero. Thus we may avoid conditionals. */ - return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) | - FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, API) | - FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3)); + return (FIELD_EX64_IDREG(id, ID_AA64ISAR1, APA) | + FIELD_EX64_IDREG(id, ID_AA64ISAR1, API) | + FIELD_EX64_IDREG(id, ID_AA64ISAR2, APA3)); } static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) @@ -520,7 +942,7 @@ static inline bool isar_feature_aa64_pauth_qarma5(const ARMISARegisters *id) * Return true if pauth is enabled with the architected QARMA5 algorithm. * QEMU will always enable or disable both APA and GPA. */ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, APA) != 0; } static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id) @@ -529,144 +951,164 @@ static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id) * Return true if pauth is enabled with the architected QARMA3 algorithm. * QEMU will always enable or disable both APA3 and GPA3. */ - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, APA3) != 0; } static inline bool isar_feature_aa64_sb(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, SB) != 0; } static inline bool isar_feature_aa64_predinv(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SPECRES) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, SPECRES) != 0; } static inline bool isar_feature_aa64_frint(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, FRINTTS) != 0; } static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, DPB) != 0; } static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, DPB) >= 2; } static inline bool isar_feature_aa64_bf16(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, BF16) != 0; } static inline bool isar_feature_aa64_ebf16(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) > 1; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, BF16) > 1; } static inline bool isar_feature_aa64_rcpc_8_3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, LRCPC) != 0; } static inline bool isar_feature_aa64_rcpc_8_4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, LRCPC) >= 2; } static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR1, I8MM) != 0; } static inline bool isar_feature_aa64_wfxt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, WFXT) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, WFXT) >= 2; } static inline bool isar_feature_aa64_hbc(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, BC) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, BC) != 0; } static inline bool isar_feature_aa64_mops(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS); + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, MOPS); } static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, RPRES); + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, RPRES); +} + +static inline bool isar_feature_aa64_cssc(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, CSSC) != 0; +} + +static inline bool isar_feature_aa64_lut(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, LUT); +} + +static inline bool isar_feature_aa64_ats1a(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR2, ATS1A); } static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) { /* We always set the AdvSIMD and FP fields identically. */ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, FP) != 0xf; } static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id) { /* We always set the AdvSIMD and FP fields identically wrt FP16. */ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, FP) == 1; } static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL0) >= 2; } static inline bool isar_feature_aa64_aa32_el1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL1) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL1) >= 2; } static inline bool isar_feature_aa64_aa32_el2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL2) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, EL2) >= 2; } static inline bool isar_feature_aa64_ras(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, RAS) != 0; } static inline bool isar_feature_aa64_doublefault(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, RAS) >= 2; } static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, SVE) != 0; } static inline bool isar_feature_aa64_sel2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SEL2) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, SEL2) != 0; } static inline bool isar_feature_aa64_rme(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RME) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, RME) != 0; +} + +static inline bool isar_feature_aa64_rme_gpc2(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64PFR0, RME) >= 2; } static inline bool isar_feature_aa64_dit(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, DIT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR0, DIT) != 0; } static inline bool isar_feature_aa64_scxtnum(const ARMISARegisters *id) { - int key = FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, CSV2); + int key = FIELD_EX64_IDREG(id, ID_AA64PFR0, CSV2); if (key >= 2) { return true; /* FEAT_CSV2_2 */ } if (key == 1) { - key = FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, CSV2_FRAC); + key = FIELD_EX64_IDREG(id, ID_AA64PFR1, CSV2_FRAC); return key >= 2; /* FEAT_CSV2_1p2 */ } return false; @@ -674,320 +1116,408 @@ static inline bool isar_feature_aa64_scxtnum(const ARMISARegisters *id) static inline bool isar_feature_aa64_ssbs(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SSBS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, SSBS) != 0; } static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, BT) != 0; } static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) != 0; } static inline bool isar_feature_aa64_mte(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) >= 2; } static inline bool isar_feature_aa64_mte3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 3; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, MTE) >= 3; } static inline bool isar_feature_aa64_sme(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SME) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, SME) != 0; } static inline bool isar_feature_aa64_nmi(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, NMI) != 0; + return FIELD_EX64_IDREG(id, ID_AA64PFR1, NMI) != 0; +} + +static inline bool isar_feature_aa64_gcs(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64PFR1, GCS) != 0; } static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 1; + return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN4) >= 1; } static inline bool isar_feature_aa64_tgran4_2_lpa2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN4_2); return t >= 3 || (t == 0 && isar_feature_aa64_tgran4_lpa2(id)); } static inline bool isar_feature_aa64_tgran16_lpa2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16) >= 2; } static inline bool isar_feature_aa64_tgran16_2_lpa2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16_2); return t >= 3 || (t == 0 && isar_feature_aa64_tgran16_lpa2(id)); } static inline bool isar_feature_aa64_tgran4(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 0; + return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN4) >= 0; } static inline bool isar_feature_aa64_tgran16(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 1; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16) >= 1; } static inline bool isar_feature_aa64_tgran64(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64) >= 0; + return FIELD_SEX64_IDREG(id, ID_AA64MMFR0, TGRAN64) >= 0; } static inline bool isar_feature_aa64_tgran4_2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN4_2); return t >= 2 || (t == 0 && isar_feature_aa64_tgran4(id)); } static inline bool isar_feature_aa64_tgran16_2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN16_2); return t >= 2 || (t == 0 && isar_feature_aa64_tgran16(id)); } static inline bool isar_feature_aa64_tgran64_2(const ARMISARegisters *id) { - unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64_2); + unsigned t = FIELD_EX64_IDREG(id, ID_AA64MMFR0, TGRAN64_2); return t >= 2 || (t == 0 && isar_feature_aa64_tgran64(id)); } static inline bool isar_feature_aa64_fgt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, FGT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, FGT) != 0; } static inline bool isar_feature_aa64_ecv_traps(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, ECV) > 0; } static inline bool isar_feature_aa64_ecv(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 1; + return FIELD_EX64_IDREG(id, ID_AA64MMFR0, ECV) > 1; } static inline bool isar_feature_aa64_vh(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, VH) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, VH) != 0; } static inline bool isar_feature_aa64_lor(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, LO) != 0; } static inline bool isar_feature_aa64_pan(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) != 0; } static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) >= 2; } static inline bool isar_feature_aa64_pan3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 3; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, PAN) >= 3; } static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HCX) != 0; } static inline bool isar_feature_aa64_afp(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, AFP) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, AFP) != 0; } static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, TIDCP1) != 0; } static inline bool isar_feature_aa64_cmow(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, CMOW) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, CMOW) != 0; } static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HAFDBS) != 0; } static inline bool isar_feature_aa64_hdbs(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, HAFDBS) >= 2; } static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR1, XNX) != 0; } static inline bool isar_feature_aa64_uao(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, UAO) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, UAO) != 0; } static inline bool isar_feature_aa64_st(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, ST) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, ST) != 0; } static inline bool isar_feature_aa64_lse2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, AT) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, AT) != 0; } static inline bool isar_feature_aa64_fwb(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, FWB) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, FWB) != 0; } static inline bool isar_feature_aa64_ids(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, IDS) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, IDS) != 0; } static inline bool isar_feature_aa64_half_evt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 1; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, EVT) >= 1; } static inline bool isar_feature_aa64_evt(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, EVT) >= 2; } static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, CCIDX) != 0; } static inline bool isar_feature_aa64_lva(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, VARANGE) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, VARANGE) != 0; } static inline bool isar_feature_aa64_e0pd(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, E0PD) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, E0PD) != 0; } static inline bool isar_feature_aa64_nv(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) != 0; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, NV) != 0; } static inline bool isar_feature_aa64_nv2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64MMFR2, NV) >= 2; +} + +static inline bool isar_feature_aa64_tcr2(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR3, TCRX) != 0; +} + +static inline bool isar_feature_aa64_sctlr2(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR3, SCTLRX) != 0; +} + +static inline bool isar_feature_aa64_s1pie(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR3, S1PIE) != 0; +} + +static inline bool isar_feature_aa64_s2pie(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR3, S2PIE) != 0; +} + +static inline bool isar_feature_aa64_mec(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64MMFR3, MEC) != 0; } static inline bool isar_feature_aa64_pmuv3p1(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && - FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 4 && + FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf; } static inline bool isar_feature_aa64_pmuv3p4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 5 && - FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 5 && + FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf; } static inline bool isar_feature_aa64_pmuv3p5(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 6 && - FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) >= 6 && + FIELD_EX64_IDREG(id, ID_AA64DFR0, PMUVER) != 0xf; } static inline bool isar_feature_aa64_debugv8p2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, DEBUGVER) >= 8; + return FIELD_EX64_IDREG(id, ID_AA64DFR0, DEBUGVER) >= 8; } static inline bool isar_feature_aa64_doublelock(const ARMISARegisters *id) { - return FIELD_SEX64(id->id_aa64dfr0, ID_AA64DFR0, DOUBLELOCK) >= 0; + return FIELD_SEX64_IDREG(id, ID_AA64DFR0, DOUBLELOCK) >= 0; } static inline bool isar_feature_aa64_sve2(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SVEVER) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SVEVER) != 0; +} + +static inline bool isar_feature_aa64_sve2p1(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SVEVER) >=2; } static inline bool isar_feature_aa64_sve2_aes(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, AES) != 0; } static inline bool isar_feature_aa64_sve2_pmull128(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) >= 2; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, AES) >= 2; } static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, BITPERM) != 0; } static inline bool isar_feature_aa64_sve_bf16(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BFLOAT16) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, BFLOAT16) != 0; } static inline bool isar_feature_aa64_sve2_sha3(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SHA3) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SHA3) != 0; } static inline bool isar_feature_aa64_sve2_sm4(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SM4) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SM4) != 0; } static inline bool isar_feature_aa64_sve_i8mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, I8MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, I8MM) != 0; } static inline bool isar_feature_aa64_sve_f32mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F32MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, F32MM) != 0; } static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F64MM) != 0; + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, F64MM) != 0; +} + +static inline bool isar_feature_aa64_sve_b16b16(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ZFR0, B16B16); +} + +static inline bool isar_feature_aa64_sme_b16b16(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, B16B16); +} + +static inline bool isar_feature_aa64_sme_f16f16(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, F16F16); } static inline bool isar_feature_aa64_sme_f64f64(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, F64F64); + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, F64F64); } static inline bool isar_feature_aa64_sme_i16i64(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, I16I64) == 0xf; + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, I16I64) == 0xf; } static inline bool isar_feature_aa64_sme_fa64(const ARMISARegisters *id) { - return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, FA64); + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, FA64); +} + +static inline bool isar_feature_aa64_sme2(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, SMEVER) != 0; +} + +static inline bool isar_feature_aa64_sme2p1(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64SMFR0, SMEVER) >= 2; +} + +/* + * Combinations of feature tests, for ease of use with TRANS_FEAT. + */ +static inline bool isar_feature_aa64_sme_or_sve2p1(const ARMISARegisters *id) +{ + return isar_feature_aa64_sme(id) || isar_feature_aa64_sve2p1(id); +} + +static inline bool isar_feature_aa64_sme2_or_sve2p1(const ARMISARegisters *id) +{ + return isar_feature_aa64_sme2(id) || isar_feature_aa64_sve2p1(id); +} + +static inline bool isar_feature_aa64_sme2p1_or_sve2p1(const ARMISARegisters *id) +{ + return isar_feature_aa64_sme2p1(id) || isar_feature_aa64_sve2p1(id); +} + +static inline bool isar_feature_aa64_sme2_i16i64(const ARMISARegisters *id) +{ + return isar_feature_aa64_sme2(id) && isar_feature_aa64_sme_i16i64(id); +} + +static inline bool isar_feature_aa64_sme2_f64f64(const ARMISARegisters *id) +{ + return isar_feature_aa64_sme2(id) && isar_feature_aa64_sme_f64f64(id); } /* diff --git a/target/arm/cpu-irq.c b/target/arm/cpu-irq.c new file mode 100644 index 0000000..fe514cc --- /dev/null +++ b/target/arm/cpu-irq.c @@ -0,0 +1,381 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* + * QEMU ARM CPU - interrupt_request handling + * + * Copyright (c) 2003-2025 QEMU contributors + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "accel/tcg/cpu-ops.h" +#include "internals.h" + +#ifdef CONFIG_TCG +static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, + unsigned int target_el, + unsigned int cur_el, bool secure, + uint64_t hcr_el2) +{ + CPUARMState *env = cpu_env(cs); + bool pstate_unmasked; + bool unmasked = false; + bool allIntMask = false; + + /* + * Don't take exceptions if they target a lower EL. + * This check should catch any exceptions that would not be taken + * but left pending. + */ + if (cur_el > target_el) { + return false; + } + + if (cpu_isar_feature(aa64_nmi, env_archcpu(env)) && + env->cp15.sctlr_el[target_el] & SCTLR_NMI && cur_el == target_el) { + allIntMask = env->pstate & PSTATE_ALLINT || + ((env->cp15.sctlr_el[target_el] & SCTLR_SPINTMASK) && + (env->pstate & PSTATE_SP)); + } + + switch (excp_idx) { + case EXCP_NMI: + pstate_unmasked = !allIntMask; + break; + + case EXCP_VINMI: + if (!(hcr_el2 & HCR_IMO) || (hcr_el2 & HCR_TGE)) { + /* VINMIs are only taken when hypervized. */ + return false; + } + return !allIntMask; + case EXCP_VFNMI: + if (!(hcr_el2 & HCR_FMO) || (hcr_el2 & HCR_TGE)) { + /* VFNMIs are only taken when hypervized. */ + return false; + } + return !allIntMask; + case EXCP_FIQ: + pstate_unmasked = (!(env->daif & PSTATE_F)) && (!allIntMask); + break; + + case EXCP_IRQ: + pstate_unmasked = (!(env->daif & PSTATE_I)) && (!allIntMask); + break; + + case EXCP_VFIQ: + if (!(hcr_el2 & HCR_FMO) || (hcr_el2 & HCR_TGE)) { + /* VFIQs are only taken when hypervized. */ + return false; + } + return !(env->daif & PSTATE_F) && (!allIntMask); + case EXCP_VIRQ: + if (!(hcr_el2 & HCR_IMO) || (hcr_el2 & HCR_TGE)) { + /* VIRQs are only taken when hypervized. */ + return false; + } + return !(env->daif & PSTATE_I) && (!allIntMask); + case EXCP_VSERR: + if (!(hcr_el2 & HCR_AMO) || (hcr_el2 & HCR_TGE)) { + /* VIRQs are only taken when hypervized. */ + return false; + } + return !(env->daif & PSTATE_A); + default: + g_assert_not_reached(); + } + + /* + * Use the target EL, current execution state and SCR/HCR settings to + * determine whether the corresponding CPSR bit is used to mask the + * interrupt. + */ + if ((target_el > cur_el) && (target_el != 1)) { + /* Exceptions targeting a higher EL may not be maskable */ + if (arm_feature(env, ARM_FEATURE_AARCH64)) { + switch (target_el) { + case 2: + /* + * According to ARM DDI 0487H.a, an interrupt can be masked + * when HCR_E2H and HCR_TGE are both set regardless of the + * current Security state. Note that we need to revisit this + * part again once we need to support NMI. + */ + if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { + unmasked = true; + } + break; + case 3: + /* Interrupt cannot be masked when the target EL is 3 */ + unmasked = true; + break; + default: + g_assert_not_reached(); + } + } else { + /* + * The old 32-bit-only environment has a more complicated + * masking setup. HCR and SCR bits not only affect interrupt + * routing but also change the behaviour of masking. + */ + bool hcr, scr; + + switch (excp_idx) { + case EXCP_FIQ: + /* + * If FIQs are routed to EL3 or EL2 then there are cases where + * we override the CPSR.F in determining if the exception is + * masked or not. If neither of these are set then we fall back + * to the CPSR.F setting otherwise we further assess the state + * below. + */ + hcr = hcr_el2 & HCR_FMO; + scr = (env->cp15.scr_el3 & SCR_FIQ); + + /* + * When EL3 is 32-bit, the SCR.FW bit controls whether the + * CPSR.F bit masks FIQ interrupts when taken in non-secure + * state. If SCR.FW is set then FIQs can be masked by CPSR.F + * when non-secure but only when FIQs are only routed to EL3. + */ + scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr); + break; + case EXCP_IRQ: + /* + * When EL3 execution state is 32-bit, if HCR.IMO is set then + * we may override the CPSR.I masking when in non-secure state. + * The SCR.IRQ setting has already been taken into consideration + * when setting the target EL, so it does not have a further + * affect here. + */ + hcr = hcr_el2 & HCR_IMO; + scr = false; + break; + default: + g_assert_not_reached(); + } + + if ((scr || hcr) && !secure) { + unmasked = true; + } + } + } + + /* + * The PSTATE bits only mask the interrupt if we have not overridden the + * ability above. + */ + return unmasked || pstate_unmasked; +} + +bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + CPUARMState *env = cpu_env(cs); + uint32_t cur_el = arm_current_el(env); + bool secure = arm_is_secure(env); + uint64_t hcr_el2 = arm_hcr_el2_eff(env); + uint32_t target_el; + uint32_t excp_idx; + + /* The prioritization of interrupts is IMPLEMENTATION DEFINED. */ + + if (cpu_isar_feature(aa64_nmi, env_archcpu(env)) && + (arm_sctlr(env, cur_el) & SCTLR_NMI)) { + if (interrupt_request & CPU_INTERRUPT_NMI) { + excp_idx = EXCP_NMI; + target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_VINMI) { + excp_idx = EXCP_VINMI; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_VFNMI) { + excp_idx = EXCP_VFNMI; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + } else { + /* + * NMI disabled: interrupts with superpriority are handled + * as if they didn't have it + */ + if (interrupt_request & CPU_INTERRUPT_NMI) { + interrupt_request |= CPU_INTERRUPT_HARD; + } + if (interrupt_request & CPU_INTERRUPT_VINMI) { + interrupt_request |= CPU_INTERRUPT_VIRQ; + } + if (interrupt_request & CPU_INTERRUPT_VFNMI) { + interrupt_request |= CPU_INTERRUPT_VFIQ; + } + } + + if (interrupt_request & CPU_INTERRUPT_FIQ) { + excp_idx = EXCP_FIQ; + target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_HARD) { + excp_idx = EXCP_IRQ; + target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_VIRQ) { + excp_idx = EXCP_VIRQ; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_VFIQ) { + excp_idx = EXCP_VFIQ; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + goto found; + } + } + if (interrupt_request & CPU_INTERRUPT_VSERR) { + excp_idx = EXCP_VSERR; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + /* Taking a virtual abort clears HCR_EL2.VSE */ + env->cp15.hcr_el2 &= ~HCR_VSE; + cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR); + goto found; + } + } + return false; + + found: + cs->exception_index = excp_idx; + env->exception.target_el = target_el; + cs->cc->tcg_ops->do_interrupt(cs); + return true; +} +#endif /* CONFIG_TCG */ + +void arm_cpu_update_virq(ARMCPU *cpu) +{ + /* + * Update the interrupt level for VIRQ, which is the logical OR of + * the HCR_EL2.VI bit and the input line level from the GIC. + */ + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + bool new_state = ((arm_hcr_el2_eff(env) & HCR_VI) && + !(arm_hcrx_el2_eff(env) & HCRX_VINMI)) || + (env->irq_line_state & CPU_INTERRUPT_VIRQ); + + if (new_state != cpu_test_interrupt(cs, CPU_INTERRUPT_VIRQ)) { + if (new_state) { + cpu_interrupt(cs, CPU_INTERRUPT_VIRQ); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VIRQ); + } + } +} + +void arm_cpu_update_vfiq(ARMCPU *cpu) +{ + /* + * Update the interrupt level for VFIQ, which is the logical OR of + * the HCR_EL2.VF bit and the input line level from the GIC. + */ + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + bool new_state = ((arm_hcr_el2_eff(env) & HCR_VF) && + !(arm_hcrx_el2_eff(env) & HCRX_VFNMI)) || + (env->irq_line_state & CPU_INTERRUPT_VFIQ); + + if (new_state != cpu_test_interrupt(cs, CPU_INTERRUPT_VFIQ)) { + if (new_state) { + cpu_interrupt(cs, CPU_INTERRUPT_VFIQ); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VFIQ); + } + } +} + +void arm_cpu_update_vinmi(ARMCPU *cpu) +{ + /* + * Update the interrupt level for VINMI, which is the logical OR of + * the HCRX_EL2.VINMI bit and the input line level from the GIC. + */ + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + bool new_state = ((arm_hcr_el2_eff(env) & HCR_VI) && + (arm_hcrx_el2_eff(env) & HCRX_VINMI)) || + (env->irq_line_state & CPU_INTERRUPT_VINMI); + + if (new_state != cpu_test_interrupt(cs, CPU_INTERRUPT_VINMI)) { + if (new_state) { + cpu_interrupt(cs, CPU_INTERRUPT_VINMI); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VINMI); + } + } +} + +void arm_cpu_update_vfnmi(ARMCPU *cpu) +{ + /* + * Update the interrupt level for VFNMI, which is the HCRX_EL2.VFNMI bit. + */ + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + bool new_state = (arm_hcr_el2_eff(env) & HCR_VF) && + (arm_hcrx_el2_eff(env) & HCRX_VFNMI); + + if (new_state != cpu_test_interrupt(cs, CPU_INTERRUPT_VFNMI)) { + if (new_state) { + cpu_interrupt(cs, CPU_INTERRUPT_VFNMI); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VFNMI); + } + } +} + +void arm_cpu_update_vserr(ARMCPU *cpu) +{ + /* + * Update the interrupt level for VSERR, which is the HCR_EL2.VSE bit. + */ + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + bool new_state = env->cp15.hcr_el2 & HCR_VSE; + + if (new_state != cpu_test_interrupt(cs, CPU_INTERRUPT_VSERR)) { + if (new_state) { + cpu_interrupt(cs, CPU_INTERRUPT_VSERR); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR); + } + } +} + diff --git a/target/arm/cpu-sysregs.h b/target/arm/cpu-sysregs.h new file mode 100644 index 0000000..7877a3b --- /dev/null +++ b/target/arm/cpu-sysregs.h @@ -0,0 +1,42 @@ +/* + * Definitions for Arm ID system registers + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef ARM_CPU_SYSREGS_H +#define ARM_CPU_SYSREGS_H + +/* + * Following is similar to the coprocessor regs encodings, but with an argument + * ordering that matches the ARM ARM. We also reuse the various CP_REG_ defines + * that actually are the same as the equivalent KVM_REG_ values. + */ +#define ENCODE_ID_REG(op0, op1, crn, crm, op2) \ + (((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \ + ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \ + ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \ + ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \ + ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT)) + +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) NAME##_IDX, + +typedef enum ARMIDRegisterIdx { +#include "cpu-sysregs.h.inc" + NUM_ID_IDX, +} ARMIDRegisterIdx; + +#undef DEF +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \ + SYS_##NAME = ENCODE_ID_REG(OP0, OP1, CRN, CRM, OP2), + +typedef enum ARMSysRegs { +#include "cpu-sysregs.h.inc" +} ARMSysRegs; + +#undef DEF + +extern const uint32_t id_register_sysreg[NUM_ID_IDX]; + +int get_sysreg_idx(ARMSysRegs sysreg); + +#endif /* ARM_CPU_SYSREGS_H */ diff --git a/target/arm/cpu-sysregs.h.inc b/target/arm/cpu-sysregs.h.inc new file mode 100644 index 0000000..2bb2861 --- /dev/null +++ b/target/arm/cpu-sysregs.h.inc @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +DEF(ID_AA64PFR0_EL1, 3, 0, 0, 4, 0) +DEF(ID_AA64PFR1_EL1, 3, 0, 0, 4, 1) +DEF(ID_AA64PFR2_EL1, 3, 0, 0, 4, 2) +DEF(ID_AA64SMFR0_EL1, 3, 0, 0, 4, 5) +DEF(ID_AA64DFR0_EL1, 3, 0, 0, 5, 0) +DEF(ID_AA64DFR1_EL1, 3, 0, 0, 5, 1) +DEF(ID_AA64AFR0_EL1, 3, 0, 0, 5, 4) +DEF(ID_AA64AFR1_EL1, 3, 0, 0, 5, 5) +DEF(ID_AA64ISAR0_EL1, 3, 0, 0, 6, 0) +DEF(ID_AA64ISAR1_EL1, 3, 0, 0, 6, 1) +DEF(ID_AA64ISAR2_EL1, 3, 0, 0, 6, 2) +DEF(ID_AA64MMFR0_EL1, 3, 0, 0, 7, 0) +DEF(ID_AA64MMFR1_EL1, 3, 0, 0, 7, 1) +DEF(ID_AA64MMFR2_EL1, 3, 0, 0, 7, 2) +DEF(ID_AA64MMFR3_EL1, 3, 0, 0, 7, 3) +DEF(ID_PFR0_EL1, 3, 0, 0, 1, 0) +DEF(ID_PFR1_EL1, 3, 0, 0, 1, 1) +DEF(ID_DFR0_EL1, 3, 0, 0, 1, 2) +DEF(ID_AFR0_EL1, 3, 0, 0, 1, 3) +DEF(ID_MMFR0_EL1, 3, 0, 0, 1, 4) +DEF(ID_MMFR1_EL1, 3, 0, 0, 1, 5) +DEF(ID_MMFR2_EL1, 3, 0, 0, 1, 6) +DEF(ID_MMFR3_EL1, 3, 0, 0, 1, 7) +DEF(ID_ISAR0_EL1, 3, 0, 0, 2, 0) +DEF(ID_ISAR1_EL1, 3, 0, 0, 2, 1) +DEF(ID_ISAR2_EL1, 3, 0, 0, 2, 2) +DEF(ID_ISAR3_EL1, 3, 0, 0, 2, 3) +DEF(ID_ISAR4_EL1, 3, 0, 0, 2, 4) +DEF(ID_ISAR5_EL1, 3, 0, 0, 2, 5) +DEF(ID_MMFR4_EL1, 3, 0, 0, 2, 6) +DEF(ID_ISAR6_EL1, 3, 0, 0, 2, 7) +DEF(MVFR0_EL1, 3, 0, 0, 3, 0) +DEF(MVFR1_EL1, 3, 0, 0, 3, 1) +DEF(MVFR2_EL1, 3, 0, 0, 3, 2) +DEF(ID_PFR2_EL1, 3, 0, 0, 3, 4) +DEF(ID_DFR1_EL1, 3, 0, 0, 3, 5) +DEF(ID_MMFR5_EL1, 3, 0, 0, 3, 6) +DEF(CLIDR_EL1, 3, 1, 0, 0, 1) +DEF(ID_AA64ZFR0_EL1, 3, 0, 0, 4, 4) +DEF(CTR_EL0, 3, 3, 0, 0, 1) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index ca5ed78..3b556f1 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -23,7 +23,6 @@ #include "qemu/timer.h" #include "qemu/log.h" #include "exec/page-vary.h" -#include "exec/tswap.h" #include "target/arm/idau.h" #include "qemu/module.h" #include "qapi/error.h" @@ -53,6 +52,8 @@ #include "target/arm/cpu-qom.h" #include "target/arm/gtimer.h" +#include "trace.h" + static void arm_cpu_set_pc(CPUState *cs, vaddr value) { ARMCPU *cpu = ARM_CPU(cs); @@ -143,11 +144,11 @@ static bool arm_cpu_has_work(CPUState *cs) ARMCPU *cpu = ARM_CPU(cs); return (cpu->power_state != PSCI_OFF) - && cs->interrupt_request & - (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD - | CPU_INTERRUPT_NMI | CPU_INTERRUPT_VINMI | CPU_INTERRUPT_VFNMI - | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ | CPU_INTERRUPT_VSERR - | CPU_INTERRUPT_EXITTB); + && cpu_test_interrupt(cs, + CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD + | CPU_INTERRUPT_NMI | CPU_INTERRUPT_VINMI | CPU_INTERRUPT_VFNMI + | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ | CPU_INTERRUPT_VSERR + | CPU_INTERRUPT_EXITTB); } #endif /* !CONFIG_USER_ONLY */ @@ -193,14 +194,8 @@ static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) * This is basically only used for fields in non-core coprocessors * (like the pxa2xx ones). */ - if (!ri->fieldoffset) { - return; - } - - if (cpreg_field_is_64bit(ri)) { - CPREG_FIELD64(&cpu->env, ri) = ri->resetvalue; - } else { - CPREG_FIELD32(&cpu->env, ri) = ri->resetvalue; + if (ri->fieldoffset) { + raw_write(&cpu->env, ri, ri->resetvalue); } } @@ -232,6 +227,8 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) ARMCPUClass *acc = ARM_CPU_GET_CLASS(obj); CPUARMState *env = &cpu->env; + trace_arm_cpu_reset(arm_cpu_mp_affinity(cpu)); + if (acc->parent_phases.hold) { acc->parent_phases.hold(obj, type); } @@ -248,10 +245,6 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) cpu->power_state = cs->start_powered_off ? PSCI_OFF : PSCI_ON; - if (arm_feature(env, ARM_FEATURE_IWMMXT)) { - env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q'; - } - if (arm_feature(env, ARM_FEATURE_AARCH64)) { /* 64 bit CPUs always start in 64 bit mode */ env->aarch64 = true; @@ -318,6 +311,10 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) env->cp15.mdscr_el1 |= 1 << 12; /* Enable FEAT_MOPS */ env->cp15.sctlr_el[1] |= SCTLR_MSCEN; + /* For Linux, GCSPR_EL0 is always readable. */ + if (cpu_isar_feature(aa64_gcs, cpu)) { + env->cp15.gcscr_el[0] = GCSCRE0_NTR; + } #else /* Reset into the highest available EL */ if (arm_feature(env, ARM_FEATURE_EL3)) { @@ -350,11 +347,6 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) env->uncached_cpsr = ARM_CPU_MODE_USR; /* For user mode we must enable access to coprocessors */ env->vfp.xregs[ARM_VFP_FPEXC] = 1 << 30; - if (arm_feature(env, ARM_FEATURE_IWMMXT)) { - env->cp15.c15_cpar = 3; - } else if (arm_feature(env, ARM_FEATURE_XSCALE)) { - env->cp15.c15_cpar = 1; - } #else /* @@ -554,11 +546,15 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]); set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]); set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]); + set_default_nan_mode(1, &env->vfp.fp_status[FPST_ZA]); + set_default_nan_mode(1, &env->vfp.fp_status[FPST_ZA_F16]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA_F16]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]); arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]); set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]); @@ -586,6 +582,8 @@ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el) bool have_el3 = arm_feature(env, ARM_FEATURE_EL3); bool have_el2 = arm_feature(env, ARM_FEATURE_EL2); + trace_arm_emulate_firmware_reset(arm_cpu_mp_affinity(cpu), target_el); + /* * Check we have the EL we're aiming for. If that is the * highest implemented EL, then cpu_reset has already done @@ -631,6 +629,9 @@ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el) env->cp15.cptr_el[3] |= R_CPTR_EL3_ESM_MASK; env->cp15.scr_el3 |= SCR_ENTP2; env->vfp.smcr_el[3] = 0xf; + if (cpu_isar_feature(aa64_sme2, cpu)) { + env->vfp.smcr_el[3] |= R_SMCR_EZT0_MASK; + } } if (cpu_isar_feature(aa64_hcx, cpu)) { env->cp15.scr_el3 |= SCR_HXEN; @@ -638,6 +639,22 @@ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el) if (cpu_isar_feature(aa64_fgt, cpu)) { env->cp15.scr_el3 |= SCR_FGTEN; } + if (cpu_isar_feature(aa64_gcs, cpu)) { + env->cp15.scr_el3 |= SCR_GCSEN; + } + if (cpu_isar_feature(aa64_tcr2, cpu)) { + env->cp15.scr_el3 |= SCR_TCR2EN; + } + if (cpu_isar_feature(aa64_sctlr2, cpu)) { + env->cp15.scr_el3 |= SCR_SCTLR2EN; + } + if (cpu_isar_feature(aa64_s1pie, cpu) || + cpu_isar_feature(aa64_s2pie, cpu)) { + env->cp15.scr_el3 |= SCR_PIEN; + } + if (cpu_isar_feature(aa64_mec, cpu)) { + env->cp15.scr_el3 |= SCR_MECEN; + } } if (target_el == 2) { @@ -674,376 +691,6 @@ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el) } -#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) - -static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, - unsigned int target_el, - unsigned int cur_el, bool secure, - uint64_t hcr_el2) -{ - CPUARMState *env = cpu_env(cs); - bool pstate_unmasked; - bool unmasked = false; - bool allIntMask = false; - - /* - * Don't take exceptions if they target a lower EL. - * This check should catch any exceptions that would not be taken - * but left pending. - */ - if (cur_el > target_el) { - return false; - } - - if (cpu_isar_feature(aa64_nmi, env_archcpu(env)) && - env->cp15.sctlr_el[target_el] & SCTLR_NMI && cur_el == target_el) { - allIntMask = env->pstate & PSTATE_ALLINT || - ((env->cp15.sctlr_el[target_el] & SCTLR_SPINTMASK) && - (env->pstate & PSTATE_SP)); - } - - switch (excp_idx) { - case EXCP_NMI: - pstate_unmasked = !allIntMask; - break; - - case EXCP_VINMI: - if (!(hcr_el2 & HCR_IMO) || (hcr_el2 & HCR_TGE)) { - /* VINMIs are only taken when hypervized. */ - return false; - } - return !allIntMask; - case EXCP_VFNMI: - if (!(hcr_el2 & HCR_FMO) || (hcr_el2 & HCR_TGE)) { - /* VFNMIs are only taken when hypervized. */ - return false; - } - return !allIntMask; - case EXCP_FIQ: - pstate_unmasked = (!(env->daif & PSTATE_F)) && (!allIntMask); - break; - - case EXCP_IRQ: - pstate_unmasked = (!(env->daif & PSTATE_I)) && (!allIntMask); - break; - - case EXCP_VFIQ: - if (!(hcr_el2 & HCR_FMO) || (hcr_el2 & HCR_TGE)) { - /* VFIQs are only taken when hypervized. */ - return false; - } - return !(env->daif & PSTATE_F) && (!allIntMask); - case EXCP_VIRQ: - if (!(hcr_el2 & HCR_IMO) || (hcr_el2 & HCR_TGE)) { - /* VIRQs are only taken when hypervized. */ - return false; - } - return !(env->daif & PSTATE_I) && (!allIntMask); - case EXCP_VSERR: - if (!(hcr_el2 & HCR_AMO) || (hcr_el2 & HCR_TGE)) { - /* VIRQs are only taken when hypervized. */ - return false; - } - return !(env->daif & PSTATE_A); - default: - g_assert_not_reached(); - } - - /* - * Use the target EL, current execution state and SCR/HCR settings to - * determine whether the corresponding CPSR bit is used to mask the - * interrupt. - */ - if ((target_el > cur_el) && (target_el != 1)) { - /* Exceptions targeting a higher EL may not be maskable */ - if (arm_feature(env, ARM_FEATURE_AARCH64)) { - switch (target_el) { - case 2: - /* - * According to ARM DDI 0487H.a, an interrupt can be masked - * when HCR_E2H and HCR_TGE are both set regardless of the - * current Security state. Note that we need to revisit this - * part again once we need to support NMI. - */ - if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { - unmasked = true; - } - break; - case 3: - /* Interrupt cannot be masked when the target EL is 3 */ - unmasked = true; - break; - default: - g_assert_not_reached(); - } - } else { - /* - * The old 32-bit-only environment has a more complicated - * masking setup. HCR and SCR bits not only affect interrupt - * routing but also change the behaviour of masking. - */ - bool hcr, scr; - - switch (excp_idx) { - case EXCP_FIQ: - /* - * If FIQs are routed to EL3 or EL2 then there are cases where - * we override the CPSR.F in determining if the exception is - * masked or not. If neither of these are set then we fall back - * to the CPSR.F setting otherwise we further assess the state - * below. - */ - hcr = hcr_el2 & HCR_FMO; - scr = (env->cp15.scr_el3 & SCR_FIQ); - - /* - * When EL3 is 32-bit, the SCR.FW bit controls whether the - * CPSR.F bit masks FIQ interrupts when taken in non-secure - * state. If SCR.FW is set then FIQs can be masked by CPSR.F - * when non-secure but only when FIQs are only routed to EL3. - */ - scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr); - break; - case EXCP_IRQ: - /* - * When EL3 execution state is 32-bit, if HCR.IMO is set then - * we may override the CPSR.I masking when in non-secure state. - * The SCR.IRQ setting has already been taken into consideration - * when setting the target EL, so it does not have a further - * affect here. - */ - hcr = hcr_el2 & HCR_IMO; - scr = false; - break; - default: - g_assert_not_reached(); - } - - if ((scr || hcr) && !secure) { - unmasked = true; - } - } - } - - /* - * The PSTATE bits only mask the interrupt if we have not overridden the - * ability above. - */ - return unmasked || pstate_unmasked; -} - -static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) -{ - CPUARMState *env = cpu_env(cs); - uint32_t cur_el = arm_current_el(env); - bool secure = arm_is_secure(env); - uint64_t hcr_el2 = arm_hcr_el2_eff(env); - uint32_t target_el; - uint32_t excp_idx; - - /* The prioritization of interrupts is IMPLEMENTATION DEFINED. */ - - if (cpu_isar_feature(aa64_nmi, env_archcpu(env)) && - (arm_sctlr(env, cur_el) & SCTLR_NMI)) { - if (interrupt_request & CPU_INTERRUPT_NMI) { - excp_idx = EXCP_NMI; - target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_VINMI) { - excp_idx = EXCP_VINMI; - target_el = 1; - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_VFNMI) { - excp_idx = EXCP_VFNMI; - target_el = 1; - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - } else { - /* - * NMI disabled: interrupts with superpriority are handled - * as if they didn't have it - */ - if (interrupt_request & CPU_INTERRUPT_NMI) { - interrupt_request |= CPU_INTERRUPT_HARD; - } - if (interrupt_request & CPU_INTERRUPT_VINMI) { - interrupt_request |= CPU_INTERRUPT_VIRQ; - } - if (interrupt_request & CPU_INTERRUPT_VFNMI) { - interrupt_request |= CPU_INTERRUPT_VFIQ; - } - } - - if (interrupt_request & CPU_INTERRUPT_FIQ) { - excp_idx = EXCP_FIQ; - target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_HARD) { - excp_idx = EXCP_IRQ; - target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_VIRQ) { - excp_idx = EXCP_VIRQ; - target_el = 1; - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_VFIQ) { - excp_idx = EXCP_VFIQ; - target_el = 1; - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - goto found; - } - } - if (interrupt_request & CPU_INTERRUPT_VSERR) { - excp_idx = EXCP_VSERR; - target_el = 1; - if (arm_excp_unmasked(cs, excp_idx, target_el, - cur_el, secure, hcr_el2)) { - /* Taking a virtual abort clears HCR_EL2.VSE */ - env->cp15.hcr_el2 &= ~HCR_VSE; - cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR); - goto found; - } - } - return false; - - found: - cs->exception_index = excp_idx; - env->exception.target_el = target_el; - cs->cc->tcg_ops->do_interrupt(cs); - return true; -} - -#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */ - -void arm_cpu_update_virq(ARMCPU *cpu) -{ - /* - * Update the interrupt level for VIRQ, which is the logical OR of - * the HCR_EL2.VI bit and the input line level from the GIC. - */ - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - - bool new_state = ((arm_hcr_el2_eff(env) & HCR_VI) && - !(arm_hcrx_el2_eff(env) & HCRX_VINMI)) || - (env->irq_line_state & CPU_INTERRUPT_VIRQ); - - if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VIRQ) != 0)) { - if (new_state) { - cpu_interrupt(cs, CPU_INTERRUPT_VIRQ); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_VIRQ); - } - } -} - -void arm_cpu_update_vfiq(ARMCPU *cpu) -{ - /* - * Update the interrupt level for VFIQ, which is the logical OR of - * the HCR_EL2.VF bit and the input line level from the GIC. - */ - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - - bool new_state = ((arm_hcr_el2_eff(env) & HCR_VF) && - !(arm_hcrx_el2_eff(env) & HCRX_VFNMI)) || - (env->irq_line_state & CPU_INTERRUPT_VFIQ); - - if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VFIQ) != 0)) { - if (new_state) { - cpu_interrupt(cs, CPU_INTERRUPT_VFIQ); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_VFIQ); - } - } -} - -void arm_cpu_update_vinmi(ARMCPU *cpu) -{ - /* - * Update the interrupt level for VINMI, which is the logical OR of - * the HCRX_EL2.VINMI bit and the input line level from the GIC. - */ - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - - bool new_state = ((arm_hcr_el2_eff(env) & HCR_VI) && - (arm_hcrx_el2_eff(env) & HCRX_VINMI)) || - (env->irq_line_state & CPU_INTERRUPT_VINMI); - - if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VINMI) != 0)) { - if (new_state) { - cpu_interrupt(cs, CPU_INTERRUPT_VINMI); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_VINMI); - } - } -} - -void arm_cpu_update_vfnmi(ARMCPU *cpu) -{ - /* - * Update the interrupt level for VFNMI, which is the HCRX_EL2.VFNMI bit. - */ - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - - bool new_state = (arm_hcr_el2_eff(env) & HCR_VF) && - (arm_hcrx_el2_eff(env) & HCRX_VFNMI); - - if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VFNMI) != 0)) { - if (new_state) { - cpu_interrupt(cs, CPU_INTERRUPT_VFNMI); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_VFNMI); - } - } -} - -void arm_cpu_update_vserr(ARMCPU *cpu) -{ - /* - * Update the interrupt level for VSERR, which is the HCR_EL2.VSE bit. - */ - CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); - - bool new_state = env->cp15.hcr_el2 & HCR_VSE; - - if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VSERR) != 0)) { - if (new_state) { - cpu_interrupt(cs, CPU_INTERRUPT_VSERR); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR); - } - } -} - #ifndef CONFIG_USER_ONLY static void arm_cpu_set_irq(void *opaque, int irq, int level) { @@ -1186,7 +833,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - uint32_t psr = pstate_read(env); + uint64_t psr = pstate_read(env); int i, j; int el = arm_current_el(env); uint64_t hcr = arm_hcr_el2_eff(env); @@ -1208,7 +855,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) } else { ns_status = ""; } - qemu_fprintf(f, "PSTATE=%08x %c%c%c%c %sEL%d%c", + qemu_fprintf(f, "PSTATE=%016" PRIx64 " %c%c%c%c %sEL%d%c", psr, psr & PSTATE_N ? 'N' : '-', psr & PSTATE_Z ? 'Z' : '-', @@ -1225,7 +872,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-')); } if (cpu_isar_feature(aa64_bti, cpu)) { - qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10); + qemu_fprintf(f, " BTYPE=%d", (int)(psr & PSTATE_BTYPE) >> 10); } qemu_fprintf(f, "%s%s%s", (hcr & HCR_NV) ? " NV" : "", @@ -1331,8 +978,8 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) qemu_fprintf(f, "ZA[%0*d]=", svl_lg10, i); for (j = zcr_len; j >= 0; --j) { qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%c", - env->zarray[i].d[2 * j + 1], - env->zarray[i].d[2 * j], + env->za_state.za[i].d[2 * j + 1], + env->za_state.za[i].d[2 * j], j ? ':' : '\n'); } } @@ -1500,6 +1147,7 @@ static void arm_cpu_initfn(Object *obj) * 0 means "unset, use the default value". That default might vary depending * on the CPU type, and is set in the realize fn. */ +#ifndef CONFIG_USER_ONLY static const Property arm_cpu_gt_cntfrq_property = DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq_hz, 0); @@ -1509,7 +1157,6 @@ static const Property arm_cpu_reset_cbar_property = static const Property arm_cpu_reset_hivecs_property = DEFINE_PROP_BOOL("reset-hivecs", ARMCPU, reset_hivecs, false); -#ifndef CONFIG_USER_ONLY static const Property arm_cpu_has_el2_property = DEFINE_PROP_BOOL("has_el2", ARMCPU, has_el2, true); @@ -1532,6 +1179,7 @@ static const Property arm_cpu_has_neon_property = static const Property arm_cpu_has_dsp_property = DEFINE_PROP_BOOL("dsp", ARMCPU, has_dsp, true); +#ifndef CONFIG_USER_ONLY static const Property arm_cpu_has_mpu_property = DEFINE_PROP_BOOL("has-mpu", ARMCPU, has_mpu, true); @@ -1544,6 +1192,7 @@ static const Property arm_cpu_pmsav7_dregion_property = DEFINE_PROP_UNSIGNED_NODEFAULT("pmsav7-dregion", ARMCPU, pmsav7_dregion, qdev_prop_uint32, uint32_t); +#endif static bool arm_get_pmu(Object *obj, Error **errp) { @@ -1713,7 +1362,7 @@ static void arm_cpu_propagate_feature_implications(ARMCPU *cpu) } } -void arm_cpu_post_init(Object *obj) +static void arm_cpu_post_init(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); @@ -1731,6 +1380,7 @@ void arm_cpu_post_init(Object *obj) "Set on/off to enable/disable aarch64 " "execution state "); } +#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_CBAR) || arm_feature(&cpu->env, ARM_FEATURE_CBAR_RO)) { qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_cbar_property); @@ -1746,7 +1396,6 @@ void arm_cpu_post_init(Object *obj) OBJ_PROP_FLAG_READWRITE); } -#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_EL3)) { /* Add the has_el3 state CPU property only if EL3 is allowed. This will * prevent "has_el3" from existing on CPUs which cannot support EL3. @@ -1818,6 +1467,7 @@ void arm_cpu_post_init(Object *obj) qdev_property_add_static(DEVICE(obj), &arm_cpu_has_dsp_property); } +#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_PMSA)) { qdev_property_add_static(DEVICE(obj), &arm_cpu_has_mpu_property); if (arm_feature(&cpu->env, ARM_FEATURE_V7)) { @@ -1854,8 +1504,6 @@ void arm_cpu_post_init(Object *obj) &cpu->psci_conduit, OBJ_PROP_FLAG_READWRITE); - qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property); - if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) { qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property); } @@ -1864,7 +1512,6 @@ void arm_cpu_post_init(Object *obj) kvm_arm_add_vcpu_properties(cpu); } -#ifndef CONFIG_USER_ONLY if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && cpu_isar_feature(aa64_mte, cpu)) { object_property_add_link(obj, "tag-memory", @@ -1882,6 +1529,7 @@ void arm_cpu_post_init(Object *obj) } } #endif + qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property); } static void arm_cpu_finalizefn(Object *obj) @@ -1962,6 +1610,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); ARMCPU *cpu = ARM_CPU(dev); + ARMISARegisters *isar = &cpu->isar; ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev); CPUARMState *env = &cpu->env; Error *local_err = NULL; @@ -2119,21 +1768,16 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (!cpu->has_vfp) { - uint64_t t; uint32_t u; - t = cpu->isar.id_aa64isar1; - t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 0); - cpu->isar.id_aa64isar1 = t; + FIELD_DP64_IDREG(isar, ID_AA64ISAR1, JSCVT, 0); - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, FP, 0xf); - cpu->isar.id_aa64pfr0 = t; + FIELD_DP64_IDREG(isar, ID_AA64PFR0, FP, 0xf); - u = cpu->isar.id_isar6; + u = GET_IDREG(isar, ID_ISAR6); u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0); u = FIELD_DP32(u, ID_ISAR6, BF16, 0); - cpu->isar.id_isar6 = u; + SET_IDREG(isar, ID_ISAR6, u); u = cpu->isar.mvfr0; u = FIELD_DP32(u, MVFR0, FPSP, 0); @@ -2167,7 +1811,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) unset_feature(env, ARM_FEATURE_NEON); - t = cpu->isar.id_aa64isar0; + t = GET_IDREG(isar, ID_AA64ISAR0); t = FIELD_DP64(t, ID_AA64ISAR0, AES, 0); t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 0); t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 0); @@ -2175,32 +1819,30 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 0); t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 0); t = FIELD_DP64(t, ID_AA64ISAR0, DP, 0); - cpu->isar.id_aa64isar0 = t; + SET_IDREG(isar, ID_AA64ISAR0, t); - t = cpu->isar.id_aa64isar1; + t = GET_IDREG(isar, ID_AA64ISAR1); t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0); t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 0); t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 0); - cpu->isar.id_aa64isar1 = t; + SET_IDREG(isar, ID_AA64ISAR1, t); - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 0xf); - cpu->isar.id_aa64pfr0 = t; + FIELD_DP64_IDREG(isar, ID_AA64PFR0, ADVSIMD, 0xf); - u = cpu->isar.id_isar5; + u = GET_IDREG(isar, ID_ISAR5); u = FIELD_DP32(u, ID_ISAR5, AES, 0); u = FIELD_DP32(u, ID_ISAR5, SHA1, 0); u = FIELD_DP32(u, ID_ISAR5, SHA2, 0); u = FIELD_DP32(u, ID_ISAR5, RDM, 0); u = FIELD_DP32(u, ID_ISAR5, VCMA, 0); - cpu->isar.id_isar5 = u; + SET_IDREG(isar, ID_ISAR5, u); - u = cpu->isar.id_isar6; + u = GET_IDREG(isar, ID_ISAR6); u = FIELD_DP32(u, ID_ISAR6, DP, 0); u = FIELD_DP32(u, ID_ISAR6, FHM, 0); u = FIELD_DP32(u, ID_ISAR6, BF16, 0); u = FIELD_DP32(u, ID_ISAR6, I8MM, 0); - cpu->isar.id_isar6 = u; + SET_IDREG(isar, ID_ISAR6, u); if (!arm_feature(env, ARM_FEATURE_M)) { u = cpu->isar.mvfr1; @@ -2217,16 +1859,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (!cpu->has_neon && !cpu->has_vfp) { - uint64_t t; uint32_t u; - t = cpu->isar.id_aa64isar0; - t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 0); - cpu->isar.id_aa64isar0 = t; + FIELD_DP64_IDREG(isar, ID_AA64ISAR0, FHM, 0); - t = cpu->isar.id_aa64isar1; - t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 0); - cpu->isar.id_aa64isar1 = t; + FIELD_DP64_IDREG(isar, ID_AA64ISAR1, FRINTTS, 0); u = cpu->isar.mvfr0; u = FIELD_DP32(u, MVFR0, SIMDREG, 0); @@ -2243,30 +1880,20 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) unset_feature(env, ARM_FEATURE_THUMB_DSP); - u = cpu->isar.id_isar1; - u = FIELD_DP32(u, ID_ISAR1, EXTEND, 1); - cpu->isar.id_isar1 = u; + FIELD_DP32_IDREG(isar, ID_ISAR1, EXTEND, 1); - u = cpu->isar.id_isar2; + u = GET_IDREG(isar, ID_ISAR2); u = FIELD_DP32(u, ID_ISAR2, MULTU, 1); u = FIELD_DP32(u, ID_ISAR2, MULTS, 1); - cpu->isar.id_isar2 = u; + SET_IDREG(isar, ID_ISAR2, u); - u = cpu->isar.id_isar3; + u = GET_IDREG(isar, ID_ISAR3); u = FIELD_DP32(u, ID_ISAR3, SIMD, 1); u = FIELD_DP32(u, ID_ISAR3, SATURATE, 0); - cpu->isar.id_isar3 = u; + SET_IDREG(isar, ID_ISAR3, u); } - /* - * We rely on no XScale CPU having VFP so we can use the same bits in the - * TB flags field for VECSTRIDE and XSCALE_CPAR. - */ - assert(arm_feature(env, ARM_FEATURE_AARCH64) || - !cpu_isar_feature(aa32_vfp_simd, cpu) || - !arm_feature(env, ARM_FEATURE_XSCALE)); - #ifndef CONFIG_USER_ONLY { int pagebits; @@ -2330,14 +1957,12 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * Disable the security extension feature bits in the processor * feature registers as well. */ - cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1, ID_PFR1, SECURITY, 0); - cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPSDBG, 0); - cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, - ID_AA64PFR0, EL3, 0); + FIELD_DP32_IDREG(isar, ID_PFR1, SECURITY, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, COPSDBG, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, EL3, 0); /* Disable the realm management extension, which requires EL3. */ - cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, - ID_AA64PFR0, RME, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, RME, 0); } if (!cpu->has_el2) { @@ -2360,9 +1985,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) cpu); #endif } else { - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); - cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, PMUVER, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, PERFMON, 0); cpu->pmceid0 = 0; cpu->pmceid1 = 0; } @@ -2372,10 +1996,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * Disable the hypervisor feature bits in the processor feature * registers if we don't have EL2. */ - cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, - ID_AA64PFR0, EL2, 0); - cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1, - ID_PFR1, VIRTUALIZATION, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, EL2, 0); + FIELD_DP32_IDREG(isar, ID_PFR1, VIRTUALIZATION, 0); } if (cpu_isar_feature(aa64_mte, cpu)) { @@ -2394,8 +2016,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * This matches Cortex-A710 BROADCASTMTE input being LOW. */ if (tcg_enabled() && cpu->tag_memory == NULL) { - cpu->isar.id_aa64pfr1 = - FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1); + FIELD_DP64_IDREG(isar, ID_AA64PFR1, MTE, 1); } /* @@ -2403,7 +2024,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * enabled on the guest (i.e mte=off), clear guest's MTE bits." */ if (kvm_enabled() && !cpu->kvm_mte) { - FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR1, MTE, 0); } #endif } @@ -2423,32 +2044,22 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * try to access the non-existent system registers for them. */ /* FEAT_SPE (Statistical Profiling Extension) */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMSVER, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, PMSVER, 0); /* FEAT_TRBE (Trace Buffer Extension) */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEBUFFER, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEBUFFER, 0); /* FEAT_TRF (Self-hosted Trace Extension) */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEFILT, 0); - cpu->isar.id_dfr0 = - FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, TRACEFILT, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEFILT, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, TRACEFILT, 0); /* Trace Macrocell system register access */ - cpu->isar.id_aa64dfr0 = - FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEVER, 0); - cpu->isar.id_dfr0 = - FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPTRC, 0); + FIELD_DP64_IDREG(isar, ID_AA64DFR0, TRACEVER, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, COPTRC, 0); /* Memory mapped trace */ - cpu->isar.id_dfr0 = - FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, MMAPTRC, 0); + FIELD_DP32_IDREG(isar, ID_DFR0, MMAPTRC, 0); /* FEAT_AMU (Activity Monitors Extension) */ - cpu->isar.id_aa64pfr0 = - FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, AMU, 0); - cpu->isar.id_pfr0 = - FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, AMU, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, AMU, 0); + FIELD_DP32_IDREG(isar, ID_PFR0, AMU, 0); /* FEAT_MPAM (Memory Partitioning and Monitoring Extension) */ - cpu->isar.id_aa64pfr0 = - FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, MPAM, 0); + FIELD_DP64_IDREG(isar, ID_AA64PFR0, MPAM, 0); } /* MPU can be configured out of a PMSA CPU either by setting has-mpu @@ -2639,14 +2250,10 @@ static const Property arm_cpu_properties[] = { static const gchar *arm_gdb_arch_name(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; if (arm_gdbstub_is_aarch64(cpu)) { return "aarch64"; } - if (arm_feature(env, ARM_FEATURE_IWMMXT)) { - return "iwmmxt"; - } return "arm"; } @@ -2672,7 +2279,7 @@ static const char *arm_gdb_get_core_xml_file(CPUState *cs) * linux syscall TIF_TAGGED_ADDR setting, not TBI in general. * * There should be a better place to put this, but we need this in - * include/exec/cpu_ldst.h, and not some place linux-user specific. + * include/accel/tcg/cpu-ldst.h, and not some place linux-user specific. * * Note that arm-*-user will never set tagged_addr_enable. */ @@ -2703,6 +2310,29 @@ static const struct SysemuCPUOps arm_sysemu_ops = { #endif #ifdef CONFIG_TCG +#ifndef CONFIG_USER_ONLY +static vaddr aprofile_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + /* + * The Stage2 and Phys indexes are only used for ptw on arm32, + * and all pte's are aligned, so we never produce a wrap for these. + * Double check that we're not truncating a 40-bit physical address. + */ + assert((unsigned)mmu_idx < (ARMMMUIdx_Stage2_S & ARM_MMU_IDX_COREIDX_MASK)); + + if (!is_a64(cpu_env(cs))) { + return (uint32_t)result; + } + + /* + * TODO: For FEAT_CPA2, decide how to we want to resolve + * Unpredictable_CPACHECK in AddressIncrement. + */ + return result; +} +#endif /* !CONFIG_USER_ONLY */ + static const TCGCPUOps arm_tcg_ops = { .mttcg_supported = true, /* ARM processors have a weak memory model */ @@ -2722,6 +2352,7 @@ static const TCGCPUOps arm_tcg_ops = { .untagged_addr = aarch64_untagged_addr, #else .tlb_fill_align = arm_cpu_tlb_fill_align, + .pointer_wrap = aprofile_pointer_wrap, .cpu_exec_interrupt = arm_cpu_exec_interrupt, .cpu_exec_halt = arm_cpu_exec_halt, .cpu_exec_reset = cpu_reset, diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 302c24e..bf221e6 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -32,6 +32,8 @@ #include "qapi/qapi-types-common.h" #include "target/arm/multiprocessing.h" #include "target/arm/gtimer.h" +#include "target/arm/cpu-sysregs.h" +#include "target/arm/mmuidx.h" #define EXCP_UDEF 1 /* undefined instruction */ #define EXCP_SWI 2 /* software interrupt */ @@ -206,6 +208,8 @@ typedef struct NVICState NVICState; * when FPCR.AH == 1 (bfloat16 conversions and multiplies, * and the reciprocal and square root estimate/step insns); * for half-precision + * ZA: the "streaming sve" fp status. + * ZA_F16: likewise for half-precision. * * Half-precision operations are governed by a separate * flush-to-zero control bit in FPSCR:FZ16. We pass a separate @@ -226,6 +230,12 @@ typedef struct NVICState NVICState; * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16, * which means we need an FPST_AH_F16 as well. * + * The "ZA" float_status are for Streaming SVE operations which use + * default-NaN and do not generate fp exceptions, which means that they + * do not accumulate exception bits back into FPCR. + * See e.g. FPAdd vs FPAdd_ZA pseudocode functions, and the setting + * of fpcr.DN and fpexec parameters. + * * To avoid having to transfer exception bits around, we simply * say that the FPSCR cumulative exception flags are the logical * OR of the flags in the four fp statuses. This relies on the @@ -239,10 +249,12 @@ typedef enum ARMFPStatusFlavour { FPST_A64_F16, FPST_AH, FPST_AH_F16, + FPST_ZA, + FPST_ZA_F16, FPST_STD, FPST_STD_F16, } ARMFPStatusFlavour; -#define FPST_COUNT 8 +#define FPST_COUNT 10 typedef struct CPUArchState { /* Regs for current mode. */ @@ -256,7 +268,7 @@ typedef struct CPUArchState { uint64_t xregs[32]; uint64_t pc; /* PSTATE isn't an architectural register for ARMv8. However, it is - * convenient for us to assemble the underlying state into a 32 bit format + * convenient for us to assemble the underlying state into a 64 bit format * identical to the architectural format used for the SPSR. (This is also * what the Linux kernel's 'pstate' field in signal handlers and KVM's * 'pstate' register are.) Of the PSTATE bits: @@ -268,7 +280,7 @@ typedef struct CPUArchState { * SM and ZA are kept in env->svcr * all other bits are stored in their correct places in env->pstate */ - uint32_t pstate; + uint64_t pstate; bool aarch64; /* True if CPU is in aarch64 state; inverse of PSTATE.nRW */ bool thumb; /* True if CPU is in thumb mode; cpsr[5] */ @@ -326,10 +338,10 @@ typedef struct CPUArchState { }; uint64_t sctlr_el[4]; }; + uint64_t sctlr2_el[4]; /* Extension to System control register. */ uint64_t vsctlr; /* Virtualization System control register. */ uint64_t cpacr_el1; /* Architectural feature access control register */ uint64_t cptr_el[4]; /* ARMv8 feature trap registers */ - uint32_t c1_xscaleauxcr; /* XScale auxiliary control register. */ uint64_t sder; /* Secure debug enable register. */ uint32_t nsacr; /* Non-secure access control register. */ union { /* MMU translation table base 0. */ @@ -354,8 +366,12 @@ typedef struct CPUArchState { uint64_t vsttbr_el2; /* Secure Virtualization Translation Table. */ /* MMU translation table base control. */ uint64_t tcr_el[4]; + uint64_t tcr2_el[3]; uint64_t vtcr_el2; /* Virtualization Translation Control. */ uint64_t vstcr_el2; /* Secure Virtualization Translation Control. */ + uint64_t pir_el[4]; /* PIRE0_EL1, PIR_EL1, PIR_EL2, PIR_EL3 */ + uint64_t pire0_el2; + uint64_t s2pir_el2; uint32_t c2_data; /* MPU data cacheable bits. */ uint32_t c2_insn; /* MPU instruction cacheable bits. */ union { /* MMU domain access control register @@ -500,7 +516,6 @@ typedef struct CPUArchState { uint64_t cntvoff_el2; /* Counter Virtual Offset register */ uint64_t cntpoff_el2; /* Counter Physical Offset register */ ARMGenericTimer c14_timer[NUM_GTIMERS]; - uint32_t c15_cpar; /* XScale Coprocessor Access Register */ uint32_t c15_ticonfig; /* TI925T configuration byte. */ uint32_t c15_i_max; /* Maximum D-cache dirty line index. */ uint32_t c15_i_min; /* Minimum D-cache dirty line index. */ @@ -565,6 +580,18 @@ typedef struct CPUArchState { /* NV2 register */ uint64_t vncr_el2; + + uint64_t gcscr_el[4]; /* GCSCRE0_EL1, GCSCR_EL[123] */ + uint64_t gcspr_el[4]; /* GCSPR_EL[0123] */ + + /* MEC registers */ + uint64_t mecid_p0_el2; + uint64_t mecid_a0_el2; + uint64_t mecid_p1_el2; + uint64_t mecid_a1_el2; + uint64_t mecid_rl_a_el3; + uint64_t vmecid_p_el2; + uint64_t vmecid_a_el2; } cp15; struct { @@ -619,13 +646,10 @@ typedef struct CPUArchState { * entry process. */ struct { - uint32_t syndrome; /* AArch64 format syndrome register */ - uint32_t fsr; /* AArch32 format fault status register info */ + uint64_t syndrome; /* AArch64 format syndrome register */ uint64_t vaddress; /* virtual addr associated with exception, if any */ + uint32_t fsr; /* AArch32 format fault status register info */ uint32_t target_el; /* EL the exception should be targeted for */ - /* If we implement EL2 we will also need to store information - * about the intermediate physical address for stage 2 faults. - */ } exception; /* Information associated with an SError */ @@ -668,9 +692,6 @@ typedef struct CPUArchState { uint32_t xregs[16]; - /* Scratch space for aa32 neon expansion. */ - uint32_t scratch[8]; - /* There are a number of distinct float control structures. */ float_status fp_status[FPST_COUNT]; @@ -689,14 +710,6 @@ typedef struct CPUArchState { */ uint64_t exclusive_high; - /* iwMMXt coprocessor state. */ - struct { - uint64_t regs[16]; - uint64_t val; - - uint32_t cregs[16]; - } iwmmxt; - struct { ARMPACKey apia; ARMPACKey apib; @@ -707,27 +720,36 @@ typedef struct CPUArchState { uint64_t scxtnum_el[4]; - /* - * SME ZA storage -- 256 x 256 byte array, with bytes in host word order, - * as we do with vfp.zregs[]. This corresponds to the architectural ZA - * array, where ZA[N] is in the least-significant bytes of env->zarray[N]. - * When SVL is less than the architectural maximum, the accessible - * storage is restricted, such that if the SVL is X bytes the guest can - * see only the bottom X elements of zarray[], and only the least - * significant X bytes of each element of the array. (In other words, - * the observable part is always square.) - * - * The ZA storage can also be considered as a set of square tiles of - * elements of different sizes. The mapping from tiles to the ZA array - * is architecturally defined, such that for tiles of elements of esz - * bytes, the Nth row (or "horizontal slice") of tile T is in - * ZA[T + N * esz]. Note that this means that each tile is not contiguous - * in the ZA storage, because its rows are striped through the ZA array. - * - * Because this is so large, keep this toward the end of the reset area, - * to keep the offsets into the rest of the structure smaller. - */ - ARMVectorReg zarray[ARM_MAX_VQ * 16]; + struct { + /* SME2 ZT0 -- 512 bit array, with data ordered like ARMVectorReg. */ + uint64_t zt0[512 / 64] QEMU_ALIGNED(16); + + /* + * SME ZA storage -- 256 x 256 byte array, with bytes in host + * word order, as we do with vfp.zregs[]. This corresponds to + * the architectural ZA array, where ZA[N] is in the least + * significant bytes of env->za_state.za[N]. + * + * When SVL is less than the architectural maximum, the accessible + * storage is restricted, such that if the SVL is X bytes the guest + * can see only the bottom X elements of zarray[], and only the least + * significant X bytes of each element of the array. (In other words, + * the observable part is always square.) + * + * The ZA storage can also be considered as a set of square tiles of + * elements of different sizes. The mapping from tiles to the ZA array + * is architecturally defined, such that for tiles of elements of esz + * bytes, the Nth row (or "horizontal slice") of tile T is in + * ZA[T + N * esz]. Note that this means that each tile is not + * contiguous in the ZA storage, because its rows are striped through + * the ZA array. + * + * Because this is so large, keep this toward the end of the + * reset area, to keep the offsets into the rest of the structure + * smaller. + */ + ARMVectorReg za[ARM_MAX_VQ * 16]; + } za_state; struct CPUBreakpoint *cpu_breakpoint[16]; struct CPUWatchpoint *cpu_watchpoint[16]; @@ -834,6 +856,53 @@ typedef struct { uint32_t map, init, supported; } ARMVQMap; +/* REG is ID_XXX */ +#define FIELD_DP64_IDREG(ISAR, REG, FIELD, VALUE) \ + ({ \ + ARMISARegisters *i_ = (ISAR); \ + uint64_t regval = i_->idregs[REG ## _EL1_IDX]; \ + regval = FIELD_DP64(regval, REG, FIELD, VALUE); \ + i_->idregs[REG ## _EL1_IDX] = regval; \ + }) + +#define FIELD_DP32_IDREG(ISAR, REG, FIELD, VALUE) \ + ({ \ + ARMISARegisters *i_ = (ISAR); \ + uint64_t regval = i_->idregs[REG ## _EL1_IDX]; \ + regval = FIELD_DP32(regval, REG, FIELD, VALUE); \ + i_->idregs[REG ## _EL1_IDX] = regval; \ + }) + +#define FIELD_EX64_IDREG(ISAR, REG, FIELD) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + FIELD_EX64(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \ + }) + +#define FIELD_EX32_IDREG(ISAR, REG, FIELD) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + FIELD_EX32(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \ + }) + +#define FIELD_SEX64_IDREG(ISAR, REG, FIELD) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + FIELD_SEX64(i_->idregs[REG ## _EL1_IDX], REG, FIELD); \ + }) + +#define SET_IDREG(ISAR, REG, VALUE) \ + ({ \ + ARMISARegisters *i_ = (ISAR); \ + i_->idregs[REG ## _EL1_IDX] = VALUE; \ + }) + +#define GET_IDREG(ISAR, REG) \ + ({ \ + const ARMISARegisters *i_ = (ISAR); \ + i_->idregs[REG ## _EL1_IDX]; \ + }) + /** * ARMCPU: * @env: #CPUARMState @@ -869,6 +938,7 @@ struct ArchCPU { DynamicGDBFeatureInfo dyn_sysreg_feature; DynamicGDBFeatureInfo dyn_svereg_feature; + DynamicGDBFeatureInfo dyn_smereg_feature; DynamicGDBFeatureInfo dyn_m_systemreg_feature; DynamicGDBFeatureInfo dyn_m_secextreg_feature; @@ -1002,44 +1072,14 @@ struct ArchCPU { * field by reading the value from the KVM vCPU. */ struct ARMISARegisters { - uint32_t id_isar0; - uint32_t id_isar1; - uint32_t id_isar2; - uint32_t id_isar3; - uint32_t id_isar4; - uint32_t id_isar5; - uint32_t id_isar6; - uint32_t id_mmfr0; - uint32_t id_mmfr1; - uint32_t id_mmfr2; - uint32_t id_mmfr3; - uint32_t id_mmfr4; - uint32_t id_mmfr5; - uint32_t id_pfr0; - uint32_t id_pfr1; - uint32_t id_pfr2; uint32_t mvfr0; uint32_t mvfr1; uint32_t mvfr2; - uint32_t id_dfr0; - uint32_t id_dfr1; uint32_t dbgdidr; uint32_t dbgdevid; uint32_t dbgdevid1; - uint64_t id_aa64isar0; - uint64_t id_aa64isar1; - uint64_t id_aa64isar2; - uint64_t id_aa64pfr0; - uint64_t id_aa64pfr1; - uint64_t id_aa64mmfr0; - uint64_t id_aa64mmfr1; - uint64_t id_aa64mmfr2; - uint64_t id_aa64mmfr3; - uint64_t id_aa64dfr0; - uint64_t id_aa64dfr1; - uint64_t id_aa64zfr0; - uint64_t id_aa64smfr0; uint64_t reset_pmcr_el0; + uint64_t idregs[NUM_ID_IDX]; } isar; uint64_t midr; uint32_t revidr; @@ -1048,10 +1088,6 @@ struct ArchCPU { uint32_t reset_sctlr; uint64_t pmceid0; uint64_t pmceid1; - uint32_t id_afr0; - uint64_t id_aa64afr0; - uint64_t id_aa64afr1; - uint64_t clidr; uint64_t mp_affinity; /* MP ID without feature bits */ /* The elements of this array are the CCSIDR values for each cache, * in the order L1DCache, L1ICache, L2DCache, L2ICache, etc. @@ -1102,6 +1138,7 @@ struct ArchCPU { /* Used to set the maximum vector length the cpu will support. */ uint32_t sve_max_vq; + uint32_t sme_max_vq; #ifdef CONFIG_USER_ONLY /* Used to set the default vector length at process start. */ @@ -1150,8 +1187,6 @@ void arm_gt_sel2vtimer_cb(void *opaque); unsigned int gt_cntfrq_period_ns(ARMCPU *cpu); void gt_rme_post_el_change(ARMCPU *cpu, void *opaque); -void arm_cpu_post_init(Object *obj); - #define ARM_AFF0_SHIFT 0 #define ARM_AFF0_MASK (0xFFULL << ARM_AFF0_SHIFT) #define ARM_AFF1_SHIFT 8 @@ -1391,6 +1426,19 @@ void pmu_init(ARMCPU *cpu); #define SCTLR_SPINTMASK (1ULL << 62) /* FEAT_NMI */ #define SCTLR_TIDCP (1ULL << 63) /* FEAT_TIDCP1 */ +#define SCTLR2_EMEC (1ULL << 1) /* FEAT_MEC */ +#define SCTLR2_NMEA (1ULL << 2) /* FEAT_DoubleFault2 */ +#define SCTLR2_ENADERR (1ULL << 3) /* FEAT_ADERR */ +#define SCTLR2_ENANERR (1ULL << 4) /* FEAT_ANERR */ +#define SCTLR2_EASE (1ULL << 5) /* FEAT_DoubleFault2 */ +#define SCTLR2_ENIDCP128 (1ULL << 6) /* FEAT_SYSREG128 */ +#define SCTLR2_ENPACM (1ULL << 7) /* FEAT_PAuth_LR */ +#define SCTLR2_ENPACM0 (1ULL << 8) /* FEAT_PAuth_LR */ +#define SCTLR2_CPTA (1ULL << 9) /* FEAT_CPA2 */ +#define SCTLR2_CPTA0 (1ULL << 10) /* FEAT_CPA2 */ +#define SCTLR2_CPTM (1ULL << 11) /* FEAT_CPA2 */ +#define SCTLR2_CPTM0 (1ULL << 12) /* FEAT_CAP2 */ + #define CPSR_M (0x1fU) #define CPSR_T (1U << 5) #define CPSR_F (1U << 6) @@ -1463,6 +1511,7 @@ void pmu_init(ARMCPU *cpu); #define PSTATE_C (1U << 29) #define PSTATE_Z (1U << 30) #define PSTATE_N (1U << 31) +#define PSTATE_EXLOCK (1ULL << 34) #define PSTATE_NZCV (PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V) #define PSTATE_DAIF (PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F) #define CACHED_PSTATE_BITS (PSTATE_NZCV | PSTATE_DAIF | PSTATE_BTYPE) @@ -1481,6 +1530,7 @@ FIELD(SVCR, ZA, 1, 1) /* Fields for SMCR_ELx. */ FIELD(SMCR, LEN, 0, 4) +FIELD(SMCR, EZT0, 30, 1) FIELD(SMCR, FA64, 31, 1) /* Write a new value to v7m.exception, thus transitioning into or out @@ -1498,7 +1548,7 @@ static inline unsigned int aarch64_pstate_mode(unsigned int el, bool handler) * interprocessing, so we don't attempt to sync with the cpsr state used by * the 32 bit decoder. */ -static inline uint32_t pstate_read(CPUARMState *env) +static inline uint64_t pstate_read(CPUARMState *env) { int ZF; @@ -1508,7 +1558,7 @@ static inline uint32_t pstate_read(CPUARMState *env) | env->pstate | env->daif | (env->btype << 10); } -static inline void pstate_write(CPUARMState *env, uint32_t val) +static inline void pstate_write(CPUARMState *env, uint64_t val) { env->ZF = (~val) & PSTATE_Z; env->NF = val; @@ -1680,11 +1730,24 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) #define SCR_ENAS0 (1ULL << 36) #define SCR_ADEN (1ULL << 37) #define SCR_HXEN (1ULL << 38) +#define SCR_GCSEN (1ULL << 39) #define SCR_TRNDR (1ULL << 40) #define SCR_ENTP2 (1ULL << 41) +#define SCR_TCR2EN (1ULL << 43) +#define SCR_SCTLR2EN (1ULL << 44) +#define SCR_PIEN (1ULL << 45) #define SCR_GPF (1ULL << 48) +#define SCR_MECEN (1ULL << 49) #define SCR_NSE (1ULL << 62) +/* GCSCR_ELx fields */ +#define GCSCR_PCRSEL (1ULL << 0) +#define GCSCR_RVCHKEN (1ULL << 5) +#define GCSCR_EXLOCKEN (1ULL << 6) +#define GCSCR_PUSHMEN (1ULL << 8) +#define GCSCR_STREN (1ULL << 9) +#define GCSCRE0_NTR (1ULL << 10) + /* Return the current FPSCR value. */ uint32_t vfp_get_fpscr(CPUARMState *env); void vfp_set_fpscr(CPUARMState *env, uint32_t val); @@ -1818,16 +1881,6 @@ enum arm_cpu_mode { /* QEMU-internal value meaning "FPSCR, but we care only about NZCV" */ #define QEMU_VFP_FPSCR_NZCV 0xffff -/* iwMMXt coprocessor control registers. */ -#define ARM_IWMMXT_wCID 0 -#define ARM_IWMMXT_wCon 1 -#define ARM_IWMMXT_wCSSF 2 -#define ARM_IWMMXT_wCASF 3 -#define ARM_IWMMXT_wCGR0 8 -#define ARM_IWMMXT_wCGR1 9 -#define ARM_IWMMXT_wCGR2 10 -#define ARM_IWMMXT_wCGR3 11 - /* V7M CCR bits */ FIELD(V7M_CCR, NONBASETHRDENA, 0, 1) FIELD(V7M_CCR, USERSETMPEND, 1, 1) @@ -1966,423 +2019,20 @@ FIELD(V7M_VPR, P0, 0, 16) FIELD(V7M_VPR, MASK01, 16, 4) FIELD(V7M_VPR, MASK23, 20, 4) -/* - * System register ID fields. - */ -FIELD(CLIDR_EL1, CTYPE1, 0, 3) -FIELD(CLIDR_EL1, CTYPE2, 3, 3) -FIELD(CLIDR_EL1, CTYPE3, 6, 3) -FIELD(CLIDR_EL1, CTYPE4, 9, 3) -FIELD(CLIDR_EL1, CTYPE5, 12, 3) -FIELD(CLIDR_EL1, CTYPE6, 15, 3) -FIELD(CLIDR_EL1, CTYPE7, 18, 3) -FIELD(CLIDR_EL1, LOUIS, 21, 3) -FIELD(CLIDR_EL1, LOC, 24, 3) -FIELD(CLIDR_EL1, LOUU, 27, 3) -FIELD(CLIDR_EL1, ICB, 30, 3) - -/* When FEAT_CCIDX is implemented */ -FIELD(CCSIDR_EL1, CCIDX_LINESIZE, 0, 3) -FIELD(CCSIDR_EL1, CCIDX_ASSOCIATIVITY, 3, 21) -FIELD(CCSIDR_EL1, CCIDX_NUMSETS, 32, 24) - -/* When FEAT_CCIDX is not implemented */ -FIELD(CCSIDR_EL1, LINESIZE, 0, 3) -FIELD(CCSIDR_EL1, ASSOCIATIVITY, 3, 10) -FIELD(CCSIDR_EL1, NUMSETS, 13, 15) - -FIELD(CTR_EL0, IMINLINE, 0, 4) -FIELD(CTR_EL0, L1IP, 14, 2) -FIELD(CTR_EL0, DMINLINE, 16, 4) -FIELD(CTR_EL0, ERG, 20, 4) -FIELD(CTR_EL0, CWG, 24, 4) -FIELD(CTR_EL0, IDC, 28, 1) -FIELD(CTR_EL0, DIC, 29, 1) -FIELD(CTR_EL0, TMINLINE, 32, 6) - -FIELD(MIDR_EL1, REVISION, 0, 4) -FIELD(MIDR_EL1, PARTNUM, 4, 12) -FIELD(MIDR_EL1, ARCHITECTURE, 16, 4) -FIELD(MIDR_EL1, VARIANT, 20, 4) -FIELD(MIDR_EL1, IMPLEMENTER, 24, 8) - -FIELD(ID_ISAR0, SWAP, 0, 4) -FIELD(ID_ISAR0, BITCOUNT, 4, 4) -FIELD(ID_ISAR0, BITFIELD, 8, 4) -FIELD(ID_ISAR0, CMPBRANCH, 12, 4) -FIELD(ID_ISAR0, COPROC, 16, 4) -FIELD(ID_ISAR0, DEBUG, 20, 4) -FIELD(ID_ISAR0, DIVIDE, 24, 4) - -FIELD(ID_ISAR1, ENDIAN, 0, 4) -FIELD(ID_ISAR1, EXCEPT, 4, 4) -FIELD(ID_ISAR1, EXCEPT_AR, 8, 4) -FIELD(ID_ISAR1, EXTEND, 12, 4) -FIELD(ID_ISAR1, IFTHEN, 16, 4) -FIELD(ID_ISAR1, IMMEDIATE, 20, 4) -FIELD(ID_ISAR1, INTERWORK, 24, 4) -FIELD(ID_ISAR1, JAZELLE, 28, 4) - -FIELD(ID_ISAR2, LOADSTORE, 0, 4) -FIELD(ID_ISAR2, MEMHINT, 4, 4) -FIELD(ID_ISAR2, MULTIACCESSINT, 8, 4) -FIELD(ID_ISAR2, MULT, 12, 4) -FIELD(ID_ISAR2, MULTS, 16, 4) -FIELD(ID_ISAR2, MULTU, 20, 4) -FIELD(ID_ISAR2, PSR_AR, 24, 4) -FIELD(ID_ISAR2, REVERSAL, 28, 4) - -FIELD(ID_ISAR3, SATURATE, 0, 4) -FIELD(ID_ISAR3, SIMD, 4, 4) -FIELD(ID_ISAR3, SVC, 8, 4) -FIELD(ID_ISAR3, SYNCHPRIM, 12, 4) -FIELD(ID_ISAR3, TABBRANCH, 16, 4) -FIELD(ID_ISAR3, T32COPY, 20, 4) -FIELD(ID_ISAR3, TRUENOP, 24, 4) -FIELD(ID_ISAR3, T32EE, 28, 4) - -FIELD(ID_ISAR4, UNPRIV, 0, 4) -FIELD(ID_ISAR4, WITHSHIFTS, 4, 4) -FIELD(ID_ISAR4, WRITEBACK, 8, 4) -FIELD(ID_ISAR4, SMC, 12, 4) -FIELD(ID_ISAR4, BARRIER, 16, 4) -FIELD(ID_ISAR4, SYNCHPRIM_FRAC, 20, 4) -FIELD(ID_ISAR4, PSR_M, 24, 4) -FIELD(ID_ISAR4, SWP_FRAC, 28, 4) - -FIELD(ID_ISAR5, SEVL, 0, 4) -FIELD(ID_ISAR5, AES, 4, 4) -FIELD(ID_ISAR5, SHA1, 8, 4) -FIELD(ID_ISAR5, SHA2, 12, 4) -FIELD(ID_ISAR5, CRC32, 16, 4) -FIELD(ID_ISAR5, RDM, 24, 4) -FIELD(ID_ISAR5, VCMA, 28, 4) - -FIELD(ID_ISAR6, JSCVT, 0, 4) -FIELD(ID_ISAR6, DP, 4, 4) -FIELD(ID_ISAR6, FHM, 8, 4) -FIELD(ID_ISAR6, SB, 12, 4) -FIELD(ID_ISAR6, SPECRES, 16, 4) -FIELD(ID_ISAR6, BF16, 20, 4) -FIELD(ID_ISAR6, I8MM, 24, 4) - -FIELD(ID_MMFR0, VMSA, 0, 4) -FIELD(ID_MMFR0, PMSA, 4, 4) -FIELD(ID_MMFR0, OUTERSHR, 8, 4) -FIELD(ID_MMFR0, SHARELVL, 12, 4) -FIELD(ID_MMFR0, TCM, 16, 4) -FIELD(ID_MMFR0, AUXREG, 20, 4) -FIELD(ID_MMFR0, FCSE, 24, 4) -FIELD(ID_MMFR0, INNERSHR, 28, 4) - -FIELD(ID_MMFR1, L1HVDVA, 0, 4) -FIELD(ID_MMFR1, L1UNIVA, 4, 4) -FIELD(ID_MMFR1, L1HVDSW, 8, 4) -FIELD(ID_MMFR1, L1UNISW, 12, 4) -FIELD(ID_MMFR1, L1HVD, 16, 4) -FIELD(ID_MMFR1, L1UNI, 20, 4) -FIELD(ID_MMFR1, L1TSTCLN, 24, 4) -FIELD(ID_MMFR1, BPRED, 28, 4) - -FIELD(ID_MMFR2, L1HVDFG, 0, 4) -FIELD(ID_MMFR2, L1HVDBG, 4, 4) -FIELD(ID_MMFR2, L1HVDRNG, 8, 4) -FIELD(ID_MMFR2, HVDTLB, 12, 4) -FIELD(ID_MMFR2, UNITLB, 16, 4) -FIELD(ID_MMFR2, MEMBARR, 20, 4) -FIELD(ID_MMFR2, WFISTALL, 24, 4) -FIELD(ID_MMFR2, HWACCFLG, 28, 4) - -FIELD(ID_MMFR3, CMAINTVA, 0, 4) -FIELD(ID_MMFR3, CMAINTSW, 4, 4) -FIELD(ID_MMFR3, BPMAINT, 8, 4) -FIELD(ID_MMFR3, MAINTBCST, 12, 4) -FIELD(ID_MMFR3, PAN, 16, 4) -FIELD(ID_MMFR3, COHWALK, 20, 4) -FIELD(ID_MMFR3, CMEMSZ, 24, 4) -FIELD(ID_MMFR3, SUPERSEC, 28, 4) - -FIELD(ID_MMFR4, SPECSEI, 0, 4) -FIELD(ID_MMFR4, AC2, 4, 4) -FIELD(ID_MMFR4, XNX, 8, 4) -FIELD(ID_MMFR4, CNP, 12, 4) -FIELD(ID_MMFR4, HPDS, 16, 4) -FIELD(ID_MMFR4, LSM, 20, 4) -FIELD(ID_MMFR4, CCIDX, 24, 4) -FIELD(ID_MMFR4, EVT, 28, 4) - -FIELD(ID_MMFR5, ETS, 0, 4) -FIELD(ID_MMFR5, NTLBPA, 4, 4) - -FIELD(ID_PFR0, STATE0, 0, 4) -FIELD(ID_PFR0, STATE1, 4, 4) -FIELD(ID_PFR0, STATE2, 8, 4) -FIELD(ID_PFR0, STATE3, 12, 4) -FIELD(ID_PFR0, CSV2, 16, 4) -FIELD(ID_PFR0, AMU, 20, 4) -FIELD(ID_PFR0, DIT, 24, 4) -FIELD(ID_PFR0, RAS, 28, 4) - -FIELD(ID_PFR1, PROGMOD, 0, 4) -FIELD(ID_PFR1, SECURITY, 4, 4) -FIELD(ID_PFR1, MPROGMOD, 8, 4) -FIELD(ID_PFR1, VIRTUALIZATION, 12, 4) -FIELD(ID_PFR1, GENTIMER, 16, 4) -FIELD(ID_PFR1, SEC_FRAC, 20, 4) -FIELD(ID_PFR1, VIRT_FRAC, 24, 4) -FIELD(ID_PFR1, GIC, 28, 4) - -FIELD(ID_PFR2, CSV3, 0, 4) -FIELD(ID_PFR2, SSBS, 4, 4) -FIELD(ID_PFR2, RAS_FRAC, 8, 4) - -FIELD(ID_AA64ISAR0, AES, 4, 4) -FIELD(ID_AA64ISAR0, SHA1, 8, 4) -FIELD(ID_AA64ISAR0, SHA2, 12, 4) -FIELD(ID_AA64ISAR0, CRC32, 16, 4) -FIELD(ID_AA64ISAR0, ATOMIC, 20, 4) -FIELD(ID_AA64ISAR0, TME, 24, 4) -FIELD(ID_AA64ISAR0, RDM, 28, 4) -FIELD(ID_AA64ISAR0, SHA3, 32, 4) -FIELD(ID_AA64ISAR0, SM3, 36, 4) -FIELD(ID_AA64ISAR0, SM4, 40, 4) -FIELD(ID_AA64ISAR0, DP, 44, 4) -FIELD(ID_AA64ISAR0, FHM, 48, 4) -FIELD(ID_AA64ISAR0, TS, 52, 4) -FIELD(ID_AA64ISAR0, TLB, 56, 4) -FIELD(ID_AA64ISAR0, RNDR, 60, 4) - -FIELD(ID_AA64ISAR1, DPB, 0, 4) -FIELD(ID_AA64ISAR1, APA, 4, 4) -FIELD(ID_AA64ISAR1, API, 8, 4) -FIELD(ID_AA64ISAR1, JSCVT, 12, 4) -FIELD(ID_AA64ISAR1, FCMA, 16, 4) -FIELD(ID_AA64ISAR1, LRCPC, 20, 4) -FIELD(ID_AA64ISAR1, GPA, 24, 4) -FIELD(ID_AA64ISAR1, GPI, 28, 4) -FIELD(ID_AA64ISAR1, FRINTTS, 32, 4) -FIELD(ID_AA64ISAR1, SB, 36, 4) -FIELD(ID_AA64ISAR1, SPECRES, 40, 4) -FIELD(ID_AA64ISAR1, BF16, 44, 4) -FIELD(ID_AA64ISAR1, DGH, 48, 4) -FIELD(ID_AA64ISAR1, I8MM, 52, 4) -FIELD(ID_AA64ISAR1, XS, 56, 4) -FIELD(ID_AA64ISAR1, LS64, 60, 4) - -FIELD(ID_AA64ISAR2, WFXT, 0, 4) -FIELD(ID_AA64ISAR2, RPRES, 4, 4) -FIELD(ID_AA64ISAR2, GPA3, 8, 4) -FIELD(ID_AA64ISAR2, APA3, 12, 4) -FIELD(ID_AA64ISAR2, MOPS, 16, 4) -FIELD(ID_AA64ISAR2, BC, 20, 4) -FIELD(ID_AA64ISAR2, PAC_FRAC, 24, 4) -FIELD(ID_AA64ISAR2, CLRBHB, 28, 4) -FIELD(ID_AA64ISAR2, SYSREG_128, 32, 4) -FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4) -FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4) -FIELD(ID_AA64ISAR2, RPRFM, 48, 4) -FIELD(ID_AA64ISAR2, CSSC, 52, 4) -FIELD(ID_AA64ISAR2, ATS1A, 60, 4) - -FIELD(ID_AA64PFR0, EL0, 0, 4) -FIELD(ID_AA64PFR0, EL1, 4, 4) -FIELD(ID_AA64PFR0, EL2, 8, 4) -FIELD(ID_AA64PFR0, EL3, 12, 4) -FIELD(ID_AA64PFR0, FP, 16, 4) -FIELD(ID_AA64PFR0, ADVSIMD, 20, 4) -FIELD(ID_AA64PFR0, GIC, 24, 4) -FIELD(ID_AA64PFR0, RAS, 28, 4) -FIELD(ID_AA64PFR0, SVE, 32, 4) -FIELD(ID_AA64PFR0, SEL2, 36, 4) -FIELD(ID_AA64PFR0, MPAM, 40, 4) -FIELD(ID_AA64PFR0, AMU, 44, 4) -FIELD(ID_AA64PFR0, DIT, 48, 4) -FIELD(ID_AA64PFR0, RME, 52, 4) -FIELD(ID_AA64PFR0, CSV2, 56, 4) -FIELD(ID_AA64PFR0, CSV3, 60, 4) - -FIELD(ID_AA64PFR1, BT, 0, 4) -FIELD(ID_AA64PFR1, SSBS, 4, 4) -FIELD(ID_AA64PFR1, MTE, 8, 4) -FIELD(ID_AA64PFR1, RAS_FRAC, 12, 4) -FIELD(ID_AA64PFR1, MPAM_FRAC, 16, 4) -FIELD(ID_AA64PFR1, SME, 24, 4) -FIELD(ID_AA64PFR1, RNDR_TRAP, 28, 4) -FIELD(ID_AA64PFR1, CSV2_FRAC, 32, 4) -FIELD(ID_AA64PFR1, NMI, 36, 4) -FIELD(ID_AA64PFR1, MTE_FRAC, 40, 4) -FIELD(ID_AA64PFR1, GCS, 44, 4) -FIELD(ID_AA64PFR1, THE, 48, 4) -FIELD(ID_AA64PFR1, MTEX, 52, 4) -FIELD(ID_AA64PFR1, DF2, 56, 4) -FIELD(ID_AA64PFR1, PFAR, 60, 4) - -FIELD(ID_AA64MMFR0, PARANGE, 0, 4) -FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4) -FIELD(ID_AA64MMFR0, BIGEND, 8, 4) -FIELD(ID_AA64MMFR0, SNSMEM, 12, 4) -FIELD(ID_AA64MMFR0, BIGENDEL0, 16, 4) -FIELD(ID_AA64MMFR0, TGRAN16, 20, 4) -FIELD(ID_AA64MMFR0, TGRAN64, 24, 4) -FIELD(ID_AA64MMFR0, TGRAN4, 28, 4) -FIELD(ID_AA64MMFR0, TGRAN16_2, 32, 4) -FIELD(ID_AA64MMFR0, TGRAN64_2, 36, 4) -FIELD(ID_AA64MMFR0, TGRAN4_2, 40, 4) -FIELD(ID_AA64MMFR0, EXS, 44, 4) -FIELD(ID_AA64MMFR0, FGT, 56, 4) -FIELD(ID_AA64MMFR0, ECV, 60, 4) - -FIELD(ID_AA64MMFR1, HAFDBS, 0, 4) -FIELD(ID_AA64MMFR1, VMIDBITS, 4, 4) -FIELD(ID_AA64MMFR1, VH, 8, 4) -FIELD(ID_AA64MMFR1, HPDS, 12, 4) -FIELD(ID_AA64MMFR1, LO, 16, 4) -FIELD(ID_AA64MMFR1, PAN, 20, 4) -FIELD(ID_AA64MMFR1, SPECSEI, 24, 4) -FIELD(ID_AA64MMFR1, XNX, 28, 4) -FIELD(ID_AA64MMFR1, TWED, 32, 4) -FIELD(ID_AA64MMFR1, ETS, 36, 4) -FIELD(ID_AA64MMFR1, HCX, 40, 4) -FIELD(ID_AA64MMFR1, AFP, 44, 4) -FIELD(ID_AA64MMFR1, NTLBPA, 48, 4) -FIELD(ID_AA64MMFR1, TIDCP1, 52, 4) -FIELD(ID_AA64MMFR1, CMOW, 56, 4) -FIELD(ID_AA64MMFR1, ECBHB, 60, 4) - -FIELD(ID_AA64MMFR2, CNP, 0, 4) -FIELD(ID_AA64MMFR2, UAO, 4, 4) -FIELD(ID_AA64MMFR2, LSM, 8, 4) -FIELD(ID_AA64MMFR2, IESB, 12, 4) -FIELD(ID_AA64MMFR2, VARANGE, 16, 4) -FIELD(ID_AA64MMFR2, CCIDX, 20, 4) -FIELD(ID_AA64MMFR2, NV, 24, 4) -FIELD(ID_AA64MMFR2, ST, 28, 4) -FIELD(ID_AA64MMFR2, AT, 32, 4) -FIELD(ID_AA64MMFR2, IDS, 36, 4) -FIELD(ID_AA64MMFR2, FWB, 40, 4) -FIELD(ID_AA64MMFR2, TTL, 48, 4) -FIELD(ID_AA64MMFR2, BBM, 52, 4) -FIELD(ID_AA64MMFR2, EVT, 56, 4) -FIELD(ID_AA64MMFR2, E0PD, 60, 4) - -FIELD(ID_AA64MMFR3, TCRX, 0, 4) -FIELD(ID_AA64MMFR3, SCTLRX, 4, 4) -FIELD(ID_AA64MMFR3, S1PIE, 8, 4) -FIELD(ID_AA64MMFR3, S2PIE, 12, 4) -FIELD(ID_AA64MMFR3, S1POE, 16, 4) -FIELD(ID_AA64MMFR3, S2POE, 20, 4) -FIELD(ID_AA64MMFR3, AIE, 24, 4) -FIELD(ID_AA64MMFR3, MEC, 28, 4) -FIELD(ID_AA64MMFR3, D128, 32, 4) -FIELD(ID_AA64MMFR3, D128_2, 36, 4) -FIELD(ID_AA64MMFR3, SNERR, 40, 4) -FIELD(ID_AA64MMFR3, ANERR, 44, 4) -FIELD(ID_AA64MMFR3, SDERR, 52, 4) -FIELD(ID_AA64MMFR3, ADERR, 56, 4) -FIELD(ID_AA64MMFR3, SPEC_FPACC, 60, 4) - -FIELD(ID_AA64DFR0, DEBUGVER, 0, 4) -FIELD(ID_AA64DFR0, TRACEVER, 4, 4) -FIELD(ID_AA64DFR0, PMUVER, 8, 4) -FIELD(ID_AA64DFR0, BRPS, 12, 4) -FIELD(ID_AA64DFR0, PMSS, 16, 4) -FIELD(ID_AA64DFR0, WRPS, 20, 4) -FIELD(ID_AA64DFR0, SEBEP, 24, 4) -FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4) -FIELD(ID_AA64DFR0, PMSVER, 32, 4) -FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4) -FIELD(ID_AA64DFR0, TRACEFILT, 40, 4) -FIELD(ID_AA64DFR0, TRACEBUFFER, 44, 4) -FIELD(ID_AA64DFR0, MTPMU, 48, 4) -FIELD(ID_AA64DFR0, BRBE, 52, 4) -FIELD(ID_AA64DFR0, EXTTRCBUFF, 56, 4) -FIELD(ID_AA64DFR0, HPMN0, 60, 4) - -FIELD(ID_AA64ZFR0, SVEVER, 0, 4) -FIELD(ID_AA64ZFR0, AES, 4, 4) -FIELD(ID_AA64ZFR0, BITPERM, 16, 4) -FIELD(ID_AA64ZFR0, BFLOAT16, 20, 4) -FIELD(ID_AA64ZFR0, B16B16, 24, 4) -FIELD(ID_AA64ZFR0, SHA3, 32, 4) -FIELD(ID_AA64ZFR0, SM4, 40, 4) -FIELD(ID_AA64ZFR0, I8MM, 44, 4) -FIELD(ID_AA64ZFR0, F32MM, 52, 4) -FIELD(ID_AA64ZFR0, F64MM, 56, 4) - -FIELD(ID_AA64SMFR0, F32F32, 32, 1) -FIELD(ID_AA64SMFR0, BI32I32, 33, 1) -FIELD(ID_AA64SMFR0, B16F32, 34, 1) -FIELD(ID_AA64SMFR0, F16F32, 35, 1) -FIELD(ID_AA64SMFR0, I8I32, 36, 4) -FIELD(ID_AA64SMFR0, F16F16, 42, 1) -FIELD(ID_AA64SMFR0, B16B16, 43, 1) -FIELD(ID_AA64SMFR0, I16I32, 44, 4) -FIELD(ID_AA64SMFR0, F64F64, 48, 1) -FIELD(ID_AA64SMFR0, I16I64, 52, 4) -FIELD(ID_AA64SMFR0, SMEVER, 56, 4) -FIELD(ID_AA64SMFR0, FA64, 63, 1) - -FIELD(ID_DFR0, COPDBG, 0, 4) -FIELD(ID_DFR0, COPSDBG, 4, 4) -FIELD(ID_DFR0, MMAPDBG, 8, 4) -FIELD(ID_DFR0, COPTRC, 12, 4) -FIELD(ID_DFR0, MMAPTRC, 16, 4) -FIELD(ID_DFR0, MPROFDBG, 20, 4) -FIELD(ID_DFR0, PERFMON, 24, 4) -FIELD(ID_DFR0, TRACEFILT, 28, 4) - -FIELD(ID_DFR1, MTPMU, 0, 4) -FIELD(ID_DFR1, HPMN0, 4, 4) - -FIELD(DBGDIDR, SE_IMP, 12, 1) -FIELD(DBGDIDR, NSUHD_IMP, 14, 1) -FIELD(DBGDIDR, VERSION, 16, 4) -FIELD(DBGDIDR, CTX_CMPS, 20, 4) -FIELD(DBGDIDR, BRPS, 24, 4) -FIELD(DBGDIDR, WRPS, 28, 4) - -FIELD(DBGDEVID, PCSAMPLE, 0, 4) -FIELD(DBGDEVID, WPADDRMASK, 4, 4) -FIELD(DBGDEVID, BPADDRMASK, 8, 4) -FIELD(DBGDEVID, VECTORCATCH, 12, 4) -FIELD(DBGDEVID, VIRTEXTNS, 16, 4) -FIELD(DBGDEVID, DOUBLELOCK, 20, 4) -FIELD(DBGDEVID, AUXREGS, 24, 4) -FIELD(DBGDEVID, CIDMASK, 28, 4) - -FIELD(DBGDEVID1, PCSROFFSET, 0, 4) - -FIELD(MVFR0, SIMDREG, 0, 4) -FIELD(MVFR0, FPSP, 4, 4) -FIELD(MVFR0, FPDP, 8, 4) -FIELD(MVFR0, FPTRAP, 12, 4) -FIELD(MVFR0, FPDIVIDE, 16, 4) -FIELD(MVFR0, FPSQRT, 20, 4) -FIELD(MVFR0, FPSHVEC, 24, 4) -FIELD(MVFR0, FPROUND, 28, 4) - -FIELD(MVFR1, FPFTZ, 0, 4) -FIELD(MVFR1, FPDNAN, 4, 4) -FIELD(MVFR1, SIMDLS, 8, 4) /* A-profile only */ -FIELD(MVFR1, SIMDINT, 12, 4) /* A-profile only */ -FIELD(MVFR1, SIMDSP, 16, 4) /* A-profile only */ -FIELD(MVFR1, SIMDHP, 20, 4) /* A-profile only */ -FIELD(MVFR1, MVE, 8, 4) /* M-profile only */ -FIELD(MVFR1, FP16, 20, 4) /* M-profile only */ -FIELD(MVFR1, FPHP, 24, 4) -FIELD(MVFR1, SIMDFMAC, 28, 4) - -FIELD(MVFR2, SIMDMISC, 0, 4) -FIELD(MVFR2, FPMISC, 4, 4) - FIELD(GPCCR, PPS, 0, 3) +FIELD(GPCCR, RLPAD, 5, 1) +FIELD(GPCCR, NSPAD, 6, 1) +FIELD(GPCCR, SPAD, 7, 1) FIELD(GPCCR, IRGN, 8, 2) FIELD(GPCCR, ORGN, 10, 2) FIELD(GPCCR, SH, 12, 2) FIELD(GPCCR, PGS, 14, 2) FIELD(GPCCR, GPC, 16, 1) FIELD(GPCCR, GPCP, 17, 1) +FIELD(GPCCR, TBGPCD, 18, 1) +FIELD(GPCCR, NSO, 19, 1) FIELD(GPCCR, L0GPTSZ, 20, 4) +FIELD(GPCCR, APPSAA, 24, 1) FIELD(MFAR, FPA, 12, 40) FIELD(MFAR, NSE, 62, 1) @@ -2396,8 +2046,6 @@ QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); */ enum arm_features { ARM_FEATURE_AUXCR, /* ARM1026 Auxiliary control register. */ - ARM_FEATURE_XSCALE, /* Intel XScale extensions. */ - ARM_FEATURE_IWMMXT, /* Intel iwMMXt extension. */ ARM_FEATURE_V6, ARM_FEATURE_V6K, ARM_FEATURE_V7, @@ -2598,6 +2246,7 @@ static inline bool arm_is_el2_enabled(CPUARMState *env) */ uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space); uint64_t arm_hcr_el2_eff(CPUARMState *env); +uint64_t arm_hcr_el2_nvx_eff(CPUARMState *env); uint64_t arm_hcrx_el2_eff(CPUARMState *env); /* @@ -2677,212 +2326,6 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); #define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU -/* ARM has the following "translation regimes" (as the ARM ARM calls them): - * - * If EL3 is 64-bit: - * + NonSecure EL1 & 0 stage 1 - * + NonSecure EL1 & 0 stage 2 - * + NonSecure EL2 - * + NonSecure EL2 & 0 (ARMv8.1-VHE) - * + Secure EL1 & 0 stage 1 - * + Secure EL1 & 0 stage 2 (FEAT_SEL2) - * + Secure EL2 (FEAT_SEL2) - * + Secure EL2 & 0 (FEAT_SEL2) - * + Realm EL1 & 0 stage 1 (FEAT_RME) - * + Realm EL1 & 0 stage 2 (FEAT_RME) - * + Realm EL2 (FEAT_RME) - * + EL3 - * If EL3 is 32-bit: - * + NonSecure PL1 & 0 stage 1 - * + NonSecure PL1 & 0 stage 2 - * + NonSecure PL2 - * + Secure PL1 & 0 - * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.) - * - * For QEMU, an mmu_idx is not quite the same as a translation regime because: - * 1. we need to split the "EL1 & 0" and "EL2 & 0" regimes into two mmu_idxes, - * because they may differ in access permissions even if the VA->PA map is - * the same - * 2. we want to cache in our TLB the full VA->IPA->PA lookup for a stage 1+2 - * translation, which means that we have one mmu_idx that deals with two - * concatenated translation regimes [this sort of combined s1+2 TLB is - * architecturally permitted] - * 3. we don't need to allocate an mmu_idx to translations that we won't be - * handling via the TLB. The only way to do a stage 1 translation without - * the immediate stage 2 translation is via the ATS or AT system insns, - * which can be slow-pathed and always do a page table walk. - * The only use of stage 2 translations is either as part of an s1+2 - * lookup or when loading the descriptors during a stage 1 page table walk, - * and in both those cases we don't use the TLB. - * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3" - * translation regimes, because they map reasonably well to each other - * and they can't both be active at the same time. - * 5. we want to be able to use the TLB for accesses done as part of a - * stage1 page table walk, rather than having to walk the stage2 page - * table over and over. - * 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access - * Never (PAN) bit within PSTATE. - * 7. we fold together most secure and non-secure regimes for A-profile, - * because there are no banked system registers for aarch64, so the - * process of switching between secure and non-secure is - * already heavyweight. - * 8. we cannot fold together Stage 2 Secure and Stage 2 NonSecure, - * because both are in use simultaneously for Secure EL2. - * - * This gives us the following list of cases: - * - * EL0 EL1&0 stage 1+2 (aka NS PL0 PL1&0 stage 1+2) - * EL1 EL1&0 stage 1+2 (aka NS PL1 PL1&0 stage 1+2) - * EL1 EL1&0 stage 1+2 +PAN (aka NS PL1 P1&0 stage 1+2 +PAN) - * EL0 EL2&0 - * EL2 EL2&0 - * EL2 EL2&0 +PAN - * EL2 (aka NS PL2) - * EL3 (aka AArch32 S PL1 PL1&0) - * AArch32 S PL0 PL1&0 (we call this EL30_0) - * AArch32 S PL1 PL1&0 +PAN (we call this EL30_3_PAN) - * Stage2 Secure - * Stage2 NonSecure - * plus one TLB per Physical address space: S, NS, Realm, Root - * - * for a total of 16 different mmu_idx. - * - * R profile CPUs have an MPU, but can use the same set of MMU indexes - * as A profile. They only need to distinguish EL0 and EL1 (and - * EL2 for cores like the Cortex-R52). - * - * M profile CPUs are rather different as they do not have a true MMU. - * They have the following different MMU indexes: - * User - * Privileged - * User, execution priority negative (ie the MPU HFNMIENA bit may apply) - * Privileged, execution priority negative (ditto) - * If the CPU supports the v8M Security Extension then there are also: - * Secure User - * Secure Privileged - * Secure User, execution priority negative - * Secure Privileged, execution priority negative - * - * The ARMMMUIdx and the mmu index value used by the core QEMU TLB code - * are not quite the same -- different CPU types (most notably M profile - * vs A/R profile) would like to use MMU indexes with different semantics, - * but since we don't ever need to use all of those in a single CPU we - * can avoid having to set NB_MMU_MODES to "total number of A profile MMU - * modes + total number of M profile MMU modes". The lower bits of - * ARMMMUIdx are the core TLB mmu index, and the higher bits are always - * the same for any particular CPU. - * Variables of type ARMMUIdx are always full values, and the core - * index values are in variables of type 'int'. - * - * Our enumeration includes at the end some entries which are not "true" - * mmu_idx values in that they don't have corresponding TLBs and are only - * valid for doing slow path page table walks. - * - * The constant names here are patterned after the general style of the names - * of the AT/ATS operations. - * The values used are carefully arranged to make mmu_idx => EL lookup easy. - * For M profile we arrange them to have a bit for priv, a bit for negpri - * and a bit for secure. - */ -#define ARM_MMU_IDX_A 0x10 /* A profile */ -#define ARM_MMU_IDX_NOTLB 0x20 /* does not have a TLB */ -#define ARM_MMU_IDX_M 0x40 /* M profile */ - -/* Meanings of the bits for M profile mmu idx values */ -#define ARM_MMU_IDX_M_PRIV 0x1 -#define ARM_MMU_IDX_M_NEGPRI 0x2 -#define ARM_MMU_IDX_M_S 0x4 /* Secure */ - -#define ARM_MMU_IDX_TYPE_MASK \ - (ARM_MMU_IDX_A | ARM_MMU_IDX_M | ARM_MMU_IDX_NOTLB) -#define ARM_MMU_IDX_COREIDX_MASK 0xf - -typedef enum ARMMMUIdx { - /* - * A-profile. - */ - ARMMMUIdx_E10_0 = 0 | ARM_MMU_IDX_A, - ARMMMUIdx_E20_0 = 1 | ARM_MMU_IDX_A, - ARMMMUIdx_E10_1 = 2 | ARM_MMU_IDX_A, - ARMMMUIdx_E20_2 = 3 | ARM_MMU_IDX_A, - ARMMMUIdx_E10_1_PAN = 4 | ARM_MMU_IDX_A, - ARMMMUIdx_E20_2_PAN = 5 | ARM_MMU_IDX_A, - ARMMMUIdx_E2 = 6 | ARM_MMU_IDX_A, - ARMMMUIdx_E3 = 7 | ARM_MMU_IDX_A, - ARMMMUIdx_E30_0 = 8 | ARM_MMU_IDX_A, - ARMMMUIdx_E30_3_PAN = 9 | ARM_MMU_IDX_A, - - /* - * Used for second stage of an S12 page table walk, or for descriptor - * loads during first stage of an S1 page table walk. Note that both - * are in use simultaneously for SecureEL2: the security state for - * the S2 ptw is selected by the NS bit from the S1 ptw. - */ - ARMMMUIdx_Stage2_S = 10 | ARM_MMU_IDX_A, - ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A, - - /* TLBs with 1-1 mapping to the physical address spaces. */ - ARMMMUIdx_Phys_S = 12 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_NS = 13 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_Root = 14 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_Realm = 15 | ARM_MMU_IDX_A, - - /* - * These are not allocated TLBs and are used only for AT system - * instructions or for the first stage of an S12 page table walk. - */ - ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB, - - /* - * M-profile. - */ - ARMMMUIdx_MUser = ARM_MMU_IDX_M, - ARMMMUIdx_MPriv = ARM_MMU_IDX_M | ARM_MMU_IDX_M_PRIV, - ARMMMUIdx_MUserNegPri = ARMMMUIdx_MUser | ARM_MMU_IDX_M_NEGPRI, - ARMMMUIdx_MPrivNegPri = ARMMMUIdx_MPriv | ARM_MMU_IDX_M_NEGPRI, - ARMMMUIdx_MSUser = ARMMMUIdx_MUser | ARM_MMU_IDX_M_S, - ARMMMUIdx_MSPriv = ARMMMUIdx_MPriv | ARM_MMU_IDX_M_S, - ARMMMUIdx_MSUserNegPri = ARMMMUIdx_MUserNegPri | ARM_MMU_IDX_M_S, - ARMMMUIdx_MSPrivNegPri = ARMMMUIdx_MPrivNegPri | ARM_MMU_IDX_M_S, -} ARMMMUIdx; - -/* - * Bit macros for the core-mmu-index values for each index, - * for use when calling tlb_flush_by_mmuidx() and friends. - */ -#define TO_CORE_BIT(NAME) \ - ARMMMUIdxBit_##NAME = 1 << (ARMMMUIdx_##NAME & ARM_MMU_IDX_COREIDX_MASK) - -typedef enum ARMMMUIdxBit { - TO_CORE_BIT(E10_0), - TO_CORE_BIT(E20_0), - TO_CORE_BIT(E10_1), - TO_CORE_BIT(E10_1_PAN), - TO_CORE_BIT(E2), - TO_CORE_BIT(E20_2), - TO_CORE_BIT(E20_2_PAN), - TO_CORE_BIT(E3), - TO_CORE_BIT(E30_0), - TO_CORE_BIT(E30_3_PAN), - TO_CORE_BIT(Stage2), - TO_CORE_BIT(Stage2_S), - - TO_CORE_BIT(MUser), - TO_CORE_BIT(MPriv), - TO_CORE_BIT(MUserNegPri), - TO_CORE_BIT(MPrivNegPri), - TO_CORE_BIT(MSUser), - TO_CORE_BIT(MSPriv), - TO_CORE_BIT(MSUserNegPri), - TO_CORE_BIT(MSPrivNegPri), -} ARMMMUIdxBit; - -#undef TO_CORE_BIT - -#define MMU_USER_IDX 0 - /* Indexes used when registering address spaces with cpu_address_space_init */ typedef enum ARMASIdx { ARMASIdx_NS = 0, @@ -2913,7 +2356,7 @@ static inline bool arm_v7m_csselr_razwi(ARMCPU *cpu) /* If all the CLIDR.Ctypem bits are 0 there are no caches, and * CSSELR is RAZ/WI. */ - return (cpu->clidr & R_V7M_CLIDR_CTYPE_ALL_MASK) != 0; + return (GET_IDREG(&cpu->isar, CLIDR) & R_V7M_CLIDR_CTYPE_ALL_MASK) != 0; } static inline bool arm_sctlr_b(CPUARMState *env) @@ -2977,13 +2420,6 @@ FIELD(TBFLAG_AM32, THUMB, 23, 1) /* Not cached. */ */ FIELD(TBFLAG_A32, VECLEN, 0, 3) /* Not cached. */ FIELD(TBFLAG_A32, VECSTRIDE, 3, 2) /* Not cached. */ -/* - * We store the bottom two bits of the CPAR as TB flags and handle - * checks on the other bits at runtime. This shares the same bits as - * VECSTRIDE, which is OK as no XScale CPU has VFP. - * Not cached, because VECLEN+VECSTRIDE are not cached. - */ -FIELD(TBFLAG_A32, XSCALE_CPAR, 5, 2) FIELD(TBFLAG_A32, VFPEN, 7, 1) /* Partially cached, minus FPEXC. */ FIELD(TBFLAG_A32, SCTLR__B, 8, 1) /* Cannot overlap with SCTLR_B */ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1) @@ -3045,12 +2481,15 @@ FIELD(TBFLAG_A64, ATA0, 31, 1) FIELD(TBFLAG_A64, NV, 32, 1) FIELD(TBFLAG_A64, NV1, 33, 1) FIELD(TBFLAG_A64, NV2, 34, 1) -/* Set if FEAT_NV2 RAM accesses use the EL2&0 translation regime */ -FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) +FIELD(TBFLAG_A64, E2H, 35, 1) /* Set if FEAT_NV2 RAM accesses are big-endian */ FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */ FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */ +FIELD(TBFLAG_A64, ZT0EXC_EL, 39, 2) +FIELD(TBFLAG_A64, GCS_EN, 41, 1) +FIELD(TBFLAG_A64, GCS_RVCEN, 42, 1) +FIELD(TBFLAG_A64, GCSSTR_EL, 43, 2) /* * Helpers for using the above. Note that only the A64 accessors use @@ -3203,7 +2642,6 @@ extern const uint64_t pred_esz_masks[5]; */ #define PAGE_BTI PAGE_TARGET_1 #define PAGE_MTE PAGE_TARGET_2 -#define PAGE_TARGET_STICKY PAGE_MTE /* We associate one allocation tag per 16 bytes, the minimum. */ #define LOG2_TAG_GRANULE 4 diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 200da1c..26cf7e6 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -36,6 +36,28 @@ #include "cpu-features.h" #include "cpregs.h" +/* convert between <register>_IDX and SYS_<register> */ +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \ + [NAME##_IDX] = SYS_##NAME, + +const uint32_t id_register_sysreg[NUM_ID_IDX] = { +#include "cpu-sysregs.h.inc" +}; + +#undef DEF +#define DEF(NAME, OP0, OP1, CRN, CRM, OP2) \ + case SYS_##NAME: return NAME##_IDX; + +int get_sysreg_idx(ARMSysRegs sysreg) +{ + switch (sysreg) { +#include "cpu-sysregs.h.inc" + } + g_assert_not_reached(); +} + +#undef DEF + void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) { /* @@ -114,7 +136,7 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) * SVE is disabled and so are all vector lengths. Good. * Disable all SVE extensions as well. */ - cpu->isar.id_aa64zfr0 = 0; + SET_IDREG(&cpu->isar, ID_AA64ZFR0, 0); return; } @@ -237,6 +259,13 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) /* From now on sve_max_vq is the actual maximum supported length. */ cpu->sve_max_vq = max_vq; cpu->sve_vq.map = vq_map; + + /* FEAT_F64MM requires the existence of a 256-bit vector size. */ + if (max_vq < 2) { + uint64_t t = GET_IDREG(&cpu->isar, ID_AA64ZFR0); + t = FIELD_DP64(t, ID_AA64ZFR0, F64MM, 0); + SET_IDREG(&cpu->isar, ID_AA64ZFR0, t); + } } /* @@ -288,16 +317,13 @@ static bool cpu_arm_get_sve(Object *obj, Error **errp) static void cpu_arm_set_sve(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; if (value && kvm_enabled() && !kvm_arm_sve_supported()) { error_setg(errp, "'sve' feature not supported by KVM on this host"); return; } - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, SVE, value); - cpu->isar.id_aa64pfr0 = t; + FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR0, SVE, value); } void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp) @@ -309,7 +335,7 @@ void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp) if (vq_map == 0) { if (!cpu_isar_feature(aa64_sme, cpu)) { - cpu->isar.id_aa64smfr0 = 0; + SET_IDREG(&cpu->isar, ID_AA64SMFR0, 0); return; } @@ -337,6 +363,7 @@ void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp) } cpu->sme_vq.map = vq_map; + cpu->sme_max_vq = 32 - clz32(vq_map); } static bool cpu_arm_get_sme(Object *obj, Error **errp) @@ -348,11 +375,8 @@ static bool cpu_arm_get_sme(Object *obj, Error **errp) static void cpu_arm_set_sme(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; - t = cpu->isar.id_aa64pfr1; - t = FIELD_DP64(t, ID_AA64PFR1, SME, value); - cpu->isar.id_aa64pfr1 = t; + FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR1, SME, value); } static bool cpu_arm_get_sme_fa64(Object *obj, Error **errp) @@ -365,11 +389,8 @@ static bool cpu_arm_get_sme_fa64(Object *obj, Error **errp) static void cpu_arm_set_sme_fa64(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; - t = cpu->isar.id_aa64smfr0; - t = FIELD_DP64(t, ID_AA64SMFR0, FA64, value); - cpu->isar.id_aa64smfr0 = t; + FIELD_DP64_IDREG(&cpu->isar, ID_AA64SMFR0, FA64, value); } #ifdef CONFIG_USER_ONLY @@ -480,6 +501,7 @@ void aarch64_add_sme_properties(Object *obj) void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) { ARMPauthFeature features = cpu_isar_feature(pauth_feature, cpu); + ARMISARegisters *isar = &cpu->isar; uint64_t isar1, isar2; /* @@ -490,13 +512,13 @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) * * Begin by disabling all fields. */ - isar1 = cpu->isar.id_aa64isar1; + isar1 = GET_IDREG(isar, ID_AA64ISAR1); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, 0); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 0); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, 0); isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 0); - isar2 = cpu->isar.id_aa64isar2; + isar2 = GET_IDREG(isar, ID_AA64ISAR2); isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, 0); isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 0); @@ -558,8 +580,8 @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) } } - cpu->isar.id_aa64isar1 = isar1; - cpu->isar.id_aa64isar2 = isar2; + SET_IDREG(isar, ID_AA64ISAR1, isar1); + SET_IDREG(isar, ID_AA64ISAR2, isar2); } static const Property arm_cpu_pauth_property = @@ -606,17 +628,18 @@ void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) return; } - t = cpu->isar.id_aa64mmfr0; + t = GET_IDREG(&cpu->isar, ID_AA64MMFR0); t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 2); /* 16k pages w/ LPA2 */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4, 1); /* 4k pages w/ LPA2 */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 3); /* 16k stage2 w/ LPA2 */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 3); /* 4k stage2 w/ LPA2 */ - cpu->isar.id_aa64mmfr0 = t; + SET_IDREG(&cpu->isar, ID_AA64MMFR0, t); } static void aarch64_a57_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a57"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -637,30 +660,30 @@ static void aarch64_a57_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_ISAR6, 0); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64MMFR0, 0x00001124); cpu->isar.dbgdidr = 0x3516d000; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x2; cpu->isar.reset_pmcr_el0 = 0x41013000; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); /* 32KB L1 dcache */ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7); /* 48KB L1 icache */ @@ -678,6 +701,7 @@ static void aarch64_a57_initfn(Object *obj) static void aarch64_a53_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a53"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -698,30 +722,30 @@ static void aarch64_a53_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x84448004; /* L1Ip = VIPT */ cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_ISAR6, 0); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64MMFR0, 0x00001122); /* 40 bit physical addr */ cpu->isar.dbgdidr = 0x3516d000; cpu->isar.dbgdevid = 0x00110f13; cpu->isar.dbgdevid1 = 0x1; cpu->isar.reset_pmcr_el0 = 0x41033000; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); /* 32KB L1 dcache */ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7); /* 32KB L1 icache */ diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c index 69fb1d0..579516e 100644 --- a/target/arm/debug_helper.c +++ b/target/arm/debug_helper.c @@ -940,6 +940,13 @@ static void dbgclaimclr_write(CPUARMState *env, const ARMCPRegInfo *ri, env->cp15.dbgclaim &= ~(value & 0xFF); } +static CPAccessResult access_bogus(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + /* Always UNDEF, as if this cpreg didn't exist */ + return CP_ACCESS_UNDEFINED; +} + static const ARMCPRegInfo debug_cp_reginfo[] = { /* * DBGDRAR, DBGDSAR: always RAZ since we don't implement memory mapped @@ -988,11 +995,42 @@ static const ARMCPRegInfo debug_cp_reginfo[] = { .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, .access = PL1_RW, .accessfn = access_tdcc, .type = ARM_CP_CONST, .resetvalue = 0 }, - /* DBGDTRTX_EL0/DBGDTRRX_EL0 depend on direction */ - { .name = "DBGDTR_EL0", .state = ARM_CP_STATE_BOTH, .cp = 14, + /* Architecturally DBGDTRTX is named DBGDTRRX when used for reads */ + { .name = "DBGDTRTX_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 5, .opc2 = 0, .access = PL0_RW, .accessfn = access_tdcc, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DBGDTRTX", .state = ARM_CP_STATE_AA32, .cp = 14, + .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, + .access = PL0_RW, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, + /* This is AArch64-only and is a combination of DBGDTRTX and DBGDTRRX */ + { .name = "DBGDTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 4, .opc2 = 0, + .access = PL0_RW, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, + /* + * This is not a real AArch32 register. We used to incorrectly expose + * this due to a QEMU bug; to avoid breaking migration compatibility we + * need to continue to provide it so that we don't fail the inbound + * migration when it tells us about a sysreg that we don't have. + * We set an always-fails .accessfn, which means that the guest doesn't + * actually see this register (it will always UNDEF, identically to if + * there were no cpreg definition for it other than that we won't print + * a LOG_UNIMP message about it), and we set the ARM_CP_NO_GDB flag so the + * gdbstub won't see it either. + * (We can't just set .access = 0, because add_cpreg_to_hashtable() + * helpfully ignores cpregs which aren't accessible to the highest + * implemented EL.) + * + * TODO: implement a system for being able to describe "this register + * can be ignored if it appears in the inbound stream"; then we can + * remove this temporary hack. + */ + { .name = "BOGUS_DBGDTR_EL0", .state = ARM_CP_STATE_AA32, + .cp = 14, .opc1 = 3, .crn = 0, .crm = 5, .opc2 = 0, + .access = PL0_RW, .accessfn = access_bogus, + .type = ARM_CP_CONST | ARM_CP_NO_GDB, .resetvalue = 0 }, /* * OSECCR_EL1 provides a mechanism for an operating system * to access the contents of EDECCR. EDECCR is not implemented though, diff --git a/target/arm/el2-stubs.c b/target/arm/el2-stubs.c new file mode 100644 index 0000000..972023c --- /dev/null +++ b/target/arm/el2-stubs.c @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* QEMU ARM CPU - user-mode emulation stubs for EL2 interrupts + * + * These should not really be needed, but CP registers for EL2 + * are not elided by user-mode emulation and they call these + * functions. Leave them as stubs until it's cleaned up. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" + +void arm_cpu_update_virq(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void arm_cpu_update_vfiq(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void arm_cpu_update_vinmi(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void arm_cpu_update_vfnmi(ARMCPU *cpu) +{ + g_assert_not_reached(); +} + +void arm_cpu_update_vserr(ARMCPU *cpu) +{ + g_assert_not_reached(); +} diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c index ce4497a..8d2229f 100644 --- a/target/arm/gdbstub.c +++ b/target/arm/gdbstub.c @@ -247,10 +247,20 @@ static int arm_gdb_get_sysreg(CPUState *cs, GByteArray *buf, int reg) key = cpu->dyn_sysreg_feature.data.cpregs.keys[reg]; ri = get_arm_cp_reginfo(cpu->cp_regs, key); if (ri) { - if (cpreg_field_is_64bit(ri)) { + switch (cpreg_field_type(ri)) { + case MO_64: + if (ri->vhe_redir_to_el2 && + (arm_hcr_el2_eff(env) & HCR_E2H) && + arm_current_el(env) == 2) { + ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el2); + } else if (ri->vhe_redir_to_el01) { + ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el01); + } return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri)); - } else { + case MO_32: return gdb_get_reg32(buf, (uint32_t)read_raw_cp_reg(env, ri)); + default: + g_assert_not_reached(); } } return 0; @@ -527,7 +537,8 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) * registers so we don't need to include both. */ #ifdef TARGET_AARCH64 - if (isar_feature_aa64_sve(&cpu->isar)) { + if (isar_feature_aa64_sve(&cpu->isar) || + isar_feature_aa64_sme(&cpu->isar)) { GDBFeature *feature = arm_gen_dynamic_svereg_feature(cs, cs->gdb_num_regs); gdb_register_coprocessor(cs, aarch64_gdb_get_sve_reg, aarch64_gdb_set_sve_reg, feature, 0); @@ -537,6 +548,13 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) gdb_find_static_feature("aarch64-fpu.xml"), 0); } + + if (isar_feature_aa64_sme(&cpu->isar)) { + GDBFeature *sme_feature = + arm_gen_dynamic_smereg_feature(cs, cs->gdb_num_regs); + gdb_register_coprocessor(cs, aarch64_gdb_get_sme_reg, + aarch64_gdb_set_sme_reg, sme_feature, 0); + } /* * Note that we report pauth information via the feature name * org.gnu.gdb.aarch64.pauth_v2, not org.gnu.gdb.aarch64.pauth. diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c index 64ee9b3..65d6bbe 100644 --- a/target/arm/gdbstub64.c +++ b/target/arm/gdbstub64.c @@ -47,6 +47,7 @@ int aarch64_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) case 32: return gdb_get_reg64(mem_buf, env->pc); case 33: + /* pstate is now a 64-bit value; can we simply adjust the xml? */ return gdb_get_reg32(mem_buf, pstate_read(env)); } /* Unknown register. */ @@ -75,6 +76,7 @@ int aarch64_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) return 8; case 33: /* CPSR */ + /* pstate is now a 64-bit value; can we simply adjust the xml? */ pstate_write(env, tmp); return 4; } @@ -115,8 +117,22 @@ int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg) /* 128 bit FP register */ { uint64_t *q = aa64_vfp_qreg(env, reg); - q[0] = ldq_le_p(buf); - q[1] = ldq_le_p(buf + 8); + + /* + * On the wire these are target-endian 128 bit values. + * In the CPU state these are host-order uint64_t values + * with the least-significant one first. This means they're + * the other way around for target_big_endian() (which is + * only true for us for aarch64_be-linux-user). + */ + if (target_big_endian()) { + q[1] = ldq_p(buf); + q[0] = ldq_p(buf + 8); + } else{ + q[0] = ldq_p(buf); + q[1] = ldq_p(buf + 8); + } + return 16; } case 32: @@ -192,10 +208,17 @@ int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg) case 0 ... 31: { int vq, len = 0; - uint64_t *p = (uint64_t *) buf; for (vq = 0; vq < cpu->sve_max_vq; vq++) { - env->vfp.zregs[reg].d[vq * 2 + 1] = *p++; - env->vfp.zregs[reg].d[vq * 2] = *p++; + if (target_big_endian()) { + env->vfp.zregs[reg].d[vq * 2 + 1] = ldq_p(buf); + buf += 8; + env->vfp.zregs[reg].d[vq * 2] = ldq_p(buf); + } else{ + env->vfp.zregs[reg].d[vq * 2] = ldq_p(buf); + buf += 8; + env->vfp.zregs[reg].d[vq * 2 + 1] = ldq_p(buf); + } + buf += 8; len += 16; } return len; @@ -210,9 +233,9 @@ int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg) { int preg = reg - 34; int vq, len = 0; - uint64_t *p = (uint64_t *) buf; for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) { - env->vfp.pregs[preg].p[vq / 4] = *p++; + env->vfp.pregs[preg].p[vq / 4] = ldq_p(buf); + buf += 8; len += 8; } return len; @@ -228,6 +251,90 @@ int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg) return 0; } +int aarch64_gdb_get_sme_reg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + case 0: /* svg register */ + { + int vq = 0; + if (FIELD_EX64(env->svcr, SVCR, SM)) { + vq = sve_vqm1_for_el_sm(env, arm_current_el(env), + FIELD_EX64(env->svcr, SVCR, SM)) + 1; + } + /* svg = vector granules (2 * vector quardwords) in streaming mode */ + return gdb_get_reg64(buf, vq * 2); + } + case 1: /* svcr register */ + return gdb_get_reg64(buf, env->svcr); + case 2: /* za register */ + { + int len = 0; + int vq = cpu->sme_max_vq; + int svl = vq * 16; + for (int i = 0; i < svl; i++) { + for (int q = 0; q < vq; q++) { + len += gdb_get_reg128(buf, + env->za_state.za[i].d[q * 2 + 1], + env->za_state.za[i].d[q * 2]); + } + } + return len; + } + default: + /* gdbstub asked for something out of range */ + qemu_log_mask(LOG_UNIMP, "%s: out of range register %d", __func__, reg); + break; + } + + return 0; +} + +int aarch64_gdb_set_sme_reg(CPUState *cs, uint8_t *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + case 0: /* svg register */ + /* cannot set svg via gdbstub */ + return 8; + case 1: /* svcr register */ + aarch64_set_svcr(env, ldq_le_p(buf), + R_SVCR_SM_MASK | R_SVCR_ZA_MASK); + return 8; + case 2: /* za register */ + { + int len = 0; + int vq = cpu->sme_max_vq; + int svl = vq * 16; + for (int i = 0; i < svl; i++) { + for (int q = 0; q < vq; q++) { + if (target_big_endian()) { + env->za_state.za[i].d[q * 2 + 1] = ldq_p(buf); + buf += 8; + env->za_state.za[i].d[q * 2] = ldq_p(buf); + } else{ + env->za_state.za[i].d[q * 2] = ldq_p(buf); + buf += 8; + env->za_state.za[i].d[q * 2 + 1] = ldq_p(buf); + } + buf += 8; + len += 16; + } + } + return len; + } + default: + /* gdbstub asked for something out of range */ + break; + } + + return 0; +} + int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg) { ARMCPU *cpu = ARM_CPU(cs); @@ -392,6 +499,41 @@ GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cs, int base_reg) return &cpu->dyn_svereg_feature.desc; } +GDBFeature *arm_gen_dynamic_smereg_feature(CPUState *cs, int base_reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + int vq = cpu->sme_max_vq; + int svl = vq * 16; + GDBFeatureBuilder builder; + int reg = 0; + + gdb_feature_builder_init(&builder, &cpu->dyn_smereg_feature.desc, + "org.gnu.gdb.aarch64.sme", "sme-registers.xml", + base_reg); + + + /* Create the sme_bv vector type. */ + gdb_feature_builder_append_tag( + &builder, "<vector id=\"sme_bv\" type=\"uint8\" count=\"%d\"/>", + svl); + + /* Create the sme_bvv vector type. */ + gdb_feature_builder_append_tag( + &builder, "<vector id=\"sme_bvv\" type=\"sme_bv\" count=\"%d\"/>", + svl); + + /* Define the svg, svcr, and za registers. */ + + gdb_feature_builder_append_reg(&builder, "svg", 64, reg++, "int", NULL); + gdb_feature_builder_append_reg(&builder, "svcr", 64, reg++, "int", NULL); + gdb_feature_builder_append_reg(&builder, "za", svl * svl * 8, reg++, + "sme_bvv", NULL); + + gdb_feature_builder_end(&builder); + + return &cpu->dyn_smereg_feature.desc; +} + #ifdef CONFIG_USER_ONLY int aarch64_gdb_get_tag_ctl_reg(CPUState *cs, GByteArray *buf, int reg) { diff --git a/target/arm/helper.c b/target/arm/helper.c index 7631210..167f290 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -38,30 +38,59 @@ #define HELPER_H "tcg/helper.h" #include "exec/helper-proto.h.inc" -#define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ - static void switch_mode(CPUARMState *env, int mode); -static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri) +int compare_u64(const void *a, const void *b) +{ + if (*(uint64_t *)a > *(uint64_t *)b) { + return 1; + } + if (*(uint64_t *)a < *(uint64_t *)b) { + return -1; + } + return 0; +} + +/* + * Macros which are lvalues for the field in CPUARMState for the + * ARMCPRegInfo *ri. + */ +#define CPREG_FIELD32(env, ri) \ + (*(uint32_t *)((char *)(env) + (ri)->fieldoffset)) +#define CPREG_FIELD64(env, ri) \ + (*(uint64_t *)((char *)(env) + (ri)->fieldoffset)) + +uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri) { assert(ri->fieldoffset); - if (cpreg_field_is_64bit(ri)) { + switch (cpreg_field_type(ri)) { + case MO_64: return CPREG_FIELD64(env, ri); - } else { + case MO_32: return CPREG_FIELD32(env, ri); + default: + g_assert_not_reached(); } } void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { assert(ri->fieldoffset); - if (cpreg_field_is_64bit(ri)) { + switch (cpreg_field_type(ri)) { + case MO_64: CPREG_FIELD64(env, ri) = value; - } else { + break; + case MO_32: CPREG_FIELD32(env, ri) = value; + break; + default: + g_assert_not_reached(); } } +#undef CPREG_FIELD32 +#undef CPREG_FIELD64 + static void *raw_ptr(CPUARMState *env, const ARMCPRegInfo *ri) { return (char *)env + ri->fieldoffset; @@ -200,11 +229,11 @@ bool write_list_to_cpustate(ARMCPU *cpu) return ok; } -static void add_cpreg_to_list(gpointer key, gpointer opaque) +static void add_cpreg_to_list(gpointer key, gpointer value, gpointer opaque) { ARMCPU *cpu = opaque; uint32_t regidx = (uintptr_t)key; - const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); + const ARMCPRegInfo *ri = value; if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) { cpu->cpreg_indexes[cpu->cpreg_array_len] = cpreg_to_kvm_id(regidx); @@ -213,64 +242,52 @@ static void add_cpreg_to_list(gpointer key, gpointer opaque) } } -static void count_cpreg(gpointer key, gpointer opaque) +static void count_cpreg(gpointer key, gpointer value, gpointer opaque) { ARMCPU *cpu = opaque; - const ARMCPRegInfo *ri; - - ri = g_hash_table_lookup(cpu->cp_regs, key); + const ARMCPRegInfo *ri = value; if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) { cpu->cpreg_array_len++; } } -static gint cpreg_key_compare(gconstpointer a, gconstpointer b, gpointer d) -{ - uint64_t aidx = cpreg_to_kvm_id((uintptr_t)a); - uint64_t bidx = cpreg_to_kvm_id((uintptr_t)b); - - if (aidx > bidx) { - return 1; - } - if (aidx < bidx) { - return -1; - } - return 0; -} - void init_cpreg_list(ARMCPU *cpu) { /* * Initialise the cpreg_tuples[] array based on the cp_regs hash. * Note that we require cpreg_tuples[] to be sorted by key ID. */ - GList *keys; int arraylen; - keys = g_hash_table_get_keys(cpu->cp_regs); - keys = g_list_sort_with_data(keys, cpreg_key_compare, NULL); - cpu->cpreg_array_len = 0; - - g_list_foreach(keys, count_cpreg, cpu); + g_hash_table_foreach(cpu->cp_regs, count_cpreg, cpu); arraylen = cpu->cpreg_array_len; - cpu->cpreg_indexes = g_new(uint64_t, arraylen); - cpu->cpreg_values = g_new(uint64_t, arraylen); - cpu->cpreg_vmstate_indexes = g_new(uint64_t, arraylen); - cpu->cpreg_vmstate_values = g_new(uint64_t, arraylen); - cpu->cpreg_vmstate_array_len = cpu->cpreg_array_len; + if (arraylen) { + cpu->cpreg_indexes = g_new(uint64_t, arraylen); + cpu->cpreg_values = g_new(uint64_t, arraylen); + cpu->cpreg_vmstate_indexes = g_new(uint64_t, arraylen); + cpu->cpreg_vmstate_values = g_new(uint64_t, arraylen); + } else { + cpu->cpreg_indexes = NULL; + cpu->cpreg_values = NULL; + cpu->cpreg_vmstate_indexes = NULL; + cpu->cpreg_vmstate_values = NULL; + } + cpu->cpreg_vmstate_array_len = arraylen; cpu->cpreg_array_len = 0; - g_list_foreach(keys, add_cpreg_to_list, cpu); + g_hash_table_foreach(cpu->cp_regs, add_cpreg_to_list, cpu); assert(cpu->cpreg_array_len == arraylen); - g_list_free(keys); + if (arraylen) { + qsort(cpu->cpreg_indexes, arraylen, sizeof(uint64_t), compare_u64); + } } -static bool arm_pan_enabled(CPUARMState *env) +bool arm_pan_enabled(CPUARMState *env) { if (is_a64(env)) { if ((arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1)) { @@ -319,25 +336,6 @@ static CPAccessResult access_trap_aa32s_el1(CPUARMState *env, return CP_ACCESS_UNDEFINED; } -/* - * Check for traps to performance monitor registers, which are controlled - * by MDCR_EL2.TPM for EL2 and MDCR_EL3.TPM for EL3. - */ -static CPAccessResult access_tpm(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - int el = arm_current_el(env); - uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); - - if (el < 2 && (mdcr_el2 & MDCR_TPM)) { - return CP_ACCESS_TRAP_EL2; - } - if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TPM)) { - return CP_ACCESS_TRAP_EL3; - } - return CP_ACCESS_OK; -} - /* Check for traps from EL1 due to HCR_EL2.TVM and HCR_EL2.TRVM. */ CPAccessResult access_tvm_trvm(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) @@ -422,7 +420,9 @@ int alle1_tlbmask(CPUARMState *env) */ return (ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | + ARMMMUIdxBit_E10_1_GCS | ARMMMUIdxBit_E10_0 | + ARMMMUIdxBit_E10_0_GCS | ARMMMUIdxBit_Stage2 | ARMMMUIdxBit_Stage2_S); } @@ -456,6 +456,8 @@ static const ARMCPRegInfo cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_CONTEXTIDR_EL1, .nv2_redirect_offset = 0x108 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 13, 0, 1), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 13, 0, 1), .secure = ARM_CP_SECSTATE_NS, .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[1]), .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, }, @@ -673,291 +675,16 @@ static const ARMCPRegInfo v6_cp_reginfo[] = { */ { .name = "WFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, }, - { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, + { .name = "CPACR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .accessfn = cpacr_access, .fgt = FGT_CPACR_EL1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 1, 2), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 0, 2), .nv2_redirect_offset = 0x100 | NV2_REDIR_NV1, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1), .resetfn = cpacr_reset, .writefn = cpacr_write, .readfn = cpacr_read }, }; -typedef struct pm_event { - uint16_t number; /* PMEVTYPER.evtCount is 16 bits wide */ - /* If the event is supported on this CPU (used to generate PMCEID[01]) */ - bool (*supported)(CPUARMState *); - /* - * Retrieve the current count of the underlying event. The programmed - * counters hold a difference from the return value from this function - */ - uint64_t (*get_count)(CPUARMState *); - /* - * Return how many nanoseconds it will take (at a minimum) for count events - * to occur. A negative value indicates the counter will never overflow, or - * that the counter has otherwise arranged for the overflow bit to be set - * and the PMU interrupt to be raised on overflow. - */ - int64_t (*ns_per_count)(uint64_t); -} pm_event; - -static bool event_always_supported(CPUARMState *env) -{ - return true; -} - -static uint64_t swinc_get_count(CPUARMState *env) -{ - /* - * SW_INCR events are written directly to the pmevcntr's by writes to - * PMSWINC, so there is no underlying count maintained by the PMU itself - */ - return 0; -} - -static int64_t swinc_ns_per(uint64_t ignored) -{ - return -1; -} - -/* - * Return the underlying cycle count for the PMU cycle counters. If we're in - * usermode, simply return 0. - */ -static uint64_t cycles_get_count(CPUARMState *env) -{ -#ifndef CONFIG_USER_ONLY - return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - ARM_CPU_FREQ, NANOSECONDS_PER_SECOND); -#else - return cpu_get_host_ticks(); -#endif -} - -#ifndef CONFIG_USER_ONLY -static int64_t cycles_ns_per(uint64_t cycles) -{ - return (ARM_CPU_FREQ / NANOSECONDS_PER_SECOND) * cycles; -} - -static bool instructions_supported(CPUARMState *env) -{ - /* Precise instruction counting */ - return icount_enabled() == ICOUNT_PRECISE; -} - -static uint64_t instructions_get_count(CPUARMState *env) -{ - assert(icount_enabled() == ICOUNT_PRECISE); - return (uint64_t)icount_get_raw(); -} - -static int64_t instructions_ns_per(uint64_t icount) -{ - assert(icount_enabled() == ICOUNT_PRECISE); - return icount_to_ns((int64_t)icount); -} -#endif - -static bool pmuv3p1_events_supported(CPUARMState *env) -{ - /* For events which are supported in any v8.1 PMU */ - return cpu_isar_feature(any_pmuv3p1, env_archcpu(env)); -} - -static bool pmuv3p4_events_supported(CPUARMState *env) -{ - /* For events which are supported in any v8.1 PMU */ - return cpu_isar_feature(any_pmuv3p4, env_archcpu(env)); -} - -static uint64_t zero_event_get_count(CPUARMState *env) -{ - /* For events which on QEMU never fire, so their count is always zero */ - return 0; -} - -static int64_t zero_event_ns_per(uint64_t cycles) -{ - /* An event which never fires can never overflow */ - return -1; -} - -static const pm_event pm_events[] = { - { .number = 0x000, /* SW_INCR */ - .supported = event_always_supported, - .get_count = swinc_get_count, - .ns_per_count = swinc_ns_per, - }, -#ifndef CONFIG_USER_ONLY - { .number = 0x008, /* INST_RETIRED, Instruction architecturally executed */ - .supported = instructions_supported, - .get_count = instructions_get_count, - .ns_per_count = instructions_ns_per, - }, - { .number = 0x011, /* CPU_CYCLES, Cycle */ - .supported = event_always_supported, - .get_count = cycles_get_count, - .ns_per_count = cycles_ns_per, - }, -#endif - { .number = 0x023, /* STALL_FRONTEND */ - .supported = pmuv3p1_events_supported, - .get_count = zero_event_get_count, - .ns_per_count = zero_event_ns_per, - }, - { .number = 0x024, /* STALL_BACKEND */ - .supported = pmuv3p1_events_supported, - .get_count = zero_event_get_count, - .ns_per_count = zero_event_ns_per, - }, - { .number = 0x03c, /* STALL */ - .supported = pmuv3p4_events_supported, - .get_count = zero_event_get_count, - .ns_per_count = zero_event_ns_per, - }, -}; - -/* - * Note: Before increasing MAX_EVENT_ID beyond 0x3f into the 0x40xx range of - * events (i.e. the statistical profiling extension), this implementation - * should first be updated to something sparse instead of the current - * supported_event_map[] array. - */ -#define MAX_EVENT_ID 0x3c -#define UNSUPPORTED_EVENT UINT16_MAX -static uint16_t supported_event_map[MAX_EVENT_ID + 1]; - -/* - * Called upon CPU initialization to initialize PMCEID[01]_EL0 and build a map - * of ARM event numbers to indices in our pm_events array. - * - * Note: Events in the 0x40XX range are not currently supported. - */ -void pmu_init(ARMCPU *cpu) -{ - unsigned int i; - - /* - * Empty supported_event_map and cpu->pmceid[01] before adding supported - * events to them - */ - for (i = 0; i < ARRAY_SIZE(supported_event_map); i++) { - supported_event_map[i] = UNSUPPORTED_EVENT; - } - cpu->pmceid0 = 0; - cpu->pmceid1 = 0; - - for (i = 0; i < ARRAY_SIZE(pm_events); i++) { - const pm_event *cnt = &pm_events[i]; - assert(cnt->number <= MAX_EVENT_ID); - /* We do not currently support events in the 0x40xx range */ - assert(cnt->number <= 0x3f); - - if (cnt->supported(&cpu->env)) { - supported_event_map[cnt->number] = i; - uint64_t event_mask = 1ULL << (cnt->number & 0x1f); - if (cnt->number & 0x20) { - cpu->pmceid1 |= event_mask; - } else { - cpu->pmceid0 |= event_mask; - } - } - } -} - -/* - * Check at runtime whether a PMU event is supported for the current machine - */ -static bool event_supported(uint16_t number) -{ - if (number > MAX_EVENT_ID) { - return false; - } - return supported_event_map[number] != UNSUPPORTED_EVENT; -} - -static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - /* - * Performance monitor registers user accessibility is controlled - * by PMUSERENR. MDCR_EL2.TPM and MDCR_EL3.TPM allow configurable - * trapping to EL2 or EL3 for other accesses. - */ - int el = arm_current_el(env); - uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); - - if (el == 0 && !(env->cp15.c9_pmuserenr & 1)) { - return CP_ACCESS_TRAP_EL1; - } - if (el < 2 && (mdcr_el2 & MDCR_TPM)) { - return CP_ACCESS_TRAP_EL2; - } - if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TPM)) { - return CP_ACCESS_TRAP_EL3; - } - - return CP_ACCESS_OK; -} - -static CPAccessResult pmreg_access_xevcntr(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - /* ER: event counter read trap control */ - if (arm_feature(env, ARM_FEATURE_V8) - && arm_current_el(env) == 0 - && (env->cp15.c9_pmuserenr & (1 << 3)) != 0 - && isread) { - return CP_ACCESS_OK; - } - - return pmreg_access(env, ri, isread); -} - -static CPAccessResult pmreg_access_swinc(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - /* SW: software increment write trap control */ - if (arm_feature(env, ARM_FEATURE_V8) - && arm_current_el(env) == 0 - && (env->cp15.c9_pmuserenr & (1 << 1)) != 0 - && !isread) { - return CP_ACCESS_OK; - } - - return pmreg_access(env, ri, isread); -} - -static CPAccessResult pmreg_access_selr(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - /* ER: event counter read trap control */ - if (arm_feature(env, ARM_FEATURE_V8) - && arm_current_el(env) == 0 - && (env->cp15.c9_pmuserenr & (1 << 3)) != 0) { - return CP_ACCESS_OK; - } - - return pmreg_access(env, ri, isread); -} - -static CPAccessResult pmreg_access_ccntr(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - /* CR: cycle counter read trap control */ - if (arm_feature(env, ARM_FEATURE_V8) - && arm_current_el(env) == 0 - && (env->cp15.c9_pmuserenr & (1 << 2)) != 0 - && isread) { - return CP_ACCESS_OK; - } - - return pmreg_access(env, ri, isread); -} - /* * Bits in MDCR_EL2 and MDCR_EL3 which pmu_counter_enabled() looks at. * We use these to decide whether we need to wrap a write to MDCR_EL2 @@ -967,684 +694,6 @@ static CPAccessResult pmreg_access_ccntr(CPUARMState *env, (MDCR_HPME | MDCR_HPMD | MDCR_HPMN | MDCR_HCCD | MDCR_HLP) #define MDCR_EL3_PMU_ENABLE_BITS (MDCR_SPME | MDCR_SCCD) -/* - * Returns true if the counter (pass 31 for PMCCNTR) should count events using - * the current EL, security state, and register configuration. - */ -static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter) -{ - uint64_t filter; - bool e, p, u, nsk, nsu, nsh, m; - bool enabled, prohibited = false, filtered; - bool secure = arm_is_secure(env); - int el = arm_current_el(env); - uint64_t mdcr_el2; - uint8_t hpmn; - - /* - * We might be called for M-profile cores where MDCR_EL2 doesn't - * exist and arm_mdcr_el2_eff() will assert, so this early-exit check - * must be before we read that value. - */ - if (!arm_feature(env, ARM_FEATURE_PMU)) { - return false; - } - - mdcr_el2 = arm_mdcr_el2_eff(env); - hpmn = mdcr_el2 & MDCR_HPMN; - - if (!arm_feature(env, ARM_FEATURE_EL2) || - (counter < hpmn || counter == 31)) { - e = env->cp15.c9_pmcr & PMCRE; - } else { - e = mdcr_el2 & MDCR_HPME; - } - enabled = e && (env->cp15.c9_pmcnten & (1 << counter)); - - /* Is event counting prohibited? */ - if (el == 2 && (counter < hpmn || counter == 31)) { - prohibited = mdcr_el2 & MDCR_HPMD; - } - if (secure) { - prohibited = prohibited || !(env->cp15.mdcr_el3 & MDCR_SPME); - } - - if (counter == 31) { - /* - * The cycle counter defaults to running. PMCR.DP says "disable - * the cycle counter when event counting is prohibited". - * Some MDCR bits disable the cycle counter specifically. - */ - prohibited = prohibited && env->cp15.c9_pmcr & PMCRDP; - if (cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { - if (secure) { - prohibited = prohibited || (env->cp15.mdcr_el3 & MDCR_SCCD); - } - if (el == 2) { - prohibited = prohibited || (mdcr_el2 & MDCR_HCCD); - } - } - } - - if (counter == 31) { - filter = env->cp15.pmccfiltr_el0; - } else { - filter = env->cp15.c14_pmevtyper[counter]; - } - - p = filter & PMXEVTYPER_P; - u = filter & PMXEVTYPER_U; - nsk = arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_NSK); - nsu = arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_NSU); - nsh = arm_feature(env, ARM_FEATURE_EL2) && (filter & PMXEVTYPER_NSH); - m = arm_el_is_aa64(env, 1) && - arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_M); - - if (el == 0) { - filtered = secure ? u : u != nsu; - } else if (el == 1) { - filtered = secure ? p : p != nsk; - } else if (el == 2) { - filtered = !nsh; - } else { /* EL3 */ - filtered = m != p; - } - - if (counter != 31) { - /* - * If not checking PMCCNTR, ensure the counter is setup to an event we - * support - */ - uint16_t event = filter & PMXEVTYPER_EVTCOUNT; - if (!event_supported(event)) { - return false; - } - } - - return enabled && !prohibited && !filtered; -} - -static void pmu_update_irq(CPUARMState *env) -{ - ARMCPU *cpu = env_archcpu(env); - qemu_set_irq(cpu->pmu_interrupt, (env->cp15.c9_pmcr & PMCRE) && - (env->cp15.c9_pminten & env->cp15.c9_pmovsr)); -} - -static bool pmccntr_clockdiv_enabled(CPUARMState *env) -{ - /* - * Return true if the clock divider is enabled and the cycle counter - * is supposed to tick only once every 64 clock cycles. This is - * controlled by PMCR.D, but if PMCR.LC is set to enable the long - * (64-bit) cycle counter PMCR.D has no effect. - */ - return (env->cp15.c9_pmcr & (PMCRD | PMCRLC)) == PMCRD; -} - -static bool pmevcntr_is_64_bit(CPUARMState *env, int counter) -{ - /* Return true if the specified event counter is configured to be 64 bit */ - - /* This isn't intended to be used with the cycle counter */ - assert(counter < 31); - - if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { - return false; - } - - if (arm_feature(env, ARM_FEATURE_EL2)) { - /* - * MDCR_EL2.HLP still applies even when EL2 is disabled in the - * current security state, so we don't use arm_mdcr_el2_eff() here. - */ - bool hlp = env->cp15.mdcr_el2 & MDCR_HLP; - int hpmn = env->cp15.mdcr_el2 & MDCR_HPMN; - - if (counter >= hpmn) { - return hlp; - } - } - return env->cp15.c9_pmcr & PMCRLP; -} - -/* - * Ensure c15_ccnt is the guest-visible count so that operations such as - * enabling/disabling the counter or filtering, modifying the count itself, - * etc. can be done logically. This is essentially a no-op if the counter is - * not enabled at the time of the call. - */ -static void pmccntr_op_start(CPUARMState *env) -{ - uint64_t cycles = cycles_get_count(env); - - if (pmu_counter_enabled(env, 31)) { - uint64_t eff_cycles = cycles; - if (pmccntr_clockdiv_enabled(env)) { - eff_cycles /= 64; - } - - uint64_t new_pmccntr = eff_cycles - env->cp15.c15_ccnt_delta; - - uint64_t overflow_mask = env->cp15.c9_pmcr & PMCRLC ? \ - 1ull << 63 : 1ull << 31; - if (env->cp15.c15_ccnt & ~new_pmccntr & overflow_mask) { - env->cp15.c9_pmovsr |= (1ULL << 31); - pmu_update_irq(env); - } - - env->cp15.c15_ccnt = new_pmccntr; - } - env->cp15.c15_ccnt_delta = cycles; -} - -/* - * If PMCCNTR is enabled, recalculate the delta between the clock and the - * guest-visible count. A call to pmccntr_op_finish should follow every call to - * pmccntr_op_start. - */ -static void pmccntr_op_finish(CPUARMState *env) -{ - if (pmu_counter_enabled(env, 31)) { -#ifndef CONFIG_USER_ONLY - /* Calculate when the counter will next overflow */ - uint64_t remaining_cycles = -env->cp15.c15_ccnt; - if (!(env->cp15.c9_pmcr & PMCRLC)) { - remaining_cycles = (uint32_t)remaining_cycles; - } - int64_t overflow_in = cycles_ns_per(remaining_cycles); - - if (overflow_in > 0) { - int64_t overflow_at; - - if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - overflow_in, &overflow_at)) { - ARMCPU *cpu = env_archcpu(env); - timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); - } - } -#endif - - uint64_t prev_cycles = env->cp15.c15_ccnt_delta; - if (pmccntr_clockdiv_enabled(env)) { - prev_cycles /= 64; - } - env->cp15.c15_ccnt_delta = prev_cycles - env->cp15.c15_ccnt; - } -} - -static void pmevcntr_op_start(CPUARMState *env, uint8_t counter) -{ - - uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT; - uint64_t count = 0; - if (event_supported(event)) { - uint16_t event_idx = supported_event_map[event]; - count = pm_events[event_idx].get_count(env); - } - - if (pmu_counter_enabled(env, counter)) { - uint64_t new_pmevcntr = count - env->cp15.c14_pmevcntr_delta[counter]; - uint64_t overflow_mask = pmevcntr_is_64_bit(env, counter) ? - 1ULL << 63 : 1ULL << 31; - - if (env->cp15.c14_pmevcntr[counter] & ~new_pmevcntr & overflow_mask) { - env->cp15.c9_pmovsr |= (1 << counter); - pmu_update_irq(env); - } - env->cp15.c14_pmevcntr[counter] = new_pmevcntr; - } - env->cp15.c14_pmevcntr_delta[counter] = count; -} - -static void pmevcntr_op_finish(CPUARMState *env, uint8_t counter) -{ - if (pmu_counter_enabled(env, counter)) { -#ifndef CONFIG_USER_ONLY - uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT; - uint16_t event_idx = supported_event_map[event]; - uint64_t delta = -(env->cp15.c14_pmevcntr[counter] + 1); - int64_t overflow_in; - - if (!pmevcntr_is_64_bit(env, counter)) { - delta = (uint32_t)delta; - } - overflow_in = pm_events[event_idx].ns_per_count(delta); - - if (overflow_in > 0) { - int64_t overflow_at; - - if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - overflow_in, &overflow_at)) { - ARMCPU *cpu = env_archcpu(env); - timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); - } - } -#endif - - env->cp15.c14_pmevcntr_delta[counter] -= - env->cp15.c14_pmevcntr[counter]; - } -} - -void pmu_op_start(CPUARMState *env) -{ - unsigned int i; - pmccntr_op_start(env); - for (i = 0; i < pmu_num_counters(env); i++) { - pmevcntr_op_start(env, i); - } -} - -void pmu_op_finish(CPUARMState *env) -{ - unsigned int i; - pmccntr_op_finish(env); - for (i = 0; i < pmu_num_counters(env); i++) { - pmevcntr_op_finish(env, i); - } -} - -void pmu_pre_el_change(ARMCPU *cpu, void *ignored) -{ - pmu_op_start(&cpu->env); -} - -void pmu_post_el_change(ARMCPU *cpu, void *ignored) -{ - pmu_op_finish(&cpu->env); -} - -void arm_pmu_timer_cb(void *opaque) -{ - ARMCPU *cpu = opaque; - - /* - * Update all the counter values based on the current underlying counts, - * triggering interrupts to be raised, if necessary. pmu_op_finish() also - * has the effect of setting the cpu->pmu_timer to the next earliest time a - * counter may expire. - */ - pmu_op_start(&cpu->env); - pmu_op_finish(&cpu->env); -} - -static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmu_op_start(env); - - if (value & PMCRC) { - /* The counter has been reset */ - env->cp15.c15_ccnt = 0; - } - - if (value & PMCRP) { - unsigned int i; - for (i = 0; i < pmu_num_counters(env); i++) { - env->cp15.c14_pmevcntr[i] = 0; - } - } - - env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK; - env->cp15.c9_pmcr |= (value & PMCR_WRITABLE_MASK); - - pmu_op_finish(env); -} - -static uint64_t pmcr_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - uint64_t pmcr = env->cp15.c9_pmcr; - - /* - * If EL2 is implemented and enabled for the current security state, reads - * of PMCR.N from EL1 or EL0 return the value of MDCR_EL2.HPMN or HDCR.HPMN. - */ - if (arm_current_el(env) <= 1 && arm_is_el2_enabled(env)) { - pmcr &= ~PMCRN_MASK; - pmcr |= (env->cp15.mdcr_el2 & MDCR_HPMN) << PMCRN_SHIFT; - } - - return pmcr; -} - -static void pmswinc_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - unsigned int i; - uint64_t overflow_mask, new_pmswinc; - - for (i = 0; i < pmu_num_counters(env); i++) { - /* Increment a counter's count iff: */ - if ((value & (1 << i)) && /* counter's bit is set */ - /* counter is enabled and not filtered */ - pmu_counter_enabled(env, i) && - /* counter is SW_INCR */ - (env->cp15.c14_pmevtyper[i] & PMXEVTYPER_EVTCOUNT) == 0x0) { - pmevcntr_op_start(env, i); - - /* - * Detect if this write causes an overflow since we can't predict - * PMSWINC overflows like we can for other events - */ - new_pmswinc = env->cp15.c14_pmevcntr[i] + 1; - - overflow_mask = pmevcntr_is_64_bit(env, i) ? - 1ULL << 63 : 1ULL << 31; - - if (env->cp15.c14_pmevcntr[i] & ~new_pmswinc & overflow_mask) { - env->cp15.c9_pmovsr |= (1 << i); - pmu_update_irq(env); - } - - env->cp15.c14_pmevcntr[i] = new_pmswinc; - - pmevcntr_op_finish(env, i); - } - } -} - -static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - uint64_t ret; - pmccntr_op_start(env); - ret = env->cp15.c15_ccnt; - pmccntr_op_finish(env); - return ret; -} - -static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* - * The value of PMSELR.SEL affects the behavior of PMXEVTYPER and - * PMXEVCNTR. We allow [0..31] to be written to PMSELR here; in the - * meanwhile, we check PMSELR.SEL when PMXEVTYPER and PMXEVCNTR are - * accessed. - */ - env->cp15.c9_pmselr = value & 0x1f; -} - -static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmccntr_op_start(env); - env->cp15.c15_ccnt = value; - pmccntr_op_finish(env); -} - -static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - uint64_t cur_val = pmccntr_read(env, NULL); - - pmccntr_write(env, ri, deposit64(cur_val, 0, 32, value)); -} - -static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmccntr_op_start(env); - env->cp15.pmccfiltr_el0 = value & PMCCFILTR_EL0; - pmccntr_op_finish(env); -} - -static void pmccfiltr_write_a32(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmccntr_op_start(env); - /* M is not accessible from AArch32 */ - env->cp15.pmccfiltr_el0 = (env->cp15.pmccfiltr_el0 & PMCCFILTR_M) | - (value & PMCCFILTR); - pmccntr_op_finish(env); -} - -static uint64_t pmccfiltr_read_a32(CPUARMState *env, const ARMCPRegInfo *ri) -{ - /* M is not visible in AArch32 */ - return env->cp15.pmccfiltr_el0 & PMCCFILTR; -} - -static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmu_op_start(env); - value &= pmu_counter_mask(env); - env->cp15.c9_pmcnten |= value; - pmu_op_finish(env); -} - -static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmu_op_start(env); - value &= pmu_counter_mask(env); - env->cp15.c9_pmcnten &= ~value; - pmu_op_finish(env); -} - -static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - value &= pmu_counter_mask(env); - env->cp15.c9_pmovsr &= ~value; - pmu_update_irq(env); -} - -static void pmovsset_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - value &= pmu_counter_mask(env); - env->cp15.c9_pmovsr |= value; - pmu_update_irq(env); -} - -static void pmevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value, const uint8_t counter) -{ - if (counter == 31) { - pmccfiltr_write(env, ri, value); - } else if (counter < pmu_num_counters(env)) { - pmevcntr_op_start(env, counter); - - /* - * If this counter's event type is changing, store the current - * underlying count for the new type in c14_pmevcntr_delta[counter] so - * pmevcntr_op_finish has the correct baseline when it converts back to - * a delta. - */ - uint16_t old_event = env->cp15.c14_pmevtyper[counter] & - PMXEVTYPER_EVTCOUNT; - uint16_t new_event = value & PMXEVTYPER_EVTCOUNT; - if (old_event != new_event) { - uint64_t count = 0; - if (event_supported(new_event)) { - uint16_t event_idx = supported_event_map[new_event]; - count = pm_events[event_idx].get_count(env); - } - env->cp15.c14_pmevcntr_delta[counter] = count; - } - - env->cp15.c14_pmevtyper[counter] = value & PMXEVTYPER_MASK; - pmevcntr_op_finish(env, counter); - } - /* - * Attempts to access PMXEVTYPER are CONSTRAINED UNPREDICTABLE when - * PMSELR value is equal to or greater than the number of implemented - * counters, but not equal to 0x1f. We opt to behave as a RAZ/WI. - */ -} - -static uint64_t pmevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri, - const uint8_t counter) -{ - if (counter == 31) { - return env->cp15.pmccfiltr_el0; - } else if (counter < pmu_num_counters(env)) { - return env->cp15.c14_pmevtyper[counter]; - } else { - /* - * We opt to behave as a RAZ/WI when attempts to access PMXEVTYPER - * are CONSTRAINED UNPREDICTABLE. See comments in pmevtyper_write(). - */ - return 0; - } -} - -static void pmevtyper_writefn(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - pmevtyper_write(env, ri, value, counter); -} - -static void pmevtyper_rawwrite(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - env->cp15.c14_pmevtyper[counter] = value; - - /* - * pmevtyper_rawwrite is called between a pair of pmu_op_start and - * pmu_op_finish calls when loading saved state for a migration. Because - * we're potentially updating the type of event here, the value written to - * c14_pmevcntr_delta by the preceding pmu_op_start call may be for a - * different counter type. Therefore, we need to set this value to the - * current count for the counter type we're writing so that pmu_op_finish - * has the correct count for its calculation. - */ - uint16_t event = value & PMXEVTYPER_EVTCOUNT; - if (event_supported(event)) { - uint16_t event_idx = supported_event_map[event]; - env->cp15.c14_pmevcntr_delta[counter] = - pm_events[event_idx].get_count(env); - } -} - -static uint64_t pmevtyper_readfn(CPUARMState *env, const ARMCPRegInfo *ri) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - return pmevtyper_read(env, ri, counter); -} - -static void pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmevtyper_write(env, ri, value, env->cp15.c9_pmselr & 31); -} - -static uint64_t pmxevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - return pmevtyper_read(env, ri, env->cp15.c9_pmselr & 31); -} - -static void pmevcntr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value, uint8_t counter) -{ - if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { - /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */ - value &= MAKE_64BIT_MASK(0, 32); - } - if (counter < pmu_num_counters(env)) { - pmevcntr_op_start(env, counter); - env->cp15.c14_pmevcntr[counter] = value; - pmevcntr_op_finish(env, counter); - } - /* - * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR - * are CONSTRAINED UNPREDICTABLE. - */ -} - -static uint64_t pmevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri, - uint8_t counter) -{ - if (counter < pmu_num_counters(env)) { - uint64_t ret; - pmevcntr_op_start(env, counter); - ret = env->cp15.c14_pmevcntr[counter]; - pmevcntr_op_finish(env, counter); - if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { - /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */ - ret &= MAKE_64BIT_MASK(0, 32); - } - return ret; - } else { - /* - * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR - * are CONSTRAINED UNPREDICTABLE. - */ - return 0; - } -} - -static void pmevcntr_writefn(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - pmevcntr_write(env, ri, value, counter); -} - -static uint64_t pmevcntr_readfn(CPUARMState *env, const ARMCPRegInfo *ri) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - return pmevcntr_read(env, ri, counter); -} - -static void pmevcntr_rawwrite(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - assert(counter < pmu_num_counters(env)); - env->cp15.c14_pmevcntr[counter] = value; - pmevcntr_write(env, ri, value, counter); -} - -static uint64_t pmevcntr_rawread(CPUARMState *env, const ARMCPRegInfo *ri) -{ - uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7); - assert(counter < pmu_num_counters(env)); - return env->cp15.c14_pmevcntr[counter]; -} - -static void pmxevcntr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - pmevcntr_write(env, ri, value, env->cp15.c9_pmselr & 31); -} - -static uint64_t pmxevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - return pmevcntr_read(env, ri, env->cp15.c9_pmselr & 31); -} - -static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - if (arm_feature(env, ARM_FEATURE_V8)) { - env->cp15.c9_pmuserenr = value & 0xf; - } else { - env->cp15.c9_pmuserenr = value & 1; - } -} - -static void pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* We have no event counters so only the C bit can be changed */ - value &= pmu_counter_mask(env); - env->cp15.c9_pminten |= value; - pmu_update_irq(env); -} - -static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - value &= pmu_counter_mask(env); - env->cp15.c9_pminten &= ~value; - pmu_update_irq(env); -} - static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -1717,6 +766,22 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) if (cpu_isar_feature(aa64_ecv, cpu)) { valid_mask |= SCR_ECVEN; } + if (cpu_isar_feature(aa64_gcs, cpu)) { + valid_mask |= SCR_GCSEN; + } + if (cpu_isar_feature(aa64_tcr2, cpu)) { + valid_mask |= SCR_TCR2EN; + } + if (cpu_isar_feature(aa64_sctlr2, cpu)) { + valid_mask |= SCR_SCTLR2EN; + } + if (cpu_isar_feature(aa64_s1pie, cpu) || + cpu_isar_feature(aa64_s2pie, cpu)) { + valid_mask |= SCR_PIEN; + } + if (cpu_isar_feature(aa64_mec, cpu)) { + valid_mask |= SCR_MECEN; + } } else { valid_mask &= ~(SCR_RW | SCR_ST); if (cpu_isar_feature(aa32_ras, cpu)) { @@ -1751,12 +816,17 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) */ if (changed & (SCR_NS | SCR_NSE)) { tlb_flush_by_mmuidx(env_cpu(env), (ARMMMUIdxBit_E10_0 | + ARMMMUIdxBit_E10_0_GCS | ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_0_GCS | ARMMMUIdxBit_E10_1 | - ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E10_1_PAN | + ARMMMUIdxBit_E10_1_GCS | + ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E2)); + ARMMMUIdxBit_E20_2_GCS | + ARMMMUIdxBit_E2 | + ARMMMUIdxBit_E2_GCS)); } } @@ -1809,40 +879,40 @@ static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri) uint64_t ret = 0; if (hcr_el2 & HCR_IMO) { - if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_VIRQ)) { ret |= CPSR_I; } - if (cs->interrupt_request & CPU_INTERRUPT_VINMI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_VINMI)) { ret |= ISR_IS; ret |= CPSR_I; } } else { - if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD)) { ret |= CPSR_I; } - if (cs->interrupt_request & CPU_INTERRUPT_NMI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_NMI)) { ret |= ISR_IS; ret |= CPSR_I; } } if (hcr_el2 & HCR_FMO) { - if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_VFIQ)) { ret |= CPSR_F; } - if (cs->interrupt_request & CPU_INTERRUPT_VFNMI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_VFNMI)) { ret |= ISR_FS; ret |= CPSR_F; } } else { - if (cs->interrupt_request & CPU_INTERRUPT_FIQ) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_FIQ)) { ret |= CPSR_F; } } if (hcr_el2 & HCR_AMO) { - if (cs->interrupt_request & CPU_INTERRUPT_VSERR) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_VSERR)) { ret |= CPSR_A; } } @@ -1874,171 +944,6 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { /* the old v6 WFI, UNPREDICTABLE in v7 but we choose to NOP */ { .name = "NOP", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4, .access = PL1_W, .type = ARM_CP_NOP }, - /* - * Performance monitors are implementation defined in v7, - * but with an ARM recommended set of registers, which we - * follow. - * - * Performance registers fall into three categories: - * (a) always UNDEF in PL0, RW in PL1 (PMINTENSET, PMINTENCLR) - * (b) RO in PL0 (ie UNDEF on write), RW in PL1 (PMUSERENR) - * (c) UNDEF in PL0 if PMUSERENR.EN==0, otherwise accessible (all others) - * For the cases controlled by PMUSERENR we must set .access to PL0_RW - * or PL0_RO as appropriate and then check PMUSERENR in the helper fn. - */ - { .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1, - .access = PL0_RW, .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), - .writefn = pmcntenset_write, - .accessfn = pmreg_access, - .fgt = FGT_PMCNTEN, - .raw_writefn = raw_write }, - { .name = "PMCNTENSET_EL0", .state = ARM_CP_STATE_AA64, .type = ARM_CP_IO, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 1, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMCNTEN, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), .resetvalue = 0, - .writefn = pmcntenset_write, .raw_writefn = raw_write }, - { .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2, - .access = PL0_RW, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), - .accessfn = pmreg_access, - .fgt = FGT_PMCNTEN, - .writefn = pmcntenclr_write, - .type = ARM_CP_ALIAS | ARM_CP_IO }, - { .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMCNTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), - .writefn = pmcntenclr_write }, - { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3, - .access = PL0_RW, .type = ARM_CP_IO, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), - .accessfn = pmreg_access, - .fgt = FGT_PMOVS, - .writefn = pmovsr_write, - .raw_writefn = raw_write }, - { .name = "PMOVSCLR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 3, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMOVS, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), - .writefn = pmovsr_write, - .raw_writefn = raw_write }, - { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4, - .access = PL0_W, .accessfn = pmreg_access_swinc, - .fgt = FGT_PMSWINC_EL0, - .type = ARM_CP_NO_RAW | ARM_CP_IO, - .writefn = pmswinc_write }, - { .name = "PMSWINC_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 4, - .access = PL0_W, .accessfn = pmreg_access_swinc, - .fgt = FGT_PMSWINC_EL0, - .type = ARM_CP_NO_RAW | ARM_CP_IO, - .writefn = pmswinc_write }, - { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5, - .access = PL0_RW, .type = ARM_CP_ALIAS, - .fgt = FGT_PMSELR_EL0, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmselr), - .accessfn = pmreg_access_selr, .writefn = pmselr_write, - .raw_writefn = raw_write}, - { .name = "PMSELR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 5, - .access = PL0_RW, .accessfn = pmreg_access_selr, - .fgt = FGT_PMSELR_EL0, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmselr), - .writefn = pmselr_write, .raw_writefn = raw_write, }, - { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0, - .access = PL0_RW, .resetvalue = 0, .type = ARM_CP_ALIAS | ARM_CP_IO, - .fgt = FGT_PMCCNTR_EL0, - .readfn = pmccntr_read, .writefn = pmccntr_write32, - .accessfn = pmreg_access_ccntr }, - { .name = "PMCCNTR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 0, - .access = PL0_RW, .accessfn = pmreg_access_ccntr, - .fgt = FGT_PMCCNTR_EL0, - .type = ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c15_ccnt), - .readfn = pmccntr_read, .writefn = pmccntr_write, - .raw_readfn = raw_read, .raw_writefn = raw_write, }, - { .name = "PMCCFILTR", .cp = 15, .opc1 = 0, .crn = 14, .crm = 15, .opc2 = 7, - .writefn = pmccfiltr_write_a32, .readfn = pmccfiltr_read_a32, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMCCFILTR_EL0, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .resetvalue = 0, }, - { .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7, - .writefn = pmccfiltr_write, .raw_writefn = raw_write, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMCCFILTR_EL0, - .type = ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.pmccfiltr_el0), - .resetvalue = 0, }, - { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1, - .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, - .accessfn = pmreg_access, - .fgt = FGT_PMEVTYPERN_EL0, - .writefn = pmxevtyper_write, .readfn = pmxevtyper_read }, - { .name = "PMXEVTYPER_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 1, - .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, - .accessfn = pmreg_access, - .fgt = FGT_PMEVTYPERN_EL0, - .writefn = pmxevtyper_write, .readfn = pmxevtyper_read }, - { .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2, - .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, - .accessfn = pmreg_access_xevcntr, - .fgt = FGT_PMEVCNTRN_EL0, - .writefn = pmxevcntr_write, .readfn = pmxevcntr_read }, - { .name = "PMXEVCNTR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 2, - .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, - .accessfn = pmreg_access_xevcntr, - .fgt = FGT_PMEVCNTRN_EL0, - .writefn = pmxevcntr_write, .readfn = pmxevcntr_read }, - { .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0, - .access = PL0_R | PL1_RW, .accessfn = access_tpm, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmuserenr), - .resetvalue = 0, - .writefn = pmuserenr_write, .raw_writefn = raw_write }, - { .name = "PMUSERENR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 14, .opc2 = 0, - .access = PL0_R | PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr), - .resetvalue = 0, - .writefn = pmuserenr_write, .raw_writefn = raw_write }, - { .name = "PMINTENSET", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1, - .access = PL1_RW, .accessfn = access_tpm, - .fgt = FGT_PMINTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pminten), - .resetvalue = 0, - .writefn = pmintenset_write, .raw_writefn = raw_write }, - { .name = "PMINTENSET_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 1, - .access = PL1_RW, .accessfn = access_tpm, - .fgt = FGT_PMINTEN, - .type = ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), - .writefn = pmintenset_write, .raw_writefn = raw_write, - .resetvalue = 0x0 }, - { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, - .access = PL1_RW, .accessfn = access_tpm, - .fgt = FGT_PMINTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), - .writefn = pmintenclr_write, }, - { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2, - .access = PL1_RW, .accessfn = access_tpm, - .fgt = FGT_PMINTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), - .writefn = pmintenclr_write }, { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0, .access = PL1_R, @@ -2072,12 +977,16 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_AFSR0_EL1, .nv2_redirect_offset = 0x128 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 1, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 1, 0), .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "AFSR1_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 1, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_AFSR1_EL1, .nv2_redirect_offset = 0x130 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 1, 1), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 1, 1), .type = ARM_CP_CONST, .resetvalue = 0 }, /* * MAIR can just read-as-written because we don't implement caches @@ -2088,6 +997,8 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_MAIR_EL1, .nv2_redirect_offset = 0x140 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 10, 2, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 10, 2, 0), .fieldoffset = offsetof(CPUARMState, cp15.mair_el[1]), .resetvalue = 0 }, { .name = "MAIR_EL3", .state = ARM_CP_STATE_AA64, @@ -2121,25 +1032,6 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .type = ARM_CP_NO_RAW, .access = PL1_R, .readfn = isr_read }, }; -static const ARMCPRegInfo pmovsset_cp_reginfo[] = { - /* PMOVSSET is not implemented in v7 before v7ve */ - { .name = "PMOVSSET", .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 3, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMOVS, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), - .writefn = pmovsset_write, - .raw_writefn = raw_write }, - { .name = "PMOVSSET_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 14, .opc2 = 3, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMOVS, - .type = ARM_CP_ALIAS | ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), - .writefn = pmovsset_write, - .raw_writefn = raw_write }, -}; - static void teecr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -2216,7 +1108,7 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = { .resetvalue = 0 }, }; -static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque) +static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); @@ -3153,9 +2045,11 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .resetfn = arm_gt_cntfrq_reset, }, /* overall control: mostly access permissions */ - { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH, + { .name = "CNTKCTL_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 14, .crm = 1, .opc2 = 0, .access = PL1_RW, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 14, 1, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 14, 1, 0), .fieldoffset = offsetof(CPUARMState, cp15.c14_cntkctl), .resetvalue = 0, }, @@ -3448,402 +2342,6 @@ static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) } } -#ifndef CONFIG_USER_ONLY -/* get_phys_addr() isn't present for user-mode-only targets */ - -static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - if (ri->opc2 & 4) { - /* - * The ATS12NSO* operations must trap to EL3 or EL2 if executed in - * Secure EL1 (which can only happen if EL3 is AArch64). - * They are simply UNDEF if executed from NS EL1. - * They function normally from EL2 or EL3. - */ - if (arm_current_el(env) == 1) { - if (arm_is_secure_below_el3(env)) { - if (env->cp15.scr_el3 & SCR_EEL2) { - return CP_ACCESS_TRAP_EL2; - } - return CP_ACCESS_TRAP_EL3; - } - return CP_ACCESS_UNDEFINED; - } - } - return CP_ACCESS_OK; -} - -#ifdef CONFIG_TCG -static int par_el1_shareability(GetPhysAddrResult *res) -{ - /* - * The PAR_EL1.SH field must be 0b10 for Device or Normal-NC - * memory -- see pseudocode PAREncodeShareability(). - */ - if (((res->cacheattrs.attrs & 0xf0) == 0) || - res->cacheattrs.attrs == 0x44 || res->cacheattrs.attrs == 0x40) { - return 2; - } - return res->cacheattrs.shareability; -} - -static uint64_t do_ats_write(CPUARMState *env, uint64_t value, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - ARMSecuritySpace ss) -{ - bool ret; - uint64_t par64; - bool format64 = false; - ARMMMUFaultInfo fi = {}; - GetPhysAddrResult res = {}; - - /* - * I_MXTJT: Granule protection checks are not performed on the final - * address of a successful translation. This is a translation not a - * memory reference, so "memop = none = 0". - */ - ret = get_phys_addr_with_space_nogpc(env, value, access_type, 0, - mmu_idx, ss, &res, &fi); - - /* - * ATS operations only do S1 or S1+S2 translations, so we never - * have to deal with the ARMCacheAttrs format for S2 only. - */ - assert(!res.cacheattrs.is_s2_format); - - if (ret) { - /* - * Some kinds of translation fault must cause exceptions rather - * than being reported in the PAR. - */ - int current_el = arm_current_el(env); - int target_el; - uint32_t syn, fsr, fsc; - bool take_exc = false; - - if (fi.s1ptw && current_el == 1 - && arm_mmu_idx_is_stage1_of_2(mmu_idx)) { - /* - * Synchronous stage 2 fault on an access made as part of the - * translation table walk for AT S1E0* or AT S1E1* insn - * executed from NS EL1. If this is a synchronous external abort - * and SCR_EL3.EA == 1, then we take a synchronous external abort - * to EL3. Otherwise the fault is taken as an exception to EL2, - * and HPFAR_EL2 holds the faulting IPA. - */ - if (fi.type == ARMFault_SyncExternalOnWalk && - (env->cp15.scr_el3 & SCR_EA)) { - target_el = 3; - } else { - env->cp15.hpfar_el2 = extract64(fi.s2addr, 12, 47) << 4; - if (arm_is_secure_below_el3(env) && fi.s1ns) { - env->cp15.hpfar_el2 |= HPFAR_NS; - } - target_el = 2; - } - take_exc = true; - } else if (fi.type == ARMFault_SyncExternalOnWalk) { - /* - * Synchronous external aborts during a translation table walk - * are taken as Data Abort exceptions. - */ - if (fi.stage2) { - if (current_el == 3) { - target_el = 3; - } else { - target_el = 2; - } - } else { - target_el = exception_target_el(env); - } - take_exc = true; - } - - if (take_exc) { - /* Construct FSR and FSC using same logic as arm_deliver_fault() */ - if (target_el == 2 || arm_el_is_aa64(env, target_el) || - arm_s1_regime_using_lpae_format(env, mmu_idx)) { - fsr = arm_fi_to_lfsc(&fi); - fsc = extract32(fsr, 0, 6); - } else { - fsr = arm_fi_to_sfsc(&fi); - fsc = 0x3f; - } - /* - * Report exception with ESR indicating a fault due to a - * translation table walk for a cache maintenance instruction. - */ - syn = syn_data_abort_no_iss(current_el == target_el, 0, - fi.ea, 1, fi.s1ptw, 1, fsc); - env->exception.vaddress = value; - env->exception.fsr = fsr; - raise_exception(env, EXCP_DATA_ABORT, syn, target_el); - } - } - - if (is_a64(env)) { - format64 = true; - } else if (arm_feature(env, ARM_FEATURE_LPAE)) { - /* - * ATS1Cxx: - * * TTBCR.EAE determines whether the result is returned using the - * 32-bit or the 64-bit PAR format - * * Instructions executed in Hyp mode always use the 64bit format - * - * ATS1S2NSOxx uses the 64bit format if any of the following is true: - * * The Non-secure TTBCR.EAE bit is set to 1 - * * The implementation includes EL2, and the value of HCR.VM is 1 - * - * (Note that HCR.DC makes HCR.VM behave as if it is 1.) - * - * ATS1Hx always uses the 64bit format. - */ - format64 = arm_s1_regime_using_lpae_format(env, mmu_idx); - - if (arm_feature(env, ARM_FEATURE_EL2)) { - if (mmu_idx == ARMMMUIdx_E10_0 || - mmu_idx == ARMMMUIdx_E10_1 || - mmu_idx == ARMMMUIdx_E10_1_PAN) { - format64 |= env->cp15.hcr_el2 & (HCR_VM | HCR_DC); - } else { - format64 |= arm_current_el(env) == 2; - } - } - } - - if (format64) { - /* Create a 64-bit PAR */ - par64 = (1 << 11); /* LPAE bit always set */ - if (!ret) { - par64 |= res.f.phys_addr & ~0xfffULL; - if (!res.f.attrs.secure) { - par64 |= (1 << 9); /* NS */ - } - par64 |= (uint64_t)res.cacheattrs.attrs << 56; /* ATTR */ - par64 |= par_el1_shareability(&res) << 7; /* SH */ - } else { - uint32_t fsr = arm_fi_to_lfsc(&fi); - - par64 |= 1; /* F */ - par64 |= (fsr & 0x3f) << 1; /* FS */ - if (fi.stage2) { - par64 |= (1 << 9); /* S */ - } - if (fi.s1ptw) { - par64 |= (1 << 8); /* PTW */ - } - } - } else { - /* - * fsr is a DFSR/IFSR value for the short descriptor - * translation table format (with WnR always clear). - * Convert it to a 32-bit PAR. - */ - if (!ret) { - /* We do not set any attribute bits in the PAR */ - if (res.f.lg_page_size == 24 - && arm_feature(env, ARM_FEATURE_V7)) { - par64 = (res.f.phys_addr & 0xff000000) | (1 << 1); - } else { - par64 = res.f.phys_addr & 0xfffff000; - } - if (!res.f.attrs.secure) { - par64 |= (1 << 9); /* NS */ - } - } else { - uint32_t fsr = arm_fi_to_sfsc(&fi); - - par64 = ((fsr & (1 << 10)) >> 5) | ((fsr & (1 << 12)) >> 6) | - ((fsr & 0xf) << 1) | 1; - } - } - return par64; -} -#endif /* CONFIG_TCG */ - -static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) -{ -#ifdef CONFIG_TCG - MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; - uint64_t par64; - ARMMMUIdx mmu_idx; - int el = arm_current_el(env); - ARMSecuritySpace ss = arm_security_space(env); - - switch (ri->opc2 & 6) { - case 0: - /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */ - switch (el) { - case 3: - if (ri->crm == 9 && arm_pan_enabled(env)) { - mmu_idx = ARMMMUIdx_E30_3_PAN; - } else { - mmu_idx = ARMMMUIdx_E3; - } - break; - case 2: - g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ - /* fall through */ - case 1: - if (ri->crm == 9 && arm_pan_enabled(env)) { - mmu_idx = ARMMMUIdx_Stage1_E1_PAN; - } else { - mmu_idx = ARMMMUIdx_Stage1_E1; - } - break; - default: - g_assert_not_reached(); - } - break; - case 2: - /* stage 1 current state PL0: ATS1CUR, ATS1CUW */ - switch (el) { - case 3: - mmu_idx = ARMMMUIdx_E30_0; - break; - case 2: - g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ - mmu_idx = ARMMMUIdx_Stage1_E0; - break; - case 1: - mmu_idx = ARMMMUIdx_Stage1_E0; - break; - default: - g_assert_not_reached(); - } - break; - case 4: - /* stage 1+2 NonSecure PL1: ATS12NSOPR, ATS12NSOPW */ - mmu_idx = ARMMMUIdx_E10_1; - ss = ARMSS_NonSecure; - break; - case 6: - /* stage 1+2 NonSecure PL0: ATS12NSOUR, ATS12NSOUW */ - mmu_idx = ARMMMUIdx_E10_0; - ss = ARMSS_NonSecure; - break; - default: - g_assert_not_reached(); - } - - par64 = do_ats_write(env, value, access_type, mmu_idx, ss); - - A32_BANKED_CURRENT_REG_SET(env, par, par64); -#else - /* Handled by hardware accelerator. */ - g_assert_not_reached(); -#endif /* CONFIG_TCG */ -} - -static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ -#ifdef CONFIG_TCG - MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; - uint64_t par64; - - /* There is no SecureEL2 for AArch32. */ - par64 = do_ats_write(env, value, access_type, ARMMMUIdx_E2, - ARMSS_NonSecure); - - A32_BANKED_CURRENT_REG_SET(env, par, par64); -#else - /* Handled by hardware accelerator. */ - g_assert_not_reached(); -#endif /* CONFIG_TCG */ -} - -static CPAccessResult at_e012_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - /* - * R_NYXTL: instruction is UNDEFINED if it applies to an Exception level - * lower than EL3 and the combination SCR_EL3.{NSE,NS} is reserved. This can - * only happen when executing at EL3 because that combination also causes an - * illegal exception return. We don't need to check FEAT_RME either, because - * scr_write() ensures that the NSE bit is not set otherwise. - */ - if ((env->cp15.scr_el3 & (SCR_NSE | SCR_NS)) == SCR_NSE) { - return CP_ACCESS_UNDEFINED; - } - return CP_ACCESS_OK; -} - -static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - if (arm_current_el(env) == 3 && - !(env->cp15.scr_el3 & (SCR_NS | SCR_EEL2))) { - return CP_ACCESS_UNDEFINED; - } - return at_e012_access(env, ri, isread); -} - -static CPAccessResult at_s1e01_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_AT)) { - return CP_ACCESS_TRAP_EL2; - } - return at_e012_access(env, ri, isread); -} - -static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ -#ifdef CONFIG_TCG - MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; - ARMMMUIdx mmu_idx; - uint64_t hcr_el2 = arm_hcr_el2_eff(env); - bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE); - bool for_el3 = false; - ARMSecuritySpace ss; - - switch (ri->opc2 & 6) { - case 0: - switch (ri->opc1) { - case 0: /* AT S1E1R, AT S1E1W, AT S1E1RP, AT S1E1WP */ - if (ri->crm == 9 && arm_pan_enabled(env)) { - mmu_idx = regime_e20 ? - ARMMMUIdx_E20_2_PAN : ARMMMUIdx_Stage1_E1_PAN; - } else { - mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_Stage1_E1; - } - break; - case 4: /* AT S1E2R, AT S1E2W */ - mmu_idx = hcr_el2 & HCR_E2H ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; - break; - case 6: /* AT S1E3R, AT S1E3W */ - mmu_idx = ARMMMUIdx_E3; - for_el3 = true; - break; - default: - g_assert_not_reached(); - } - break; - case 2: /* AT S1E0R, AT S1E0W */ - mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_Stage1_E0; - break; - case 4: /* AT S12E1R, AT S12E1W */ - mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E10_1; - break; - case 6: /* AT S12E0R, AT S12E0W */ - mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_E10_0; - break; - default: - g_assert_not_reached(); - } - - ss = for_el3 ? arm_security_space(env) : arm_security_space_below_el3(env); - env->cp15.par_el[1] = do_ats_write(env, value, access_type, mmu_idx, ss); -#else - /* Handled by hardware accelerator. */ - g_assert_not_reached(); -#endif /* CONFIG_TCG */ -} -#endif - /* Return basic MPU access permission bits. */ static uint32_t simple_mpu_ap_bits(uint32_t val) { @@ -4281,7 +2779,7 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* If the ASID changes (with a 64-bit write), we must flush the TLB. */ - if (cpreg_field_is_64bit(ri) && + if (cpreg_field_type(ri) == MO_64 && extract64(raw_read(env, ri) ^ value, 48, 16) != 0) { ARMCPU *cpu = env_archcpu(env); tlb_flush(CPU(cpu)); @@ -4302,7 +2800,9 @@ static void vmsa_tcr_ttbr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, (arm_hcr_el2_eff(env) & HCR_E2H)) { uint16_t mask = ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E20_0; + ARMMMUIdxBit_E20_2_GCS | + ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_0_GCS; tlb_flush_by_mmuidx(env_cpu(env), mask); } raw_write(env, ri, value); @@ -4342,6 +2842,8 @@ static const ARMCPRegInfo vmsa_pmsa_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_FAR_EL1, .nv2_redirect_offset = 0x220 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 6, 0, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 6, 0, 0), .fieldoffset = offsetof(CPUARMState, cp15.far_el[1]), .resetvalue = 0, }, }; @@ -4352,12 +2854,16 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_ESR_EL1, .nv2_redirect_offset = 0x138 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 2, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 2, 0), .fieldoffset = offsetof(CPUARMState, cp15.esr_el[1]), .resetvalue = 0, }, { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_TTBR0_EL1, .nv2_redirect_offset = 0x200 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 0), .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s), offsetof(CPUARMState, cp15.ttbr0_ns) } }, @@ -4366,6 +2872,8 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_TTBR1_EL1, .nv2_redirect_offset = 0x210 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 1), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 1), .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s), offsetof(CPUARMState, cp15.ttbr1_ns) } }, @@ -4374,6 +2882,8 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = { .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_TCR_EL1, .nv2_redirect_offset = 0x120 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 2), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 2), .writefn = vmsa_tcr_el12_write, .raw_writefn = raw_write, .resetvalue = 0, @@ -4418,8 +2928,12 @@ static void omap_threadid_write(CPUARMState *env, const ARMCPRegInfo *ri, static void omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else /* Wait-for-interrupt (deprecated) */ cpu_interrupt(env_cpu(env), CPU_INTERRUPT_HALT); +#endif } static void omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4473,39 +2987,6 @@ static const ARMCPRegInfo omap_cp_reginfo[] = { .type = ARM_CP_CONST | ARM_CP_OVERRIDE, .resetvalue = 0 }, }; -static void xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - env->cp15.c15_cpar = value & 0x3fff; -} - -static const ARMCPRegInfo xscale_cp_reginfo[] = { - { .name = "XSCALE_CPAR", - .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0, .access = PL1_RW, - .fieldoffset = offsetof(CPUARMState, cp15.c15_cpar), .resetvalue = 0, - .writefn = xscale_cpar_write, }, - { .name = "XSCALE_AUXCR", - .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW, - .fieldoffset = offsetof(CPUARMState, cp15.c1_xscaleauxcr), - .resetvalue = 0, }, - /* - * XScale specific cache-lockdown: since we have no cache we NOP these - * and hope the guest does not really rely on cache behaviour. - */ - { .name = "XSCALE_LOCK_ICACHE_LINE", - .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NOP }, - { .name = "XSCALE_UNLOCK_ICACHE", - .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NOP }, - { .name = "XSCALE_DCACHE_LOCK", - .cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 0, - .access = PL1_RW, .type = ARM_CP_NOP }, - { .name = "XSCALE_UNLOCK_DCACHE", - .cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NOP }, -}; - static const ARMCPRegInfo dummy_c15_cp_reginfo[] = { /* * RAZ/WI the whole crn=15 space, when we don't have a more specific @@ -4608,12 +3089,14 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri) } static const ARMCPRegInfo lpae_cp_reginfo[] = { - /* NOP AMAIR0/1 */ - { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH, + /* AMAIR0 is mapped to AMAIR_EL1[31:0] */ + { .name = "AMAIR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_AMAIR_EL1, .nv2_redirect_offset = 0x148 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 10, 3, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 10, 3, 0), .type = ARM_CP_CONST, .resetvalue = 0 }, /* AMAIR1 is mapped to AMAIR_EL1[63:32] */ { .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1, @@ -4896,16 +3379,6 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, /* This may enable/disable the MMU, so do a TLB flush. */ tlb_flush(CPU(cpu)); - - if (tcg_enabled() && ri->type & ARM_CP_SUPPRESS_TB_END) { - /* - * Normally we would always end the TB on an SCTLR write; see the - * comment in ARMCPRegInfo sctlr initialization below for why Xscale - * is special. Setting ARM_CP_SUPPRESS_TB_END also stops the rebuild - * of hflags from the translator, so do it here. - */ - arm_rebuild_hflags(env); - } } static void mdcr_el3_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4953,15 +3426,71 @@ static void mdcr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +static CPAccessResult access_nv1_with_nvx(uint64_t hcr_nv) +{ + return hcr_nv == (HCR_NV | HCR_NV1) ? CP_ACCESS_TRAP_EL2 : CP_ACCESS_OK; +} + static CPAccessResult access_nv1(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { if (arm_current_el(env) == 1) { - uint64_t hcr_nv = arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1 | HCR_NV2); + return access_nv1_with_nvx(arm_hcr_el2_nvx_eff(env)); + } + return CP_ACCESS_OK; +} - if (hcr_nv == (HCR_NV | HCR_NV1)) { - return CP_ACCESS_TRAP_EL2; +static CPAccessResult access_nv1_or_exlock_el1(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1) { + uint64_t nvx = arm_hcr_el2_nvx_eff(env); + + if (!isread && + (env->pstate & PSTATE_EXLOCK) && + (env->cp15.gcscr_el[1] & GCSCR_EXLOCKEN) && + !(nvx & HCR_NV1)) { + return CP_ACCESS_EXLOCK; } + return access_nv1_with_nvx(nvx); + } + + /* + * At EL2, since VHE redirection is done at translation time, + * el_is_in_host is always false here, so EXLOCK does not apply. + */ + return CP_ACCESS_OK; +} + +static CPAccessResult access_exlock_el2(CPUARMState *env, + const ARMCPRegInfo *ri, bool isread) +{ + int el = arm_current_el(env); + + if (el == 3) { + return CP_ACCESS_OK; + } + + /* + * Access to the EL2 register from EL1 means NV is set, and + * EXLOCK has priority over an NV1 trap to EL2. + */ + if (!isread && + (env->pstate & PSTATE_EXLOCK) && + (env->cp15.gcscr_el[el] & GCSCR_EXLOCKEN)) { + return CP_ACCESS_EXLOCK; + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_exlock_el3(CPUARMState *env, + const ARMCPRegInfo *ri, bool isread) +{ + if (!isread && + (env->pstate & PSTATE_EXLOCK) && + (env->cp15.gcscr_el[3] & GCSCR_EXLOCKEN)) { + return CP_ACCESS_EXLOCK; } return CP_ACCESS_OK; } @@ -5094,53 +3623,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2, .fgt = FGT_DCCISW, .access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP }, -#ifndef CONFIG_USER_ONLY - /* 64 bit address translation operations */ - { .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E1R, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, - { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E1W, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, - { .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E0R, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, - { .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E0W, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, - { .name = "AT_S12E1R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 4, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .accessfn = at_e012_access, .writefn = ats_write64 }, - { .name = "AT_S12E1W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .accessfn = at_e012_access, .writefn = ats_write64 }, - { .name = "AT_S12E0R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 6, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .accessfn = at_e012_access, .writefn = ats_write64 }, - { .name = "AT_S12E0W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 7, - .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .accessfn = at_e012_access, .writefn = ats_write64 }, - /* AT S1E2* are elsewhere as they UNDEF from EL3 if EL2 is not present */ - { .name = "AT_S1E3R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 0, - .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, - { .name = "AT_S1E3W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 1, - .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, { .name = "PAR_EL1", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 7, .crm = 4, .opc2 = 0, @@ -5148,7 +3630,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .fgt = FGT_PAR_EL1, .fieldoffset = offsetof(CPUARMState, cp15.par_el[1]), .writefn = par_write }, -#endif /* 32 bit cache operations */ { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_ticab }, @@ -5185,14 +3666,18 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "ELR_EL1", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1, - .access = PL1_RW, .accessfn = access_nv1, + .access = PL1_RW, .accessfn = access_nv1_or_exlock_el1, .nv2_redirect_offset = 0x230 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 4, 0, 1), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 4, 0, 1), .fieldoffset = offsetof(CPUARMState, elr_el[1]) }, { .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0, - .access = PL1_RW, .accessfn = access_nv1, + .access = PL1_RW, .accessfn = access_nv1_or_exlock_el1, .nv2_redirect_offset = 0x160 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 4, 0, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 4, 0, 0), .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_SVC]) }, /* * We rely on the access checks not allowing the guest to write to the @@ -5332,7 +3817,8 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) value &= valid_mask; /* RW is RAO/WI if EL1 is AArch64 only */ - if (!cpu_isar_feature(aa64_aa32_el1, cpu)) { + if (arm_feature(env, ARM_FEATURE_AARCH64) && + !cpu_isar_feature(aa64_aa32_el1, cpu)) { value |= HCR_RW; } @@ -5477,6 +3963,16 @@ uint64_t arm_hcr_el2_eff(CPUARMState *env) return arm_hcr_el2_eff_secstate(env, arm_security_space_below_el3(env)); } +uint64_t arm_hcr_el2_nvx_eff(CPUARMState *env) +{ + uint64_t hcr = arm_hcr_el2_eff(env); + + if (!(hcr & HCR_NV)) { + return 0; /* CONSTRAINED UNPREDICTABLE wrt NV1 */ + } + return hcr & (HCR_NV2 | HCR_NV1 | HCR_NV); +} + /* * Corresponds to ARM pseudocode function ELIsInHost(). */ @@ -5511,23 +4007,27 @@ static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri, ARMCPU *cpu = env_archcpu(env); uint64_t valid_mask = 0; - /* FEAT_MOPS adds MSCEn and MCE2 */ if (cpu_isar_feature(aa64_mops, cpu)) { valid_mask |= HCRX_MSCEN | HCRX_MCE2; } - - /* FEAT_NMI adds TALLINT, VINMI and VFNMI */ if (cpu_isar_feature(aa64_nmi, cpu)) { valid_mask |= HCRX_TALLINT | HCRX_VINMI | HCRX_VFNMI; } - /* FEAT_CMOW adds CMOW */ if (cpu_isar_feature(aa64_cmow, cpu)) { valid_mask |= HCRX_CMOW; } - /* FEAT_XS adds FGTnXS, FnXS */ if (cpu_isar_feature(aa64_xs, cpu)) { valid_mask |= HCRX_FGTNXS | HCRX_FNXS; } + if (cpu_isar_feature(aa64_tcr2, cpu)) { + valid_mask |= HCRX_TCR2EN; + } + if (cpu_isar_feature(aa64_sctlr2, cpu)) { + valid_mask |= HCRX_SCTLR2EN; + } + if (cpu_isar_feature(aa64_gcs, cpu)) { + valid_mask |= HCRX_GCSEN; + } /* Clear RES0 bits. */ env->cp15.hcrx_el2 = value & valid_mask; @@ -5585,11 +4085,22 @@ uint64_t arm_hcrx_el2_eff(CPUARMState *env) * This may need to be revisited for future bits. */ if (!arm_is_el2_enabled(env)) { + ARMCPU *cpu = env_archcpu(env); uint64_t hcrx = 0; - if (cpu_isar_feature(aa64_mops, env_archcpu(env))) { - /* MSCEn behaves as 1 if EL2 is not enabled */ + + /* Bits which whose effective value is 1 if el2 not enabled. */ + if (cpu_isar_feature(aa64_mops, cpu)) { hcrx |= HCRX_MSCEN; } + if (cpu_isar_feature(aa64_tcr2, cpu)) { + hcrx |= HCRX_TCR2EN; + } + if (cpu_isar_feature(aa64_sctlr2, cpu)) { + hcrx |= HCRX_SCTLR2EN; + } + if (cpu_isar_feature(aa64_gcs, cpu)) { + hcrx |= HCRX_GCSEN; + } return hcrx; } if (arm_feature(env, ARM_FEATURE_EL3) && !(env->cp15.scr_el3 & SCR_HXEN)) { @@ -5647,7 +4158,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS | ARM_CP_NV2_REDIRECT, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1, - .access = PL2_RW, + .access = PL2_RW, .accessfn = access_exlock_el2, .fieldoffset = offsetof(CPUARMState, elr_el[2]) }, { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH, .type = ARM_CP_NV2_REDIRECT, @@ -5665,7 +4176,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "SPSR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS | ARM_CP_NV2_REDIRECT, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0, - .access = PL2_RW, + .access = PL2_RW, .accessfn = access_exlock_el2, .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_HYP]) }, { .name = "VBAR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0, @@ -5751,33 +4262,6 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[2]) }, #ifndef CONFIG_USER_ONLY - /* - * Unlike the other EL2-related AT operations, these must - * UNDEF from EL3 if EL2 is not implemented, which is why we - * define them here rather than with the rest of the AT ops. - */ - { .name = "AT_S1E2R", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 0, - .access = PL2_W, .accessfn = at_s1e2_access, - .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = ats_write64 }, - { .name = "AT_S1E2W", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 1, - .access = PL2_W, .accessfn = at_s1e2_access, - .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF, - .writefn = ats_write64 }, - /* - * The AArch32 ATS1H* operations are CONSTRAINED UNPREDICTABLE - * if EL2 is not implemented; we choose to UNDEF. Behaviour at EL3 - * with SCR.NS == 0 outside Monitor mode is UNPREDICTABLE; we choose - * to behave as if SCR.NS was 1. - */ - { .name = "ATS1HR", .cp = 15, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 0, - .access = PL2_W, - .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, - { .name = "ATS1HW", .cp = 15, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 1, - .access = PL2_W, - .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, { .name = "CNTHCTL_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 1, .opc2 = 0, /* @@ -5974,7 +4458,7 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { { .name = "ELR_EL3", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 0, .opc2 = 1, - .access = PL3_RW, + .access = PL3_RW, .accessfn = access_exlock_el3, .fieldoffset = offsetof(CPUARMState, elr_el[3]) }, { .name = "ESR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 2, .opc2 = 0, @@ -5985,7 +4469,7 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { { .name = "SPSR_EL3", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 0, .opc2 = 0, - .access = PL3_RW, + .access = PL3_RW, .accessfn = access_exlock_el3, .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_MON]) }, { .name = "VBAR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 0, @@ -6053,235 +4537,6 @@ static CPAccessResult access_el1nvvct(CPUARMState *env, const ARMCPRegInfo *ri, return e2h_access(env, ri, isread); } -/* Test if system register redirection is to occur in the current state. */ -static bool redirect_for_e2h(CPUARMState *env) -{ - return arm_current_el(env) == 2 && (arm_hcr_el2_eff(env) & HCR_E2H); -} - -static uint64_t el2_e2h_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - CPReadFn *readfn; - - if (redirect_for_e2h(env)) { - /* Switch to the saved EL2 version of the register. */ - ri = ri->opaque; - readfn = ri->readfn; - } else { - readfn = ri->orig_readfn; - } - if (readfn == NULL) { - readfn = raw_read; - } - return readfn(env, ri); -} - -static void el2_e2h_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPWriteFn *writefn; - - if (redirect_for_e2h(env)) { - /* Switch to the saved EL2 version of the register. */ - ri = ri->opaque; - writefn = ri->writefn; - } else { - writefn = ri->orig_writefn; - } - if (writefn == NULL) { - writefn = raw_write; - } - writefn(env, ri, value); -} - -static uint64_t el2_e2h_e12_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - /* Pass the EL1 register accessor its ri, not the EL12 alias ri */ - return ri->orig_readfn(env, ri->opaque); -} - -static void el2_e2h_e12_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Pass the EL1 register accessor its ri, not the EL12 alias ri */ - return ri->orig_writefn(env, ri->opaque, value); -} - -static CPAccessResult el2_e2h_e12_access(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - if (arm_current_el(env) == 1) { - /* - * This must be a FEAT_NV access (will either trap or redirect - * to memory). None of the registers with _EL12 aliases want to - * apply their trap controls for this kind of access, so don't - * call the orig_accessfn or do the "UNDEF when E2H is 0" check. - */ - return CP_ACCESS_OK; - } - /* FOO_EL12 aliases only exist when E2H is 1; otherwise they UNDEF */ - if (!(arm_hcr_el2_eff(env) & HCR_E2H)) { - return CP_ACCESS_UNDEFINED; - } - if (ri->orig_accessfn) { - return ri->orig_accessfn(env, ri->opaque, isread); - } - return CP_ACCESS_OK; -} - -static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu) -{ - struct E2HAlias { - uint32_t src_key, dst_key, new_key; - const char *src_name, *dst_name, *new_name; - bool (*feature)(const ARMISARegisters *id); - }; - -#define K(op0, op1, crn, crm, op2) \ - ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2) - - static const struct E2HAlias aliases[] = { - { K(3, 0, 1, 0, 0), K(3, 4, 1, 0, 0), K(3, 5, 1, 0, 0), - "SCTLR", "SCTLR_EL2", "SCTLR_EL12" }, - { K(3, 0, 1, 0, 2), K(3, 4, 1, 1, 2), K(3, 5, 1, 0, 2), - "CPACR", "CPTR_EL2", "CPACR_EL12" }, - { K(3, 0, 2, 0, 0), K(3, 4, 2, 0, 0), K(3, 5, 2, 0, 0), - "TTBR0_EL1", "TTBR0_EL2", "TTBR0_EL12" }, - { K(3, 0, 2, 0, 1), K(3, 4, 2, 0, 1), K(3, 5, 2, 0, 1), - "TTBR1_EL1", "TTBR1_EL2", "TTBR1_EL12" }, - { K(3, 0, 2, 0, 2), K(3, 4, 2, 0, 2), K(3, 5, 2, 0, 2), - "TCR_EL1", "TCR_EL2", "TCR_EL12" }, - { K(3, 0, 4, 0, 0), K(3, 4, 4, 0, 0), K(3, 5, 4, 0, 0), - "SPSR_EL1", "SPSR_EL2", "SPSR_EL12" }, - { K(3, 0, 4, 0, 1), K(3, 4, 4, 0, 1), K(3, 5, 4, 0, 1), - "ELR_EL1", "ELR_EL2", "ELR_EL12" }, - { K(3, 0, 5, 1, 0), K(3, 4, 5, 1, 0), K(3, 5, 5, 1, 0), - "AFSR0_EL1", "AFSR0_EL2", "AFSR0_EL12" }, - { K(3, 0, 5, 1, 1), K(3, 4, 5, 1, 1), K(3, 5, 5, 1, 1), - "AFSR1_EL1", "AFSR1_EL2", "AFSR1_EL12" }, - { K(3, 0, 5, 2, 0), K(3, 4, 5, 2, 0), K(3, 5, 5, 2, 0), - "ESR_EL1", "ESR_EL2", "ESR_EL12" }, - { K(3, 0, 6, 0, 0), K(3, 4, 6, 0, 0), K(3, 5, 6, 0, 0), - "FAR_EL1", "FAR_EL2", "FAR_EL12" }, - { K(3, 0, 10, 2, 0), K(3, 4, 10, 2, 0), K(3, 5, 10, 2, 0), - "MAIR_EL1", "MAIR_EL2", "MAIR_EL12" }, - { K(3, 0, 10, 3, 0), K(3, 4, 10, 3, 0), K(3, 5, 10, 3, 0), - "AMAIR0", "AMAIR_EL2", "AMAIR_EL12" }, - { K(3, 0, 12, 0, 0), K(3, 4, 12, 0, 0), K(3, 5, 12, 0, 0), - "VBAR", "VBAR_EL2", "VBAR_EL12" }, - { K(3, 0, 13, 0, 1), K(3, 4, 13, 0, 1), K(3, 5, 13, 0, 1), - "CONTEXTIDR_EL1", "CONTEXTIDR_EL2", "CONTEXTIDR_EL12" }, - { K(3, 0, 14, 1, 0), K(3, 4, 14, 1, 0), K(3, 5, 14, 1, 0), - "CNTKCTL", "CNTHCTL_EL2", "CNTKCTL_EL12" }, - - /* - * Note that redirection of ZCR is mentioned in the description - * of ZCR_EL2, and aliasing in the description of ZCR_EL1, but - * not in the summary table. - */ - { K(3, 0, 1, 2, 0), K(3, 4, 1, 2, 0), K(3, 5, 1, 2, 0), - "ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve }, - { K(3, 0, 1, 2, 6), K(3, 4, 1, 2, 6), K(3, 5, 1, 2, 6), - "SMCR_EL1", "SMCR_EL2", "SMCR_EL12", isar_feature_aa64_sme }, - - { K(3, 0, 5, 6, 0), K(3, 4, 5, 6, 0), K(3, 5, 5, 6, 0), - "TFSR_EL1", "TFSR_EL2", "TFSR_EL12", isar_feature_aa64_mte }, - - { K(3, 0, 13, 0, 7), K(3, 4, 13, 0, 7), K(3, 5, 13, 0, 7), - "SCXTNUM_EL1", "SCXTNUM_EL2", "SCXTNUM_EL12", - isar_feature_aa64_scxtnum }, - - /* TODO: ARMv8.2-SPE -- PMSCR_EL2 */ - /* TODO: ARMv8.4-Trace -- TRFCR_EL2 */ - }; -#undef K - - size_t i; - - for (i = 0; i < ARRAY_SIZE(aliases); i++) { - const struct E2HAlias *a = &aliases[i]; - ARMCPRegInfo *src_reg, *dst_reg, *new_reg; - bool ok; - - if (a->feature && !a->feature(&cpu->isar)) { - continue; - } - - src_reg = g_hash_table_lookup(cpu->cp_regs, - (gpointer)(uintptr_t)a->src_key); - dst_reg = g_hash_table_lookup(cpu->cp_regs, - (gpointer)(uintptr_t)a->dst_key); - g_assert(src_reg != NULL); - g_assert(dst_reg != NULL); - - /* Cross-compare names to detect typos in the keys. */ - g_assert(strcmp(src_reg->name, a->src_name) == 0); - g_assert(strcmp(dst_reg->name, a->dst_name) == 0); - - /* None of the core system registers use opaque; we will. */ - g_assert(src_reg->opaque == NULL); - - /* Create alias before redirection so we dup the right data. */ - new_reg = g_memdup(src_reg, sizeof(ARMCPRegInfo)); - - new_reg->name = a->new_name; - new_reg->type |= ARM_CP_ALIAS; - /* Remove PL1/PL0 access, leaving PL2/PL3 R/W in place. */ - new_reg->access &= PL2_RW | PL3_RW; - /* The new_reg op fields are as per new_key, not the target reg */ - new_reg->crn = (a->new_key & CP_REG_ARM64_SYSREG_CRN_MASK) - >> CP_REG_ARM64_SYSREG_CRN_SHIFT; - new_reg->crm = (a->new_key & CP_REG_ARM64_SYSREG_CRM_MASK) - >> CP_REG_ARM64_SYSREG_CRM_SHIFT; - new_reg->opc0 = (a->new_key & CP_REG_ARM64_SYSREG_OP0_MASK) - >> CP_REG_ARM64_SYSREG_OP0_SHIFT; - new_reg->opc1 = (a->new_key & CP_REG_ARM64_SYSREG_OP1_MASK) - >> CP_REG_ARM64_SYSREG_OP1_SHIFT; - new_reg->opc2 = (a->new_key & CP_REG_ARM64_SYSREG_OP2_MASK) - >> CP_REG_ARM64_SYSREG_OP2_SHIFT; - new_reg->opaque = src_reg; - new_reg->orig_readfn = src_reg->readfn ?: raw_read; - new_reg->orig_writefn = src_reg->writefn ?: raw_write; - new_reg->orig_accessfn = src_reg->accessfn; - if (!new_reg->raw_readfn) { - new_reg->raw_readfn = raw_read; - } - if (!new_reg->raw_writefn) { - new_reg->raw_writefn = raw_write; - } - new_reg->readfn = el2_e2h_e12_read; - new_reg->writefn = el2_e2h_e12_write; - new_reg->accessfn = el2_e2h_e12_access; - - /* - * If the _EL1 register is redirected to memory by FEAT_NV2, - * then it shares the offset with the _EL12 register, - * and which one is redirected depends on HCR_EL2.NV1. - */ - if (new_reg->nv2_redirect_offset) { - assert(new_reg->nv2_redirect_offset & NV2_REDIR_NV1); - new_reg->nv2_redirect_offset &= ~NV2_REDIR_NV1; - new_reg->nv2_redirect_offset |= NV2_REDIR_NO_NV1; - } - - ok = g_hash_table_insert(cpu->cp_regs, - (gpointer)(uintptr_t)a->new_key, new_reg); - g_assert(ok); - - src_reg->opaque = dst_reg; - src_reg->orig_readfn = src_reg->readfn ?: raw_read; - src_reg->orig_writefn = src_reg->writefn ?: raw_write; - if (!src_reg->raw_readfn) { - src_reg->raw_readfn = raw_read; - } - if (!src_reg->raw_writefn) { - src_reg->raw_writefn = raw_write; - } - src_reg->readfn = el2_e2h_read; - src_reg->writefn = el2_e2h_write; - } -} #endif static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri, @@ -6574,6 +4829,8 @@ static const ARMCPRegInfo zcr_reginfo[] = { { .name = "ZCR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0, .nv2_redirect_offset = 0x1e0 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 2, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 2, 0), .access = PL1_RW, .type = ARM_CP_SVE, .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]), .writefn = zcr_write, .raw_writefn = raw_write }, @@ -6663,7 +4920,7 @@ void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask) * when disabled either. */ if (change & new & R_SVCR_ZA_MASK) { - memset(env->zarray, 0, sizeof(env->zarray)); + memset(&env->za_state, 0, sizeof(env->za_state)); } if (tcg_enabled()) { @@ -6682,10 +4939,14 @@ static void smcr_write(CPUARMState *env, const ARMCPRegInfo *ri, { int cur_el = arm_current_el(env); int old_len = sve_vqm1_for_el(env, cur_el); + uint64_t valid_mask = R_SMCR_LEN_MASK | R_SMCR_FA64_MASK; int new_len; QEMU_BUILD_BUG_ON(ARM_MAX_VQ > R_SMCR_LEN_MASK + 1); - value &= R_SMCR_LEN_MASK | R_SMCR_FA64_MASK; + if (cpu_isar_feature(aa64_sme2, env_archcpu(env))) { + valid_mask |= R_SMCR_EZT0_MASK; + } + value &= valid_mask; raw_write(env, ri, value); /* @@ -6715,6 +4976,8 @@ static const ARMCPRegInfo sme_reginfo[] = { { .name = "SMCR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 6, .nv2_redirect_offset = 0x1f0 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 2, 6), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 2, 6), .access = PL1_RW, .type = ARM_CP_SME, .fieldoffset = offsetof(CPUARMState, vfp.smcr_el[1]), .writefn = smcr_write, .raw_writefn = raw_write }, @@ -6761,6 +5024,11 @@ static void gpccr_write(CPUARMState *env, const ARMCPRegInfo *ri, R_GPCCR_ORGN_MASK | R_GPCCR_SH_MASK | R_GPCCR_PGS_MASK | R_GPCCR_GPC_MASK | R_GPCCR_GPCP_MASK; + if (cpu_isar_feature(aa64_rme_gpc2, env_archcpu(env))) { + rw_mask |= R_GPCCR_APPSAA_MASK | R_GPCCR_NSO_MASK | + R_GPCCR_SPAD_MASK | R_GPCCR_NSPAD_MASK | R_GPCCR_RLPAD_MASK; + } + env->cp15.gpccr_el3 = (value & rw_mask) | (env->cp15.gpccr_el3 & ~rw_mask); } @@ -6823,105 +5091,96 @@ static const ARMCPRegInfo nmi_reginfo[] = { .resetfn = arm_cp_reset_ignore }, }; -static void define_pmu_regs(ARMCPU *cpu) +static CPAccessResult mecid_access(CPUARMState *env, + const ARMCPRegInfo *ri, bool isread) { - /* - * v7 performance monitor control register: same implementor - * field as main ID register, and we implement four counters in - * addition to the cycle count register. - */ - unsigned int i, pmcrn = pmu_num_counters(&cpu->env); - ARMCPRegInfo pmcr = { - .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0, - .access = PL0_RW, - .fgt = FGT_PMCR_EL0, - .type = ARM_CP_IO | ARM_CP_ALIAS, - .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr), - .accessfn = pmreg_access, - .readfn = pmcr_read, .raw_readfn = raw_read, - .writefn = pmcr_write, .raw_writefn = raw_write, - }; - ARMCPRegInfo pmcr64 = { - .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0, - .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMCR_EL0, - .type = ARM_CP_IO, - .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr), - .resetvalue = cpu->isar.reset_pmcr_el0, - .readfn = pmcr_read, .raw_readfn = raw_read, - .writefn = pmcr_write, .raw_writefn = raw_write, - }; + int el = arm_current_el(env); - define_one_arm_cp_reg(cpu, &pmcr); - define_one_arm_cp_reg(cpu, &pmcr64); - for (i = 0; i < pmcrn; i++) { - char *pmevcntr_name = g_strdup_printf("PMEVCNTR%d", i); - char *pmevcntr_el0_name = g_strdup_printf("PMEVCNTR%d_EL0", i); - char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i); - char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i); - ARMCPRegInfo pmev_regs[] = { - { .name = pmevcntr_name, .cp = 15, .crn = 14, - .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, - .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, - .fgt = FGT_PMEVCNTRN_EL0, - .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, - .accessfn = pmreg_access_xevcntr }, - { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)), - .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access_xevcntr, - .type = ARM_CP_IO, - .fgt = FGT_PMEVCNTRN_EL0, - .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, - .raw_readfn = pmevcntr_rawread, - .raw_writefn = pmevcntr_rawwrite }, - { .name = pmevtyper_name, .cp = 15, .crn = 14, - .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, - .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, - .fgt = FGT_PMEVTYPERN_EL0, - .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, - .accessfn = pmreg_access }, - { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)), - .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, - .fgt = FGT_PMEVTYPERN_EL0, - .type = ARM_CP_IO, - .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, - .raw_writefn = pmevtyper_rawwrite }, - }; - define_arm_cp_regs(cpu, pmev_regs); - g_free(pmevcntr_name); - g_free(pmevcntr_el0_name); - g_free(pmevtyper_name); - g_free(pmevtyper_el0_name); - } - if (cpu_isar_feature(aa32_pmuv3p1, cpu)) { - ARMCPRegInfo v81_pmu_regs[] = { - { .name = "PMCEID2", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = extract64(cpu->pmceid0, 32, 32) }, - { .name = "PMCEID3", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 5, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = extract64(cpu->pmceid1, 32, 32) }, - }; - define_arm_cp_regs(cpu, v81_pmu_regs); - } - if (cpu_isar_feature(any_pmuv3p4, cpu)) { - static const ARMCPRegInfo v84_pmmir = { - .name = "PMMIR_EL1", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 6, - .access = PL1_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMMIR_EL1, - .resetvalue = 0 - }; - define_one_arm_cp_reg(cpu, &v84_pmmir); + if (el == 2) { + if (arm_security_space(env) != ARMSS_Realm) { + return CP_ACCESS_UNDEFINED; + } + + if (!(env->cp15.scr_el3 & SCR_MECEN)) { + return CP_ACCESS_TRAP_EL3; + } } + + return CP_ACCESS_OK; +} + +static void mecid_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value = extract64(value, 0, MECID_WIDTH); + raw_write(env, ri, value); } +static CPAccessResult cipae_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + switch (arm_security_space(env)) { + case ARMSS_Root: /* EL3 */ + case ARMSS_Realm: /* Realm EL2 */ + return CP_ACCESS_OK; + default: + return CP_ACCESS_UNDEFINED; + } +} + +static const ARMCPRegInfo mec_reginfo[] = { + { .name = "MECIDR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 7, .crn = 10, .crm = 8, + .access = PL2_R, .type = ARM_CP_CONST | ARM_CP_NV_NO_TRAP, + .resetvalue = MECID_WIDTH - 1 }, + { .name = "MECID_P0_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 0, .crn = 10, .crm = 8, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.mecid_p0_el2) }, + { .name = "MECID_A0_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 1, .crn = 10, .crm = 8, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.mecid_a0_el2) }, + { .name = "MECID_P1_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 2, .crn = 10, .crm = 8, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.mecid_p1_el2) }, + { .name = "MECID_A1_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 3, .crn = 10, .crm = 8, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.mecid_a1_el2) }, + { .name = "MECID_RL_A_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .opc2 = 1, .crn = 10, .crm = 10, + .access = PL3_RW, .accessfn = mecid_access, + .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.mecid_rl_a_el3) }, + { .name = "VMECID_P_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 0, .crn = 10, .crm = 9, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.vmecid_p_el2) }, + { .name = "VMECID_A_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 1, .crn = 10, .crm = 9, + .access = PL2_RW, .type = ARM_CP_NV_NO_TRAP, + .accessfn = mecid_access, .writefn = mecid_write, + .fieldoffset = offsetof(CPUARMState, cp15.vmecid_a_el2) }, + { .name = "DC_CIPAE", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 14, .opc2 = 0, + .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_NV_NO_TRAP, + .accessfn = cipae_access }, +}; + +static const ARMCPRegInfo mec_mte_reginfo[] = { + { .name = "DC_CIGDPAE", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 14, .opc2 = 7, + .access = PL2_W, .type = ARM_CP_NOP | ARM_CP_NV_NO_TRAP, + .accessfn = cipae_access }, +}; + #ifndef CONFIG_USER_ONLY /* * We don't know until after realize whether there's a GICv3 @@ -6932,10 +5191,10 @@ static void define_pmu_regs(ARMCPU *cpu) static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); - uint64_t pfr1 = cpu->isar.id_pfr1; + uint64_t pfr1 = GET_IDREG(&cpu->isar, ID_PFR1); if (env->gicv3state) { - pfr1 |= 1 << 28; + pfr1 = FIELD_DP64(pfr1, ID_PFR1, GIC, 1); } return pfr1; } @@ -6943,10 +5202,10 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); - uint64_t pfr0 = cpu->isar.id_aa64pfr0; + uint64_t pfr0 = GET_IDREG(&cpu->isar, ID_AA64PFR0); if (env->gicv3state) { - pfr0 |= 1 << 24; + pfr0 = FIELD_DP64(pfr0, ID_AA64PFR0, GIC, 1); } return pfr0; } @@ -7122,7 +5381,7 @@ static const ARMCPRegInfo rndr_reginfo[] = { .access = PL0_R, .readfn = rndr_readfn }, }; -static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque, +static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { #ifdef CONFIG_TCG @@ -7259,6 +5518,8 @@ static const ARMCPRegInfo mte_reginfo[] = { .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0, .access = PL1_RW, .accessfn = access_tfsr_el1, .nv2_redirect_offset = 0x190 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 6, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 6, 0), .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) }, { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_NV2_REDIRECT, @@ -7434,6 +5695,8 @@ static const ARMCPRegInfo scxtnum_reginfo[] = { .access = PL1_RW, .accessfn = access_scxtnum_el1, .fgt = FGT_SCXTNUM_EL1, .nv2_redirect_offset = 0x188 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 13, 0, 7), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 13, 0, 7), .fieldoffset = offsetof(CPUARMState, scxtnum_el[1]) }, { .name = "SCXTNUM_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 7, @@ -7700,32 +5963,6 @@ static const ARMCPRegInfo vhe_reginfo[] = { #endif }; -#ifndef CONFIG_USER_ONLY -static const ARMCPRegInfo ats1e1_reginfo[] = { - { .name = "AT_S1E1RP", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E1RP, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, - { .name = "AT_S1E1WP", .state = ARM_CP_STATE_AA64, - .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .fgt = FGT_ATS1E1WP, - .accessfn = at_s1e01_access, .writefn = ats_write64 }, -}; - -static const ARMCPRegInfo ats1cp_reginfo[] = { - { .name = "ATS1CPRP", - .cp = 15, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write }, - { .name = "ATS1CPWP", - .cp = 15, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write }, -}; -#endif - /* * ACTLR2 and HACTLR2 map to ACTLR_EL1[63:32] and * ACTLR_EL2[63:32]. They exist only if the ID_MMFR4.AC2 field @@ -7746,10 +5983,218 @@ static const ARMCPRegInfo actlr2_hactlr2_reginfo[] = { .resetvalue = 0 }, }; +static CPAccessResult sctlr2_el2_access(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_SCTLR2EN)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static CPAccessResult sctlr2_el1_access(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + CPAccessResult ret = access_tvm_trvm(env, ri, isread); + if (ret != CP_ACCESS_OK) { + return ret; + } + if (arm_current_el(env) < 2 && !(arm_hcrx_el2_eff(env) & HCRX_SCTLR2EN)) { + return CP_ACCESS_TRAP_EL2; + } + return sctlr2_el2_access(env, ri, isread); +} + +static void sctlr2_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t valid_mask = 0; + + value &= valid_mask; + raw_write(env, ri, value); +} + +static void sctlr2_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t valid_mask = 0; + + if (cpu_isar_feature(aa64_mec, env_archcpu(env))) { + valid_mask |= SCTLR2_EMEC; + } + value &= valid_mask; + raw_write(env, ri, value); +} + +static void sctlr2_el3_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t valid_mask = 0; + + if (cpu_isar_feature(aa64_mec, env_archcpu(env))) { + valid_mask |= SCTLR2_EMEC; + } + value &= valid_mask; + raw_write(env, ri, value); +} + +static const ARMCPRegInfo sctlr2_reginfo[] = { + { .name = "SCTLR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 3, .crn = 1, .crm = 0, + .access = PL1_RW, .accessfn = sctlr2_el1_access, + .writefn = sctlr2_el1_write, .fgt = FGT_SCTLR_EL1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 0, 3), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 0, 3), + .nv2_redirect_offset = 0x278 | NV2_REDIR_NV1, + .fieldoffset = offsetof(CPUARMState, cp15.sctlr2_el[1]) }, + { .name = "SCTLR2_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 3, .crn = 1, .crm = 0, + .access = PL2_RW, .accessfn = sctlr2_el2_access, + .writefn = sctlr2_el2_write, + .fieldoffset = offsetof(CPUARMState, cp15.sctlr2_el[2]) }, + { .name = "SCTLR2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .opc2 = 3, .crn = 1, .crm = 0, + .access = PL3_RW, .writefn = sctlr2_el3_write, + .fieldoffset = offsetof(CPUARMState, cp15.sctlr2_el[3]) }, +}; + +static CPAccessResult tcr2_el2_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_TCR2EN)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static CPAccessResult tcr2_el1_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + CPAccessResult ret = access_tvm_trvm(env, ri, isread); + if (ret != CP_ACCESS_OK) { + return ret; + } + if (arm_current_el(env) < 2 && !(arm_hcrx_el2_eff(env) & HCRX_TCR2EN)) { + return CP_ACCESS_TRAP_EL2; + } + return tcr2_el2_access(env, ri, isread); +} + +static void tcr2_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + uint64_t valid_mask = 0; + + if (cpu_isar_feature(aa64_s1pie, cpu)) { + valid_mask |= TCR2_PIE; + } + value &= valid_mask; + raw_write(env, ri, value); +} + +static void tcr2_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + uint64_t valid_mask = 0; + + if (cpu_isar_feature(aa64_s1pie, cpu)) { + valid_mask |= TCR2_PIE; + } + if (cpu_isar_feature(aa64_mec, env_archcpu(env))) { + valid_mask |= TCR2_AMEC0 | TCR2_AMEC1; + } + value &= valid_mask; + raw_write(env, ri, value); +} + +static const ARMCPRegInfo tcr2_reginfo[] = { + { .name = "TCR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 3, .crn = 2, .crm = 0, + .access = PL1_RW, .accessfn = tcr2_el1_access, + .writefn = tcr2_el1_write, .fgt = FGT_TCR_EL1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 3), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 3), + .nv2_redirect_offset = 0x270 | NV2_REDIR_NV1, + .fieldoffset = offsetof(CPUARMState, cp15.tcr2_el[1]) }, + { .name = "TCR2_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 3, .crn = 2, .crm = 0, + .access = PL2_RW, .accessfn = tcr2_el2_access, + .writefn = tcr2_el2_write, + .fieldoffset = offsetof(CPUARMState, cp15.tcr2_el[2]) }, +}; + +static CPAccessResult pien_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_PIEN) + && arm_current_el(env) < 3) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static CPAccessResult pien_el1_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + CPAccessResult ret = access_tvm_trvm(env, ri, isread); + if (ret == CP_ACCESS_OK) { + ret = pien_access(env, ri, isread); + } + return ret; +} + +static const ARMCPRegInfo s1pie_reginfo[] = { + { .name = "PIR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 3, .crn = 10, .crm = 2, + .access = PL1_RW, .accessfn = pien_el1_access, + .fgt = FGT_NPIR_EL1, .nv2_redirect_offset = 0x2a0 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 10, 2, 3), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 10, 2, 3), + .fieldoffset = offsetof(CPUARMState, cp15.pir_el[1]) }, + { .name = "PIR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 3, .crn = 10, .crm = 2, + .access = PL2_RW, .accessfn = pien_access, + .fieldoffset = offsetof(CPUARMState, cp15.pir_el[2]) }, + { .name = "PIR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .opc2 = 3, .crn = 10, .crm = 2, + .access = PL3_RW, + .fieldoffset = offsetof(CPUARMState, cp15.pir_el[3]) }, + { .name = "PIRE0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 10, .crm = 2, + .access = PL1_RW, .accessfn = pien_el1_access, + .fgt = FGT_NPIRE0_EL1, .nv2_redirect_offset = 0x290 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 10, 2, 2), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 10, 2, 2), + .fieldoffset = offsetof(CPUARMState, cp15.pir_el[0]) }, + { .name = "PIRE0_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 2, .crn = 10, .crm = 2, + .access = PL2_RW, .accessfn = pien_access, + .fieldoffset = offsetof(CPUARMState, cp15.pire0_el2) }, +}; + +static const ARMCPRegInfo s2pie_reginfo[] = { + { .name = "S2PIR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .opc2 = 5, .crn = 10, .crm = 2, + .access = PL2_RW, .accessfn = pien_access, + .nv2_redirect_offset = 0x2b0, + .fieldoffset = offsetof(CPUARMState, cp15.s2pir_el2) }, +}; + void register_cp_regs_for_features(ARMCPU *cpu) { /* Register all the coprocessor registers based on feature bits */ CPUARMState *env = &cpu->env; + ARMISARegisters *isar = &cpu->isar; + if (arm_feature(env, ARM_FEATURE_M)) { /* M profile has no coprocessor registers */ return; @@ -7765,7 +6210,10 @@ void register_cp_regs_for_features(ARMCPU *cpu) } #ifndef CONFIG_USER_ONLY - define_tlb_insn_regs(cpu); + if (tcg_enabled()) { + define_tlb_insn_regs(cpu); + define_at_insn_regs(cpu); + } #endif if (arm_feature(env, ARM_FEATURE_V6)) { @@ -7775,7 +6223,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_pfr0 }, + .resetvalue = GET_IDREG(isar, ID_PFR0)}, /* * ID_PFR1 is not a plain ARM_CP_CONST because we don't know * the value of the GIC field until after we define these regs. @@ -7786,7 +6234,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .accessfn = access_aa32_tid3, #ifdef CONFIG_USER_ONLY .type = ARM_CP_CONST, - .resetvalue = cpu->isar.id_pfr1, + .resetvalue = GET_IDREG(isar, ID_PFR1), #else .type = ARM_CP_NO_RAW, .accessfn = access_aa32_tid3, @@ -7798,72 +6246,72 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_dfr0 }, + .resetvalue = GET_IDREG(isar, ID_DFR0)}, { .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->id_afr0 }, + .resetvalue = GET_IDREG(isar, ID_AFR0)}, { .name = "ID_MMFR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr0 }, + .resetvalue = GET_IDREG(isar, ID_MMFR0)}, { .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr1 }, + .resetvalue = GET_IDREG(isar, ID_MMFR1)}, { .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr2 }, + .resetvalue = GET_IDREG(isar, ID_MMFR2)}, { .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr3 }, + .resetvalue = GET_IDREG(isar, ID_MMFR3)}, { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar0 }, + .resetvalue = GET_IDREG(isar, ID_ISAR0)}, { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar1 }, + .resetvalue = GET_IDREG(isar, ID_ISAR1)}, { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar2 }, + .resetvalue = GET_IDREG(isar, ID_ISAR2)}, { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar3 }, + .resetvalue = GET_IDREG(isar, ID_ISAR3) }, { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar4 }, + .resetvalue = GET_IDREG(isar, ID_ISAR4) }, { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar5 }, + .resetvalue = GET_IDREG(isar, ID_ISAR5) }, { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_mmfr4 }, + .resetvalue = GET_IDREG(isar, ID_MMFR4)}, { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, - .resetvalue = cpu->isar.id_isar6 }, + .resetvalue = GET_IDREG(isar, ID_ISAR6) }, }; define_arm_cp_regs(cpu, v6_idregs); define_arm_cp_regs(cpu, v6_cp_reginfo); @@ -7873,9 +6321,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (arm_feature(env, ARM_FEATURE_V6K)) { define_arm_cp_regs(cpu, v6k_cp_reginfo); } - if (arm_feature(env, ARM_FEATURE_V7VE)) { - define_arm_cp_regs(cpu, pmovsset_cp_reginfo); - } if (arm_feature(env, ARM_FEATURE_V7)) { ARMCPRegInfo clidr = { .name = "CLIDR", .state = ARM_CP_STATE_BOTH, @@ -7883,12 +6328,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_tid4, .fgt = FGT_CLIDR_EL1, - .resetvalue = cpu->clidr + .resetvalue = GET_IDREG(isar, CLIDR) }; define_one_arm_cp_reg(cpu, &clidr); define_arm_cp_regs(cpu, v7_cp_reginfo); define_debug_regs(cpu); - define_pmu_regs(cpu); } else { define_arm_cp_regs(cpu, not_v7_cp_reginfo); } @@ -7914,7 +6358,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, #ifdef CONFIG_USER_ONLY .type = ARM_CP_CONST, - .resetvalue = cpu->isar.id_aa64pfr0 + .resetvalue = GET_IDREG(isar, ID_AA64PFR0) #else .type = ARM_CP_NO_RAW, .accessfn = access_aa64_tid3, @@ -7926,12 +6370,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64pfr1}, - { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + .resetvalue = GET_IDREG(isar, ID_AA64PFR1)}, + { .name = "ID_AA64PFR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = 0 }, + .resetvalue = GET_IDREG(isar, ID_AA64PFR2)}, { .name = "ID_AA64PFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -7941,12 +6385,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64zfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64ZFR0)}, { .name = "ID_AA64SMFR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64smfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64SMFR0)}, { .name = "ID_AA64PFR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, @@ -7961,12 +6405,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64dfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64DFR0) }, { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64dfr1 }, + .resetvalue = GET_IDREG(isar, ID_AA64DFR1) }, { .name = "ID_AA64DFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -7981,12 +6425,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->id_aa64afr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64AFR0) }, { .name = "ID_AA64AFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->id_aa64afr1 }, + .resetvalue = GET_IDREG(isar, ID_AA64AFR1) }, { .name = "ID_AA64AFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, @@ -8001,17 +6445,17 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64isar0 }, + .resetvalue = GET_IDREG(isar, ID_AA64ISAR0)}, { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64isar1 }, + .resetvalue = GET_IDREG(isar, ID_AA64ISAR1)}, { .name = "ID_AA64ISAR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64isar2 }, + .resetvalue = GET_IDREG(isar, ID_AA64ISAR2)}, { .name = "ID_AA64ISAR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -8041,22 +6485,22 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr0 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR0)}, { .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr1 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR1) }, { .name = "ID_AA64MMFR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr2 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR2) }, { .name = "ID_AA64MMFR3_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_aa64mmfr3 }, + .resetvalue = GET_IDREG(isar, ID_AA64MMFR3) }, { .name = "ID_AA64MMFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, @@ -8128,42 +6572,22 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_pfr2 }, + .resetvalue = GET_IDREG(isar, ID_PFR2)}, { .name = "ID_DFR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_dfr1 }, + .resetvalue = GET_IDREG(isar, ID_DFR1)}, { .name = "ID_MMFR5", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = cpu->isar.id_mmfr5 }, + .resetvalue = GET_IDREG(isar, ID_MMFR5)}, { .name = "RES_0_C0_C3_7", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, .resetvalue = 0 }, - { .name = "PMCEID0", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 6, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = extract64(cpu->pmceid0, 0, 32) }, - { .name = "PMCEID0_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 6, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = cpu->pmceid0 }, - { .name = "PMCEID1", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 7, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = extract64(cpu->pmceid1, 0, 32) }, - { .name = "PMCEID1_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 7, - .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, - .fgt = FGT_PMCEIDN_EL0, - .resetvalue = cpu->pmceid1 }, }; #ifdef CONFIG_USER_ONLY static const ARMCPRegUserSpaceInfo v8_user_idregs[] = { @@ -8179,6 +6603,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) R_ID_AA64PFR1_SSBS_MASK | R_ID_AA64PFR1_MTE_MASK | R_ID_AA64PFR1_SME_MASK }, + { .name = "ID_AA64PFR2_EL1", + .exported_bits = 0 }, { .name = "ID_AA64PFR*_EL1_RESERVED", .is_glob = true }, { .name = "ID_AA64ZFR0_EL1", @@ -8498,12 +6924,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.par_s), offsetoflow32(CPUARMState, cp15.par_ns) }, .writefn = par_write}, -#ifndef CONFIG_USER_ONLY - /* This underdecoding is safe because the reginfo is NO_RAW. */ - { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY, - .access = PL1_W, .accessfn = ats_access, - .writefn = ats_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, -#endif }; /* @@ -8530,9 +6950,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (arm_feature(env, ARM_FEATURE_STRONGARM)) { define_arm_cp_regs(cpu, strongarm_cp_reginfo); } - if (arm_feature(env, ARM_FEATURE_XSCALE)) { - define_arm_cp_regs(cpu, xscale_cp_reginfo); - } if (arm_feature(env, ARM_FEATURE_DUMMY_C15_REGS)) { define_arm_cp_regs(cpu, dummy_c15_cp_reginfo); } @@ -8855,12 +7272,14 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (arm_feature(env, ARM_FEATURE_VBAR)) { static const ARMCPRegInfo vbar_cp_reginfo[] = { - { .name = "VBAR", .state = ARM_CP_STATE_BOTH, + { .name = "VBAR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .writefn = vbar_write, .accessfn = access_nv1, .fgt = FGT_VBAR_EL1, .nv2_redirect_offset = 0x250 | NV2_REDIR_NV1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 12, 0, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 12, 0, 0), .bank_fieldoffsets = { offsetof(CPUARMState, cp15.vbar_s), offsetof(CPUARMState, cp15.vbar_ns) }, .resetvalue = 0 }, @@ -8871,24 +7290,18 @@ void register_cp_regs_for_features(ARMCPU *cpu) /* Generic registers whose values depend on the implementation */ { ARMCPRegInfo sctlr = { - .name = "SCTLR", .state = ARM_CP_STATE_BOTH, + .name = "SCTLR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_SCTLR_EL1, + .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 0, 0), + .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 0, 0), .nv2_redirect_offset = 0x110 | NV2_REDIR_NV1, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.sctlr_s), offsetof(CPUARMState, cp15.sctlr_ns) }, .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr, .raw_writefn = raw_write, }; - if (arm_feature(env, ARM_FEATURE_XSCALE)) { - /* - * Normally we would always end the TB on an SCTLR write, but Linux - * arch/arm/mach-pxa/sleep.S expects two instructions following - * an MMU enable to execute from cache. Imitate this behaviour. - */ - sctlr.type |= ARM_CP_SUPPRESS_TB_END; - } define_one_arm_cp_reg(cpu, &sctlr); if (arm_feature(env, ARM_FEATURE_PMSA) && @@ -8909,14 +7322,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa64_pan, cpu)) { define_one_arm_cp_reg(cpu, &pan_reginfo); } -#ifndef CONFIG_USER_ONLY - if (cpu_isar_feature(aa64_ats1e1, cpu)) { - define_arm_cp_regs(cpu, ats1e1_reginfo); - } - if (cpu_isar_feature(aa32_ats1e1, cpu)) { - define_arm_cp_regs(cpu, ats1cp_reginfo); - } -#endif if (cpu_isar_feature(aa64_uao, cpu)) { define_one_arm_cp_reg(cpu, &uao_reginfo); } @@ -9008,6 +7413,27 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_arm_cp_regs(cpu, nmi_reginfo); } + if (cpu_isar_feature(aa64_sctlr2, cpu)) { + define_arm_cp_regs(cpu, sctlr2_reginfo); + } + + if (cpu_isar_feature(aa64_tcr2, cpu)) { + define_arm_cp_regs(cpu, tcr2_reginfo); + } + + if (cpu_isar_feature(aa64_s1pie, cpu)) { + define_arm_cp_regs(cpu, s1pie_reginfo); + } + if (cpu_isar_feature(aa64_s2pie, cpu)) { + define_arm_cp_regs(cpu, s2pie_reginfo); + } + if (cpu_isar_feature(aa64_mec, cpu)) { + define_arm_cp_regs(cpu, mec_reginfo); + if (cpu_isar_feature(aa64_mte, cpu)) { + define_arm_cp_regs(cpu, mec_mte_reginfo); + } + } + if (cpu_isar_feature(any_predinv, cpu)) { define_arm_cp_regs(cpu, predinv_reginfo); } @@ -9016,60 +7442,42 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_arm_cp_regs(cpu, ccsidr2_reginfo); } -#ifndef CONFIG_USER_ONLY - /* - * Register redirections and aliases must be done last, - * after the registers from the other extensions have been defined. - */ - if (arm_feature(env, ARM_FEATURE_EL2) && cpu_isar_feature(aa64_vh, cpu)) { - define_arm_vh_e2h_redirects_aliases(cpu); + define_pm_cpregs(cpu); + define_gcs_cpregs(cpu); +} + +/* + * Copy a ARMCPRegInfo structure, allocating it along with the name + * and an optional suffix to the name. + */ +static ARMCPRegInfo *alloc_cpreg(const ARMCPRegInfo *in, const char *suffix) +{ + const char *name = in->name; + size_t name_len = strlen(name); + size_t suff_len = suffix ? strlen(suffix) : 0; + ARMCPRegInfo *out = g_malloc(sizeof(*in) + name_len + suff_len + 1); + char *p = (char *)(out + 1); + + *out = *in; + out->name = p; + + memcpy(p, name, name_len + 1); + if (suffix) { + memcpy(p + name_len, suffix, suff_len + 1); } -#endif + return out; } /* - * Private utility function for define_one_arm_cp_reg_with_opaque(): + * Private utility function for define_one_arm_cp_reg(): * add a single reginfo struct to the hash table. */ -static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, - void *opaque, CPState state, - CPSecureState secstate, - int crm, int opc1, int opc2, - const char *name) +static void add_cpreg_to_hashtable(ARMCPU *cpu, ARMCPRegInfo *r, + CPState state, CPSecureState secstate, + uint32_t key) { CPUARMState *env = &cpu->env; - uint32_t key; - ARMCPRegInfo *r2; - bool is64 = r->type & ARM_CP_64BIT; bool ns = secstate & ARM_CP_SECSTATE_NS; - int cp = r->cp; - size_t name_len; - bool make_const; - - switch (state) { - case ARM_CP_STATE_AA32: - /* We assume it is a cp15 register if the .cp field is left unset. */ - if (cp == 0 && r->state == ARM_CP_STATE_BOTH) { - cp = 15; - } - key = ENCODE_CP_REG(cp, is64, ns, r->crn, crm, opc1, opc2); - break; - case ARM_CP_STATE_AA64: - /* - * To allow abbreviation of ARMCPRegInfo definitions, we treat - * cp == 0 as equivalent to the value for "standard guest-visible - * sysreg". STATE_BOTH definitions are also always "standard sysreg" - * in their AArch64 view (the .cp value may be non-zero for the - * benefit of the AArch32 view). - */ - if (cp == 0 || r->state == ARM_CP_STATE_BOTH) { - cp = CP_REG_ARM64_SYSREG_CP; - } - key = ENCODE_AA64_CP_REG(cp, r->crn, crm, r->opc0, opc1, opc2); - break; - default: - g_assert_not_reached(); - } /* Overriding of an existing definition must be explicitly requested. */ if (!(r->type & ARM_CP_OVERRIDE)) { @@ -9079,84 +7487,7 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, } } - /* - * Eliminate registers that are not present because the EL is missing. - * Doing this here makes it easier to put all registers for a given - * feature into the same ARMCPRegInfo array and define them all at once. - */ - make_const = false; - if (arm_feature(env, ARM_FEATURE_EL3)) { - /* - * An EL2 register without EL2 but with EL3 is (usually) RES0. - * See rule RJFFP in section D1.1.3 of DDI0487H.a. - */ - int min_el = ctz32(r->access) / 2; - if (min_el == 2 && !arm_feature(env, ARM_FEATURE_EL2)) { - if (r->type & ARM_CP_EL3_NO_EL2_UNDEF) { - return; - } - make_const = !(r->type & ARM_CP_EL3_NO_EL2_KEEP); - } - } else { - CPAccessRights max_el = (arm_feature(env, ARM_FEATURE_EL2) - ? PL2_RW : PL1_RW); - if ((r->access & max_el) == 0) { - return; - } - } - - /* Combine cpreg and name into one allocation. */ - name_len = strlen(name) + 1; - r2 = g_malloc(sizeof(*r2) + name_len); - *r2 = *r; - r2->name = memcpy(r2 + 1, name, name_len); - - /* - * Update fields to match the instantiation, overwiting wildcards - * such as CP_ANY, ARM_CP_STATE_BOTH, or ARM_CP_SECSTATE_BOTH. - */ - r2->cp = cp; - r2->crm = crm; - r2->opc1 = opc1; - r2->opc2 = opc2; - r2->state = state; - r2->secure = secstate; - if (opaque) { - r2->opaque = opaque; - } - - if (make_const) { - /* This should not have been a very special register to begin. */ - int old_special = r2->type & ARM_CP_SPECIAL_MASK; - assert(old_special == 0 || old_special == ARM_CP_NOP); - /* - * Set the special function to CONST, retaining the other flags. - * This is important for e.g. ARM_CP_SVE so that we still - * take the SVE trap if CPTR_EL3.EZ == 0. - */ - r2->type = (r2->type & ~ARM_CP_SPECIAL_MASK) | ARM_CP_CONST; - /* - * Usually, these registers become RES0, but there are a few - * special cases like VPIDR_EL2 which have a constant non-zero - * value with writes ignored. - */ - if (!(r->type & ARM_CP_EL3_NO_EL2_C_NZ)) { - r2->resetvalue = 0; - } - /* - * ARM_CP_CONST has precedence, so removing the callbacks and - * offsets are not strictly necessary, but it is potentially - * less confusing to debug later. - */ - r2->readfn = NULL; - r2->writefn = NULL; - r2->raw_readfn = NULL; - r2->raw_writefn = NULL; - r2->resetfn = NULL; - r2->fieldoffset = 0; - r2->bank_fieldoffsets[0] = 0; - r2->bank_fieldoffsets[1] = 0; - } else { + { bool isbanked = r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1]; if (isbanked) { @@ -9165,7 +7496,7 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, * Overwriting fieldoffset as the array is only used to define * banked registers but later only fieldoffset is used. */ - r2->fieldoffset = r->bank_fieldoffsets[ns]; + r->fieldoffset = r->bank_fieldoffsets[ns]; } if (state == ARM_CP_STATE_AA32) { if (isbanked) { @@ -9182,54 +7513,187 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, */ if ((r->state == ARM_CP_STATE_BOTH && ns) || (arm_feature(env, ARM_FEATURE_V8) && !ns)) { - r2->type |= ARM_CP_ALIAS; + r->type |= ARM_CP_ALIAS; } } else if ((secstate != r->secure) && !ns) { /* * The register is not banked so we only want to allow * migration of the non-secure instance. */ - r2->type |= ARM_CP_ALIAS; - } - - if (HOST_BIG_ENDIAN && - r->state == ARM_CP_STATE_BOTH && r2->fieldoffset) { - r2->fieldoffset += sizeof(uint32_t); + r->type |= ARM_CP_ALIAS; } } } /* - * By convention, for wildcarded registers only the first - * entry is used for migration; the others are marked as - * ALIAS so we don't try to transfer the register - * multiple times. Special registers (ie NOP/WFI) are - * never migratable and not even raw-accessible. + * For 32-bit AArch32 regs shared with 64-bit AArch64 regs, + * adjust the field offset for endianness. This had to be + * delayed until banked registers were resolved. */ - if (r2->type & ARM_CP_SPECIAL_MASK) { - r2->type |= ARM_CP_NO_RAW; + if (HOST_BIG_ENDIAN && + state == ARM_CP_STATE_AA32 && + r->state == ARM_CP_STATE_BOTH && + r->fieldoffset) { + r->fieldoffset += sizeof(uint32_t); } - if (((r->crm == CP_ANY) && crm != 0) || - ((r->opc1 == CP_ANY) && opc1 != 0) || - ((r->opc2 == CP_ANY) && opc2 != 0)) { - r2->type |= ARM_CP_ALIAS | ARM_CP_NO_GDB; + + /* + * Special registers (ie NOP/WFI) are never migratable and + * are not even raw-accessible. + */ + if (r->type & ARM_CP_SPECIAL_MASK) { + r->type |= ARM_CP_NO_RAW; } /* + * Update fields to match the instantiation, overwiting wildcards + * such as ARM_CP_STATE_BOTH or ARM_CP_SECSTATE_BOTH. + */ + r->state = state; + r->secure = secstate; + + /* * Check that raw accesses are either forbidden or handled. Note that * we can't assert this earlier because the setup of fieldoffset for * banked registers has to be done first. */ - if (!(r2->type & ARM_CP_NO_RAW)) { - assert(!raw_accessors_invalid(r2)); + if (!(r->type & ARM_CP_NO_RAW)) { + assert(!raw_accessors_invalid(r)); } - g_hash_table_insert(cpu->cp_regs, (gpointer)(uintptr_t)key, r2); + g_hash_table_insert(cpu->cp_regs, (gpointer)(uintptr_t)key, r); } +static void add_cpreg_to_hashtable_aa32(ARMCPU *cpu, ARMCPRegInfo *r) +{ + /* + * Under AArch32 CP registers can be common + * (same for secure and non-secure world) or banked. + */ + ARMCPRegInfo *r_s; + bool is64 = r->type & ARM_CP_64BIT; + uint32_t key = ENCODE_CP_REG(r->cp, is64, 0, r->crn, + r->crm, r->opc1, r->opc2); + + assert(!(r->type & ARM_CP_ADD_TLBI_NXS)); /* aa64 only */ + r->vhe_redir_to_el2 = 0; + r->vhe_redir_to_el01 = 0; + + switch (r->secure) { + case ARM_CP_SECSTATE_NS: + key |= CP_REG_AA32_NS_MASK; + /* fall through */ + case ARM_CP_SECSTATE_S: + add_cpreg_to_hashtable(cpu, r, ARM_CP_STATE_AA32, r->secure, key); + break; + case ARM_CP_SECSTATE_BOTH: + r_s = alloc_cpreg(r, "_S"); + add_cpreg_to_hashtable(cpu, r_s, ARM_CP_STATE_AA32, + ARM_CP_SECSTATE_S, key); + + key |= CP_REG_AA32_NS_MASK; + add_cpreg_to_hashtable(cpu, r, ARM_CP_STATE_AA32, + ARM_CP_SECSTATE_NS, key); + break; + default: + g_assert_not_reached(); + } +} + +static void add_cpreg_to_hashtable_aa64(ARMCPU *cpu, ARMCPRegInfo *r) +{ + uint32_t key = ENCODE_AA64_CP_REG(r->opc0, r->opc1, + r->crn, r->crm, r->opc2); + + if ((r->type & ARM_CP_ADD_TLBI_NXS) && + cpu_isar_feature(aa64_xs, cpu)) { + /* + * This is a TLBI insn which has an NXS variant. The + * NXS variant is at the same encoding except that + * crn is +1, and has the same behaviour except for + * fine-grained trapping. Add the NXS insn here and + * then fall through to add the normal register. + * add_cpreg_to_hashtable() copies the cpreg struct + * and name that it is passed, so it's OK to use + * a local struct here. + */ + ARMCPRegInfo *nxs_ri = alloc_cpreg(r, "NXS"); + uint32_t nxs_key; + + assert(nxs_ri->crn < 0xf); + nxs_ri->crn++; + /* Also increment the CRN field inside the key value */ + nxs_key = key + (1 << CP_REG_ARM64_SYSREG_CRN_SHIFT); + if (nxs_ri->fgt) { + nxs_ri->fgt |= R_FGT_NXS_MASK; + } + + add_cpreg_to_hashtable(cpu, nxs_ri, ARM_CP_STATE_AA64, + ARM_CP_SECSTATE_NS, nxs_key); + } + + if (!r->vhe_redir_to_el01) { + assert(!r->vhe_redir_to_el2); + } else if (!arm_feature(&cpu->env, ARM_FEATURE_EL2) || + !cpu_isar_feature(aa64_vh, cpu)) { + r->vhe_redir_to_el2 = 0; + r->vhe_redir_to_el01 = 0; + } else { + /* Create the FOO_EL12 alias. */ + ARMCPRegInfo *r2 = alloc_cpreg(r, "2"); + uint32_t key2 = r->vhe_redir_to_el01; + + /* + * Clear EL1 redirection on the FOO_EL1 reg; + * Clear EL2 redirection on the FOO_EL12 reg; + * Install redirection from FOO_EL12 back to FOO_EL1. + */ + r->vhe_redir_to_el01 = 0; + r2->vhe_redir_to_el2 = 0; + r2->vhe_redir_to_el01 = key; + + r2->type |= ARM_CP_ALIAS | ARM_CP_NO_RAW; + /* Remove PL1/PL0 access, leaving PL2/PL3 R/W in place. */ + r2->access &= PL2_RW | PL3_RW; + /* The new_reg op fields are as per new_key, not the target reg */ + r2->crn = (key2 & CP_REG_ARM64_SYSREG_CRN_MASK) + >> CP_REG_ARM64_SYSREG_CRN_SHIFT; + r2->crm = (key2 & CP_REG_ARM64_SYSREG_CRM_MASK) + >> CP_REG_ARM64_SYSREG_CRM_SHIFT; + r2->opc0 = (key2 & CP_REG_ARM64_SYSREG_OP0_MASK) + >> CP_REG_ARM64_SYSREG_OP0_SHIFT; + r2->opc1 = (key2 & CP_REG_ARM64_SYSREG_OP1_MASK) + >> CP_REG_ARM64_SYSREG_OP1_SHIFT; + r2->opc2 = (key2 & CP_REG_ARM64_SYSREG_OP2_MASK) + >> CP_REG_ARM64_SYSREG_OP2_SHIFT; -void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, - const ARMCPRegInfo *r, void *opaque) + /* Non-redirected access to this register will abort. */ + r2->readfn = NULL; + r2->writefn = NULL; + r2->raw_readfn = NULL; + r2->raw_writefn = NULL; + r2->accessfn = NULL; + r2->fieldoffset = 0; + + /* + * If the _EL1 register is redirected to memory by FEAT_NV2, + * then it shares the offset with the _EL12 register, + * and which one is redirected depends on HCR_EL2.NV1. + */ + if (r2->nv2_redirect_offset) { + assert(r2->nv2_redirect_offset & NV2_REDIR_NV1); + r2->nv2_redirect_offset &= ~NV2_REDIR_NV1; + r2->nv2_redirect_offset |= NV2_REDIR_NO_NV1; + } + add_cpreg_to_hashtable(cpu, r2, ARM_CP_STATE_AA64, + ARM_CP_SECSTATE_NS, key2); + } + + add_cpreg_to_hashtable(cpu, r, ARM_CP_STATE_AA64, + ARM_CP_SECSTATE_NS, key); +} + +void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *r) { /* * Define implementations of coprocessor registers. @@ -9255,21 +7719,27 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, * bits; the ARM_CP_64BIT* flag applies only to the AArch32 view of * the register, if any. */ - int crm, opc1, opc2; int crmmin = (r->crm == CP_ANY) ? 0 : r->crm; int crmmax = (r->crm == CP_ANY) ? 15 : r->crm; int opc1min = (r->opc1 == CP_ANY) ? 0 : r->opc1; int opc1max = (r->opc1 == CP_ANY) ? 7 : r->opc1; int opc2min = (r->opc2 == CP_ANY) ? 0 : r->opc2; int opc2max = (r->opc2 == CP_ANY) ? 7 : r->opc2; - CPState state; + int cp = r->cp; + ARMCPRegInfo r_const; + CPUARMState *env = &cpu->env; - /* 64 bit registers have only CRm and Opc1 fields */ - assert(!((r->type & ARM_CP_64BIT) && (r->opc2 || r->crn))); + /* + * AArch64 regs are all 64 bit so ARM_CP_64BIT is meaningless. + * Moreover, the encoding test just following in general prevents + * shared encoding so ARM_CP_STATE_BOTH won't work either. + */ + assert(r->state == ARM_CP_STATE_AA32 || !(r->type & ARM_CP_64BIT)); + /* AArch32 64-bit registers have only CRm and Opc1 fields. */ + assert(!(r->type & ARM_CP_64BIT) || !(r->opc2 || r->crn)); /* op0 only exists in the AArch64 encodings */ - assert((r->state != ARM_CP_STATE_AA32) || (r->opc0 == 0)); - /* AArch64 regs are all 64 bit so ARM_CP_64BIT is meaningless */ - assert((r->state != ARM_CP_STATE_AA64) || !(r->type & ARM_CP_64BIT)); + assert(r->state != ARM_CP_STATE_AA32 || r->opc0 == 0); + /* * This API is only for Arm's system coprocessors (14 and 15) or * (M-profile or v7A-and-earlier only) for implementation defined @@ -9280,21 +7750,25 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, */ switch (r->state) { case ARM_CP_STATE_BOTH: - /* 0 has a special meaning, but otherwise the same rules as AA32. */ - if (r->cp == 0) { + /* + * If the cp field is left unset, assume cp15. + * Otherwise apply the same rules as AA32. + */ + if (cp == 0) { + cp = 15; break; } /* fall through */ case ARM_CP_STATE_AA32: if (arm_feature(&cpu->env, ARM_FEATURE_V8) && !arm_feature(&cpu->env, ARM_FEATURE_M)) { - assert(r->cp >= 14 && r->cp <= 15); + assert(cp >= 14 && cp <= 15); } else { - assert(r->cp < 8 || (r->cp >= 14 && r->cp <= 15)); + assert(cp < 8 || (cp >= 14 && cp <= 15)); } break; case ARM_CP_STATE_AA64: - assert(r->cp == 0 || r->cp == CP_REG_ARM64_SYSREG_CP); + assert(cp == 0); break; default: g_assert_not_reached(); @@ -9359,75 +7833,104 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, } } - for (crm = crmmin; crm <= crmmax; crm++) { - for (opc1 = opc1min; opc1 <= opc1max; opc1++) { - for (opc2 = opc2min; opc2 <= opc2max; opc2++) { - for (state = ARM_CP_STATE_AA32; - state <= ARM_CP_STATE_AA64; state++) { - if (r->state != state && r->state != ARM_CP_STATE_BOTH) { - continue; - } - if ((r->type & ARM_CP_ADD_TLBI_NXS) && - cpu_isar_feature(aa64_xs, cpu)) { - /* - * This is a TLBI insn which has an NXS variant. The - * NXS variant is at the same encoding except that - * crn is +1, and has the same behaviour except for - * fine-grained trapping. Add the NXS insn here and - * then fall through to add the normal register. - * add_cpreg_to_hashtable() copies the cpreg struct - * and name that it is passed, so it's OK to use - * a local struct here. - */ - ARMCPRegInfo nxs_ri = *r; - g_autofree char *name = g_strdup_printf("%sNXS", r->name); - - assert(state == ARM_CP_STATE_AA64); - assert(nxs_ri.crn < 0xf); - nxs_ri.crn++; - if (nxs_ri.fgt) { - nxs_ri.fgt |= R_FGT_NXS_MASK; - } - add_cpreg_to_hashtable(cpu, &nxs_ri, opaque, state, - ARM_CP_SECSTATE_NS, - crm, opc1, opc2, name); - } - if (state == ARM_CP_STATE_AA32) { - /* - * Under AArch32 CP registers can be common - * (same for secure and non-secure world) or banked. - */ - char *name; - - switch (r->secure) { - case ARM_CP_SECSTATE_S: - case ARM_CP_SECSTATE_NS: - add_cpreg_to_hashtable(cpu, r, opaque, state, - r->secure, crm, opc1, opc2, - r->name); - break; - case ARM_CP_SECSTATE_BOTH: - name = g_strdup_printf("%s_S", r->name); - add_cpreg_to_hashtable(cpu, r, opaque, state, - ARM_CP_SECSTATE_S, - crm, opc1, opc2, name); - g_free(name); - add_cpreg_to_hashtable(cpu, r, opaque, state, - ARM_CP_SECSTATE_NS, - crm, opc1, opc2, r->name); - break; - default: - g_assert_not_reached(); - } - } else { - /* - * AArch64 registers get mapped to non-secure instance - * of AArch32 - */ - add_cpreg_to_hashtable(cpu, r, opaque, state, - ARM_CP_SECSTATE_NS, - crm, opc1, opc2, r->name); - } + /* + * Eliminate registers that are not present because the EL is missing. + * Doing this here makes it easier to put all registers for a given + * feature into the same ARMCPRegInfo array and define them all at once. + */ + if (arm_feature(env, ARM_FEATURE_EL3)) { + /* + * An EL2 register without EL2 but with EL3 is (usually) RES0. + * See rule RJFFP in section D1.1.3 of DDI0487H.a. + */ + int min_el = ctz32(r->access) / 2; + if (min_el == 2 && !arm_feature(env, ARM_FEATURE_EL2)) { + if (r->type & ARM_CP_EL3_NO_EL2_UNDEF) { + return; + } + if (!(r->type & ARM_CP_EL3_NO_EL2_KEEP)) { + /* This should not have been a very special register. */ + int old_special = r->type & ARM_CP_SPECIAL_MASK; + assert(old_special == 0 || old_special == ARM_CP_NOP); + + r_const = *r; + + /* + * Set the special function to CONST, retaining the other flags. + * This is important for e.g. ARM_CP_SVE so that we still + * take the SVE trap if CPTR_EL3.EZ == 0. + */ + r_const.type = (r->type & ~ARM_CP_SPECIAL_MASK) | ARM_CP_CONST; + /* + * Usually, these registers become RES0, but there are a few + * special cases like VPIDR_EL2 which have a constant non-zero + * value with writes ignored. + */ + if (!(r->type & ARM_CP_EL3_NO_EL2_C_NZ)) { + r_const.resetvalue = 0; + } + /* + * ARM_CP_CONST has precedence, so removing the callbacks and + * offsets are not strictly necessary, but it is potentially + * less confusing to debug later. + */ + r_const.readfn = NULL; + r_const.writefn = NULL; + r_const.raw_readfn = NULL; + r_const.raw_writefn = NULL; + r_const.resetfn = NULL; + r_const.fieldoffset = 0; + r_const.bank_fieldoffsets[0] = 0; + r_const.bank_fieldoffsets[1] = 0; + + r = &r_const; + } + } + } else { + CPAccessRights max_el = (arm_feature(env, ARM_FEATURE_EL2) + ? PL2_RW : PL1_RW); + if ((r->access & max_el) == 0) { + return; + } + } + + for (int crm = crmmin; crm <= crmmax; crm++) { + for (int opc1 = opc1min; opc1 <= opc1max; opc1++) { + for (int opc2 = opc2min; opc2 <= opc2max; opc2++) { + ARMCPRegInfo *r2 = alloc_cpreg(r, NULL); + ARMCPRegInfo *r3; + + /* + * By convention, for wildcarded registers only the first + * entry is used for migration; the others are marked as + * ALIAS so we don't try to transfer the register + * multiple times. + */ + if (crm != crmmin || opc1 != opc1min || opc2 != opc2min) { + r2->type |= ARM_CP_ALIAS | ARM_CP_NO_GDB; + } + + /* Overwrite CP_ANY with the instantiation. */ + r2->crm = crm; + r2->opc1 = opc1; + r2->opc2 = opc2; + + switch (r->state) { + case ARM_CP_STATE_AA32: + add_cpreg_to_hashtable_aa32(cpu, r2); + break; + case ARM_CP_STATE_AA64: + add_cpreg_to_hashtable_aa64(cpu, r2); + break; + case ARM_CP_STATE_BOTH: + r3 = alloc_cpreg(r2, NULL); + r2->cp = cp; + add_cpreg_to_hashtable_aa32(cpu, r2); + r3->cp = 0; + add_cpreg_to_hashtable_aa64(cpu, r3); + break; + default: + g_assert_not_reached(); } } } @@ -9435,12 +7938,10 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, } /* Define a whole list of registers */ -void define_arm_cp_regs_with_opaque_len(ARMCPU *cpu, const ARMCPRegInfo *regs, - void *opaque, size_t len) +void define_arm_cp_regs_len(ARMCPU *cpu, const ARMCPRegInfo *regs, size_t len) { - size_t i; - for (i = 0; i < len; ++i) { - define_one_arm_cp_reg_with_opaque(cpu, regs + i, opaque); + for (size_t i = 0; i < len; ++i) { + define_one_arm_cp_reg(cpu, regs + i); } } @@ -9502,7 +8003,7 @@ uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri) return 0; } -void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque) +void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *ri) { /* Helper coprocessor reset function for do-nothing-on-reset registers */ } @@ -10569,7 +9070,7 @@ static int aarch64_regnum(CPUARMState *env, int aarch32_reg) } } -static uint32_t cpsr_read_for_spsr_elx(CPUARMState *env) +uint32_t cpsr_read_for_spsr_elx(CPUARMState *env) { uint32_t ret = cpsr_read(env); @@ -10584,6 +9085,24 @@ static uint32_t cpsr_read_for_spsr_elx(CPUARMState *env) return ret; } +void cpsr_write_from_spsr_elx(CPUARMState *env, uint32_t val) +{ + uint32_t mask; + + /* Save SPSR_ELx.SS into PSTATE. */ + env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); + val &= ~PSTATE_SS; + + /* Move DIT to the correct location for CPSR */ + if (val & PSTATE_DIT) { + val &= ~PSTATE_DIT; + val |= CPSR_DIT; + } + + mask = aarch32_cpsr_valid_mask(env->features, &env_archcpu(env)->isar); + cpsr_write(env, val, mask, CPSRWriteRaw); +} + static bool syndrome_is_sync_extabt(uint32_t syndrome) { /* Return true if this syndrome value is a synchronous external abort */ @@ -10616,8 +9135,8 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) CPUARMState *env = &cpu->env; unsigned int new_el = env->exception.target_el; vaddr addr = env->cp15.vbar_el[new_el]; - unsigned int new_mode = aarch64_pstate_mode(new_el, true); - unsigned int old_mode; + uint64_t new_mode = aarch64_pstate_mode(new_el, true); + uint64_t old_mode; unsigned int cur_el = arm_current_el(env); int rt; @@ -10660,8 +9179,13 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) } else { addr += 0x600; } - } else if (pstate_read(env) & PSTATE_SP) { - addr += 0x200; + } else { + if (pstate_read(env) & PSTATE_SP) { + addr += 0x200; + } + if (is_a64(env) && (env->cp15.gcscr_el[new_el] & GCSCR_EXLOCKEN)) { + new_mode |= PSTATE_EXLOCK; + } } switch (cs->exception_index) { @@ -10765,7 +9289,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) * If NV2 is disabled, change SPSR when NV,NV1 == 1,0 (I_ZJRNN) * If NV2 is enabled, change SPSR when NV is 1 (I_DBTLM) */ - old_mode = deposit32(old_mode, 2, 2, 2); + old_mode = deposit64(old_mode, 2, 2, 2); } } } else { @@ -10778,7 +9302,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) } env->banked_spsr[aarch64_banked_spsr_index(new_el)] = old_mode; - qemu_log_mask(CPU_LOG_INT, "...with SPSR 0x%x\n", old_mode); + qemu_log_mask(CPU_LOG_INT, "...with SPSR 0x%" PRIx64 "\n", old_mode); qemu_log_mask(CPU_LOG_INT, "...with ELR 0x%" PRIx64 "\n", env->elr_el[new_el]); @@ -10832,7 +9356,8 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) env->pc = addr; - qemu_log_mask(CPU_LOG_INT, "...to EL%d PC 0x%" PRIx64 " PSTATE 0x%x\n", + qemu_log_mask(CPU_LOG_INT, "...to EL%d PC 0x%" PRIx64 + " PSTATE 0x%" PRIx64 "\n", new_el, env->pc, pstate_read(env)); } @@ -10888,7 +9413,7 @@ void arm_cpu_do_interrupt(CPUState *cs) new_el); if (qemu_loglevel_mask(CPU_LOG_INT) && !excp_is_internal(cs->exception_index)) { - qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%x/0x%" PRIx32 "\n", + qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%x/0x%" PRIx64 "\n", syn_get_ec(env->exception.syndrome), env->exception.syndrome); } @@ -10930,7 +9455,7 @@ void arm_cpu_do_interrupt(CPUState *cs) arm_call_el_change_hook(cpu); if (!kvm_enabled()) { - cs->interrupt_request |= CPU_INTERRUPT_EXITTB; + cpu_set_interrupt(cs, CPU_INTERRUPT_EXITTB); } } #endif /* !CONFIG_USER_ONLY */ @@ -11078,21 +9603,34 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, bool el1_is_aa32) { uint64_t tcr = regime_tcr(env, mmu_idx); - bool epd, hpd, tsz_oob, ds, ha, hd; + bool epd, hpd, tsz_oob, ds, ha, hd, pie = false; int select, tsz, tbi, max_tsz, min_tsz, ps, sh; ARMGranuleSize gran; ARMCPU *cpu = env_archcpu(env); bool stage2 = regime_is_stage2(mmu_idx); + int r_el = regime_el(mmu_idx); if (!regime_has_2_ranges(mmu_idx)) { select = 0; tsz = extract32(tcr, 0, 6); gran = tg0_to_gran_size(extract32(tcr, 14, 2)); if (stage2) { - /* VTCR_EL2 */ - hpd = false; + /* + * Stage2 does not have hierarchical permissions. + * Thus disabling them makes things easier during ptw. + */ + hpd = true; + pie = extract64(tcr, 36, 1) && cpu_isar_feature(aa64_s2pie, cpu); } else { hpd = extract32(tcr, 24, 1); + if (r_el == 3) { + pie = (extract64(tcr, 35, 1) + && cpu_isar_feature(aa64_s1pie, cpu)); + } else { + pie = ((env->cp15.tcr2_el[2] & TCR2_PIE) + && (!arm_feature(env, ARM_FEATURE_EL3) + || (env->cp15.scr_el3 & SCR_TCR2EN))); + } } epd = false; sh = extract32(tcr, 12, 2); @@ -11129,10 +9667,16 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ds = extract64(tcr, 59, 1); if (e0pd && cpu_isar_feature(aa64_e0pd, cpu) && - regime_is_user(env, mmu_idx)) { + regime_is_user(mmu_idx)) { epd = true; } + + pie = ((env->cp15.tcr2_el[r_el] & TCR2_PIE) + && (!arm_feature(env, ARM_FEATURE_EL3) + || (env->cp15.scr_el3 & SCR_TCR2EN)) + && (r_el == 2 || (arm_hcrx_el2_eff(env) & HCRX_TCR2EN))); } + hpd |= pie; gran = sanitize_gran_size(cpu, gran, stage2); @@ -11211,6 +9755,7 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, .ha = ha, .hd = ha && hd, .gran = gran, + .pie = pie, }; } @@ -11325,33 +9870,6 @@ int fp_exception_el(CPUARMState *env, int cur_el) return 0; } -/* Return the exception level we're running at if this is our mmu_idx */ -int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) -{ - if (mmu_idx & ARM_MMU_IDX_M) { - return mmu_idx & ARM_MMU_IDX_M_PRIV; - } - - switch (mmu_idx) { - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E30_0: - return 0; - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - return 1; - case ARMMMUIdx_E2: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - return 2; - case ARMMMUIdx_E3: - case ARMMMUIdx_E30_3_PAN: - return 3; - default: - g_assert_not_reached(); - } -} - #ifndef CONFIG_TCG ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate) { diff --git a/target/arm/hvf-stub.c b/target/arm/hvf-stub.c new file mode 100644 index 0000000..ff13726 --- /dev/null +++ b/target/arm/hvf-stub.c @@ -0,0 +1,20 @@ +/* + * QEMU Hypervisor.framework (HVF) stubs for ARM + * + * Copyright (c) Linaro + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hvf_arm.h" + +uint32_t hvf_arm_get_default_ipa_bit_size(void) +{ + g_assert_not_reached(); +} + +uint32_t hvf_arm_get_max_ipa_bit_size(void) +{ + g_assert_not_reached(); +} diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 42258cc..0658a99 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -19,6 +19,7 @@ #include "system/hw_accel.h" #include "hvf_arm.h" #include "cpregs.h" +#include "cpu-sysregs.h" #include <mach/mach_time.h> @@ -151,9 +152,6 @@ void hvf_arm_init_debug(void) g_array_sized_new(true, true, sizeof(HWWatchpoint), max_hw_wps); } -#define HVF_SYSREG(crn, crm, op0, op1, op2) \ - ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2) - #define SYSREG_OP0_SHIFT 20 #define SYSREG_OP0_MASK 0x3 #define SYSREG_OP0(sysreg) ((sysreg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK) @@ -185,6 +183,7 @@ void hvf_arm_init_debug(void) #define SYSREG_OSLAR_EL1 SYSREG(2, 0, 1, 0, 4) #define SYSREG_OSLSR_EL1 SYSREG(2, 0, 1, 1, 4) #define SYSREG_OSDLR_EL1 SYSREG(2, 0, 1, 3, 4) +#define SYSREG_LORC_EL1 SYSREG(3, 0, 10, 4, 3) #define SYSREG_CNTPCT_EL0 SYSREG(3, 3, 14, 0, 1) #define SYSREG_CNTP_CTL_EL0 SYSREG(3, 3, 14, 2, 1) #define SYSREG_PMCR_EL0 SYSREG(3, 3, 9, 12, 0) @@ -395,156 +394,34 @@ static const struct hvf_reg_match hvf_fpreg_match[] = { { HV_SIMD_FP_REG_Q31, offsetof(CPUARMState, vfp.zregs[31]) }, }; -struct hvf_sreg_match { - int reg; - uint32_t key; - uint32_t cp_idx; -}; +/* + * QEMU uses KVM system register ids in the migration format. + * Conveniently, HVF uses the same encoding of the op* and cr* parameters + * within the low 16 bits of the ids. Thus conversion between the + * formats is trivial. + */ -static struct hvf_sreg_match hvf_sreg_match[] = { - { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 7) }, - - { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 4) }, - { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 5) }, - { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 6) }, - { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 7) }, - -#ifdef SYNC_NO_RAW_REGS - /* - * The registers below are manually synced on init because they are - * marked as NO_RAW. We still list them to make number space sync easier. - */ - { HV_SYS_REG_MDCCINT_EL1, HVF_SYSREG(0, 2, 2, 0, 0) }, - { HV_SYS_REG_MIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 0) }, - { HV_SYS_REG_MPIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 5) }, - { HV_SYS_REG_ID_AA64PFR0_EL1, HVF_SYSREG(0, 4, 3, 0, 0) }, -#endif - { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 1) }, - { HV_SYS_REG_ID_AA64DFR0_EL1, HVF_SYSREG(0, 5, 3, 0, 0) }, - { HV_SYS_REG_ID_AA64DFR1_EL1, HVF_SYSREG(0, 5, 3, 0, 1) }, - { HV_SYS_REG_ID_AA64ISAR0_EL1, HVF_SYSREG(0, 6, 3, 0, 0) }, - { HV_SYS_REG_ID_AA64ISAR1_EL1, HVF_SYSREG(0, 6, 3, 0, 1) }, -#ifdef SYNC_NO_MMFR0 - /* We keep the hardware MMFR0 around. HW limits are there anyway */ - { HV_SYS_REG_ID_AA64MMFR0_EL1, HVF_SYSREG(0, 7, 3, 0, 0) }, -#endif - { HV_SYS_REG_ID_AA64MMFR1_EL1, HVF_SYSREG(0, 7, 3, 0, 1) }, - { HV_SYS_REG_ID_AA64MMFR2_EL1, HVF_SYSREG(0, 7, 3, 0, 2) }, - /* Add ID_AA64MMFR3_EL1 here when HVF supports it */ - - { HV_SYS_REG_MDSCR_EL1, HVF_SYSREG(0, 2, 2, 0, 2) }, - { HV_SYS_REG_SCTLR_EL1, HVF_SYSREG(1, 0, 3, 0, 0) }, - { HV_SYS_REG_CPACR_EL1, HVF_SYSREG(1, 0, 3, 0, 2) }, - { HV_SYS_REG_TTBR0_EL1, HVF_SYSREG(2, 0, 3, 0, 0) }, - { HV_SYS_REG_TTBR1_EL1, HVF_SYSREG(2, 0, 3, 0, 1) }, - { HV_SYS_REG_TCR_EL1, HVF_SYSREG(2, 0, 3, 0, 2) }, - - { HV_SYS_REG_APIAKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 0) }, - { HV_SYS_REG_APIAKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 1) }, - { HV_SYS_REG_APIBKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 2) }, - { HV_SYS_REG_APIBKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 3) }, - { HV_SYS_REG_APDAKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 0) }, - { HV_SYS_REG_APDAKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 1) }, - { HV_SYS_REG_APDBKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 2) }, - { HV_SYS_REG_APDBKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 3) }, - { HV_SYS_REG_APGAKEYLO_EL1, HVF_SYSREG(2, 3, 3, 0, 0) }, - { HV_SYS_REG_APGAKEYHI_EL1, HVF_SYSREG(2, 3, 3, 0, 1) }, - - { HV_SYS_REG_SPSR_EL1, HVF_SYSREG(4, 0, 3, 0, 0) }, - { HV_SYS_REG_ELR_EL1, HVF_SYSREG(4, 0, 3, 0, 1) }, - { HV_SYS_REG_SP_EL0, HVF_SYSREG(4, 1, 3, 0, 0) }, - { HV_SYS_REG_AFSR0_EL1, HVF_SYSREG(5, 1, 3, 0, 0) }, - { HV_SYS_REG_AFSR1_EL1, HVF_SYSREG(5, 1, 3, 0, 1) }, - { HV_SYS_REG_ESR_EL1, HVF_SYSREG(5, 2, 3, 0, 0) }, - { HV_SYS_REG_FAR_EL1, HVF_SYSREG(6, 0, 3, 0, 0) }, - { HV_SYS_REG_PAR_EL1, HVF_SYSREG(7, 4, 3, 0, 0) }, - { HV_SYS_REG_MAIR_EL1, HVF_SYSREG(10, 2, 3, 0, 0) }, - { HV_SYS_REG_AMAIR_EL1, HVF_SYSREG(10, 3, 3, 0, 0) }, - { HV_SYS_REG_VBAR_EL1, HVF_SYSREG(12, 0, 3, 0, 0) }, - { HV_SYS_REG_CONTEXTIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 1) }, - { HV_SYS_REG_TPIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 4) }, - { HV_SYS_REG_CNTKCTL_EL1, HVF_SYSREG(14, 1, 3, 0, 0) }, - { HV_SYS_REG_CSSELR_EL1, HVF_SYSREG(0, 0, 3, 2, 0) }, - { HV_SYS_REG_TPIDR_EL0, HVF_SYSREG(13, 0, 3, 3, 2) }, - { HV_SYS_REG_TPIDRRO_EL0, HVF_SYSREG(13, 0, 3, 3, 3) }, - { HV_SYS_REG_CNTV_CTL_EL0, HVF_SYSREG(14, 3, 3, 3, 1) }, - { HV_SYS_REG_CNTV_CVAL_EL0, HVF_SYSREG(14, 3, 3, 3, 2) }, - { HV_SYS_REG_SP_EL1, HVF_SYSREG(4, 1, 3, 4, 0) }, +#define KVMID_TO_HVF(KVM) ((KVM) & 0xffff) +#define HVF_TO_KVMID(HVF) \ + (CP_REG_ARM64 | CP_REG_SIZE_U64 | CP_REG_ARM64_SYSREG | (HVF)) + +/* Verify this at compile-time. */ + +#define DEF_SYSREG(HVF_ID, ...) \ + QEMU_BUILD_BUG_ON(HVF_ID != KVMID_TO_HVF(KVMID_AA64_SYS_REG64(__VA_ARGS__))); + +#include "sysreg.c.inc" + +#undef DEF_SYSREG + +#define DEF_SYSREG(HVF_ID, op0, op1, crn, crm, op2) HVF_ID, + +static const hv_sys_reg_t hvf_sreg_list[] = { +#include "sysreg.c.inc" }; +#undef DEF_SYSREG + int hvf_get_registers(CPUState *cpu) { ARMCPU *arm_cpu = ARM_CPU(cpu); @@ -552,7 +429,7 @@ int hvf_get_registers(CPUState *cpu) hv_return_t ret; uint64_t val; hv_simd_fp_uchar16_t fpval; - int i; + int i, n; for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) { ret = hv_vcpu_get_reg(cpu->accel->fd, hvf_reg_match[i].reg, &val); @@ -581,14 +458,13 @@ int hvf_get_registers(CPUState *cpu) assert_hvf_ok(ret); pstate_write(env, val); - for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) { - if (hvf_sreg_match[i].cp_idx == -1) { - continue; - } + for (i = 0, n = arm_cpu->cpreg_array_len; i < n; i++) { + uint64_t kvm_id = arm_cpu->cpreg_indexes[i]; + int hvf_id = KVMID_TO_HVF(kvm_id); if (cpu->accel->guest_debug_enabled) { /* Handle debug registers */ - switch (hvf_sreg_match[i].reg) { + switch (hvf_id) { case HV_SYS_REG_DBGBVR0_EL1: case HV_SYS_REG_DBGBCR0_EL1: case HV_SYS_REG_DBGWVR0_EL1: @@ -662,20 +538,22 @@ int hvf_get_registers(CPUState *cpu) * vCPU but simply keep the values from the previous * environment. */ - const ARMCPRegInfo *ri; - ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_sreg_match[i].key); + uint32_t key = kvm_to_cpreg_id(kvm_id); + const ARMCPRegInfo *ri = + get_arm_cp_reginfo(arm_cpu->cp_regs, key); + val = read_raw_cp_reg(env, ri); - arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val; + arm_cpu->cpreg_values[i] = val; continue; } } } - ret = hv_vcpu_get_sys_reg(cpu->accel->fd, hvf_sreg_match[i].reg, &val); + ret = hv_vcpu_get_sys_reg(cpu->accel->fd, hvf_id, &val); assert_hvf_ok(ret); - arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val; + arm_cpu->cpreg_values[i] = val; } assert(write_list_to_cpustate(arm_cpu)); @@ -691,7 +569,7 @@ int hvf_put_registers(CPUState *cpu) hv_return_t ret; uint64_t val; hv_simd_fp_uchar16_t fpval; - int i; + int i, n; for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) { val = *(uint64_t *)((void *)env + hvf_reg_match[i].offset); @@ -718,14 +596,13 @@ int hvf_put_registers(CPUState *cpu) aarch64_save_sp(env, arm_current_el(env)); assert(write_cpustate_to_list(arm_cpu, false)); - for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) { - if (hvf_sreg_match[i].cp_idx == -1) { - continue; - } + for (i = 0, n = arm_cpu->cpreg_array_len; i < n; i++) { + uint64_t kvm_id = arm_cpu->cpreg_indexes[i]; + int hvf_id = KVMID_TO_HVF(kvm_id); if (cpu->accel->guest_debug_enabled) { /* Handle debug registers */ - switch (hvf_sreg_match[i].reg) { + switch (hvf_id) { case HV_SYS_REG_DBGBVR0_EL1: case HV_SYS_REG_DBGBCR0_EL1: case HV_SYS_REG_DBGWVR0_EL1: @@ -799,8 +676,8 @@ int hvf_put_registers(CPUState *cpu) } } - val = arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx]; - ret = hv_vcpu_set_sys_reg(cpu->accel->fd, hvf_sreg_match[i].reg, val); + val = arm_cpu->cpreg_values[i]; + ret = hv_vcpu_set_sys_reg(cpu->accel->fd, hvf_id, val); assert_hvf_ok(ret); } @@ -812,9 +689,9 @@ int hvf_put_registers(CPUState *cpu) static void flush_cpu_state(CPUState *cpu) { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { hvf_put_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } } @@ -845,14 +722,17 @@ static uint64_t hvf_get_reg(CPUState *cpu, int rt) return val; } -static void clamp_id_aa64mmfr0_parange_to_ipa_size(uint64_t *id_aa64mmfr0) +static void clamp_id_aa64mmfr0_parange_to_ipa_size(ARMISARegisters *isar) { uint32_t ipa_size = chosen_ipa_bit_size ? chosen_ipa_bit_size : hvf_arm_get_max_ipa_bit_size(); + uint64_t id_aa64mmfr0; /* Clamp down the PARange to the IPA size the kernel supports. */ uint8_t index = round_down_to_parange_index(ipa_size); - *id_aa64mmfr0 = (*id_aa64mmfr0 & ~R_ID_AA64MMFR0_PARANGE_MASK) | index; + id_aa64mmfr0 = GET_IDREG(isar, ID_AA64MMFR0); + id_aa64mmfr0 = (id_aa64mmfr0 & ~R_ID_AA64MMFR0_PARANGE_MASK) | index; + SET_IDREG(isar, ID_AA64MMFR0, id_aa64mmfr0); } static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) @@ -862,16 +742,17 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) int reg; uint64_t *val; } regs[] = { - { HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.id_aa64pfr0 }, - { HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.id_aa64pfr1 }, - { HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.id_aa64dfr0 }, - { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.id_aa64dfr1 }, - { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.id_aa64isar0 }, - { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.id_aa64isar1 }, + { HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.idregs[ID_AA64PFR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.idregs[ID_AA64PFR1_EL1_IDX] }, + /* Add ID_AA64PFR2_EL1 here when HVF supports it */ + { HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.idregs[ID_AA64DFR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.idregs[ID_AA64DFR1_EL1_IDX] }, + { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.idregs[ID_AA64ISAR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.idregs[ID_AA64ISAR1_EL1_IDX] }, /* Add ID_AA64ISAR2_EL1 here when HVF supports it */ - { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 }, - { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 }, - { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 }, + { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.idregs[ID_AA64MMFR0_EL1_IDX] }, + { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.idregs[ID_AA64MMFR1_EL1_IDX] }, + { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.idregs[ID_AA64MMFR2_EL1_IDX] }, /* Add ID_AA64MMFR3_EL1 here when HVF supports it */ }; hv_vcpu_t fd; @@ -879,7 +760,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) hv_vcpu_exit_t *exit; int i; - ahcf->dtb_compatible = "arm,arm-v8"; + ahcf->dtb_compatible = "arm,armv8"; ahcf->features = (1ULL << ARM_FEATURE_V8) | (1ULL << ARM_FEATURE_NEON) | (1ULL << ARM_FEATURE_AARCH64) | @@ -898,7 +779,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) r |= hv_vcpu_get_sys_reg(fd, HV_SYS_REG_MIDR_EL1, &ahcf->midr); r |= hv_vcpu_destroy(fd); - clamp_id_aa64mmfr0_parange_to_ipa_size(&host_isar.id_aa64mmfr0); + clamp_id_aa64mmfr0_parange_to_ipa_size(&host_isar); /* * Disable SME, which is not properly handled by QEMU hvf yet. @@ -910,7 +791,8 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * - fix any assumptions we made that SME implies SVE (since * on the M4 there is SME but not SVE) */ - host_isar.id_aa64pfr1 &= ~R_ID_AA64PFR1_SME_MASK; + SET_IDREG(&host_isar, ID_AA64PFR1, + GET_IDREG(&host_isar, ID_AA64PFR1) & ~R_ID_AA64PFR1_SME_MASK); ahcf->isar = host_isar; @@ -927,7 +809,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) ahcf->reset_sctlr |= 0x00800000; /* Make sure we don't advertise AArch32 support for EL0/EL1 */ - if ((host_isar.id_aa64pfr0 & 0xff) != 0x11) { + if ((GET_IDREG(&host_isar, ID_AA64PFR0) & 0xff) != 0x11) { return false; } @@ -1006,7 +888,7 @@ int hvf_arch_init_vcpu(CPUState *cpu) { ARMCPU *arm_cpu = ARM_CPU(cpu); CPUARMState *env = &arm_cpu->env; - uint32_t sregs_match_len = ARRAY_SIZE(hvf_sreg_match); + uint32_t sregs_match_len = ARRAY_SIZE(hvf_sreg_list); uint32_t sregs_cnt = 0; uint64_t pfr; hv_return_t ret; @@ -1031,21 +913,22 @@ int hvf_arch_init_vcpu(CPUState *cpu) /* Populate cp list for all known sysregs */ for (i = 0; i < sregs_match_len; i++) { - const ARMCPRegInfo *ri; - uint32_t key = hvf_sreg_match[i].key; + hv_sys_reg_t hvf_id = hvf_sreg_list[i]; + uint64_t kvm_id = HVF_TO_KVMID(hvf_id); + uint32_t key = kvm_to_cpreg_id(kvm_id); + const ARMCPRegInfo *ri = get_arm_cp_reginfo(arm_cpu->cp_regs, key); - ri = get_arm_cp_reginfo(arm_cpu->cp_regs, key); if (ri) { assert(!(ri->type & ARM_CP_NO_RAW)); - hvf_sreg_match[i].cp_idx = sregs_cnt; - arm_cpu->cpreg_indexes[sregs_cnt++] = cpreg_to_kvm_id(key); - } else { - hvf_sreg_match[i].cp_idx = -1; + arm_cpu->cpreg_indexes[sregs_cnt++] = kvm_id; } } arm_cpu->cpreg_array_len = sregs_cnt; arm_cpu->cpreg_vmstate_array_len = sregs_cnt; + /* cpreg tuples must be in strictly ascending order */ + qsort(arm_cpu->cpreg_indexes, sregs_cnt, sizeof(uint64_t), compare_u64); + assert(write_cpustate_to_list(arm_cpu, false)); /* Set CP_NO_RAW system registers on init */ @@ -1065,12 +948,12 @@ int hvf_arch_init_vcpu(CPUState *cpu) /* We're limited to underlying hardware caps, override internal versions */ ret = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64MMFR0_EL1, - &arm_cpu->isar.id_aa64mmfr0); + &arm_cpu->isar.idregs[ID_AA64MMFR0_EL1_IDX]); assert_hvf_ok(ret); - clamp_id_aa64mmfr0_parange_to_ipa_size(&arm_cpu->isar.id_aa64mmfr0); + clamp_id_aa64mmfr0_parange_to_ipa_size(&arm_cpu->isar); ret = hv_vcpu_set_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64MMFR0_EL1, - arm_cpu->isar.id_aa64mmfr0); + arm_cpu->isar.idregs[ID_AA64MMFR0_EL1_IDX]); assert_hvf_ok(ret); return 0; @@ -1083,13 +966,13 @@ void hvf_kick_vcpu_thread(CPUState *cpu) } static void hvf_raise_exception(CPUState *cpu, uint32_t excp, - uint32_t syndrome) + uint32_t syndrome, int target_el) { ARMCPU *arm_cpu = ARM_CPU(cpu); CPUARMState *env = &arm_cpu->env; cpu->exception_index = excp; - env->exception.target_el = 1; + env->exception.target_el = target_el; env->exception.syndrome = syndrome; arm_cpu_do_interrupt(cpu); @@ -1242,11 +1125,10 @@ static bool is_id_sysreg(uint32_t reg) static uint32_t hvf_reg2cp_reg(uint32_t reg) { - return ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, + return ENCODE_AA64_CP_REG((reg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK, + (reg >> SYSREG_OP1_SHIFT) & SYSREG_OP1_MASK, (reg >> SYSREG_CRN_SHIFT) & SYSREG_CRN_MASK, (reg >> SYSREG_CRM_SHIFT) & SYSREG_CRM_MASK, - (reg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK, - (reg >> SYSREG_OP1_SHIFT) & SYSREG_OP1_MASK, (reg >> SYSREG_OP2_SHIFT) & SYSREG_OP2_MASK); } @@ -1258,6 +1140,9 @@ static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val) ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg)); if (ri) { + if (!cp_access_ok(1, ri, true)) { + return false; + } if (ri->accessfn) { if (ri->accessfn(env, ri, true) != CP_ACCESS_OK) { return false; @@ -1268,7 +1153,7 @@ static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val) } else if (ri->readfn) { *val = ri->readfn(env, ri); } else { - *val = CPREG_FIELD64(env, ri); + *val = raw_read(env, ri); } trace_hvf_vgic_read(ri->name, *val); return true; @@ -1353,6 +1238,7 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint64_t *val) case SYSREG_ICC_IGRPEN0_EL1: case SYSREG_ICC_IGRPEN1_EL1: case SYSREG_ICC_PMR_EL1: + case SYSREG_ICC_RPR_EL1: case SYSREG_ICC_SGI0R_EL1: case SYSREG_ICC_SGI1R_EL1: case SYSREG_ICC_SRE_EL1: @@ -1449,7 +1335,7 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint64_t *val) SYSREG_CRN(reg), SYSREG_CRM(reg), SYSREG_OP2(reg)); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); return 1; } @@ -1538,6 +1424,9 @@ static bool hvf_sysreg_write_cp(CPUState *cpu, uint32_t reg, uint64_t val) ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg)); if (ri) { + if (!cp_access_ok(1, ri, false)) { + return false; + } if (ri->accessfn) { if (ri->accessfn(env, ri, false) != CP_ACCESS_OK) { return false; @@ -1546,7 +1435,7 @@ static bool hvf_sysreg_write_cp(CPUState *cpu, uint32_t reg, uint64_t val) if (ri->writefn) { ri->writefn(env, ri, val); } else { - CPREG_FIELD64(env, ri) = val; + raw_write(env, ri, val); } trace_hvf_vgic_write(ri->name, val); @@ -1645,6 +1534,9 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_OSDLR_EL1: /* Dummy register */ return 0; + case SYSREG_LORC_EL1: + /* Dummy register */ + return 0; case SYSREG_ICC_AP0R0_EL1: case SYSREG_ICC_AP0R1_EL1: case SYSREG_ICC_AP0R2_EL1: @@ -1667,6 +1559,7 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_ICC_IGRPEN0_EL1: case SYSREG_ICC_IGRPEN1_EL1: case SYSREG_ICC_PMR_EL1: + case SYSREG_ICC_RPR_EL1: case SYSREG_ICC_SGI0R_EL1: case SYSREG_ICC_SGI1R_EL1: case SYSREG_ICC_SRE_EL1: @@ -1759,19 +1652,19 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) SYSREG_CRN(reg), SYSREG_CRM(reg), SYSREG_OP2(reg)); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); return 1; } static int hvf_inject_interrupts(CPUState *cpu) { - if (cpu->interrupt_request & CPU_INTERRUPT_FIQ) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_FIQ)) { trace_hvf_inject_fiq(); hv_vcpu_set_pending_interrupt(cpu->accel->fd, HV_INTERRUPT_TYPE_FIQ, true); } - if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { trace_hvf_inject_irq(); hv_vcpu_set_pending_interrupt(cpu->accel->fd, HV_INTERRUPT_TYPE_IRQ, true); @@ -1823,7 +1716,7 @@ static void hvf_wfi(CPUState *cpu) uint64_t nanos; uint32_t cntfrq; - if (cpu->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ)) { /* Interrupt pending, no need to wait */ return; } @@ -1910,7 +1803,17 @@ int hvf_vcpu_exec(CPUState *cpu) flush_cpu_state(cpu); bql_unlock(); - assert_hvf_ok(hv_vcpu_run(cpu->accel->fd)); + r = hv_vcpu_run(cpu->accel->fd); + bql_lock(); + switch (r) { + case HV_SUCCESS: + break; + case HV_ILLEGAL_GUEST_STATE: + trace_hvf_illegal_guest_state(); + /* fall through */ + default: + g_assert_not_reached(); + } /* handle VMEXIT */ uint64_t exit_reason = hvf_exit->reason; @@ -1918,7 +1821,6 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t ec = syn_get_ec(syndrome); ret = 0; - bql_lock(); switch (exit_reason) { case HV_EXIT_REASON_EXCEPTION: /* This is the main one, handle below. */ @@ -1953,7 +1855,7 @@ int hvf_vcpu_exec(CPUState *cpu) if (!hvf_find_sw_breakpoint(cpu, env->pc)) { /* Re-inject into the guest */ ret = 0; - hvf_raise_exception(cpu, EXCP_BKPT, syn_aa64_bkpt(0)); + hvf_raise_exception(cpu, EXCP_BKPT, syn_aa64_bkpt(0), 1); } break; } @@ -1991,7 +1893,7 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t cm = (syndrome >> 8) & 0x1; uint64_t val = 0; - trace_hvf_data_abort(env->pc, hvf_exit->exception.virtual_address, + trace_hvf_data_abort(hvf_exit->exception.virtual_address, hvf_exit->exception.physical_address, isv, iswrite, s1ptw, len, srt); @@ -2058,13 +1960,13 @@ int hvf_vcpu_exec(CPUState *cpu) cpu_synchronize_state(cpu); if (arm_cpu->psci_conduit == QEMU_PSCI_CONDUIT_HVC) { if (!hvf_handle_psci_call(cpu)) { - trace_hvf_unknown_hvc(env->xregs[0]); + trace_hvf_unknown_hvc(env->pc, env->xregs[0]); /* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */ env->xregs[0] = -1; } } else { - trace_hvf_unknown_hvc(env->xregs[0]); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + trace_hvf_unknown_hvc(env->pc, env->xregs[0]); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); } break; case EC_AA64_SMC: @@ -2079,7 +1981,7 @@ int hvf_vcpu_exec(CPUState *cpu) } } else { trace_hvf_unknown_smc(env->xregs[0]); - hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized(), 1); } break; default: diff --git a/target/arm/hvf/sysreg.c.inc b/target/arm/hvf/sysreg.c.inc new file mode 100644 index 0000000..067a860 --- /dev/null +++ b/target/arm/hvf/sysreg.c.inc @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +DEF_SYSREG(HV_SYS_REG_DBGBVR0_EL1, 2, 0, 0, 0, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR0_EL1, 2, 0, 0, 0, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR0_EL1, 2, 0, 0, 0, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR0_EL1, 2, 0, 0, 0, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR1_EL1, 2, 0, 0, 1, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR1_EL1, 2, 0, 0, 1, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR1_EL1, 2, 0, 0, 1, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR1_EL1, 2, 0, 0, 1, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR2_EL1, 2, 0, 0, 2, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR2_EL1, 2, 0, 0, 2, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR2_EL1, 2, 0, 0, 2, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR2_EL1, 2, 0, 0, 2, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR3_EL1, 2, 0, 0, 3, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR3_EL1, 2, 0, 0, 3, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR3_EL1, 2, 0, 0, 3, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR3_EL1, 2, 0, 0, 3, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR4_EL1, 2, 0, 0, 4, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR4_EL1, 2, 0, 0, 4, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR4_EL1, 2, 0, 0, 4, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR4_EL1, 2, 0, 0, 4, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR5_EL1, 2, 0, 0, 5, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR5_EL1, 2, 0, 0, 5, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR5_EL1, 2, 0, 0, 5, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR5_EL1, 2, 0, 0, 5, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR6_EL1, 2, 0, 0, 6, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR6_EL1, 2, 0, 0, 6, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR6_EL1, 2, 0, 0, 6, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR6_EL1, 2, 0, 0, 6, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR7_EL1, 2, 0, 0, 7, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR7_EL1, 2, 0, 0, 7, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR7_EL1, 2, 0, 0, 7, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR7_EL1, 2, 0, 0, 7, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR8_EL1, 2, 0, 0, 8, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR8_EL1, 2, 0, 0, 8, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR8_EL1, 2, 0, 0, 8, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR8_EL1, 2, 0, 0, 8, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR9_EL1, 2, 0, 0, 9, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR9_EL1, 2, 0, 0, 9, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR9_EL1, 2, 0, 0, 9, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR9_EL1, 2, 0, 0, 9, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR10_EL1, 2, 0, 0, 10, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR10_EL1, 2, 0, 0, 10, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR10_EL1, 2, 0, 0, 10, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR10_EL1, 2, 0, 0, 10, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR11_EL1, 2, 0, 0, 11, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR11_EL1, 2, 0, 0, 11, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR11_EL1, 2, 0, 0, 11, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR11_EL1, 2, 0, 0, 11, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR12_EL1, 2, 0, 0, 12, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR12_EL1, 2, 0, 0, 12, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR12_EL1, 2, 0, 0, 12, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR12_EL1, 2, 0, 0, 12, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR13_EL1, 2, 0, 0, 13, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR13_EL1, 2, 0, 0, 13, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR13_EL1, 2, 0, 0, 13, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR13_EL1, 2, 0, 0, 13, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR14_EL1, 2, 0, 0, 14, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR14_EL1, 2, 0, 0, 14, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR14_EL1, 2, 0, 0, 14, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR14_EL1, 2, 0, 0, 14, 7) + +DEF_SYSREG(HV_SYS_REG_DBGBVR15_EL1, 2, 0, 0, 15, 4) +DEF_SYSREG(HV_SYS_REG_DBGBCR15_EL1, 2, 0, 0, 15, 5) +DEF_SYSREG(HV_SYS_REG_DBGWVR15_EL1, 2, 0, 0, 15, 6) +DEF_SYSREG(HV_SYS_REG_DBGWCR15_EL1, 2, 0, 0, 15, 7) + +#ifdef SYNC_NO_RAW_REGS +/* + * The registers below are manually synced on init because they are + * marked as NO_RAW. We still list them to make number space sync easier. + */ +DEF_SYSREG(HV_SYS_REG_MDCCINT_EL1, 2, 0, 0, 2, 0) +DEF_SYSREG(HV_SYS_REG_MIDR_EL1, 3, 0, 0, 0, 0) +DEF_SYSREG(HV_SYS_REG_MPIDR_EL1, 3, 0, 0, 0, 5) +DEF_SYSREG(HV_SYS_REG_ID_AA64PFR0_EL1, 3, 0, 0, 4, 0) +#endif + +DEF_SYSREG(HV_SYS_REG_ID_AA64PFR1_EL1, 3, 0, 0, 4, 1) +/* Add ID_AA64PFR2_EL1 here when HVF supports it */ +DEF_SYSREG(HV_SYS_REG_ID_AA64DFR0_EL1, 3, 0, 0, 5, 0) +DEF_SYSREG(HV_SYS_REG_ID_AA64DFR1_EL1, 3, 0, 0, 5, 1) +DEF_SYSREG(HV_SYS_REG_ID_AA64ISAR0_EL1, 3, 0, 0, 6, 0) +DEF_SYSREG(HV_SYS_REG_ID_AA64ISAR1_EL1, 3, 0, 0, 6, 1) + +#ifdef SYNC_NO_MMFR0 +/* We keep the hardware MMFR0 around. HW limits are there anyway */ +DEF_SYSREG(HV_SYS_REG_ID_AA64MMFR0_EL1, 3, 0, 0, 7, 0) +#endif + +DEF_SYSREG(HV_SYS_REG_ID_AA64MMFR1_EL1, 3, 0, 0, 7, 1) +DEF_SYSREG(HV_SYS_REG_ID_AA64MMFR2_EL1, 3, 0, 0, 7, 2) +/* Add ID_AA64MMFR3_EL1 here when HVF supports it */ + +DEF_SYSREG(HV_SYS_REG_MDSCR_EL1, 2, 0, 0, 2, 2) +DEF_SYSREG(HV_SYS_REG_SCTLR_EL1, 3, 0, 1, 0, 0) +DEF_SYSREG(HV_SYS_REG_CPACR_EL1, 3, 0, 1, 0, 2) +DEF_SYSREG(HV_SYS_REG_TTBR0_EL1, 3, 0, 2, 0, 0) +DEF_SYSREG(HV_SYS_REG_TTBR1_EL1, 3, 0, 2, 0, 1) +DEF_SYSREG(HV_SYS_REG_TCR_EL1, 3, 0, 2, 0, 2) + +DEF_SYSREG(HV_SYS_REG_APIAKEYLO_EL1, 3, 0, 2, 1, 0) +DEF_SYSREG(HV_SYS_REG_APIAKEYHI_EL1, 3, 0, 2, 1, 1) +DEF_SYSREG(HV_SYS_REG_APIBKEYLO_EL1, 3, 0, 2, 1, 2) +DEF_SYSREG(HV_SYS_REG_APIBKEYHI_EL1, 3, 0, 2, 1, 3) +DEF_SYSREG(HV_SYS_REG_APDAKEYLO_EL1, 3, 0, 2, 2, 0) +DEF_SYSREG(HV_SYS_REG_APDAKEYHI_EL1, 3, 0, 2, 2, 1) +DEF_SYSREG(HV_SYS_REG_APDBKEYLO_EL1, 3, 0, 2, 2, 2) +DEF_SYSREG(HV_SYS_REG_APDBKEYHI_EL1, 3, 0, 2, 2, 3) +DEF_SYSREG(HV_SYS_REG_APGAKEYLO_EL1, 3, 0, 2, 3, 0) +DEF_SYSREG(HV_SYS_REG_APGAKEYHI_EL1, 3, 0, 2, 3, 1) + +DEF_SYSREG(HV_SYS_REG_SPSR_EL1, 3, 0, 4, 0, 0) +DEF_SYSREG(HV_SYS_REG_ELR_EL1, 3, 0, 4, 0, 1) +DEF_SYSREG(HV_SYS_REG_SP_EL0, 3, 0, 4, 1, 0) +DEF_SYSREG(HV_SYS_REG_AFSR0_EL1, 3, 0, 5, 1, 0) +DEF_SYSREG(HV_SYS_REG_AFSR1_EL1, 3, 0, 5, 1, 1) +DEF_SYSREG(HV_SYS_REG_ESR_EL1, 3, 0, 5, 2, 0) +DEF_SYSREG(HV_SYS_REG_FAR_EL1, 3, 0, 6, 0, 0) +DEF_SYSREG(HV_SYS_REG_PAR_EL1, 3, 0, 7, 4, 0) +DEF_SYSREG(HV_SYS_REG_MAIR_EL1, 3, 0, 10, 2, 0) +DEF_SYSREG(HV_SYS_REG_AMAIR_EL1, 3, 0, 10, 3, 0) +DEF_SYSREG(HV_SYS_REG_VBAR_EL1, 3, 0, 12, 0, 0) +DEF_SYSREG(HV_SYS_REG_CONTEXTIDR_EL1, 3, 0, 13, 0, 1) +DEF_SYSREG(HV_SYS_REG_TPIDR_EL1, 3, 0, 13, 0, 4) +DEF_SYSREG(HV_SYS_REG_CNTKCTL_EL1, 3, 0, 14, 1, 0) +DEF_SYSREG(HV_SYS_REG_CSSELR_EL1, 3, 2, 0, 0, 0) +DEF_SYSREG(HV_SYS_REG_TPIDR_EL0, 3, 3, 13, 0, 2) +DEF_SYSREG(HV_SYS_REG_TPIDRRO_EL0, 3, 3, 13, 0, 3) +DEF_SYSREG(HV_SYS_REG_CNTV_CTL_EL0, 3, 3, 14, 3, 1) +DEF_SYSREG(HV_SYS_REG_CNTV_CVAL_EL0, 3, 3, 14, 3, 2) +DEF_SYSREG(HV_SYS_REG_SP_EL1, 3, 4, 4, 1, 0) diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events index 4fbbe4b..b29a995 100644 --- a/target/arm/hvf/trace-events +++ b/target/arm/hvf/trace-events @@ -2,12 +2,13 @@ hvf_unhandled_sysreg_read(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, hvf_unhandled_sysreg_write(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2) "unhandled sysreg write at pc=0x%"PRIx64": 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d)" hvf_inject_fiq(void) "injecting FIQ" hvf_inject_irq(void) "injecting IRQ" -hvf_data_abort(uint64_t pc, uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [pc=0x%"PRIx64" va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]" +hvf_data_abort(uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]" hvf_sysreg_read(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg read 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d) = 0x%016"PRIx64 hvf_sysreg_write(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg write 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d, val=0x%016"PRIx64")" -hvf_unknown_hvc(uint64_t x0) "unknown HVC! 0x%016"PRIx64 +hvf_unknown_hvc(uint64_t pc, uint64_t x0) "pc=0x%"PRIx64" unknown HVC! 0x%016"PRIx64 hvf_unknown_smc(uint64_t x0) "unknown SMC! 0x%016"PRIx64 hvf_exit(uint64_t syndrome, uint32_t ec, uint64_t pc) "exit: 0x%"PRIx64" [ec=0x%x pc=0x%"PRIx64"]" -hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpu=0x%x" +hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpuid=0x%x" hvf_vgic_write(const char *name, uint64_t val) "vgic write to %s [val=0x%016"PRIx64"]" hvf_vgic_read(const char *name, uint64_t val) "vgic read from %s [val=0x%016"PRIx64"]" +hvf_illegal_guest_state(void) "HV_ILLEGAL_GUEST_STATE" diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h index 26c717b..ea82f26 100644 --- a/target/arm/hvf_arm.h +++ b/target/arm/hvf_arm.h @@ -11,7 +11,7 @@ #ifndef QEMU_HVF_ARM_H #define QEMU_HVF_ARM_H -#include "cpu.h" +#include "target/arm/cpu-qom.h" /** * hvf_arm_init_debug() - initialize guest debug capabilities @@ -22,23 +22,7 @@ void hvf_arm_init_debug(void); void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu); -#ifdef CONFIG_HVF - uint32_t hvf_arm_get_default_ipa_bit_size(void); uint32_t hvf_arm_get_max_ipa_bit_size(void); -#else - -static inline uint32_t hvf_arm_get_default_ipa_bit_size(void) -{ - return 0; -} - -static inline uint32_t hvf_arm_get_max_ipa_bit_size(void) -{ - return 0; -} - -#endif - #endif diff --git a/target/arm/internals.h b/target/arm/internals.h index 3360de9..f539bbe 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -34,6 +34,7 @@ #include "system/memory.h" #include "syndrome.h" #include "cpu-features.h" +#include "mmuidx-internal.h" /* register banks for CPU modes */ #define BANK_USRSYS 0 @@ -113,11 +114,6 @@ FIELD(DBGWCR, WT, 20, 1) FIELD(DBGWCR, MASK, 24, 5) FIELD(DBGWCR, SSCE, 29, 1) -#define VTCR_NSW (1u << 29) -#define VTCR_NSA (1u << 30) -#define VSTCR_SW VTCR_NSW -#define VSTCR_SA VTCR_NSA - /* Bit definitions for CPACR (AArch32 only) */ FIELD(CPACR, CP10, 20, 2) FIELD(CPACR, CP11, 22, 2) @@ -201,6 +197,24 @@ FIELD(CPTR_EL3, TCPAC, 31, 1) #define TTBCR_SH1 (1U << 28) #define TTBCR_EAE (1U << 31) +#define TCR2_PNCH (1ULL << 0) +#define TCR2_PIE (1ULL << 1) +#define TCR2_E0POE (1ULL << 2) +#define TCR2_POE (1ULL << 3) +#define TCR2_AIE (1ULL << 4) +#define TCR2_D128 (1ULL << 5) +#define TCR2_PTTWI (1ULL << 10) +#define TCR2_HAFT (1ULL << 11) +#define TCR2_AMEC0 (1ULL << 12) +#define TCR2_AMEC1 (1ULL << 13) +#define TCR2_DISCH0 (1ULL << 14) +#define TCR2_DISCH1 (1ULL << 15) +#define TCR2_A2 (1ULL << 16) +#define TCR2_FNG0 (1ULL << 17) +#define TCR2_FNG1 (1ULL << 18) +#define TCR2_FNGNA0 (1ULL << 20) +#define TCR2_FNGNA1 (1ULL << 21) + FIELD(VTCR, T0SZ, 0, 6) FIELD(VTCR, SL0, 6, 2) FIELD(VTCR, IRGN0, 8, 2) @@ -220,6 +234,9 @@ FIELD(VTCR, NSA, 30, 1) FIELD(VTCR, DS, 32, 1) FIELD(VTCR, SL2, 33, 1) +FIELD(VSTCR, SW, 29, 1) +FIELD(VSTCR, SA, 30, 1) + #define HCRX_ENAS0 (1ULL << 0) #define HCRX_ENALS (1ULL << 1) #define HCRX_ENASR (1ULL << 2) @@ -232,6 +249,9 @@ FIELD(VTCR, SL2, 33, 1) #define HCRX_CMOW (1ULL << 9) #define HCRX_MCE2 (1ULL << 10) #define HCRX_MSCEN (1ULL << 11) +#define HCRX_TCR2EN (1ULL << 14) +#define HCRX_SCTLR2EN (1ULL << 15) +#define HCRX_GCSEN (1ULL << 22) #define HPFAR_NS (1ULL << 63) @@ -286,14 +306,14 @@ FIELD(CNTHCTL, CNTPMASK, 19, 1) * and never returns because we will longjump back up to the CPU main loop. */ G_NORETURN void raise_exception(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el); + uint64_t syndrome, uint32_t target_el); /* * Similarly, but also use unwinding to restore cpu state. */ G_NORETURN void raise_exception_ra(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el, - uintptr_t ra); + uint64_t syndrome, uint32_t target_el, + uintptr_t ra); /* * For AArch64, map a given EL to an index in the banked_spsr array. @@ -650,16 +670,12 @@ static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type) { return false; } -static inline void arm_handle_psci_call(ARMCPU *cpu) -{ - g_assert_not_reached(); -} #else /* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */ bool arm_is_psci_call(ARMCPU *cpu, int excp_type); +#endif /* Actually handle a PSCI call */ void arm_handle_psci_call(ARMCPU *cpu); -#endif /** * arm_clear_exclusive: clear the exclusive monitor @@ -738,6 +754,7 @@ struct ARMMMUFaultInfo { bool s1ptw; bool s1ns; bool ea; + bool dirtybit; /* FEAT_S1PIE, FEAT_S2PIE */ }; /** @@ -969,8 +986,6 @@ static inline ARMMMUIdx core_to_aa64_mmu_idx(int mmu_idx) return mmu_idx | ARM_MMU_IDX_A; } -int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx); - /* Return the MMU index for a v7M CPU in the specified security state */ ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate); @@ -1013,108 +1028,10 @@ static inline void arm_call_el_change_hook(ARMCPU *cpu) } } -/* - * Return true if this address translation regime has two ranges. - * Note that this will not return the correct answer for AArch32 - * Secure PL1&0 (i.e. mmu indexes E3, E30_0, E30_3_PAN), but it is - * never called from a context where EL3 can be AArch32. (The - * correct return value for ARMMMUIdx_E3 would be different for - * that case, so we can't just make the function return the - * correct value anyway; we would need an extra "bool e3_is_aarch32" - * argument which all the current callsites would pass as 'false'.) - */ -static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - return true; - default: - return false; - } -} - -static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_E30_3_PAN: - return true; - default: - return false; - } -} - -static inline bool regime_is_stage2(ARMMMUIdx mmu_idx) -{ - return mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S; -} - -/* Return the exception level which controls this address translation regime */ -static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_Stage2: - case ARMMMUIdx_Stage2_S: - case ARMMMUIdx_E2: - return 2; - case ARMMMUIdx_E3: - case ARMMMUIdx_E30_0: - case ARMMMUIdx_E30_3_PAN: - return 3; - case ARMMMUIdx_E10_0: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_MPrivNegPri: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MPriv: - case ARMMMUIdx_MUser: - case ARMMMUIdx_MSPrivNegPri: - case ARMMMUIdx_MSUserNegPri: - case ARMMMUIdx_MSPriv: - case ARMMMUIdx_MSUser: - return 1; - default: - g_assert_not_reached(); - } -} - -static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E30_0: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_MUser: - case ARMMMUIdx_MSUser: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MSUserNegPri: - return true; - default: - return false; - } -} - /* Return the SCTLR value which controls this address translation regime */ static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx) { - return env->cp15.sctlr_el[regime_el(env, mmu_idx)]; + return env->cp15.sctlr_el[regime_el(mmu_idx)]; } /* @@ -1146,13 +1063,13 @@ static inline uint64_t regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) v |= env->cp15.vtcr_el2 & VTCR_SHARED_FIELD_MASK; return v; } - return env->cp15.tcr_el[regime_el(env, mmu_idx)]; + return env->cp15.tcr_el[regime_el(mmu_idx)]; } /* Return true if the translation regime is using LPAE format page tables */ static inline bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) { - int el = regime_el(env, mmu_idx); + int el = regime_el(mmu_idx); if (el == 2 || arm_el_is_aa64(env, el)) { return true; } @@ -1175,7 +1092,7 @@ static inline bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) static inline int arm_num_brps(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; + return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, BRPS) + 1; } else { return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, BRPS) + 1; } @@ -1189,7 +1106,7 @@ static inline int arm_num_brps(ARMCPU *cpu) static inline int arm_num_wrps(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; + return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, WRPS) + 1; } else { return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, WRPS) + 1; } @@ -1203,7 +1120,7 @@ static inline int arm_num_wrps(ARMCPU *cpu) static inline int arm_num_ctx_cmps(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1; + return FIELD_EX64_IDREG(&cpu->isar, ID_AA64DFR0, CTX_CMPS) + 1; } else { return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, CTX_CMPS) + 1; } @@ -1279,6 +1196,11 @@ static inline const char *aarch32_mode_name(uint32_t psr) } /** + * arm_cpu_exec_interrupt(): Implementation of the cpu_exec_inrerrupt hook. + */ +bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request); + +/** * arm_cpu_update_virq: Update CPU_INTERRUPT_VIRQ bit in cs->interrupt_request * * Update the CPU_INTERRUPT_VIRQ bit in cs->interrupt_request, following @@ -1359,25 +1281,6 @@ ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx); ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env); #endif -/** - * arm_mmu_idx_is_stage1_of_2: - * @mmu_idx: The ARMMMUIdx to test - * - * Return true if @mmu_idx is a NOTLB mmu_idx that is the - * first stage of a two stage regime. - */ -static inline bool arm_mmu_idx_is_stage1_of_2(ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - return true; - default: - return false; - } -} - static inline uint32_t aarch32_cpsr_valid_mask(uint64_t features, const ARMISARegisters *id) { @@ -1472,7 +1375,7 @@ static inline int arm_granule_bits(ARMGranuleSize gran) /* * Parameters of a given virtual address, as extracted from the - * translation control register (TCR) for a given regime. + * translation controls for a given regime. */ typedef struct ARMVAParameters { unsigned tsz : 8; @@ -1487,6 +1390,7 @@ typedef struct ARMVAParameters { bool ha : 1; bool hd : 1; ARMGranuleSize gran : 2; + bool pie : 1; } ARMVAParameters; /** @@ -1557,6 +1461,13 @@ typedef struct ARMCacheAttrs { typedef struct GetPhysAddrResult { CPUTLBEntryFull f; ARMCacheAttrs cacheattrs; + /* + * For ARMMMUIdx_Stage2*, the protection installed into f.prot + * is the result for AccessType_TTW, i.e. the page table walk itself. + * The protection installed info s2prot is the one to be merged + * with the stage1 protection. + */ + int s2prot; } GetPhysAddrResult; /** @@ -1588,30 +1499,27 @@ bool get_phys_addr(CPUARMState *env, vaddr address, __attribute__((nonnull)); /** - * get_phys_addr_with_space_nogpc: get the physical address for a virtual - * address + * get_phys_addr_for_at: * @env: CPUARMState * @address: virtual address to get physical address for - * @access_type: 0 for read, 1 for write, 2 for execute - * @memop: memory operation feeding this access, or 0 for none + * @prot_check: PAGE_{READ,WRITE,EXEC}, or 0 * @mmu_idx: MMU index indicating required translation regime * @space: security space for the access * @result: set on translation success. * @fi: set to fault info if the translation fails * - * Similar to get_phys_addr, but use the given security space and don't perform - * a Granule Protection Check on the resulting address. + * Similar to get_phys_addr, but for use by AccessType_AT, i.e. + * system instructions for address translation. */ -bool get_phys_addr_with_space_nogpc(CPUARMState *env, vaddr address, - MMUAccessType access_type, MemOp memop, - ARMMMUIdx mmu_idx, ARMSecuritySpace space, - GetPhysAddrResult *result, - ARMMMUFaultInfo *fi) +bool get_phys_addr_for_at(CPUARMState *env, vaddr address, unsigned prot_check, + ARMMMUIdx mmu_idx, ARMSecuritySpace space, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) __attribute__((nonnull)); bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - bool is_secure, GetPhysAddrResult *result, + MMUAccessType access_type, unsigned prot_check, + ARMMMUIdx mmu_idx, bool is_secure, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi, uint32_t *mregion); void arm_log_exception(CPUState *cs); @@ -1627,19 +1535,13 @@ FIELD(PREDDESC, OPRSZ, 0, 6) FIELD(PREDDESC, ESZ, 6, 2) FIELD(PREDDESC, DATA, 8, 24) -/* - * The SVE simd_data field, for memory ops, contains either - * rd (5 bits) or a shift count (2 bits). - */ -#define SVE_MTEDESC_SHIFT 5 - /* Bits within a descriptor passed to the helper_mte_check* functions. */ FIELD(MTEDESC, MIDX, 0, 4) FIELD(MTEDESC, TBI, 4, 2) FIELD(MTEDESC, TCMA, 6, 2) FIELD(MTEDESC, WRITE, 8, 1) FIELD(MTEDESC, ALIGN, 9, 3) -FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - SVE_MTEDESC_SHIFT - 12) /* size - 1 */ +FIELD(MTEDESC, SIZEM1, 12, 32 - 12) /* size - 1 */ bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr); uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra); @@ -1812,8 +1714,11 @@ static inline uint64_t pmu_counter_mask(CPUARMState *env) } GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cpu, int base_reg); +GDBFeature *arm_gen_dynamic_smereg_feature(CPUState *cpu, int base_reg); int aarch64_gdb_get_sve_reg(CPUState *cs, GByteArray *buf, int reg); int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg); +int aarch64_gdb_get_sme_reg(CPUState *cs, GByteArray *buf, int reg); +int aarch64_gdb_set_sme_reg(CPUState *cs, uint8_t *buf, int reg); int aarch64_gdb_get_fpu_reg(CPUState *cs, GByteArray *buf, int reg); int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg); int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg); @@ -1875,6 +1780,12 @@ void define_debug_regs(ARMCPU *cpu); /* Add the cpreg definitions for TLBI instructions */ void define_tlb_insn_regs(ARMCPU *cpu); +/* Add the cpreg definitions for AT instructions */ +void define_at_insn_regs(ARMCPU *cpu); +/* Add the cpreg definitions for PM cpregs */ +void define_pm_cpregs(ARMCPU *cpu); +/* Add the cpreg definitions for GCS cpregs */ +void define_gcs_cpregs(ARMCPU *cpu); /* Effective value of MDCR_EL2 */ static inline uint64_t arm_mdcr_el2_eff(CPUARMState *env) @@ -1985,5 +1896,14 @@ void vfp_clear_float_status_exc_flags(CPUARMState *env); * specified by mask changing to the values in val. */ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask); +bool arm_pan_enabled(CPUARMState *env); +uint32_t cpsr_read_for_spsr_elx(CPUARMState *env); +void cpsr_write_from_spsr_elx(CPUARMState *env, uint32_t val); + +/* Compare uint64_t for qsort and bsearch. */ +int compare_u64(const void *a, const void *b); + +/* Used in FEAT_MEC to set the MECIDWidthm1 field in the MECIDR_EL2 register. */ +#define MECID_WIDTH 16 #endif diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h index c44d23d..54ae5da 100644 --- a/target/arm/kvm-consts.h +++ b/target/arm/kvm-consts.h @@ -160,9 +160,6 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53); #define CP_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 #define CP_REG_ARM64_SYSREG_OP2_SHIFT 0 -/* No kernel define but it's useful to QEMU */ -#define CP_REG_ARM64_SYSREG_CP (CP_REG_ARM64_SYSREG >> CP_REG_ARM_COPROC_SHIFT) - MISMATCH_CHECK(CP_REG_ARM64, KVM_REG_ARM64); MISMATCH_CHECK(CP_REG_ARM_COPROC_MASK, KVM_REG_ARM_COPROC_MASK); MISMATCH_CHECK(CP_REG_ARM_COPROC_SHIFT, KVM_REG_ARM_COPROC_SHIFT); @@ -180,4 +177,15 @@ MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP2_SHIFT, KVM_REG_ARM64_SYSREG_OP2_SHIFT); #undef MISMATCH_CHECK +#define KVMID_AA64_SYS_REG_(op0, op1, crn, crm, op2) \ + (CP_REG_AA64_MASK | CP_REG_ARM64_SYSREG | \ + ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \ + ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \ + ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \ + ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \ + ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT)) + +#define KVMID_AA64_SYS_REG64(op0, op1, crn, crm, op2) \ + (KVMID_AA64_SYS_REG_(op0, op1, crn, crm, op2) | CP_REG_SIZE_U64) + #endif diff --git a/target/arm/kvm-stub.c b/target/arm/kvm-stub.c index 34e57fa..c93462c 100644 --- a/target/arm/kvm-stub.c +++ b/target/arm/kvm-stub.c @@ -47,6 +47,11 @@ bool kvm_arm_mte_supported(void) return false; } +bool kvm_arm_el2_supported(void) +{ + return false; +} + /* * These functions should never actually be called without KVM support. */ diff --git a/target/arm/kvm.c b/target/arm/kvm.c index a2791aa..0d57081 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -26,6 +26,7 @@ #include "system/kvm_int.h" #include "kvm_arm.h" #include "cpu.h" +#include "cpu-sysregs.h" #include "trace.h" #include "internals.h" #include "hw/pci/pci.h" @@ -218,6 +219,29 @@ static bool kvm_arm_pauth_supported(void) kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_GENERIC)); } + +static uint64_t idregs_sysreg_to_kvm_reg(ARMSysRegs sysreg) +{ + return ARM64_SYS_REG((sysreg & CP_REG_ARM64_SYSREG_OP0_MASK) >> CP_REG_ARM64_SYSREG_OP0_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_OP1_MASK) >> CP_REG_ARM64_SYSREG_OP1_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_CRN_MASK) >> CP_REG_ARM64_SYSREG_CRN_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_CRM_MASK) >> CP_REG_ARM64_SYSREG_CRM_SHIFT, + (sysreg & CP_REG_ARM64_SYSREG_OP2_MASK) >> CP_REG_ARM64_SYSREG_OP2_SHIFT); +} + +/* read a sysreg value and store it in the idregs */ +static int get_host_cpu_reg(int fd, ARMHostCPUFeatures *ahcf, + ARMIDRegisterIdx index) +{ + uint64_t *reg; + int ret; + + reg = &ahcf->isar.idregs[index]; + ret = read_sys_reg64(fd, reg, + idregs_sysreg_to_kvm_reg(id_register_sysreg[index])); + return ret; +} + static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) { /* Identify the feature bits corresponding to the host CPU, and @@ -227,6 +251,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) */ int fdarray[3]; bool sve_supported; + bool el2_supported; bool pmu_supported = false; uint64_t features = 0; int err; @@ -247,6 +272,14 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) } /* + * Ask for EL2 if supported. + */ + el2_supported = kvm_arm_el2_supported(); + if (el2_supported) { + init.features[0] |= 1 << KVM_ARM_VCPU_HAS_EL2; + } + + /* * Ask for Pointer Authentication if supported, so that we get * the unsanitized field values for AA64ISAR1_EL1. */ @@ -266,10 +299,10 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) } ahcf->target = init.target; - ahcf->dtb_compatible = "arm,arm-v8"; + ahcf->dtb_compatible = "arm,armv8"; + int fd = fdarray[2]; - err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, - ARM64_SYS_REG(3, 0, 0, 4, 0)); + err = get_host_cpu_reg(fd, ahcf, ID_AA64PFR0_EL1_IDX); if (unlikely(err < 0)) { /* * Before v4.15, the kernel only exposed a limited number of system @@ -287,31 +320,21 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * ??? Either of these sounds like too much effort just * to work around running a modern host kernel. */ - ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ + SET_IDREG(&ahcf->isar, ID_AA64PFR0, 0x00000011); /* EL1&0, AArch64 only */ err = 0; } else { - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, - ARM64_SYS_REG(3, 0, 0, 4, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0, - ARM64_SYS_REG(3, 0, 0, 4, 5)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, - ARM64_SYS_REG(3, 0, 0, 5, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, - ARM64_SYS_REG(3, 0, 0, 5, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, - ARM64_SYS_REG(3, 0, 0, 6, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, - ARM64_SYS_REG(3, 0, 0, 6, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar2, - ARM64_SYS_REG(3, 0, 0, 6, 2)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, - ARM64_SYS_REG(3, 0, 0, 7, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, - ARM64_SYS_REG(3, 0, 0, 7, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, - ARM64_SYS_REG(3, 0, 0, 7, 2)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr3, - ARM64_SYS_REG(3, 0, 0, 7, 3)); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64PFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64PFR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64SMFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64DFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64DFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ISAR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64MMFR3_EL1_IDX); /* * Note that if AArch32 support is not present in the host, @@ -320,49 +343,31 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * than skipping the reads and leaving 0, as we must avoid * considering the values in every case. */ - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0, - ARM64_SYS_REG(3, 0, 0, 1, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1, - ARM64_SYS_REG(3, 0, 0, 1, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, - ARM64_SYS_REG(3, 0, 0, 1, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, - ARM64_SYS_REG(3, 0, 0, 1, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, - ARM64_SYS_REG(3, 0, 0, 1, 5)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, - ARM64_SYS_REG(3, 0, 0, 1, 6)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, - ARM64_SYS_REG(3, 0, 0, 1, 7)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, - ARM64_SYS_REG(3, 0, 0, 2, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, - ARM64_SYS_REG(3, 0, 0, 2, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, - ARM64_SYS_REG(3, 0, 0, 2, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, - ARM64_SYS_REG(3, 0, 0, 2, 3)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, - ARM64_SYS_REG(3, 0, 0, 2, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, - ARM64_SYS_REG(3, 0, 0, 2, 5)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, - ARM64_SYS_REG(3, 0, 0, 2, 6)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, - ARM64_SYS_REG(3, 0, 0, 2, 7)); - - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, + err |= get_host_cpu_reg(fd, ahcf, ID_PFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_PFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_DFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR3_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR0_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR3_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR4_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR5_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_ISAR6_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR4_EL1_IDX); + + err |= read_sys_reg32(fd, &ahcf->isar.mvfr0, ARM64_SYS_REG(3, 0, 0, 3, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, + err |= read_sys_reg32(fd, &ahcf->isar.mvfr1, ARM64_SYS_REG(3, 0, 0, 3, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, + err |= read_sys_reg32(fd, &ahcf->isar.mvfr2, ARM64_SYS_REG(3, 0, 0, 3, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2, - ARM64_SYS_REG(3, 0, 0, 3, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr1, - ARM64_SYS_REG(3, 0, 0, 3, 5)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr5, - ARM64_SYS_REG(3, 0, 0, 3, 6)); + err |= get_host_cpu_reg(fd, ahcf, ID_PFR2_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_DFR1_EL1_IDX); + err |= get_host_cpu_reg(fd, ahcf, ID_MMFR5_EL1_IDX); /* * DBGDIDR is a bit complicated because the kernel doesn't @@ -374,14 +379,14 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. * We only do this if the CPU supports AArch32 at EL1. */ - if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { - int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); - int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); + if (FIELD_EX32_IDREG(&ahcf->isar, ID_AA64PFR0, EL1) >= 2) { + int wrps = FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, WRPS); + int brps = FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, BRPS); int ctx_cmps = - FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); + FIELD_EX64_IDREG(&ahcf->isar, ID_AA64DFR0, CTX_CMPS); int version = 6; /* ARMv8 debug architecture */ bool has_el3 = - !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); + !!FIELD_EX32_IDREG(&ahcf->isar, ID_AA64PFR0, EL3); uint32_t dbgdidr = 0; dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); @@ -396,7 +401,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) if (pmu_supported) { /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */ - err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0, + err |= read_sys_reg64(fd, &ahcf->isar.reset_pmcr_el0, ARM64_SYS_REG(3, 3, 9, 12, 0)); } @@ -408,8 +413,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * enabled SVE support, which resulted in an error rather than RAZ. * So only read the register if we set KVM_ARM_VCPU_SVE above. */ - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0, - ARM64_SYS_REG(3, 0, 0, 4, 4)); + err |= get_host_cpu_reg(fd, ahcf, ID_AA64ZFR0_EL1_IDX); } } @@ -429,6 +433,10 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) features |= 1ULL << ARM_FEATURE_AARCH64; features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; + if (el2_supported) { + features |= 1ULL << ARM_FEATURE_EL2; + } + ahcf->features = features; return true; @@ -711,17 +719,6 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, memory_region_ref(kd->mr); } -static int compare_u64(const void *a, const void *b) -{ - if (*(uint64_t *)a > *(uint64_t *)b) { - return 1; - } - if (*(uint64_t *)a < *(uint64_t *)b) { - return -1; - } - return 0; -} - /* * cpreg_values are sorted in ascending order by KVM register ID * (see kvm_arm_init_cpreg_list). This allows us to cheaply find @@ -893,6 +890,58 @@ bool write_kvmstate_to_list(ARMCPU *cpu) return ok; } +/* pretty-print a KVM register */ +#define CP_REG_ARM64_SYSREG_OP(_reg, _op) \ + ((uint8_t)((_reg & CP_REG_ARM64_SYSREG_ ## _op ## _MASK) >> \ + CP_REG_ARM64_SYSREG_ ## _op ## _SHIFT)) + +static gchar *kvm_print_sve_register_name(uint64_t regidx) +{ + uint16_t sve_reg = regidx & 0x000000000000ffff; + + if (regidx == KVM_REG_ARM64_SVE_VLS) { + return g_strdup_printf("SVE VLS"); + } + /* zreg, preg, ffr */ + switch (sve_reg & 0xfc00) { + case 0: + return g_strdup_printf("SVE zreg n:%d slice:%d", + (sve_reg & 0x03e0) >> 5, sve_reg & 0x001f); + case 0x04: + return g_strdup_printf("SVE preg n:%d slice:%d", + (sve_reg & 0x01e0) >> 5, sve_reg & 0x001f); + case 0x06: + return g_strdup_printf("SVE ffr slice:%d", sve_reg & 0x001f); + default: + return g_strdup_printf("SVE ???"); + } +} + +static gchar *kvm_print_register_name(uint64_t regidx) +{ + switch ((regidx & KVM_REG_ARM_COPROC_MASK)) { + case KVM_REG_ARM_CORE: + return g_strdup_printf("core reg %"PRIx64, regidx); + case KVM_REG_ARM_DEMUX: + return g_strdup_printf("demuxed reg %"PRIx64, regidx); + case KVM_REG_ARM64_SYSREG: + return g_strdup_printf("op0:%d op1:%d crn:%d crm:%d op2:%d", + CP_REG_ARM64_SYSREG_OP(regidx, OP0), + CP_REG_ARM64_SYSREG_OP(regidx, OP1), + CP_REG_ARM64_SYSREG_OP(regidx, CRN), + CP_REG_ARM64_SYSREG_OP(regidx, CRM), + CP_REG_ARM64_SYSREG_OP(regidx, OP2)); + case KVM_REG_ARM_FW: + return g_strdup_printf("fw reg %d", (int)(regidx & 0xffff)); + case KVM_REG_ARM64_SVE: + return kvm_print_sve_register_name(regidx); + case KVM_REG_ARM_FW_FEAT_BMAP: + return g_strdup_printf("fw feat reg %d", (int)(regidx & 0xffff)); + default: + return g_strdup_printf("%"PRIx64, regidx); + } +} + bool write_list_to_kvmstate(ARMCPU *cpu, int level) { CPUState *cs = CPU(cpu); @@ -920,11 +969,45 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) g_assert_not_reached(); } if (ret) { + gchar *reg_str = kvm_print_register_name(regidx); + /* We might fail for "unknown register" and also for * "you tried to set a register which is constant with * a different value from what it actually contains". */ ok = false; + switch (ret) { + case -ENOENT: + error_report("Could not set register %s: unknown to KVM", + reg_str); + break; + case -EINVAL: + if ((regidx & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) { + if (!kvm_get_one_reg(cs, regidx, &v32)) { + error_report("Could not set register %s to %x (is %x)", + reg_str, (uint32_t)cpu->cpreg_values[i], + v32); + } else { + error_report("Could not set register %s to %x", + reg_str, (uint32_t)cpu->cpreg_values[i]); + } + } else /* U64 */ { + uint64_t v64; + + if (!kvm_get_one_reg(cs, regidx, &v64)) { + error_report("Could not set register %s to %"PRIx64" (is %"PRIx64")", + reg_str, cpu->cpreg_values[i], v64); + } else { + error_report("Could not set register %s to %"PRIx64, + reg_str, cpu->cpreg_values[i]); + } + } + break; + default: + error_report("Could not set register %s: %s", + reg_str, strerror(-ret)); + } + g_free(reg_str); } } return ok; @@ -1769,6 +1852,11 @@ bool kvm_arm_aarch32_supported(void) return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT); } +bool kvm_arm_el2_supported(void) +{ + return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL2); +} + bool kvm_arm_sve_supported(void) { return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); @@ -1846,6 +1934,11 @@ static int kvm_arm_sve_set_vls(ARMCPU *cpu) #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret; @@ -1884,6 +1977,9 @@ int kvm_arch_init_vcpu(CPUState *cs) cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS | 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); } + if (cpu->has_el2 && kvm_arm_el2_supported()) { + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_HAS_EL2; + } /* Do KVM_ARM_VCPU_INIT ioctl */ ret = kvm_arm_vcpu_init(cpu); @@ -2027,7 +2123,7 @@ static int kvm_arch_put_sve(CPUState *cs) return 0; } -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { uint64_t val; uint32_t fpr; @@ -2337,10 +2433,12 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) { ram_addr_t ram_addr; hwaddr paddr; + AcpiGhesState *ags; assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); - if (acpi_ghes_present() && addr) { + ags = acpi_ghes_get_state(); + if (ags && addr) { ram_addr = qemu_ram_addr_from_host(addr); if (ram_addr != RAM_ADDR_INVALID && kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { @@ -2358,7 +2456,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) */ if (code == BUS_MCEERR_AR) { kvm_cpu_synchronize_state(c); - if (!acpi_ghes_memory_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { + if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC, + paddr)) { kvm_inject_arm_sea(c); } else { error_report("failed to record the error"); diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index c4178d1..6a9b637 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -12,6 +12,7 @@ #define QEMU_KVM_ARM_H #include "system/kvm.h" +#include "target/arm/cpu-qom.h" #define KVM_ARM_VGIC_V2 (1 << 0) #define KVM_ARM_VGIC_V3 (1 << 1) @@ -160,6 +161,14 @@ void kvm_arm_add_vcpu_properties(ARMCPU *cpu); */ void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp); +/* + * These "is some KVM subfeature enabled?" functions may be called + * when KVM support is not present, including in the user-mode + * emulators. The kvm-stub.c file is only built into the system + * emulators, so for user-mode emulation we provide "always false" + * stubs here. + */ +#ifndef CONFIG_USER_ONLY /** * kvm_arm_aarch32_supported: * @@ -191,6 +200,40 @@ bool kvm_arm_sve_supported(void); bool kvm_arm_mte_supported(void); /** + * kvm_arm_el2_supported: + * + * Returns true if KVM can enable EL2 and false otherwise. + */ +bool kvm_arm_el2_supported(void); +#else + +static inline bool kvm_arm_aarch32_supported(void) +{ + return false; +} + +static inline bool kvm_arm_pmu_supported(void) +{ + return false; +} + +static inline bool kvm_arm_sve_supported(void) +{ + return false; +} + +static inline bool kvm_arm_mte_supported(void) +{ + return false; +} + +static inline bool kvm_arm_el2_supported(void) +{ + return false; +} +#endif + +/** * kvm_arm_get_max_vm_ipa_size: * @ms: Machine state handle * @fixed_ipa: True when the IPA limit is fixed at 40. This is the case diff --git a/target/arm/machine.c b/target/arm/machine.c index e442d48..44a0cf8 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -221,26 +221,6 @@ static const VMStateDescription vmstate_vfp = { } }; -static bool iwmmxt_needed(void *opaque) -{ - ARMCPU *cpu = opaque; - CPUARMState *env = &cpu->env; - - return arm_feature(env, ARM_FEATURE_IWMMXT); -} - -static const VMStateDescription vmstate_iwmmxt = { - .name = "cpu/iwmmxt", - .version_id = 1, - .minimum_version_id = 1, - .needed = iwmmxt_needed, - .fields = (const VMStateField[]) { - VMSTATE_UINT64_ARRAY(env.iwmmxt.regs, ARMCPU, 16), - VMSTATE_UINT32_ARRAY(env.iwmmxt.cregs, ARMCPU, 16), - VMSTATE_END_OF_LIST() - } -}; - /* The expression ARM_MAX_VQ - 2 is 0 for pure AArch32 build, * and ARMPredicateReg is actively empty. This triggers errors * in the expansion of the VMSTATE macros. @@ -315,12 +295,31 @@ static const VMStateDescription vmstate_za = { .minimum_version_id = 1, .needed = za_needed, .fields = (const VMStateField[]) { - VMSTATE_STRUCT_ARRAY(env.zarray, ARMCPU, ARM_MAX_VQ * 16, 0, + VMSTATE_STRUCT_ARRAY(env.za_state.za, ARMCPU, ARM_MAX_VQ * 16, 0, vmstate_vreg, ARMVectorReg), VMSTATE_END_OF_LIST() } }; +static bool zt0_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + + return za_needed(cpu) && cpu_isar_feature(aa64_sme2, cpu); +} + +static const VMStateDescription vmstate_zt0 = { + .name = "cpu/zt0", + .version_id = 1, + .minimum_version_id = 1, + .needed = zt0_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64_ARRAY(env.za_state.zt0, ARMCPU, + ARRAY_SIZE(((CPUARMState *)0)->za_state.zt0)), + VMSTATE_END_OF_LIST() + } +}; + static bool serror_needed(void *opaque) { ARMCPU *cpu = opaque; @@ -817,6 +816,80 @@ static const VMStateInfo vmstate_cpsr = { .put = put_cpsr, }; +static int get_pstate64(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + uint64_t val = qemu_get_be64(f); + + env->aarch64 = ((val & PSTATE_nRW) == 0); + if (is_a64(env)) { + pstate_write(env, val); + } else { + cpsr_write_from_spsr_elx(env, val); + } + return 0; +} + +static int put_pstate64(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + uint64_t val; + + if (is_a64(env)) { + val = pstate_read(env); + } else { + val = cpsr_read_for_spsr_elx(env); + } + qemu_put_be64(f, val); + return 0; +} + +static bool pstate64_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + uint64_t val; + + if (arm_feature(env, ARM_FEATURE_M)) { + return false; + } + if (is_a64(env)) { + val = pstate_read(env); + } else { + val = cpsr_read_for_spsr_elx(env); + if (val & PSTATE_SS) { + return true; + } + } + return val > UINT32_MAX; +} + +static const VMStateDescription vmstate_pstate64 = { + .name = "cpu/pstate64", + .version_id = 1, + .minimum_version_id = 1, + .needed = pstate64_needed, + .fields = (const VMStateField[]) { + { + .name = "pstate64", + .version_id = 0, + .size = sizeof(uint64_t), + .info = &(const VMStateInfo) { + .name = "pstate64", + .get = get_pstate64, + .put = put_pstate64, + }, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_END_OF_LIST() + }, +}; + static int get_power(QEMUFile *f, void *opaque, size_t size, const VMStateField *field) { @@ -849,6 +922,23 @@ static const VMStateInfo vmstate_powered_off = { .put = put_power, }; +static bool syndrome64_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + return cpu->env.exception.syndrome > UINT32_MAX; +} + +static const VMStateDescription vmstate_syndrome64 = { + .name = "cpu/syndrome64", + .version_id = 1, + .minimum_version_id = 1, + .needed = syndrome64_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(env.exception.syndrome, ARMCPU), + VMSTATE_END_OF_LIST() + }, +}; + static int cpu_pre_save(void *opaque) { ARMCPU *cpu = opaque; @@ -1036,6 +1126,12 @@ const VMStateDescription vmstate_arm_cpu = { VMSTATE_UINT32_ARRAY(env.regs, ARMCPU, 16), VMSTATE_UINT64_ARRAY(env.xregs, ARMCPU, 32), VMSTATE_UINT64(env.pc, ARMCPU), + /* + * If any bits are set in the upper 32 bits of cpsr/pstate, + * or if the cpu is in aa32 mode and PSTATE.SS is set, then + * the cpu/pstate64 subsection will override this with the + * full 64 bit state. + */ { .name = "cpsr", .version_id = 0, @@ -1066,7 +1162,19 @@ const VMStateDescription vmstate_arm_cpu = { VMSTATE_UINT64(env.exclusive_val, ARMCPU), VMSTATE_UINT64(env.exclusive_high, ARMCPU), VMSTATE_UNUSED(sizeof(uint64_t)), - VMSTATE_UINT32(env.exception.syndrome, ARMCPU), + /* + * If any bits are set in the upper 32 bits of syndrome, + * then the cpu/syndrome64 subsection will override this + * with the full 64 bit state. + */ + { + .name = "env.exception.syndrome", + .version_id = 0, + .size = sizeof(uint32_t), + .info = &vmstate_info_uint32, + .flags = VMS_SINGLE, + .offset = offsetoflow32(ARMCPU, env.exception.syndrome), + }, VMSTATE_UINT32(env.exception.fsr, ARMCPU), VMSTATE_UINT64(env.exception.vaddress, ARMCPU), VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU), @@ -1083,7 +1191,6 @@ const VMStateDescription vmstate_arm_cpu = { }, .subsections = (const VMStateDescription * const []) { &vmstate_vfp, - &vmstate_iwmmxt, &vmstate_m, &vmstate_thumb2ee, /* pmsav7_rnr must come before pmsav7 so that we have the @@ -1096,9 +1203,12 @@ const VMStateDescription vmstate_arm_cpu = { &vmstate_m_security, &vmstate_sve, &vmstate_za, + &vmstate_zt0, &vmstate_serror, &vmstate_irq_line_state, &vmstate_wfxt_timer, + &vmstate_syndrome64, + &vmstate_pstate64, NULL } }; diff --git a/target/arm/meson.build b/target/arm/meson.build index b404fa5..3df7e03 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -3,11 +3,15 @@ arm_common_ss = ss.source_set() arm_ss.add(files( 'gdbstub.c', )) -arm_ss.add(zlib) arm_ss.add(when: 'TARGET_AARCH64', if_true: files( 'cpu64.c', - 'gdbstub64.c')) + 'gdbstub64.c' +)) + +arm_common_ss.add(files( + 'mmuidx.c', +)) arm_system_ss = ss.source_set() arm_common_system_ss = ss.source_set() @@ -23,19 +27,30 @@ arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files( 'cpu32-stubs.c', )) arm_user_ss.add(files( + 'cpregs-gcs.c', + 'cpregs-pmu.c', 'debug_helper.c', 'helper.c', 'vfp_fpscr.c', + 'el2-stubs.c', )) +arm_user_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING', + if_true: files('common-semi-target.c')) -arm_common_system_ss.add(files('cpu.c'), capstone) +arm_common_system_ss.add(files('cpu.c')) arm_common_system_ss.add(when: 'TARGET_AARCH64', if_false: files( 'cpu32-stubs.c')) arm_common_system_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) +arm_common_system_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c')) +arm_common_system_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING', + if_true: files('common-semi-target.c')) arm_common_system_ss.add(files( 'arch_dump.c', 'arm-powerctl.c', 'cortex-regs.c', + 'cpregs-gcs.c', + 'cpregs-pmu.c', + 'cpu-irq.c', 'debug_helper.c', 'helper.c', 'machine.c', @@ -48,7 +63,7 @@ subdir('hvf') if 'CONFIG_TCG' in config_all_accel subdir('tcg') else - arm_ss.add(files('tcg-stubs.c')) + arm_common_system_ss.add(files('tcg-stubs.c')) endif target_arch += {'arm': arm_ss} diff --git a/target/arm/mmuidx-internal.h b/target/arm/mmuidx-internal.h new file mode 100644 index 0000000..962b053 --- /dev/null +++ b/target/arm/mmuidx-internal.h @@ -0,0 +1,113 @@ +/* + * QEMU Arm software mmu index internal definitions + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef TARGET_ARM_MMUIDX_INTERNAL_H +#define TARGET_ARM_MMUIDX_INTERNAL_H + +#include "mmuidx.h" +#include "tcg/debug-assert.h" +#include "hw/registerfields.h" + + +FIELD(MMUIDXINFO, EL, 0, 2) +FIELD(MMUIDXINFO, ELVALID, 2, 1) +FIELD(MMUIDXINFO, REL, 3, 2) +FIELD(MMUIDXINFO, RELVALID, 5, 1) +FIELD(MMUIDXINFO, 2RANGES, 6, 1) +FIELD(MMUIDXINFO, PAN, 7, 1) +FIELD(MMUIDXINFO, USER, 8, 1) +FIELD(MMUIDXINFO, STAGE1, 9, 1) +FIELD(MMUIDXINFO, STAGE2, 10, 1) +FIELD(MMUIDXINFO, GCS, 11, 1) +FIELD(MMUIDXINFO, TG, 12, 5) + +extern const uint32_t arm_mmuidx_table[ARM_MMU_IDX_M + 8]; + +#define arm_mmuidx_is_valid(x) ((unsigned)(x) < ARRAY_SIZE(arm_mmuidx_table)) + +/* Return the exception level associated with this mmu index. */ +static inline int arm_mmu_idx_to_el(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + tcg_debug_assert(FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, ELVALID)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, EL); +} + +/* + * Return the exception level for the address translation regime + * associated with this mmu index. + */ +static inline uint32_t regime_el(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + tcg_debug_assert(FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, RELVALID)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, REL); +} + +/* + * Return true if this address translation regime has two ranges. + * Note that this will not return the correct answer for AArch32 + * Secure PL1&0 (i.e. mmu indexes E3, E30_0, E30_3_PAN), but it is + * never called from a context where EL3 can be AArch32. (The + * correct return value for ARMMMUIdx_E3 would be different for + * that case, so we can't just make the function return the + * correct value anyway; we would need an extra "bool e3_is_aarch32" + * argument which all the current callsites would pass as 'false'.) + */ +static inline bool regime_has_2_ranges(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, 2RANGES); +} + +/* Return true if Privileged Access Never is enabled for this mmu index. */ +static inline bool regime_is_pan(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, PAN); +} + +/* + * Return true if the exception level associated with this mmu index is 0. + * Differs from arm_mmu_idx_to_el(idx) == 0 in that this allows querying + * Stage1 and Stage2 mmu indexes. + */ +static inline bool regime_is_user(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, USER); +} + +/* Return true if this mmu index is stage 1 of a 2-stage translation. */ +static inline bool arm_mmu_idx_is_stage1_of_2(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, STAGE1); +} + +/* Return true if this mmu index is stage 2 of a 2-stage translation. */ +static inline bool regime_is_stage2(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, STAGE2); +} + +/* Return true if this mmu index implies AccessType_GCS. */ +static inline bool regime_is_gcs(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + return FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, GCS); +} + +/* Return the GCS MMUIdx for a given regime. */ +static inline ARMMMUIdx regime_to_gcs(ARMMMUIdx idx) +{ + tcg_debug_assert(arm_mmuidx_is_valid(idx)); + uint32_t core = FIELD_EX32(arm_mmuidx_table[idx], MMUIDXINFO, TG); + tcg_debug_assert(core != 0); /* core 0 is E10_0, not a GCS index */ + return core | ARM_MMU_IDX_A; +} + +#endif /* TARGET_ARM_MMUIDX_INTERNAL_H */ diff --git a/target/arm/mmuidx.c b/target/arm/mmuidx.c new file mode 100644 index 0000000..a4663c8 --- /dev/null +++ b/target/arm/mmuidx.c @@ -0,0 +1,66 @@ +/* + * QEMU Arm software mmu index definitions + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "mmuidx-internal.h" + + +#define EL(X) ((X << R_MMUIDXINFO_EL_SHIFT) | R_MMUIDXINFO_ELVALID_MASK | \ + ((X == 0) << R_MMUIDXINFO_USER_SHIFT)) +#define REL(X) ((X << R_MMUIDXINFO_REL_SHIFT) | R_MMUIDXINFO_RELVALID_MASK) +#define R2 R_MMUIDXINFO_2RANGES_MASK +#define PAN R_MMUIDXINFO_PAN_MASK +#define USER R_MMUIDXINFO_USER_MASK +#define S1 R_MMUIDXINFO_STAGE1_MASK +#define S2 R_MMUIDXINFO_STAGE2_MASK +#define GCS R_MMUIDXINFO_GCS_MASK +#define TG(X) \ + ((ARMMMUIdx_##X##_GCS & ARM_MMU_IDX_COREIDX_MASK) << R_MMUIDXINFO_TG_SHIFT) + +const uint32_t arm_mmuidx_table[ARM_MMU_IDX_M + 8] = { + /* + * A-profile. + */ + [ARMMMUIdx_E10_0] = EL(0) | REL(1) | R2 | TG(E10_0), + [ARMMMUIdx_E10_0_GCS] = EL(0) | REL(1) | R2 | GCS, + [ARMMMUIdx_E10_1] = EL(1) | REL(1) | R2 | TG(E10_1), + [ARMMMUIdx_E10_1_PAN] = EL(1) | REL(1) | R2 | TG(E10_1) | PAN, + [ARMMMUIdx_E10_1_GCS] = EL(1) | REL(1) | R2 | GCS, + + [ARMMMUIdx_E20_0] = EL(0) | REL(2) | R2 | TG(E20_0), + [ARMMMUIdx_E20_0_GCS] = EL(0) | REL(2) | R2 | GCS, + [ARMMMUIdx_E20_2] = EL(2) | REL(2) | R2 | TG(E20_2), + [ARMMMUIdx_E20_2_PAN] = EL(2) | REL(2) | R2 | TG(E20_2) | PAN, + [ARMMMUIdx_E20_2_GCS] = EL(2) | REL(2) | R2 | GCS, + + [ARMMMUIdx_E2] = EL(2) | REL(2) | TG(E2), + [ARMMMUIdx_E2_GCS] = EL(2) | REL(2) | GCS, + + [ARMMMUIdx_E3] = EL(3) | REL(3) | TG(E3), + [ARMMMUIdx_E3_GCS] = EL(3) | REL(3) | GCS, + [ARMMMUIdx_E30_0] = EL(0) | REL(3), + [ARMMMUIdx_E30_3_PAN] = EL(3) | REL(3) | PAN, + + [ARMMMUIdx_Stage2_S] = REL(2) | S2, + [ARMMMUIdx_Stage2] = REL(2) | S2, + + [ARMMMUIdx_Stage1_E0] = REL(1) | R2 | S1 | USER | TG(Stage1_E0), + [ARMMMUIdx_Stage1_E0_GCS] = REL(1) | R2 | S1 | USER | GCS, + [ARMMMUIdx_Stage1_E1] = REL(1) | R2 | S1 | TG(Stage1_E1), + [ARMMMUIdx_Stage1_E1_PAN] = REL(1) | R2 | S1 | TG(Stage1_E1) | PAN, + [ARMMMUIdx_Stage1_E1_GCS] = REL(1) | R2 | S1 | GCS, + + /* + * M-profile. + */ + [ARMMMUIdx_MUser] = EL(0) | REL(1), + [ARMMMUIdx_MPriv] = EL(1) | REL(1), + [ARMMMUIdx_MUserNegPri] = EL(0) | REL(1), + [ARMMMUIdx_MPrivNegPri] = EL(1) | REL(1), + [ARMMMUIdx_MSUser] = EL(0) | REL(1), + [ARMMMUIdx_MSPriv] = EL(1) | REL(1), + [ARMMMUIdx_MSUserNegPri] = EL(0) | REL(1), + [ARMMMUIdx_MSPrivNegPri] = EL(1) | REL(1), +}; diff --git a/target/arm/mmuidx.h b/target/arm/mmuidx.h new file mode 100644 index 0000000..8d8d273 --- /dev/null +++ b/target/arm/mmuidx.h @@ -0,0 +1,241 @@ +/* + * QEMU Arm software mmu index definitions + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef TARGET_ARM_MMUIDX_H +#define TARGET_ARM_MMUIDX_H + +/* + * Arm has the following "translation regimes" (as the Arm ARM calls them): + * + * If EL3 is 64-bit: + * + NonSecure EL1 & 0 stage 1 + * + NonSecure EL1 & 0 stage 2 + * + NonSecure EL2 + * + NonSecure EL2 & 0 (ARMv8.1-VHE) + * + Secure EL1 & 0 stage 1 + * + Secure EL1 & 0 stage 2 (FEAT_SEL2) + * + Secure EL2 (FEAT_SEL2) + * + Secure EL2 & 0 (FEAT_SEL2) + * + Realm EL1 & 0 stage 1 (FEAT_RME) + * + Realm EL1 & 0 stage 2 (FEAT_RME) + * + Realm EL2 (FEAT_RME) + * + EL3 + * If EL3 is 32-bit: + * + NonSecure PL1 & 0 stage 1 + * + NonSecure PL1 & 0 stage 2 + * + NonSecure PL2 + * + Secure PL1 & 0 + * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.) + * + * For QEMU, an mmu_idx is not quite the same as a translation regime because: + * 1. we need to split the "EL1 & 0" and "EL2 & 0" regimes into two mmu_idxes, + * because they may differ in access permissions even if the VA->PA map is + * the same + * 2. we want to cache in our TLB the full VA->IPA->PA lookup for a stage 1+2 + * translation, which means that we have one mmu_idx that deals with two + * concatenated translation regimes [this sort of combined s1+2 TLB is + * architecturally permitted] + * 3. we don't need to allocate an mmu_idx to translations that we won't be + * handling via the TLB. The only way to do a stage 1 translation without + * the immediate stage 2 translation is via the ATS or AT system insns, + * which can be slow-pathed and always do a page table walk. + * The only use of stage 2 translations is either as part of an s1+2 + * lookup or when loading the descriptors during a stage 1 page table walk, + * and in both those cases we don't use the TLB. + * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3" + * translation regimes, because they map reasonably well to each other + * and they can't both be active at the same time. + * 5. we want to be able to use the TLB for accesses done as part of a + * stage1 page table walk, rather than having to walk the stage2 page + * table over and over. + * 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access + * Never (PAN) bit within PSTATE. + * 7. we fold together most secure and non-secure regimes for A-profile, + * because there are no banked system registers for aarch64, so the + * process of switching between secure and non-secure is + * already heavyweight. + * 8. we cannot fold together Stage 2 Secure and Stage 2 NonSecure, + * because both are in use simultaneously for Secure EL2. + * 9. we need separate indexes for handling AccessType_GCS. + * + * This gives us the following list of cases: + * + * EL0 EL1&0 stage 1+2 (aka NS PL0 PL1&0 stage 1+2) + * EL0 EL1&0 stage 1+2 +GCS + * EL1 EL1&0 stage 1+2 (aka NS PL1 PL1&0 stage 1+2) + * EL1 EL1&0 stage 1+2 +PAN (aka NS PL1 P1&0 stage 1+2 +PAN) + * EL1 EL1&0 stage 1+2 +GCS + * EL0 EL2&0 + * EL0 EL2&0 +GCS + * EL2 EL2&0 + * EL2 EL2&0 +PAN + * EL2 EL2&0 +GCS + * EL2 (aka NS PL2) + * EL2 +GCS + * EL3 (aka AArch32 S PL1 PL1&0) + * EL3 +GCS + * AArch32 S PL0 PL1&0 (we call this EL30_0) + * AArch32 S PL1 PL1&0 +PAN (we call this EL30_3_PAN) + * Stage2 Secure + * Stage2 NonSecure + * plus one TLB per Physical address space: S, NS, Realm, Root + * + * for a total of 22 different mmu_idx. + * + * R profile CPUs have an MPU, but can use the same set of MMU indexes + * as A profile. They only need to distinguish EL0 and EL1 (and + * EL2 for cores like the Cortex-R52). + * + * M profile CPUs are rather different as they do not have a true MMU. + * They have the following different MMU indexes: + * User + * Privileged + * User, execution priority negative (ie the MPU HFNMIENA bit may apply) + * Privileged, execution priority negative (ditto) + * If the CPU supports the v8M Security Extension then there are also: + * Secure User + * Secure Privileged + * Secure User, execution priority negative + * Secure Privileged, execution priority negative + * + * The ARMMMUIdx and the mmu index value used by the core QEMU TLB code + * are not quite the same -- different CPU types (most notably M profile + * vs A/R profile) would like to use MMU indexes with different semantics, + * but since we don't ever need to use all of those in a single CPU we + * can avoid having to set NB_MMU_MODES to "total number of A profile MMU + * modes + total number of M profile MMU modes". The lower bits of + * ARMMMUIdx are the core TLB mmu index, and the higher bits are always + * the same for any particular CPU. + * Variables of type ARMMUIdx are always full values, and the core + * index values are in variables of type 'int'. + * + * Our enumeration includes at the end some entries which are not "true" + * mmu_idx values in that they don't have corresponding TLBs and are only + * valid for doing slow path page table walks. + * + * The constant names here are patterned after the general style of the names + * of the AT/ATS operations. + * The values used are carefully arranged to make mmu_idx => EL lookup easy. + * For M profile we arrange them to have a bit for priv, a bit for negpri + * and a bit for secure. + */ +#define ARM_MMU_IDX_A 0x20 /* A profile */ +#define ARM_MMU_IDX_NOTLB 0x40 /* does not have a TLB */ +#define ARM_MMU_IDX_M 0x80 /* M profile */ + +/* Meanings of the bits for M profile mmu idx values */ +#define ARM_MMU_IDX_M_PRIV 0x1 +#define ARM_MMU_IDX_M_NEGPRI 0x2 +#define ARM_MMU_IDX_M_S 0x4 /* Secure */ + +#define ARM_MMU_IDX_TYPE_MASK \ + (ARM_MMU_IDX_A | ARM_MMU_IDX_M | ARM_MMU_IDX_NOTLB) +#define ARM_MMU_IDX_COREIDX_MASK 0x1f + +typedef enum ARMMMUIdx { + /* + * A-profile. + */ + + ARMMMUIdx_E10_0 = 0 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_0_GCS = 1 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_1 = 2 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_1_PAN = 3 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_1_GCS = 4 | ARM_MMU_IDX_A, + + ARMMMUIdx_E20_0 = 5 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_0_GCS = 6 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_2 = 7 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_2_PAN = 8 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_2_GCS = 9 | ARM_MMU_IDX_A, + + ARMMMUIdx_E2 = 10 | ARM_MMU_IDX_A, + ARMMMUIdx_E2_GCS = 11 | ARM_MMU_IDX_A, + + ARMMMUIdx_E3 = 12 | ARM_MMU_IDX_A, + ARMMMUIdx_E3_GCS = 13 | ARM_MMU_IDX_A, + ARMMMUIdx_E30_0 = 14 | ARM_MMU_IDX_A, + ARMMMUIdx_E30_3_PAN = 15 | ARM_MMU_IDX_A, + + /* + * Used for second stage of an S12 page table walk, or for descriptor + * loads during first stage of an S1 page table walk. Note that both + * are in use simultaneously for SecureEL2: the security state for + * the S2 ptw is selected by the NS bit from the S1 ptw. + */ + ARMMMUIdx_Stage2_S = 16 | ARM_MMU_IDX_A, + ARMMMUIdx_Stage2 = 17 | ARM_MMU_IDX_A, + + /* TLBs with 1-1 mapping to the physical address spaces. */ + ARMMMUIdx_Phys_S = 18 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_NS = 19 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Root = 20 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Realm = 21 | ARM_MMU_IDX_A, + + /* + * These are not allocated TLBs and are used only for AT system + * instructions or for the first stage of an S12 page table walk. + */ + ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB, + ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB, + ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB, + ARMMMUIdx_Stage1_E0_GCS = 3 | ARM_MMU_IDX_NOTLB, + ARMMMUIdx_Stage1_E1_GCS = 4 | ARM_MMU_IDX_NOTLB, + + /* + * M-profile. + */ + ARMMMUIdx_MUser = ARM_MMU_IDX_M, + ARMMMUIdx_MPriv = ARM_MMU_IDX_M | ARM_MMU_IDX_M_PRIV, + ARMMMUIdx_MUserNegPri = ARMMMUIdx_MUser | ARM_MMU_IDX_M_NEGPRI, + ARMMMUIdx_MPrivNegPri = ARMMMUIdx_MPriv | ARM_MMU_IDX_M_NEGPRI, + ARMMMUIdx_MSUser = ARMMMUIdx_MUser | ARM_MMU_IDX_M_S, + ARMMMUIdx_MSPriv = ARMMMUIdx_MPriv | ARM_MMU_IDX_M_S, + ARMMMUIdx_MSUserNegPri = ARMMMUIdx_MUserNegPri | ARM_MMU_IDX_M_S, + ARMMMUIdx_MSPrivNegPri = ARMMMUIdx_MPrivNegPri | ARM_MMU_IDX_M_S, +} ARMMMUIdx; + +/* + * Bit macros for the core-mmu-index values for each index, + * for use when calling tlb_flush_by_mmuidx() and friends. + */ +#define TO_CORE_BIT(NAME) \ + ARMMMUIdxBit_##NAME = 1 << (ARMMMUIdx_##NAME & ARM_MMU_IDX_COREIDX_MASK) + +typedef enum ARMMMUIdxBit { + TO_CORE_BIT(E10_0), + TO_CORE_BIT(E10_0_GCS), + TO_CORE_BIT(E10_1), + TO_CORE_BIT(E10_1_PAN), + TO_CORE_BIT(E10_1_GCS), + TO_CORE_BIT(E20_0), + TO_CORE_BIT(E20_0_GCS), + TO_CORE_BIT(E20_2), + TO_CORE_BIT(E20_2_PAN), + TO_CORE_BIT(E20_2_GCS), + TO_CORE_BIT(E2), + TO_CORE_BIT(E2_GCS), + TO_CORE_BIT(E3), + TO_CORE_BIT(E3_GCS), + TO_CORE_BIT(E30_0), + TO_CORE_BIT(E30_3_PAN), + TO_CORE_BIT(Stage2), + TO_CORE_BIT(Stage2_S), + + TO_CORE_BIT(MUser), + TO_CORE_BIT(MPriv), + TO_CORE_BIT(MUserNegPri), + TO_CORE_BIT(MPrivNegPri), + TO_CORE_BIT(MSUser), + TO_CORE_BIT(MSPriv), + TO_CORE_BIT(MSUserNegPri), + TO_CORE_BIT(MSPrivNegPri), +} ARMMMUIdxBit; + +#undef TO_CORE_BIT + +#define MMU_USER_IDX 0 + +#endif /* TARGET_ARM_MMUIDX_H */ diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 44170d8..d4386ed 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -36,8 +36,6 @@ typedef struct S1Translate { /* * in_space: the security space for this walk. This plus * the in_mmu_idx specify the architectural translation regime. - * If a Secure ptw is "downgraded" to NonSecure by an NSTable bit, - * this field is updated accordingly. * * Note that the security space for the in_ptw_idx may be different * from that for the in_mmu_idx. We do not need to explicitly track @@ -53,17 +51,36 @@ typedef struct S1Translate { */ ARMSecuritySpace in_space; /* + * Like in_space, except this may be "downgraded" to NonSecure + * by an NSTable bit. + */ + ARMSecuritySpace cur_space; + /* * in_debug: is this a QEMU debug access (gdbstub, etc)? Debug * accesses will not update the guest page table access flags * and will not change the state of the softmmu TLBs. */ bool in_debug; /* + * in_at: is this AccessType_AT? + * This is also set for debug, because at heart that is also + * an address translation, and simplifies a test. + */ + bool in_at; + /* * If this is stage 2 of a stage 1+2 page table walk, then this must * be true if stage 1 is an EL0 access; otherwise this is ignored. * Stage 2 is indicated by in_mmu_idx set to ARMMMUIdx_Stage2{,_S}. */ bool in_s1_is_el0; + /* + * The set of PAGE_* bits to be use in the permission check. + * This is normally directly related to the access_type, but + * may be suppressed for debug or AT insns. + */ + uint8_t in_prot_check; + /* Cached EffectiveHCR_EL2_NVx() bit */ + bool in_nv1; bool out_rw; bool out_be; ARMSecuritySpace out_space; @@ -122,7 +139,7 @@ unsigned int arm_pamax(ARMCPU *cpu) { if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { unsigned int parange = - FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); + FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE); /* * id_aa64mmfr0 is a read-only register so values outside of the @@ -152,6 +169,10 @@ ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) return ARMMMUIdx_Stage1_E1; case ARMMMUIdx_E10_1_PAN: return ARMMMUIdx_Stage1_E1_PAN; + case ARMMMUIdx_E10_0_GCS: + return ARMMMUIdx_Stage1_E0_GCS; + case ARMMMUIdx_E10_1_GCS: + return ARMMMUIdx_Stage1_E1_GCS; default: return mmu_idx; } @@ -193,9 +214,9 @@ static ARMMMUIdx ptw_idx_for_stage_2(CPUARMState *env, ARMMMUIdx stage2idx) return ARMMMUIdx_Phys_Realm; case ARMSS_Secure: if (stage2idx == ARMMMUIdx_Stage2_S) { - s2walk_secure = !(env->cp15.vstcr_el2 & VSTCR_SW); + s2walk_secure = !(env->cp15.vstcr_el2 & R_VSTCR_SW_MASK); } else { - s2walk_secure = !(env->cp15.vtcr_el2 & VTCR_NSW); + s2walk_secure = !(env->cp15.vtcr_el2 & R_VTCR_NSW_MASK); } return s2walk_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; default: @@ -218,9 +239,9 @@ static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) return env->cp15.vsttbr_el2; } if (ttbrn == 0) { - return env->cp15.ttbr0_el[regime_el(env, mmu_idx)]; + return env->cp15.ttbr0_el[regime_el(mmu_idx)]; } else { - return env->cp15.ttbr1_el[regime_el(env, mmu_idx)]; + return env->cp15.ttbr1_el[regime_el(mmu_idx)]; } } @@ -259,8 +280,10 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, return (hcr_el2 & (HCR_DC | HCR_VM)) == 0; case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_0_GCS: case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: + case ARMMMUIdx_E10_1_GCS: /* TGE means that EL0/1 act as if SCTLR_EL1.M is zero */ hcr_el2 = arm_hcr_el2_eff_secstate(env, space); if (hcr_el2 & HCR_TGE) { @@ -269,8 +292,10 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, break; case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E0_GCS: case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: + case ARMMMUIdx_Stage1_E1_GCS: /* HCR.DC means SCTLR_EL1.M behaves as 0 */ hcr_el2 = arm_hcr_el2_eff_secstate(env, space); if (hcr_el2 & HCR_DC) { @@ -279,10 +304,14 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, break; case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_0_GCS: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_E20_2_GCS: case ARMMMUIdx_E2: + case ARMMMUIdx_E2_GCS: case ARMMMUIdx_E3: + case ARMMMUIdx_E3_GCS: case ARMMMUIdx_E30_0: case ARMMMUIdx_E30_3_PAN: break; @@ -303,6 +332,7 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, static bool granule_protection_check(CPUARMState *env, uint64_t paddress, ARMSecuritySpace pspace, + ARMSecuritySpace ss, ARMMMUFaultInfo *fi) { MemTxAttrs attrs = { @@ -332,7 +362,7 @@ static bool granule_protection_check(CPUARMState *env, uint64_t paddress, * physical address size is invalid. */ pps = FIELD_EX64(gpccr, GPCCR, PPS); - if (pps > FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE)) { + if (pps > FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE)) { goto fault_walk; } pps = pamax_map[pps]; @@ -371,18 +401,37 @@ static bool granule_protection_check(CPUARMState *env, uint64_t paddress, l0gptsz = 30 + FIELD_EX64(gpccr, GPCCR, L0GPTSZ); /* - * GPC Priority 2: Secure, Realm or Root address exceeds PPS. + * GPC Priority 2: Access to Secure, NonSecure or Realm is prevented + * by one of the GPCCR_EL3 address space disable bits (R_TCWMD). + * All of these bits are checked vs aa64_rme_gpc2 in gpccr_write. + */ + { + static const uint8_t disable_masks[4] = { + [ARMSS_Secure] = R_GPCCR_SPAD_MASK, + [ARMSS_NonSecure] = R_GPCCR_NSPAD_MASK, + [ARMSS_Root] = 0, + [ARMSS_Realm] = R_GPCCR_RLPAD_MASK, + }; + + if (gpccr & disable_masks[pspace]) { + goto fault_fail; + } + } + + /* + * GPC Priority 3: Secure, Realm or Root address exceeds PPS. * R_CPDSB: A NonSecure physical address input exceeding PPS * does not experience any fault. + * R_PBPSH: Other address spaces have fault suppressed by APPSAA. */ if (paddress & ~pps_mask) { - if (pspace == ARMSS_NonSecure) { + if (pspace == ARMSS_NonSecure || FIELD_EX64(gpccr, GPCCR, APPSAA)) { return true; } - goto fault_size; + goto fault_fail; } - /* GPC Priority 3: the base address of GPTBR_EL3 exceeds PPS. */ + /* GPC Priority 4: the base address of GPTBR_EL3 exceeds PPS. */ tableaddr = env->cp15.gptbr_el3 << 12; if (tableaddr & ~pps_mask) { goto fault_size; @@ -463,18 +512,30 @@ static bool granule_protection_check(CPUARMState *env, uint64_t paddress, break; case 0b1111: /* all access */ return true; - case 0b1000: - case 0b1001: - case 0b1010: - case 0b1011: + case 0b1000: /* secure */ + if (!cpu_isar_feature(aa64_sel2, cpu)) { + goto fault_walk; + } + /* fall through */ + case 0b1001: /* non-secure */ + case 0b1010: /* root */ + case 0b1011: /* realm */ if (pspace == (gpi & 3)) { return true; } break; + case 0b1101: /* non-secure only */ + /* aa64_rme_gpc2 was checked in gpccr_write */ + if (FIELD_EX64(gpccr, GPCCR, NSO)) { + return (pspace == ARMSS_NonSecure && + (ss == ARMSS_NonSecure || ss == ARMSS_Root)); + } + goto fault_walk; default: goto fault_walk; /* reserved */ } + fault_fail: fi->gpcf = GPCF_Fail; goto fault_common; fault_eabt: @@ -575,12 +636,14 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, * From gdbstub, do not use softmmu so that we don't modify the * state of the cpu at all, including softmmu tlb contents. */ - ARMSecuritySpace s2_space = S2_security_space(ptw->in_space, s2_mmu_idx); + ARMSecuritySpace s2_space + = S2_security_space(ptw->cur_space, s2_mmu_idx); S1Translate s2ptw = { .in_mmu_idx = s2_mmu_idx, .in_ptw_idx = ptw_idx_for_stage_2(env, s2_mmu_idx), .in_space = s2_space, .in_debug = true, + .in_prot_check = PAGE_READ, }; GetPhysAddrResult s2 = { }; @@ -617,7 +680,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, } if (regime_is_stage2(s2_mmu_idx)) { - uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space); + uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->cur_space); if ((hcr & HCR_PTW) && S2_attrs_are_device(hcr, pte_attrs)) { /* @@ -628,7 +691,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, fi->s2addr = addr; fi->stage2 = true; fi->s1ptw = true; - fi->s1ns = fault_s1ns(ptw->in_space, s2_mmu_idx); + fi->s1ns = fault_s1ns(ptw->cur_space, s2_mmu_idx); return false; } } @@ -644,7 +707,7 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, fi->s2addr = addr; fi->stage2 = regime_is_stage2(s2_mmu_idx); fi->s1ptw = fi->stage2; - fi->s1ns = fault_s1ns(ptw->in_space, s2_mmu_idx); + fi->s1ns = fault_s1ns(ptw->cur_space, s2_mmu_idx); return false; } @@ -831,7 +894,7 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, fi->s2addr = ptw->out_virt; fi->stage2 = true; fi->s1ptw = true; - fi->s1ns = fault_s1ns(ptw->in_space, ptw->in_ptw_idx); + fi->s1ns = fault_s1ns(ptw->cur_space, ptw->in_ptw_idx); return 0; } @@ -949,7 +1012,7 @@ static int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap, int domain_prot) { return ap_to_rw_prot_is_user(env, mmu_idx, ap, domain_prot, - regime_is_user(env, mmu_idx)); + regime_is_user(mmu_idx)); } /* @@ -975,7 +1038,7 @@ static int simple_ap_to_rw_prot_is_user(int ap, bool is_user) static int simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) { - return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); + return simple_ap_to_rw_prot_is_user(ap, regime_is_user(mmu_idx)); } static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, @@ -1008,7 +1071,7 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, } type = (desc & 3); domain = (desc >> 5) & 0x0f; - if (regime_el(env, ptw->in_mmu_idx) == 1) { + if (regime_el(ptw->in_mmu_idx) == 1) { dacr = env->cp15.dacr_ns; } else { dacr = env->cp15.dacr_s; @@ -1061,11 +1124,10 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, ap = (desc >> (4 + ((address >> 9) & 6))) & 3; result->f.lg_page_size = 12; break; - case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */ + case 3: /* 1k page, or ARMv6 "extended small (4k) page" */ if (type == 1) { - /* ARMv6/XScale extended small page format */ - if (arm_feature(env, ARM_FEATURE_XSCALE) - || arm_feature(env, ARM_FEATURE_V6)) { + /* ARMv6 extended small page format */ + if (arm_feature(env, ARM_FEATURE_V6)) { phys_addr = (desc & 0xfffff000) | (address & 0xfff); result->f.lg_page_size = 12; } else { @@ -1089,7 +1151,7 @@ static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, } result->f.prot = ap_to_rw_prot(env, ptw->in_mmu_idx, ap, domain_prot); result->f.prot |= result->f.prot ? PAGE_EXEC : 0; - if (!(result->f.prot & (1 << access_type))) { + if (ptw->in_prot_check & ~result->f.prot) { /* Access permission fault. */ fi->type = ARMFault_Permission; goto do_fault; @@ -1148,7 +1210,7 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, /* Page or Section. */ domain = (desc >> 5) & 0x0f; } - if (regime_el(env, mmu_idx) == 1) { + if (regime_el(mmu_idx) == 1) { dacr = env->cp15.dacr_ns; } else { dacr = env->cp15.dacr_s; @@ -1212,7 +1274,7 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, g_assert_not_reached(); } } - out_space = ptw->in_space; + out_space = ptw->cur_space; if (ns) { /* * The NS bit will (as required by the architecture) have no effect if @@ -1242,8 +1304,8 @@ static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, } result->f.prot = get_S1prot(env, mmu_idx, false, user_rw, prot_rw, - xn, pxn, result->f.attrs.space, out_space); - if (!(result->f.prot & (1 << access_type))) { + xn, pxn, ptw->in_space, out_space); + if (ptw->in_prot_check & ~result->f.prot) { /* Access permission fault. */ fi->type = ARMFault_Permission; goto do_fault; @@ -1266,7 +1328,7 @@ do_fault: * @xn: XN (execute-never) bits * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0 */ -static int get_S2prot_noexecute(int s2ap) +static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) { int prot = 0; @@ -1276,12 +1338,6 @@ static int get_S2prot_noexecute(int s2ap) if (s2ap & 2) { prot |= PAGE_WRITE; } - return prot; -} - -static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) -{ - int prot = get_S2prot_noexecute(s2ap); if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) { switch (xn) { @@ -1313,6 +1369,44 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) return prot; } +static int get_S2prot_indirect(CPUARMState *env, GetPhysAddrResult *result, + int pi_index, int po_index, bool s1_is_el0) +{ + /* Last index is (priv, unpriv, ttw) */ + static const uint8_t perm_table[16][3] = { + /* 0 */ { 0, 0, 0 }, /* no access */ + /* 1 */ { 0, 0, 0 }, /* reserved */ + /* 2 */ { PAGE_READ, PAGE_READ, PAGE_READ | PAGE_WRITE }, + /* 3 */ { PAGE_READ, PAGE_READ, PAGE_READ | PAGE_WRITE }, + /* 4 */ { PAGE_WRITE, PAGE_WRITE, 0 }, + /* 5 */ { 0, 0, 0 }, /* reserved */ + /* 6 */ { PAGE_READ, PAGE_READ, PAGE_READ | PAGE_WRITE }, + /* 7 */ { PAGE_READ, PAGE_READ, PAGE_READ | PAGE_WRITE }, + /* 8 */ { PAGE_READ, PAGE_READ, PAGE_READ }, + /* 9 */ { PAGE_READ, PAGE_READ | PAGE_EXEC, PAGE_READ }, + /* A */ { PAGE_READ | PAGE_EXEC, PAGE_READ, PAGE_READ }, + /* B */ { PAGE_READ | PAGE_EXEC, PAGE_READ | PAGE_EXEC, PAGE_READ }, + /* C */ { PAGE_READ | PAGE_WRITE, + PAGE_READ | PAGE_WRITE, + PAGE_READ | PAGE_WRITE }, + /* D */ { PAGE_READ | PAGE_WRITE, + PAGE_READ | PAGE_WRITE | PAGE_EXEC, + PAGE_READ | PAGE_WRITE }, + /* E */ { PAGE_READ | PAGE_WRITE | PAGE_EXEC, + PAGE_READ | PAGE_WRITE, + PAGE_READ | PAGE_WRITE }, + /* F */ { PAGE_READ | PAGE_WRITE | PAGE_EXEC, + PAGE_READ | PAGE_WRITE | PAGE_EXEC, + PAGE_READ | PAGE_WRITE }, + }; + + uint64_t pir = (env->cp15.scr_el3 & SCR_PIEN ? env->cp15.s2pir_el2 : 0); + int s2pi = extract64(pir, pi_index * 4, 4); + + result->f.prot = perm_table[s2pi][2]; + return perm_table[s2pi][s1_is_el0]; +} + /* * Translate section/page access permissions to protection flags * @env: CPUARMState @@ -1330,7 +1424,7 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, ARMSecuritySpace in_pa, ARMSecuritySpace out_pa) { ARMCPU *cpu = env_archcpu(env); - bool is_user = regime_is_user(env, mmu_idx); + bool is_user = regime_is_user(mmu_idx); bool have_wxn; int wxn = 0; @@ -1347,10 +1441,10 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, * We make the IMPDEF choices that SCR_EL3.SIF and Realm EL2&0 * do not affect EPAN. */ - if (user_rw && regime_is_pan(env, mmu_idx)) { + if (user_rw && regime_is_pan(mmu_idx)) { prot_rw = 0; } else if (cpu_isar_feature(aa64_pan3, cpu) && is_aa64 && - regime_is_pan(env, mmu_idx) && + regime_is_pan(mmu_idx) && (regime_sctlr(env, mmu_idx) & SCTLR_EPAN) && !xn) { prot_rw = 0; } @@ -1407,7 +1501,7 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, xn = pxn || (user_rw & PAGE_WRITE); } } else if (arm_feature(env, ARM_FEATURE_V7)) { - switch (regime_el(env, mmu_idx)) { + switch (regime_el(mmu_idx)) { case 1: case 3: if (is_user) { @@ -1434,11 +1528,115 @@ static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, return prot_rw | PAGE_EXEC; } +/* Extra page permission bits, during get_S1prot_indirect only. */ +#define PAGE_GCS (1 << 3) +#define PAGE_WXN (1 << 4) +#define PAGE_OVERLAY (1 << 5) +QEMU_BUILD_BUG_ON(PAGE_RWX & (PAGE_GCS | PAGE_WXN | PAGE_OVERLAY)); + +static int get_S1prot_indirect(CPUARMState *env, S1Translate *ptw, + ARMMMUIdx mmu_idx, int pi_index, int po_index, + ARMSecuritySpace in_pa, ARMSecuritySpace out_pa) +{ + static const uint8_t perm_table[16] = { + /* 0 */ PAGE_OVERLAY, /* no access */ + /* 1 */ PAGE_OVERLAY | PAGE_READ, + /* 2 */ PAGE_OVERLAY | PAGE_EXEC, + /* 3 */ PAGE_OVERLAY | PAGE_READ | PAGE_EXEC, + /* 4 */ PAGE_OVERLAY, /* reserved */ + /* 5 */ PAGE_OVERLAY | PAGE_READ | PAGE_WRITE, + /* 6 */ PAGE_OVERLAY | PAGE_READ | PAGE_WRITE | PAGE_EXEC | PAGE_WXN, + /* 7 */ PAGE_OVERLAY | PAGE_READ | PAGE_WRITE | PAGE_EXEC, + /* 8 */ PAGE_READ, + /* 9 */ PAGE_READ | PAGE_GCS, + /* A */ PAGE_READ | PAGE_EXEC, + /* B */ 0, /* reserved */ + /* C */ PAGE_READ | PAGE_WRITE, + /* D */ 0, /* reserved */ + /* E */ PAGE_READ | PAGE_WRITE | PAGE_EXEC, + /* F */ 0, /* reserved */ + }; + + uint32_t el = regime_el(mmu_idx); + uint64_t pir = env->cp15.pir_el[el]; + uint64_t pire0 = 0; + int perm; + + if (el < 3) { + if (arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_PIEN)) { + pir = 0; + } else if (el == 2) { + pire0 = env->cp15.pire0_el2; + } else if (!ptw->in_nv1) { + pire0 = env->cp15.pir_el[0]; + } + } + perm = perm_table[extract64(pir, pi_index * 4, 4)]; + + if (regime_has_2_ranges(mmu_idx)) { + int p_perm = perm; + int u_perm = perm_table[extract64(pire0, pi_index * 4, 4)]; + + if ((p_perm & (PAGE_EXEC | PAGE_GCS)) && + (u_perm & (PAGE_WRITE | PAGE_GCS))) { + p_perm &= ~(PAGE_RWX | PAGE_GCS); + u_perm &= ~(PAGE_RWX | PAGE_GCS); + } + if ((u_perm & (PAGE_RWX | PAGE_GCS)) && regime_is_pan(mmu_idx)) { + p_perm &= ~(PAGE_READ | PAGE_WRITE); + } + perm = regime_is_user(mmu_idx) ? u_perm : p_perm; + } + + if (in_pa != out_pa) { + switch (in_pa) { + case ARMSS_Root: + /* + * R_ZWRVD: permission fault for insn fetched from non-Root, + * I_WWBFB: SIF has no effect in EL3. + */ + perm &= ~(PAGE_EXEC | PAGE_GCS); + break; + case ARMSS_Realm: + /* + * R_PKTDS: permission fault for insn fetched from non-Realm, + * for Realm EL2 or EL2&0. The corresponding fault for EL1&0 + * happens during any stage2 translation. + */ + if (el == 2) { + perm &= ~(PAGE_EXEC | PAGE_GCS); + } + break; + case ARMSS_Secure: + if (env->cp15.scr_el3 & SCR_SIF) { + perm &= ~(PAGE_EXEC | PAGE_GCS); + } + break; + default: + /* Input NonSecure must have output NonSecure. */ + g_assert_not_reached(); + } + } + + if (regime_is_gcs(mmu_idx)) { + /* + * Note that the one s1perms.gcs bit controls both read and write + * access via AccessType_GCS. See AArch64.S1CheckPermissions. + */ + perm = (perm & PAGE_GCS ? PAGE_READ | PAGE_WRITE : 0); + } else if (perm & PAGE_WXN) { + perm &= ~PAGE_EXEC; + } + + return perm & PAGE_RWX; +} + static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, ARMMMUIdx mmu_idx) { uint64_t tcr = regime_tcr(env, mmu_idx); - uint32_t el = regime_el(env, mmu_idx); + uint32_t el = regime_el(mmu_idx); int select, tsz; bool epd, hpd; @@ -1459,8 +1657,12 @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, } tsz = sextract32(tcr, 0, 4) + 8; select = 0; - hpd = false; epd = false; + /* + * Stage2 does not have hierarchical permissions. + * Thus disabling them makes things easier during ptw. + */ + hpd = true; } else if (el == 2) { /* HTCR */ tsz = extract32(tcr, 0, 3); @@ -1625,12 +1827,6 @@ static bool lpae_block_desc_valid(ARMCPU *cpu, bool ds, } } -static bool nv_nv1_enabled(CPUARMState *env, S1Translate *ptw) -{ - uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space); - return (hcr & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1); -} - /** * get_phys_addr_lpae: perform one stage of page table walk, LPAE format * @@ -1665,8 +1861,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, int32_t stride; int addrsize, inputsize, outputsize; uint64_t tcr = regime_tcr(env, mmu_idx); - int ap, xn, pxn; - uint32_t el = regime_el(env, mmu_idx); + int ap, prot; + uint32_t el = regime_el(mmu_idx); uint64_t descaddrmask; bool aarch64 = arm_el_is_aa64(env, el); uint64_t descriptor, new_descriptor; @@ -1683,6 +1879,16 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, level = 0; /* + * Cache NV1 before we adjust ptw->in_space for NSTable. + * Note that this is only relevant for EL1&0, and that + * computing it would assert for ARMSS_Root. + */ + if (el == 1) { + uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space); + ptw->in_nv1 = (hcr & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1); + } + + /* * If TxSZ is programmed to a value larger than the maximum, * or smaller than the effective minimum, it is IMPLEMENTATION * DEFINED whether we behave as if the field were programmed @@ -1703,7 +1909,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * ID_AA64MMFR0 is a read-only register so values outside of the * supported mappings can be considered an implementation error. */ - ps = FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); + ps = FIELD_EX64_IDREG(&cpu->isar, ID_AA64MMFR0, PARANGE); ps = MIN(ps, param.ps); assert(ps < ARRAY_SIZE(pamax_map)); outputsize = pamax_map[ps]; @@ -1845,7 +2051,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * NonSecure. With RME, the EL3 translation regime does not change * from Root to NonSecure. */ - if (ptw->in_space == ARMSS_Secure + if (ptw->cur_space == ARMSS_Secure && !regime_is_stage2(mmu_idx) && extract32(tableattrs, 4, 1)) { /* @@ -1855,7 +2061,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_S + 1 != ARMMMUIdx_Phys_NS); QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2_S + 1 != ARMMMUIdx_Stage2); ptw->in_ptw_idx += 1; - ptw->in_space = ARMSS_NonSecure; + ptw->cur_space = ARMSS_NonSecure; } if (!S1_ptw_translate(env, ptw, descaddr, fi)) { @@ -1922,7 +2128,12 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, descaddr &= ~(hwaddr)(page_size - 1); descaddr |= (address & (page_size - 1)); - if (likely(!ptw->in_debug)) { + /* + * For AccessType_AT, DB is not updated (AArch64.SetDirtyFlag), + * and it is IMPLEMENTATION DEFINED whether AF is updated + * (AArch64.SetAccessFlag; qemu chooses to not update). + */ + if (likely(!ptw->in_at)) { /* * Access flag. * If HA is enabled, prepare to update the descriptor below. @@ -1961,21 +2172,31 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, * except NSTable (which we have already handled). */ attrs = new_descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14)); - if (!regime_is_stage2(mmu_idx)) { - if (!param.hpd) { - attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */ - /* - * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 - * means "force PL1 access only", which means forcing AP[1] to 0. - */ - attrs &= ~(extract64(tableattrs, 2, 1) << 6); /* !APT[0] => AP[1] */ - attrs |= extract32(tableattrs, 3, 1) << 7; /* APT[1] => AP[2] */ - } + if (!param.hpd) { + attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */ + /* + * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 + * means "force PL1 access only", which means forcing AP[1] to 0. + */ + attrs &= ~(extract64(tableattrs, 2, 1) << 6); /* !APT[0] => AP[1] */ + attrs |= extract32(tableattrs, 3, 1) << 7; /* APT[1] => AP[2] */ } ap = extract32(attrs, 6, 2); - out_space = ptw->in_space; + out_space = ptw->cur_space; if (regime_is_stage2(mmu_idx)) { + if (param.pie) { + int pi = extract64(attrs, 6, 1) + | (extract64(attrs, 51, 1) << 1) + | (extract64(attrs, 53, 2) << 2); + int po = extract64(attrs, 60, 3); + prot = get_S2prot_indirect(env, result, pi, po, ptw->in_s1_is_el0); + } else { + int xn = extract64(attrs, 53, 2); + prot = get_S2prot(env, ap, xn, ptw->in_s1_is_el0); + /* Install TTW permissions in f.prot. */ + result->f.prot = prot & (PAGE_READ | PAGE_WRITE); + } /* * R_GYNXY: For stage2 in Realm security state, bit 55 is NS. * The bit remains ignored for other security states. @@ -1984,11 +2205,9 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, */ if (out_space == ARMSS_Realm && extract64(attrs, 55, 1)) { out_space = ARMSS_NonSecure; - result->f.prot = get_S2prot_noexecute(ap); - } else { - xn = extract64(attrs, 53, 2); - result->f.prot = get_S2prot(env, ap, xn, ptw->in_s1_is_el0); + prot &= ~PAGE_EXEC; } + result->s2prot = prot; result->cacheattrs.is_s2_format = true; result->cacheattrs.attrs = extract32(attrs, 2, 4); @@ -2002,7 +2221,6 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, int nse, ns = extract32(attrs, 5, 1); uint8_t attrindx; uint64_t mair; - int user_rw, prot_rw; switch (out_space) { case ARMSS_Root: @@ -2051,37 +2269,57 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, default: g_assert_not_reached(); } - xn = extract64(attrs, 54, 1); - pxn = extract64(attrs, 53, 1); - if (el == 1 && nv_nv1_enabled(env, ptw)) { + if (param.pie) { + int pi = extract64(attrs, 6, 1) + | (extract64(attrs, 51, 1) << 1) + | (extract64(attrs, 53, 2) << 2); + int po = extract64(attrs, 60, 3); /* - * With FEAT_NV, when HCR_EL2.{NV,NV1} == {1,1}, the block/page - * descriptor bit 54 holds PXN, 53 is RES0, and the effective value - * of UXN is 0. Similarly for bits 59 and 60 in table descriptors - * (which we have already folded into bits 53 and 54 of attrs). - * AP[1] (descriptor bit 6, our ap bit 0) is treated as 0. - * Similarly, APTable[0] from the table descriptor is treated as 0; - * we already folded this into AP[1] and squashing that to 0 does - * the right thing. + * Note that we modified ptw->in_space earlier for NSTable, but + * result->f.attrs retains a copy of the original security space. */ - pxn = xn; - xn = 0; - ap &= ~1; - } + prot = get_S1prot_indirect(env, ptw, mmu_idx, pi, po, + result->f.attrs.space, out_space); + } else if (regime_is_gcs(mmu_idx)) { + /* + * While one must use indirect permissions to successfully + * use GCS instructions, AArch64.S1DirectBasePermissions + * faithfully supplies s1perms.gcs = 0, Just In Case. + */ + prot = 0; + } else { + int xn = extract64(attrs, 54, 1); + int pxn = extract64(attrs, 53, 1); + int user_rw, prot_rw; - user_rw = simple_ap_to_rw_prot_is_user(ap, true); - prot_rw = simple_ap_to_rw_prot_is_user(ap, false); - /* - * Note that we modified ptw->in_space earlier for NSTable, but - * result->f.attrs retains a copy of the original security space. - */ - result->f.prot = get_S1prot(env, mmu_idx, aarch64, user_rw, prot_rw, - xn, pxn, result->f.attrs.space, out_space); + if (el == 1 && ptw->in_nv1) { + /* + * With FEAT_NV, when HCR_EL2.{NV,NV1} == {1,1}, + * the block/page descriptor bit 54 holds PXN, + * 53 is RES0, and the effective value of UXN is 0. + * Similarly for bits 59 and 60 in table descriptors + * (which we have already folded into bits 53 and 54 of attrs). + * AP[1] (descriptor bit 6, our ap bit 0) is treated as 0. + * Similarly, APTable[0] from the table descriptor is treated + * as 0; we already folded this into AP[1] and squashing + * that to 0 does the right thing. + */ + pxn = xn; + xn = 0; + ap &= ~1; + } + + user_rw = simple_ap_to_rw_prot_is_user(ap, true); + prot_rw = simple_ap_to_rw_prot_is_user(ap, false); + prot = get_S1prot(env, mmu_idx, aarch64, user_rw, prot_rw, + xn, pxn, ptw->in_space, out_space); + } + result->f.prot = prot; /* Index into MAIR registers for cache attributes */ attrindx = extract32(attrs, 2, 3); - mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + mair = env->cp15.mair_el[regime_el(mmu_idx)]; assert(attrindx <= 7); result->cacheattrs.is_s2_format = false; result->cacheattrs.attrs = extract64(mair, attrindx * 8, 8); @@ -2123,11 +2361,27 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, result->f.tlb_fill_flags = 0; } - if (!(result->f.prot & (1 << access_type))) { + if (ptw->in_prot_check & ~prot) { fi->type = ARMFault_Permission; goto do_fault; } + /* S1PIE and S2PIE both have a bit for software dirty page tracking. */ + if (access_type == MMU_DATA_STORE && param.pie) { + /* + * For S1PIE, bit 7 is nDirty and both HA and HD are checked. + * For S2PIE, bit 7 is Dirty and only HD is checked. + */ + bool bit7 = extract64(attrs, 7, 1); + if (regime_is_stage2(mmu_idx) + ? !bit7 && !param.hd + : bit7 && !(param.ha && param.hd)) { + fi->type = ARMFault_Permission; + fi->dirtybit = true; + goto do_fault; + } + } + /* If FEAT_HAFDBS has made changes, update the PTE. */ if (new_descriptor != descriptor) { new_descriptor = arm_casq_ptw(env, descriptor, new_descriptor, ptw, fi); @@ -2175,7 +2429,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, fi->level = level; fi->stage2 = regime_is_stage2(mmu_idx); } - fi->s1ns = fault_s1ns(ptw->in_space, mmu_idx); + fi->s1ns = fault_s1ns(ptw->cur_space, mmu_idx); return true; } @@ -2190,7 +2444,7 @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t mask; uint32_t base; ARMMMUIdx mmu_idx = ptw->in_mmu_idx; - bool is_user = regime_is_user(env, mmu_idx); + bool is_user = regime_is_user(mmu_idx); if (regime_translation_disabled(env, mmu_idx, ptw->in_space)) { /* MPU disabled. */ @@ -2357,7 +2611,7 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, ARMCPU *cpu = env_archcpu(env); int n; ARMMMUIdx mmu_idx = ptw->in_mmu_idx; - bool is_user = regime_is_user(env, mmu_idx); + bool is_user = regime_is_user(mmu_idx); bool secure = arm_space_is_secure(ptw->in_space); result->f.phys_addr = address; @@ -2537,13 +2791,13 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, fi->type = ARMFault_Permission; fi->level = 1; - return !(result->f.prot & (1 << access_type)); + return (ptw->in_prot_check & ~result->f.prot) != 0; } static uint32_t *regime_rbar(CPUARMState *env, ARMMMUIdx mmu_idx, uint32_t secure) { - if (regime_el(env, mmu_idx) == 2) { + if (regime_el(mmu_idx) == 2) { return env->pmsav8.hprbar; } else { return env->pmsav8.rbar[secure]; @@ -2553,7 +2807,7 @@ static uint32_t *regime_rbar(CPUARMState *env, ARMMMUIdx mmu_idx, static uint32_t *regime_rlar(CPUARMState *env, ARMMMUIdx mmu_idx, uint32_t secure) { - if (regime_el(env, mmu_idx) == 2) { + if (regime_el(mmu_idx) == 2) { return env->pmsav8.hprlar; } else { return env->pmsav8.rlar[secure]; @@ -2561,8 +2815,9 @@ static uint32_t *regime_rlar(CPUARMState *env, ARMMMUIdx mmu_idx, } bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - bool secure, GetPhysAddrResult *result, + MMUAccessType access_type, unsigned prot_check, + ARMMMUIdx mmu_idx, bool secure, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi, uint32_t *mregion) { /* @@ -2576,7 +2831,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, * memory system to use a subpage. */ ARMCPU *cpu = env_archcpu(env); - bool is_user = regime_is_user(env, mmu_idx); + bool is_user = regime_is_user(mmu_idx); int n; int matchregion = -1; bool hit = false; @@ -2584,7 +2839,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); int region_counter; - if (regime_el(env, mmu_idx) == 2) { + if (regime_el(mmu_idx) == 2) { region_counter = cpu->pmsav8r_hdregion; } else { region_counter = cpu->pmsav7_dregion; @@ -2710,7 +2965,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, xn = 1; } - if (regime_el(env, mmu_idx) == 2) { + if (regime_el(mmu_idx) == 2) { result->f.prot = simple_ap_to_rw_prot_is_user(ap, mmu_idx != ARMMMUIdx_E2); } else { @@ -2719,7 +2974,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, if (!arm_feature(env, ARM_FEATURE_M)) { uint8_t attrindx = extract32(matched_rlar, 1, 3); - uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + uint64_t mair = env->cp15.mair_el[regime_el(mmu_idx)]; uint8_t sh = extract32(matched_rlar, 3, 2); if (regime_sctlr(env, mmu_idx) & SCTLR_WXN && @@ -2727,7 +2982,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, xn = 0x1; } - if ((regime_el(env, mmu_idx) == 1) && + if ((regime_el(mmu_idx) == 1) && regime_sctlr(env, mmu_idx) & SCTLR_UWXN && ap == 0x1) { pxn = 0x1; } @@ -2750,7 +3005,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, if (arm_feature(env, ARM_FEATURE_M)) { fi->level = 1; } - return !(result->f.prot & (1 << access_type)); + return (prot_check & ~result->f.prot) != 0; } static bool v8m_is_sau_exempt(CPUARMState *env, @@ -2952,8 +3207,8 @@ static bool get_phys_addr_pmsav8(CPUARMState *env, } } - ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, secure, - result, fi, NULL); + ret = pmsav8_mpu_lookup(env, address, access_type, ptw->in_prot_check, + mmu_idx, secure, result, fi, NULL); if (sattrs.subpage) { result->f.lg_page_size = 0; } @@ -3212,7 +3467,7 @@ static bool get_phys_addr_disabled(CPUARMState *env, break; default: - r_el = regime_el(env, mmu_idx); + r_el = regime_el(mmu_idx); if (arm_el_is_aa64(env, r_el)) { int pamax = arm_pamax(env_archcpu(env)); uint64_t tcr = env->cp15.tcr_el[r_el]; @@ -3320,7 +3575,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, fi->s2addr = ipa; /* Combine the S1 and S2 perms. */ - result->f.prot &= s1_prot; + result->f.prot = s1_prot & result->s2prot; /* If S2 fails, return early. */ if (ret) { @@ -3372,9 +3627,9 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, */ if (in_space == ARMSS_Secure) { result->f.attrs.secure = - !(env->cp15.vstcr_el2 & (VSTCR_SA | VSTCR_SW)) + !(env->cp15.vstcr_el2 & (R_VSTCR_SA_MASK | R_VSTCR_SW_MASK)) && (ipa_secure - || !(env->cp15.vtcr_el2 & (VTCR_NSA | VTCR_NSW))); + || !(env->cp15.vtcr_el2 & (R_VTCR_NSA_MASK | R_VTCR_NSW_MASK))); result->f.attrs.space = arm_secure_to_space(result->f.attrs.secure); } @@ -3395,6 +3650,7 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, * cannot upgrade a NonSecure translation regime's attributes * to Secure or Realm. */ + ptw->cur_space = ptw->in_space; result->f.attrs.space = ptw->in_space; result->f.attrs.secure = arm_space_is_secure(ptw->in_space); @@ -3456,7 +3712,7 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, break; } - result->f.attrs.user = regime_is_user(env, mmu_idx); + result->f.attrs.user = regime_is_user(mmu_idx); /* * Fast Context Switch Extension. This doesn't exist at all in v8. @@ -3464,7 +3720,7 @@ static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, */ if (address < 0x02000000 && mmu_idx != ARMMMUIdx_Stage2 && !arm_feature(env, ARM_FEATURE_V8)) { - if (regime_el(env, mmu_idx) == 3) { + if (regime_el(mmu_idx) == 3) { address += env->cp15.fcseidr_s; } else { address += env->cp15.fcseidr_ns; @@ -3530,25 +3786,33 @@ static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw, return true; } if (!granule_protection_check(env, result->f.phys_addr, - result->f.attrs.space, fi)) { + result->f.attrs.space, ptw->in_space, fi)) { fi->type = ARMFault_GPCFOnOutput; return true; } return false; } -bool get_phys_addr_with_space_nogpc(CPUARMState *env, vaddr address, - MMUAccessType access_type, MemOp memop, - ARMMMUIdx mmu_idx, ARMSecuritySpace space, - GetPhysAddrResult *result, - ARMMMUFaultInfo *fi) +bool get_phys_addr_for_at(CPUARMState *env, vaddr address, + unsigned prot_check, ARMMMUIdx mmu_idx, + ARMSecuritySpace space, GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) { S1Translate ptw = { .in_mmu_idx = mmu_idx, .in_space = space, + .in_at = true, + .in_prot_check = prot_check, }; - return get_phys_addr_nogpc(env, &ptw, address, access_type, - memop, result, fi); + /* + * I_MXTJT: Granule protection checks are not performed on the final + * address of a successful translation. This is a translation not a + * memory reference, so MMU_DATA_LOAD is arbitrary (the exact protection + * check is handled or bypassed by .in_prot_check) and "memop = MO_8" + * bypasses any alignment check. + */ + return get_phys_addr_nogpc(env, &ptw, address, + MMU_DATA_LOAD, MO_8, result, fi); } static ARMSecuritySpace @@ -3558,15 +3822,22 @@ arm_mmu_idx_to_security_space(CPUARMState *env, ARMMMUIdx mmu_idx) switch (mmu_idx) { case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_0_GCS: case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: + case ARMMMUIdx_E10_1_GCS: case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_0_GCS: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_E20_2_GCS: case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E0_GCS: case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: + case ARMMMUIdx_Stage1_E1_GCS: case ARMMMUIdx_E2: + case ARMMMUIdx_E2_GCS: ss = arm_security_space_below_el3(env); break; case ARMMMUIdx_Stage2: @@ -3595,6 +3866,7 @@ arm_mmu_idx_to_security_space(CPUARMState *env, ARMMMUIdx mmu_idx) ss = ARMSS_Secure; break; case ARMMMUIdx_E3: + case ARMMMUIdx_E3_GCS: case ARMMMUIdx_E30_0: case ARMMMUIdx_E30_3_PAN: if (arm_feature(env, ARM_FEATURE_AARCH64) && @@ -3624,6 +3896,7 @@ bool get_phys_addr(CPUARMState *env, vaddr address, S1Translate ptw = { .in_mmu_idx = mmu_idx, .in_space = arm_mmu_idx_to_security_space(env, mmu_idx), + .in_prot_check = 1 << access_type, }; return get_phys_addr_gpc(env, &ptw, address, access_type, @@ -3637,6 +3910,8 @@ static hwaddr arm_cpu_get_phys_page(CPUARMState *env, vaddr addr, .in_mmu_idx = mmu_idx, .in_space = arm_mmu_idx_to_security_space(env, mmu_idx), .in_debug = true, + .in_at = true, + .in_prot_check = 0, }; GetPhysAddrResult res = {}; ARMMMUFaultInfo fi = {}; diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h index 3244e07..bff61f0 100644 --- a/target/arm/syndrome.h +++ b/target/arm/syndrome.h @@ -63,6 +63,7 @@ enum arm_exception_class { EC_MOP = 0x27, EC_AA32_FPTRAP = 0x28, EC_AA64_FPTRAP = 0x2c, + EC_GCS = 0x2d, EC_SERROR = 0x2f, EC_BREAKPOINT = 0x30, EC_BREAKPOINT_SAME_EL = 0x31, @@ -80,8 +81,26 @@ typedef enum { SME_ET_Streaming, SME_ET_NotStreaming, SME_ET_InactiveZA, + SME_ET_InaccessibleZT0, } SMEExceptionType; +typedef enum { + GCS_ET_DataCheck, + GCS_ET_EXLOCK, + GCS_ET_GCSSTR_GCSSTTR, +} GCSExceptionType; + +typedef enum { + GCS_IT_RET_nPauth = 0, + GCS_IT_GCSPOPM = 1, + GCS_IT_RET_PauthA = 2, + GCS_IT_RET_PauthB = 3, + GCS_IT_GCSSS1 = 4, + GCS_IT_GCSSS2 = 5, + GCS_IT_GCSPOPCX = 8, + GCS_IT_GCSPOPX = 9, +} GCSInstructionType; + #define ARM_EL_EC_LENGTH 6 #define ARM_EL_EC_SHIFT 26 #define ARM_EL_IL_SHIFT 25 @@ -350,6 +369,23 @@ static inline uint32_t syn_pcalignment(void) return (EC_PCALIGNMENT << ARM_EL_EC_SHIFT) | ARM_EL_IL; } +static inline uint32_t syn_gcs_data_check(GCSInstructionType it, int rn) +{ + return ((EC_GCS << ARM_EL_EC_SHIFT) | ARM_EL_IL | + (GCS_ET_DataCheck << 20) | (rn << 5) | it); +} + +static inline uint32_t syn_gcs_exlock(void) +{ + return (EC_GCS << ARM_EL_EC_SHIFT) | ARM_EL_IL | (GCS_ET_EXLOCK << 20); +} + +static inline uint32_t syn_gcs_gcsstr(int ra, int rn) +{ + return ((EC_GCS << ARM_EL_EC_SHIFT) | ARM_EL_IL | + (GCS_ET_GCSSTR_GCSSTTR << 20) | (ra << 10) | (rn << 5)); +} + static inline uint32_t syn_serror(uint32_t extra) { return (EC_SERROR << ARM_EL_EC_SHIFT) | ARM_EL_IL | extra; diff --git a/target/arm/tcg-stubs.c b/target/arm/tcg-stubs.c index 5e5166c..aeeede8 100644 --- a/target/arm/tcg-stubs.c +++ b/target/arm/tcg-stubs.c @@ -16,17 +16,12 @@ void write_v7m_exception(CPUARMState *env, uint32_t new_exc) g_assert_not_reached(); } -void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome, +void raise_exception_ra(CPUARMState *env, uint32_t excp, uint64_t syndrome, uint32_t target_el, uintptr_t ra) { g_assert_not_reached(); } -/* TLBI insns are only used by TCG, so we don't need to do anything for KVM */ -void define_tlb_insn_regs(ARMCPU *cpu) -{ -} - /* With KVM, we never use float_status, so these can be no-ops */ void arm_set_default_fp_behaviours(float_status *s) { diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 8c798cd..01b1b3e 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -156,6 +156,16 @@ MOVZ . 10 100101 .. ................ ..... @movw_32 MOVK . 11 100101 .. ................ ..... @movw_64 MOVK . 11 100101 .. ................ ..... @movw_32 +# Min/Max (immediate) + +@minmaxi_s sf:1 .. ........... imm:s8 rn:5 rd:5 &rri_sf +@minmaxi_u sf:1 .. ........... imm:8 rn:5 rd:5 &rri_sf + +SMAX_i . 00 1000111 0000 ........ ..... ..... @minmaxi_s +SMIN_i . 00 1000111 0010 ........ ..... ..... @minmaxi_s +UMAX_i . 00 1000111 0001 ........ ..... ..... @minmaxi_u +UMIN_i . 00 1000111 0011 ........ ..... ..... @minmaxi_u + # Bitfield &bitfield rd rn sf immr imms @@ -238,6 +248,7 @@ ERETA 1101011 0100 11111 00001 m:1 11111 11111 &reta # ERETAA, ERETAB AUTIA1716 1101 0101 0000 0011 0010 0001 100 11111 AUTIB1716 1101 0101 0000 0011 0010 0001 110 11111 ESB 1101 0101 0000 0011 0010 0010 000 11111 + GCSB 1101 0101 0000 0011 0010 0010 011 11111 PACIAZ 1101 0101 0000 0011 0010 0011 000 11111 PACIASP 1101 0101 0000 0011 0010 0011 001 11111 PACIBZ 1101 0101 0000 0011 0010 0011 010 11111 @@ -246,6 +257,7 @@ ERETA 1101011 0100 11111 00001 m:1 11111 11111 &reta # ERETAA, ERETAB AUTIASP 1101 0101 0000 0011 0010 0011 101 11111 AUTIBZ 1101 0101 0000 0011 0010 0011 110 11111 AUTIBSP 1101 0101 0000 0011 0010 0011 111 11111 + CHKFEAT 1101 0101 0000 0011 0010 0101 000 11111 ] # The canonical NOP has CRm == op2 == 0, but all of the space # that isn't specifically allocated to an instruction must NOP @@ -536,6 +548,13 @@ SWP .. 111 0 00 . . 1 ..... 1000 00 ..... ..... @atomic LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5 +# Atomic 128-bit memory operations +&atomic128 rn rt rt2 a r +@atomic128 ........ a:1 r:1 . rt2:5 ...... rn:5 rt:5 &atomic128 +LDCLRP 00011001 . . 1 ..... 000100 ..... ..... @atomic128 +LDSETP 00011001 . . 1 ..... 001100 ..... ..... @atomic128 +SWPP 00011001 . . 1 ..... 100000 ..... ..... @atomic128 + # Load/store register (pointer authentication) # LDRA immediate is 10 bits signed and scaled, but the bits aren't all contiguous @@ -553,6 +572,9 @@ LDAPR_i 10 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext LDAPR_i 00 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=0 LDAPR_i 01 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=1 +# GCSSTR, GCSSTTR +GCSSTR 11011001 000 11111 000 unpriv:1 11 rn:5 rt:5 + # Load/store multiple structures # The 4-bit opcode in [15:12] encodes repeat count and structure elements &ldst_mult rm rn rt sz q p rpt selem @@ -698,6 +720,11 @@ GMI 1 00 11010110 ..... 000101 ..... ..... @rrr PACGA 1 00 11010110 ..... 001100 ..... ..... @rrr +SMAX . 00 11010110 ..... 011000 ..... ..... @rrr_sf +SMIN . 00 11010110 ..... 011010 ..... ..... @rrr_sf +UMAX . 00 11010110 ..... 011001 ..... ..... @rrr_sf +UMIN . 00 11010110 ..... 011011 ..... ..... @rrr_sf + # Data Processing (1-source) @rr . .......... ..... ...... rn:5 rd:5 &rr @@ -711,6 +738,10 @@ REV64 1 10 11010110 00000 000011 ..... ..... @rr CLZ . 10 11010110 00000 000100 ..... ..... @rr_sf CLS . 10 11010110 00000 000101 ..... ..... @rr_sf +CTZ . 10 11010110 00000 000110 ..... ..... @rr_sf +CNT . 10 11010110 00000 000111 ..... ..... @rr_sf +ABS . 10 11010110 00000 001000 ..... ..... @rr_sf + &pacaut rd rn z @pacaut . .. ........ ..... .. z:1 ... rn:5 rd:5 &pacaut diff --git a/target/arm/tcg/cpregs-at.c b/target/arm/tcg/cpregs-at.c new file mode 100644 index 0000000..0e8f229 --- /dev/null +++ b/target/arm/tcg/cpregs-at.c @@ -0,0 +1,556 @@ +/* + * System instructions for address translation + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "cpu-features.h" +#include "internals.h" +#include "cpregs.h" + + +static int par_el1_shareability(GetPhysAddrResult *res) +{ + /* + * The PAR_EL1.SH field must be 0b10 for Device or Normal-NC + * memory -- see pseudocode PAREncodeShareability(). + */ + if (((res->cacheattrs.attrs & 0xf0) == 0) || + res->cacheattrs.attrs == 0x44 || res->cacheattrs.attrs == 0x40) { + return 2; + } + return res->cacheattrs.shareability; +} + +static uint64_t do_ats_write(CPUARMState *env, uint64_t value, + unsigned prot_check, ARMMMUIdx mmu_idx, + ARMSecuritySpace ss) +{ + uint64_t par64; + bool format64 = false; + ARMMMUFaultInfo fi = {}; + GetPhysAddrResult res = {}; + bool ret = get_phys_addr_for_at(env, value, prot_check, + mmu_idx, ss, &res, &fi); + + /* + * ATS operations only do S1 or S1+S2 translations, so we never + * have to deal with the ARMCacheAttrs format for S2 only. + */ + assert(!res.cacheattrs.is_s2_format); + + if (ret) { + /* + * Some kinds of translation fault must cause exceptions rather + * than being reported in the PAR. + */ + int current_el = arm_current_el(env); + int target_el; + uint32_t syn, fsr, fsc; + bool take_exc = false; + + if (fi.s1ptw && current_el == 1 + && arm_mmu_idx_is_stage1_of_2(mmu_idx)) { + /* + * Synchronous stage 2 fault on an access made as part of the + * translation table walk for AT S1E0* or AT S1E1* insn + * executed from NS EL1. If this is a synchronous external abort + * and SCR_EL3.EA == 1, then we take a synchronous external abort + * to EL3. Otherwise the fault is taken as an exception to EL2, + * and HPFAR_EL2 holds the faulting IPA. + */ + if (fi.type == ARMFault_SyncExternalOnWalk && + (env->cp15.scr_el3 & SCR_EA)) { + target_el = 3; + } else { + env->cp15.hpfar_el2 = extract64(fi.s2addr, 12, 47) << 4; + if (arm_is_secure_below_el3(env) && fi.s1ns) { + env->cp15.hpfar_el2 |= HPFAR_NS; + } + target_el = 2; + } + take_exc = true; + } else if (fi.type == ARMFault_SyncExternalOnWalk) { + /* + * Synchronous external aborts during a translation table walk + * are taken as Data Abort exceptions. + */ + if (fi.stage2) { + if (current_el == 3) { + target_el = 3; + } else { + target_el = 2; + } + } else { + target_el = exception_target_el(env); + } + take_exc = true; + } + + if (take_exc) { + /* Construct FSR and FSC using same logic as arm_deliver_fault() */ + if (target_el == 2 || arm_el_is_aa64(env, target_el) || + arm_s1_regime_using_lpae_format(env, mmu_idx)) { + fsr = arm_fi_to_lfsc(&fi); + fsc = extract32(fsr, 0, 6); + } else { + fsr = arm_fi_to_sfsc(&fi); + fsc = 0x3f; + } + /* + * Report exception with ESR indicating a fault due to a + * translation table walk for a cache maintenance instruction. + */ + syn = syn_data_abort_no_iss(current_el == target_el, 0, + fi.ea, 1, fi.s1ptw, 1, fsc); + env->exception.vaddress = value; + env->exception.fsr = fsr; + raise_exception(env, EXCP_DATA_ABORT, syn, target_el); + } + } + + if (is_a64(env)) { + format64 = true; + } else if (arm_feature(env, ARM_FEATURE_LPAE)) { + /* + * ATS1Cxx: + * * TTBCR.EAE determines whether the result is returned using the + * 32-bit or the 64-bit PAR format + * * Instructions executed in Hyp mode always use the 64bit format + * + * ATS1S2NSOxx uses the 64bit format if any of the following is true: + * * The Non-secure TTBCR.EAE bit is set to 1 + * * The implementation includes EL2, and the value of HCR.VM is 1 + * + * (Note that HCR.DC makes HCR.VM behave as if it is 1.) + * + * ATS1Hx always uses the 64bit format. + */ + format64 = arm_s1_regime_using_lpae_format(env, mmu_idx); + + if (arm_feature(env, ARM_FEATURE_EL2)) { + if (mmu_idx == ARMMMUIdx_E10_0 || + mmu_idx == ARMMMUIdx_E10_1 || + mmu_idx == ARMMMUIdx_E10_1_PAN) { + format64 |= env->cp15.hcr_el2 & (HCR_VM | HCR_DC); + } else { + format64 |= arm_current_el(env) == 2; + } + } + } + + if (format64) { + /* Create a 64-bit PAR */ + par64 = (1 << 11); /* LPAE bit always set */ + if (!ret) { + par64 |= res.f.phys_addr & ~0xfffULL; + if (!res.f.attrs.secure) { + par64 |= (1 << 9); /* NS */ + } + par64 |= (uint64_t)res.cacheattrs.attrs << 56; /* ATTR */ + par64 |= par_el1_shareability(&res) << 7; /* SH */ + } else { + uint32_t fsr = arm_fi_to_lfsc(&fi); + + par64 |= 1; /* F */ + par64 |= (fsr & 0x3f) << 1; /* FS */ + if (fi.stage2) { + par64 |= (1 << 9); /* S */ + } + if (fi.s1ptw) { + par64 |= (1 << 8); /* PTW */ + } + } + } else { + /* + * fsr is a DFSR/IFSR value for the short descriptor + * translation table format (with WnR always clear). + * Convert it to a 32-bit PAR. + */ + if (!ret) { + /* We do not set any attribute bits in the PAR */ + if (res.f.lg_page_size == 24 + && arm_feature(env, ARM_FEATURE_V7)) { + par64 = (res.f.phys_addr & 0xff000000) | (1 << 1); + } else { + par64 = res.f.phys_addr & 0xfffff000; + } + if (!res.f.attrs.secure) { + par64 |= (1 << 9); /* NS */ + } + } else { + uint32_t fsr = arm_fi_to_sfsc(&fi); + + par64 = ((fsr & (1 << 10)) >> 5) | ((fsr & (1 << 12)) >> 6) | + ((fsr & 0xf) << 1) | 1; + } + } + return par64; +} + +static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + unsigned access_perm = ri->opc2 & 1 ? PAGE_WRITE : PAGE_READ; + uint64_t par64; + ARMMMUIdx mmu_idx; + int el = arm_current_el(env); + ARMSecuritySpace ss = arm_security_space(env); + + switch (ri->opc2 & 6) { + case 0: + /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */ + switch (el) { + case 3: + if (ri->crm == 9 && arm_pan_enabled(env)) { + mmu_idx = ARMMMUIdx_E30_3_PAN; + } else { + mmu_idx = ARMMMUIdx_E3; + } + break; + case 2: + g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ + /* fall through */ + case 1: + if (ri->crm == 9 && arm_pan_enabled(env)) { + mmu_idx = ARMMMUIdx_Stage1_E1_PAN; + } else { + mmu_idx = ARMMMUIdx_Stage1_E1; + } + break; + default: + g_assert_not_reached(); + } + break; + case 2: + /* stage 1 current state PL0: ATS1CUR, ATS1CUW */ + switch (el) { + case 3: + mmu_idx = ARMMMUIdx_E30_0; + break; + case 2: + g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ + mmu_idx = ARMMMUIdx_Stage1_E0; + break; + case 1: + mmu_idx = ARMMMUIdx_Stage1_E0; + break; + default: + g_assert_not_reached(); + } + break; + case 4: + /* stage 1+2 NonSecure PL1: ATS12NSOPR, ATS12NSOPW */ + mmu_idx = ARMMMUIdx_E10_1; + ss = ARMSS_NonSecure; + break; + case 6: + /* stage 1+2 NonSecure PL0: ATS12NSOUR, ATS12NSOUW */ + mmu_idx = ARMMMUIdx_E10_0; + ss = ARMSS_NonSecure; + break; + default: + g_assert_not_reached(); + } + + par64 = do_ats_write(env, value, access_perm, mmu_idx, ss); + + A32_BANKED_CURRENT_REG_SET(env, par, par64); +} + +static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + unsigned access_perm = ri->opc2 & 1 ? PAGE_WRITE : PAGE_READ; + uint64_t par64; + + /* There is no SecureEL2 for AArch32. */ + par64 = do_ats_write(env, value, access_perm, ARMMMUIdx_E2, + ARMSS_NonSecure); + + A32_BANKED_CURRENT_REG_SET(env, par, par64); +} + +static CPAccessResult at_e012_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + /* + * R_NYXTL: instruction is UNDEFINED if it applies to an Exception level + * lower than EL3 and the combination SCR_EL3.{NSE,NS} is reserved. This can + * only happen when executing at EL3 because that combination also causes an + * illegal exception return. We don't need to check FEAT_RME either, because + * scr_write() ensures that the NSE bit is not set otherwise. + */ + if ((env->cp15.scr_el3 & (SCR_NSE | SCR_NS)) == SCR_NSE) { + return CP_ACCESS_UNDEFINED; + } + return CP_ACCESS_OK; +} + +static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 3 && + !(env->cp15.scr_el3 & (SCR_NS | SCR_EEL2))) { + return CP_ACCESS_UNDEFINED; + } + return at_e012_access(env, ri, isread); +} + +static CPAccessResult at_s1e01_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_AT)) { + return CP_ACCESS_TRAP_EL2; + } + return at_e012_access(env, ri, isread); +} + +static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + unsigned access_perm = ri->opc2 & 1 ? PAGE_WRITE : PAGE_READ; + ARMMMUIdx mmu_idx; + uint64_t hcr_el2 = arm_hcr_el2_eff(env); + bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE); + bool for_el3 = false; + ARMSecuritySpace ss; + + switch (ri->opc2 & 6) { + case 0: + switch (ri->opc1) { + case 0: /* AT S1E1R, AT S1E1W, AT S1E1RP, AT S1E1WP */ + if (ri->crm == 9 && arm_pan_enabled(env)) { + mmu_idx = regime_e20 ? + ARMMMUIdx_E20_2_PAN : ARMMMUIdx_Stage1_E1_PAN; + } else { + mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_Stage1_E1; + } + break; + case 4: /* AT S1E2R, AT S1E2W */ + mmu_idx = hcr_el2 & HCR_E2H ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; + break; + case 6: /* AT S1E3R, AT S1E3W */ + mmu_idx = ARMMMUIdx_E3; + for_el3 = true; + break; + default: + g_assert_not_reached(); + } + break; + case 2: /* AT S1E0R, AT S1E0W */ + mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_Stage1_E0; + break; + case 4: /* AT S12E1R, AT S12E1W */ + mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E10_1; + break; + case 6: /* AT S12E0R, AT S12E0W */ + mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_E10_0; + break; + default: + g_assert_not_reached(); + } + + ss = for_el3 ? arm_security_space(env) : arm_security_space_below_el3(env); + env->cp15.par_el[1] = do_ats_write(env, value, access_perm, mmu_idx, ss); +} + +static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (ri->opc2 & 4) { + /* + * The ATS12NSO* operations must trap to EL3 or EL2 if executed in + * Secure EL1 (which can only happen if EL3 is AArch64). + * They are simply UNDEF if executed from NS EL1. + * They function normally from EL2 or EL3. + */ + if (arm_current_el(env) == 1) { + if (arm_is_secure_below_el3(env)) { + if (env->cp15.scr_el3 & SCR_EEL2) { + return CP_ACCESS_TRAP_EL2; + } + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_UNDEFINED; + } + } + return CP_ACCESS_OK; +} + +static const ARMCPRegInfo vapa_ats_reginfo[] = { + /* This underdecoding is safe because the reginfo is NO_RAW. */ + { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY, + .access = PL1_W, .accessfn = ats_access, + .writefn = ats_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, +}; + +static const ARMCPRegInfo v8_ats_reginfo[] = { + /* 64 bit address translation operations */ + { .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E1R, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, + { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E1W, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, + { .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E0R, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, + { .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E0W, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, + { .name = "AT_S12E1R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 4, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .accessfn = at_e012_access, .writefn = ats_write64 }, + { .name = "AT_S12E1W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 5, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .accessfn = at_e012_access, .writefn = ats_write64 }, + { .name = "AT_S12E0R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 6, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .accessfn = at_e012_access, .writefn = ats_write64 }, + { .name = "AT_S12E0W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 7, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .accessfn = at_e012_access, .writefn = ats_write64 }, + /* AT S1E2* are elsewhere as they UNDEF from EL3 if EL2 is not present */ + { .name = "AT_S1E3R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 0, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .writefn = ats_write64 }, + { .name = "AT_S1E3W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 1, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .writefn = ats_write64 }, +}; + +static const ARMCPRegInfo el2_ats_reginfo[] = { + /* + * Unlike the other EL2-related AT operations, these must + * UNDEF from EL3 if EL2 is not implemented, which is why we + * define them here rather than with the rest of the AT ops. + */ + { .name = "AT_S1E2R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 0, + .access = PL2_W, .accessfn = at_s1e2_access, + .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = ats_write64 }, + { .name = "AT_S1E2W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 1, + .access = PL2_W, .accessfn = at_s1e2_access, + .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = ats_write64 }, + /* + * The AArch32 ATS1H* operations are CONSTRAINED UNPREDICTABLE + * if EL2 is not implemented; we choose to UNDEF. Behaviour at EL3 + * with SCR.NS == 0 outside Monitor mode is UNPREDICTABLE; we choose + * to behave as if SCR.NS was 1. + */ + { .name = "ATS1HR", .cp = 15, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 0, + .access = PL2_W, + .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, + { .name = "ATS1HW", .cp = 15, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 1, + .access = PL2_W, + .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, +}; + +static const ARMCPRegInfo ats1e1_reginfo[] = { + { .name = "AT_S1E1RP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E1RP, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, + { .name = "AT_S1E1WP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E1WP, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, +}; + +static const ARMCPRegInfo ats1cp_reginfo[] = { + { .name = "ATS1CPRP", + .cp = 15, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .writefn = ats_write }, + { .name = "ATS1CPWP", + .cp = 15, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .writefn = ats_write }, +}; + +static void ats_s1e1a(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + uint64_t hcr_el2 = arm_hcr_el2_eff(env); + bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE); + ARMMMUIdx mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_Stage1_E1; + ARMSecuritySpace ss = arm_security_space_below_el3(env); + + env->cp15.par_el[1] = do_ats_write(env, value, 0, mmu_idx, ss); +} + +static void ats_s1e2a(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + uint64_t hcr_el2 = arm_hcr_el2_eff(env); + ARMMMUIdx mmu_idx = hcr_el2 & HCR_E2H ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; + ARMSecuritySpace ss = arm_security_space_below_el3(env); + + env->cp15.par_el[1] = do_ats_write(env, value, 0, mmu_idx, ss); +} + +static void ats_s1e3a(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + env->cp15.par_el[1] = do_ats_write(env, value, 0, ARMMMUIdx_E3, + arm_security_space(env)); +} + +static const ARMCPRegInfo ats1a_reginfo[] = { + { .name = "AT_S1E1A", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .fgt = FGT_ATS1E1A, + .accessfn = at_s1e01_access, .writefn = ats_s1e1a }, + { .name = "AT_S1E2A", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 9, .opc2 = 2, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .accessfn = at_s1e2_access, .writefn = ats_s1e2a }, + { .name = "AT_S1E3A", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 9, .opc2 = 2, + .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, + .writefn = ats_s1e3a }, +}; + +void define_at_insn_regs(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + + if (arm_feature(env, ARM_FEATURE_VAPA)) { + define_arm_cp_regs(cpu, vapa_ats_reginfo); + } + if (arm_feature(env, ARM_FEATURE_V8)) { + define_arm_cp_regs(cpu, v8_ats_reginfo); + } + if (arm_feature(env, ARM_FEATURE_EL2) + || (arm_feature(env, ARM_FEATURE_EL3) + && arm_feature(env, ARM_FEATURE_V8))) { + define_arm_cp_regs(cpu, el2_ats_reginfo); + } + if (cpu_isar_feature(aa64_ats1e1, cpu)) { + define_arm_cp_regs(cpu, ats1e1_reginfo); + } + if (cpu_isar_feature(aa32_ats1e1, cpu)) { + define_arm_cp_regs(cpu, ats1cp_reginfo); + } + if (cpu_isar_feature(aa64_ats1a, cpu)) { + define_arm_cp_regs(cpu, ats1a_reginfo); + } +} diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c index 95b23d9..dc249ce 100644 --- a/target/arm/tcg/cpu-v7m.c +++ b/target/arm/tcg/cpu-v7m.c @@ -45,6 +45,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V6); set_feature(&cpu->env, ARM_FEATURE_M); @@ -58,51 +59,53 @@ static void cortex_m0_initfn(Object *obj) * by looking at ID register fields. We use the same values as * for the M3. */ - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00000030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x00000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01141110); + SET_IDREG(isar, ID_ISAR1, 0x02111000); + SET_IDREG(isar, ID_ISAR2, 0x21112231); + SET_IDREG(isar, ID_ISAR3, 0x01111110); + SET_IDREG(isar, ID_ISAR4, 0x01310102); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m3_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_M); set_feature(&cpu->env, ARM_FEATURE_M_MAIN); cpu->midr = 0x410fc231; cpu->pmsav7_dregion = 8; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00000030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x00000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01141110); + SET_IDREG(isar, ID_ISAR1, 0x02111000); + SET_IDREG(isar, ID_ISAR2, 0x21112231); + SET_IDREG(isar, ID_ISAR3, 0x01111110); + SET_IDREG(isar, ID_ISAR4, 0x01310102); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m4_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_M); @@ -113,26 +116,27 @@ static void cortex_m4_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110021; cpu->isar.mvfr1 = 0x11000011; cpu->isar.mvfr2 = 0x00000000; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00000030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x00000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01141110); + SET_IDREG(isar, ID_ISAR1, 0x02111000); + SET_IDREG(isar, ID_ISAR2, 0x21112231); + SET_IDREG(isar, ID_ISAR3, 0x01111110); + SET_IDREG(isar, ID_ISAR4, 0x01310102); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_M); @@ -143,26 +147,27 @@ static void cortex_m7_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110221; cpu->isar.mvfr1 = 0x12000011; cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00100030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01101110; - cpu->isar.id_isar1 = 0x02112000; - cpu->isar.id_isar2 = 0x20232231; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000200); + SET_IDREG(isar, ID_DFR0, 0x00100000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00100030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01101110); + SET_IDREG(isar, ID_ISAR1, 0x02112000); + SET_IDREG(isar, ID_ISAR2, 0x20232231); + SET_IDREG(isar, ID_ISAR3, 0x01111131); + SET_IDREG(isar, ID_ISAR4, 0x01310132); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); } static void cortex_m33_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_M); @@ -175,28 +180,29 @@ static void cortex_m33_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110021; cpu->isar.mvfr1 = 0x11000011; cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000210; - cpu->isar.id_dfr0 = 0x00200000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00101F40; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01101110; - cpu->isar.id_isar1 = 0x02212000; - cpu->isar.id_isar2 = 0x20232232; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; - cpu->clidr = 0x00000000; + SET_IDREG(isar, ID_PFR0, 0x00000030); + SET_IDREG(isar, ID_PFR1, 0x00000210); + SET_IDREG(isar, ID_DFR0, 0x00200000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00101F40); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01000000); + SET_IDREG(isar, ID_MMFR3, 0x00000000); + SET_IDREG(isar, ID_ISAR0, 0x01101110); + SET_IDREG(isar, ID_ISAR1, 0x02212000); + SET_IDREG(isar, ID_ISAR2, 0x20232232); + SET_IDREG(isar, ID_ISAR3, 0x01111131); + SET_IDREG(isar, ID_ISAR4, 0x01310132); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); + SET_IDREG(isar, CLIDR, 0x00000000); cpu->ctr = 0x8000c000; } static void cortex_m55_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_V8_1M); @@ -212,22 +218,22 @@ static void cortex_m55_initfn(Object *obj) cpu->isar.mvfr0 = 0x10110221; cpu->isar.mvfr1 = 0x12100211; cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x20000030; - cpu->isar.id_pfr1 = 0x00000230; - cpu->isar.id_dfr0 = 0x10200000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00111040; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000011; - cpu->isar.id_isar0 = 0x01103110; - cpu->isar.id_isar1 = 0x02212000; - cpu->isar.id_isar2 = 0x20232232; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; - cpu->clidr = 0x00000000; /* caches not implemented */ + SET_IDREG(isar, ID_PFR0, 0x20000030); + SET_IDREG(isar, ID_PFR1, 0x00000230); + SET_IDREG(isar, ID_DFR0, 0x10200000); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00111040); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01000000); + SET_IDREG(isar, ID_MMFR3, 0x00000011); + SET_IDREG(isar, ID_ISAR0, 0x01103110); + SET_IDREG(isar, ID_ISAR1, 0x02212000); + SET_IDREG(isar, ID_ISAR2, 0x20232232); + SET_IDREG(isar, ID_ISAR3, 0x01111131); + SET_IDREG(isar, ID_ISAR4, 0x01310132); + SET_IDREG(isar, ID_ISAR5, 0x00000000); + SET_IDREG(isar, ID_ISAR6, 0x00000000); + SET_IDREG(isar, CLIDR, 0x00000000); /* caches not implemented */ cpu->ctr = 0x8303c003; } @@ -249,6 +255,7 @@ static const TCGCPUOps arm_v7m_tcg_ops = { .record_sigbus = arm_cpu_record_sigbus, #else .tlb_fill_align = arm_cpu_tlb_fill_align, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt, .cpu_exec_halt = arm_cpu_exec_halt, .cpu_exec_reset = cpu_reset, diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c index 2c45b7e..f076141 100644 --- a/target/arm/tcg/cpu32.c +++ b/target/arm/tcg/cpu32.c @@ -23,18 +23,19 @@ void aa32_max_features(ARMCPU *cpu) { uint32_t t; + ARMISARegisters *isar = &cpu->isar; /* Add additional features supported by QEMU */ - t = cpu->isar.id_isar5; + t = GET_IDREG(isar, ID_ISAR5); t = FIELD_DP32(t, ID_ISAR5, AES, 2); /* FEAT_PMULL */ t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); /* FEAT_SHA1 */ t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); /* FEAT_SHA256 */ t = FIELD_DP32(t, ID_ISAR5, CRC32, 1); t = FIELD_DP32(t, ID_ISAR5, RDM, 1); /* FEAT_RDM */ t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); /* FEAT_FCMA */ - cpu->isar.id_isar5 = t; + SET_IDREG(isar, ID_ISAR5, t); - t = cpu->isar.id_isar6; + t = GET_IDREG(isar, ID_ISAR6); t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1); /* FEAT_JSCVT */ t = FIELD_DP32(t, ID_ISAR6, DP, 1); /* Feat_DotProd */ t = FIELD_DP32(t, ID_ISAR6, FHM, 1); /* FEAT_FHM */ @@ -42,7 +43,7 @@ void aa32_max_features(ARMCPU *cpu) t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1); /* FEAT_SPECRES */ t = FIELD_DP32(t, ID_ISAR6, BF16, 1); /* FEAT_AA32BF16 */ t = FIELD_DP32(t, ID_ISAR6, I8MM, 1); /* FEAT_AA32I8MM */ - cpu->isar.id_isar6 = t; + SET_IDREG(isar, ID_ISAR6, t); t = cpu->isar.mvfr1; t = FIELD_DP32(t, MVFR1, FPHP, 3); /* FEAT_FP16 */ @@ -54,38 +55,34 @@ void aa32_max_features(ARMCPU *cpu) t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ cpu->isar.mvfr2 = t; - t = cpu->isar.id_mmfr3; - t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* FEAT_PAN2 */ - cpu->isar.id_mmfr3 = t; + FIELD_DP32_IDREG(isar, ID_MMFR3, PAN, 2); /* FEAT_PAN2 */ - t = cpu->isar.id_mmfr4; + t = GET_IDREG(isar, ID_MMFR4); t = FIELD_DP32(t, ID_MMFR4, HPDS, 2); /* FEAT_HPDS2 */ t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */ t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* FEAT_TTCNP */ t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* FEAT_XNX */ t = FIELD_DP32(t, ID_MMFR4, EVT, 2); /* FEAT_EVT */ - cpu->isar.id_mmfr4 = t; + SET_IDREG(isar, ID_MMFR4, t); - t = cpu->isar.id_mmfr5; - t = FIELD_DP32(t, ID_MMFR5, ETS, 2); /* FEAT_ETS2 */ - cpu->isar.id_mmfr5 = t; + FIELD_DP32_IDREG(isar, ID_MMFR5, ETS, 2); /* FEAT_ETS2 */ - t = cpu->isar.id_pfr0; + t = GET_IDREG(isar, ID_PFR0); t = FIELD_DP32(t, ID_PFR0, CSV2, 2); /* FEAT_CSV2 */ t = FIELD_DP32(t, ID_PFR0, DIT, 1); /* FEAT_DIT */ t = FIELD_DP32(t, ID_PFR0, RAS, 1); /* FEAT_RAS */ - cpu->isar.id_pfr0 = t; + SET_IDREG(isar, ID_PFR0, t); - t = cpu->isar.id_pfr2; + t = GET_IDREG(isar, ID_PFR2); t = FIELD_DP32(t, ID_PFR2, CSV3, 1); /* FEAT_CSV3 */ t = FIELD_DP32(t, ID_PFR2, SSBS, 1); /* FEAT_SSBS */ - cpu->isar.id_pfr2 = t; + SET_IDREG(isar, ID_PFR2, t); - t = cpu->isar.id_dfr0; + t = GET_IDREG(isar, ID_DFR0); t = FIELD_DP32(t, ID_DFR0, COPDBG, 10); /* FEAT_Debugv8p8 */ t = FIELD_DP32(t, ID_DFR0, COPSDBG, 10); /* FEAT_Debugv8p8 */ t = FIELD_DP32(t, ID_DFR0, PERFMON, 6); /* FEAT_PMUv3p5 */ - cpu->isar.id_dfr0 = t; + SET_IDREG(isar, ID_DFR0, t); /* Debug ID registers. */ @@ -115,9 +112,7 @@ void aa32_max_features(ARMCPU *cpu) t = FIELD_DP32(t, DBGDEVID1, PCSROFFSET, 2); cpu->isar.dbgdevid1 = t; - t = cpu->isar.id_dfr1; - t = FIELD_DP32(t, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */ - cpu->isar.id_dfr1 = t; + FIELD_DP32_IDREG(isar, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */ } /* CPU models. These are not needed for the AArch64 linux-user build. */ @@ -140,7 +135,7 @@ static void arm926_initfn(Object *obj) * ARMv5 does not have the ID_ISAR registers, but we can still * set the field to indicate Jazelle support within QEMU. */ - cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + FIELD_DP32_IDREG(&cpu->isar, ID_ISAR1, JAZELLE, 1); /* * Similarly, we need to set MVFR0 fields to enable vfp and short vector * support even though ARMv5 doesn't have this register. @@ -182,7 +177,7 @@ static void arm1026_initfn(Object *obj) * ARMv5 does not have the ID_ISAR registers, but we can still * set the field to indicate Jazelle support within QEMU. */ - cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); + FIELD_DP32_IDREG(&cpu->isar, ID_ISAR1, JAZELLE, 1); /* * Similarly, we need to set MVFR0 fields to enable vfp and short vector * support even though ARMv5 doesn't have this register. @@ -206,6 +201,7 @@ static void arm1026_initfn(Object *obj) static void arm1136_r2_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; /* * What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an * older core than plain "arm1136". In particular this does not @@ -226,24 +222,25 @@ static void arm1136_r2_initfn(Object *obj) cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0x2; - cpu->id_afr0 = 0x3; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222110; - cpu->isar.id_isar0 = 0x00140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231111; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x1); + SET_IDREG(isar, ID_DFR0, 0x2); + SET_IDREG(isar, ID_AFR0, 0x3); + SET_IDREG(isar, ID_MMFR0, 0x01130003); + SET_IDREG(isar, ID_MMFR1, 0x10030302); + SET_IDREG(isar, ID_MMFR2, 0x01222110); + SET_IDREG(isar, ID_ISAR0, 0x00140011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11231111); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x141); cpu->reset_auxcr = 7; } static void arm1136_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,arm1136"; set_feature(&cpu->env, ARM_FEATURE_V6K); @@ -257,24 +254,25 @@ static void arm1136_initfn(Object *obj) cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0x2; - cpu->id_afr0 = 0x3; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222110; - cpu->isar.id_isar0 = 0x00140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231111; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x1); + SET_IDREG(isar, ID_DFR0, 0x2); + SET_IDREG(isar, ID_AFR0, 0x3); + SET_IDREG(isar, ID_MMFR0, 0x01130003); + SET_IDREG(isar, ID_MMFR1, 0x10030302); + SET_IDREG(isar, ID_MMFR2, 0x01222110); + SET_IDREG(isar, ID_ISAR0, 0x00140011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11231111); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x141); cpu->reset_auxcr = 7; } static void arm1176_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,arm1176"; set_feature(&cpu->env, ARM_FEATURE_V6K); @@ -289,24 +287,25 @@ static void arm1176_initfn(Object *obj) cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1dd20d2; cpu->reset_sctlr = 0x00050078; - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x33; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x01130003; - cpu->isar.id_mmfr1 = 0x10030302; - cpu->isar.id_mmfr2 = 0x01222100; - cpu->isar.id_isar0 = 0x0140011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11231121; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x01141; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x11); + SET_IDREG(isar, ID_DFR0, 0x33); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x01130003); + SET_IDREG(isar, ID_MMFR1, 0x10030302); + SET_IDREG(isar, ID_MMFR2, 0x01222100); + SET_IDREG(isar, ID_ISAR0, 0x0140011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11231121); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x01141); cpu->reset_auxcr = 7; } static void arm11mpcore_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,arm11mpcore"; set_feature(&cpu->env, ARM_FEATURE_V6K); @@ -318,18 +317,18 @@ static void arm11mpcore_initfn(Object *obj) cpu->isar.mvfr0 = 0x11111111; cpu->isar.mvfr1 = 0x00000000; cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ - cpu->isar.id_pfr0 = 0x111; - cpu->isar.id_pfr1 = 0x1; - cpu->isar.id_dfr0 = 0; - cpu->id_afr0 = 0x2; - cpu->isar.id_mmfr0 = 0x01100103; - cpu->isar.id_mmfr1 = 0x10020302; - cpu->isar.id_mmfr2 = 0x01222000; - cpu->isar.id_isar0 = 0x00100011; - cpu->isar.id_isar1 = 0x12002111; - cpu->isar.id_isar2 = 0x11221011; - cpu->isar.id_isar3 = 0x01102131; - cpu->isar.id_isar4 = 0x141; + SET_IDREG(isar, ID_PFR0, 0x111); + SET_IDREG(isar, ID_PFR1, 0x1); + SET_IDREG(isar, ID_DFR0, 0); + SET_IDREG(isar, ID_AFR0, 0x2); + SET_IDREG(isar, ID_MMFR0, 0x01100103); + SET_IDREG(isar, ID_MMFR1, 0x10020302); + SET_IDREG(isar, ID_MMFR2, 0x01222000); + SET_IDREG(isar, ID_ISAR0, 0x00100011); + SET_IDREG(isar, ID_ISAR1, 0x12002111); + SET_IDREG(isar, ID_ISAR2, 0x11221011); + SET_IDREG(isar, ID_ISAR3, 0x01102131); + SET_IDREG(isar, ID_ISAR4, 0x141); cpu->reset_auxcr = 1; } @@ -343,6 +342,7 @@ static const ARMCPRegInfo cortexa8_cp_reginfo[] = { static void cortex_a8_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a8"; set_feature(&cpu->env, ARM_FEATURE_V7); @@ -357,21 +357,21 @@ static void cortex_a8_initfn(Object *obj) cpu->isar.mvfr1 = 0x00011111; cpu->ctr = 0x82048004; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x1031; - cpu->isar.id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x400; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x31100003; - cpu->isar.id_mmfr1 = 0x20000000; - cpu->isar.id_mmfr2 = 0x01202000; - cpu->isar.id_mmfr3 = 0x11; - cpu->isar.id_isar0 = 0x00101111; - cpu->isar.id_isar1 = 0x12112111; - cpu->isar.id_isar2 = 0x21232031; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x00111142; + SET_IDREG(isar, ID_PFR0, 0x1031); + SET_IDREG(isar, ID_PFR1, 0x11); + SET_IDREG(isar, ID_DFR0, 0x400); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x31100003); + SET_IDREG(isar, ID_MMFR1, 0x20000000); + SET_IDREG(isar, ID_MMFR2, 0x01202000); + SET_IDREG(isar, ID_MMFR3, 0x11); + SET_IDREG(isar, ID_ISAR0, 0x00101111); + SET_IDREG(isar, ID_ISAR1, 0x12112111); + SET_IDREG(isar, ID_ISAR2, 0x21232031); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x00111142); cpu->isar.dbgdidr = 0x15141000; - cpu->clidr = (1 << 27) | (2 << 24) | 3; + SET_IDREG(isar, CLIDR, (1 << 27) | (2 << 24) | 3); cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */ cpu->ccsidr[2] = 0xf0000000; /* No L2 icache. */ @@ -412,6 +412,7 @@ static const ARMCPRegInfo cortexa9_cp_reginfo[] = { static void cortex_a9_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a9"; set_feature(&cpu->env, ARM_FEATURE_V7); @@ -432,21 +433,21 @@ static void cortex_a9_initfn(Object *obj) cpu->isar.mvfr1 = 0x01111111; cpu->ctr = 0x80038003; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x1031; - cpu->isar.id_pfr1 = 0x11; - cpu->isar.id_dfr0 = 0x000; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x00100103; - cpu->isar.id_mmfr1 = 0x20000000; - cpu->isar.id_mmfr2 = 0x01230000; - cpu->isar.id_mmfr3 = 0x00002111; - cpu->isar.id_isar0 = 0x00101111; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x00111142; + SET_IDREG(isar, ID_PFR0, 0x1031); + SET_IDREG(isar, ID_PFR1, 0x11); + SET_IDREG(isar, ID_DFR0, 0x000); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x00100103); + SET_IDREG(isar, ID_MMFR1, 0x20000000); + SET_IDREG(isar, ID_MMFR2, 0x01230000); + SET_IDREG(isar, ID_MMFR3, 0x00002111); + SET_IDREG(isar, ID_ISAR0, 0x00101111); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232041); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x00111142); cpu->isar.dbgdidr = 0x35141000; - cpu->clidr = (1 << 27) | (1 << 24) | 3; + SET_IDREG(isar, CLIDR, (1 << 27) | (1 << 24) | 3); cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ cpu->isar.reset_pmcr_el0 = 0x41093000; @@ -479,6 +480,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a7"; set_feature(&cpu->env, ARM_FEATURE_V7VE); @@ -497,27 +499,27 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x84448003; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x00001131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x02010555; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01240000; - cpu->isar.id_mmfr3 = 0x02102211; + SET_IDREG(isar, ID_PFR0, 0x00001131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x02010555); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01240000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); /* * a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but * table 4-41 gives 0x02101110, which includes the arm div insns. */ - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x10011142; + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232041); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x10011142); cpu->isar.dbgdidr = 0x3515f005; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x1; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ @@ -528,6 +530,7 @@ static void cortex_a7_initfn(Object *obj) static void cortex_a15_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a15"; set_feature(&cpu->env, ARM_FEATURE_V7VE); @@ -548,23 +551,23 @@ static void cortex_a15_initfn(Object *obj) cpu->isar.mvfr1 = 0x11111111; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50078; - cpu->isar.id_pfr0 = 0x00001131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x02010555; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x20000000; - cpu->isar.id_mmfr2 = 0x01240000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232041; - cpu->isar.id_isar3 = 0x11112131; - cpu->isar.id_isar4 = 0x10011142; + SET_IDREG(isar, ID_PFR0, 0x00001131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x02010555); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x20000000); + SET_IDREG(isar, ID_MMFR2, 0x01240000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232041); + SET_IDREG(isar, ID_ISAR3, 0x11112131); + SET_IDREG(isar, ID_ISAR4, 0x10011142); cpu->isar.dbgdidr = 0x3515f021; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x0; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ @@ -585,27 +588,28 @@ static const ARMCPRegInfo cortexr5_cp_reginfo[] = { static void cortex_r5_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_PMSA); set_feature(&cpu->env, ARM_FEATURE_PMU); cpu->midr = 0x411fc153; /* r1p3 */ - cpu->isar.id_pfr0 = 0x0131; - cpu->isar.id_pfr1 = 0x001; - cpu->isar.id_dfr0 = 0x010400; - cpu->id_afr0 = 0x0; - cpu->isar.id_mmfr0 = 0x0210030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01200000; - cpu->isar.id_mmfr3 = 0x0211; - cpu->isar.id_isar0 = 0x02101111; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232141; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x0010142; - cpu->isar.id_isar5 = 0x0; - cpu->isar.id_isar6 = 0x0; + SET_IDREG(isar, ID_PFR0, 0x0131); + SET_IDREG(isar, ID_PFR1, 0x001); + SET_IDREG(isar, ID_DFR0, 0x010400); + SET_IDREG(isar, ID_AFR0, 0x0); + SET_IDREG(isar, ID_MMFR0, 0x0210030); + SET_IDREG(isar, ID_MMFR1, 0x00000000); + SET_IDREG(isar, ID_MMFR2, 0x01200000); + SET_IDREG(isar, ID_MMFR3, 0x0211); + SET_IDREG(isar, ID_ISAR0, 0x02101111); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232141); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x0010142); + SET_IDREG(isar, ID_ISAR5, 0x0); + SET_IDREG(isar, ID_ISAR6, 0x0); cpu->mp_is_up = true; cpu->pmsav7_dregion = 16; cpu->isar.reset_pmcr_el0 = 0x41151800; @@ -720,6 +724,7 @@ static const ARMCPRegInfo cortex_r52_cp_reginfo[] = { static void cortex_r52_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -737,23 +742,23 @@ static void cortex_r52_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8144c004; cpu->reset_sctlr = 0x30c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x10111001; - cpu->isar.id_dfr0 = 0x03010006; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00211040; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01200000; - cpu->isar.id_mmfr3 = 0xf0102211; - cpu->isar.id_mmfr4 = 0x00000010; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232142; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x00010001; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x10111001); + SET_IDREG(isar, ID_DFR0, 0x03010006); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x00211040); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01200000); + SET_IDREG(isar, ID_MMFR3, 0xf0102211); + SET_IDREG(isar, ID_MMFR4, 0x00000010); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232142); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x00010001); cpu->isar.dbgdidr = 0x77168000; - cpu->clidr = (1 << 27) | (1 << 24) | 0x3; + SET_IDREG(isar, CLIDR, (1 << 27) | (1 << 24) | 0x3); cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ @@ -802,144 +807,6 @@ static void sa1110_initfn(Object *obj) cpu->reset_sctlr = 0x00000070; } -static void pxa250_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052100; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa255_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052d00; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa260_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052903; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa261_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052d05; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa262_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - cpu->midr = 0x69052d06; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270a0_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054110; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270a1_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054111; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270b0_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054112; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270b1_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054113; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270c0_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054114; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - -static void pxa270c5_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "marvell,xscale"; - set_feature(&cpu->env, ARM_FEATURE_V5); - set_feature(&cpu->env, ARM_FEATURE_XSCALE); - set_feature(&cpu->env, ARM_FEATURE_IWMMXT); - cpu->midr = 0x69054117; - cpu->ctr = 0xd172172; - cpu->reset_sctlr = 0x00000078; -} - #ifndef TARGET_AARCH64 /* * -cpu max: a CPU with as many features enabled as our emulation supports. @@ -949,6 +816,7 @@ static void pxa270c5_initfn(Object *obj) static void arm_max_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; /* aarch64_a57_initfn, advertising none of the aarch64 features */ cpu->dtb_compatible = "arm,cortex-a57"; @@ -968,23 +836,23 @@ static void arm_max_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10101105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_ISAR6, 0); cpu->isar.reset_pmcr_el0 = 0x41013000; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */ @@ -1026,31 +894,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "ti925t", .initfn = ti925t_initfn }, { .name = "sa1100", .initfn = sa1100_initfn }, { .name = "sa1110", .initfn = sa1110_initfn }, - { .name = "pxa250", .initfn = pxa250_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa255", .initfn = pxa255_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa260", .initfn = pxa260_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa261", .initfn = pxa261_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa262", .initfn = pxa262_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - /* "pxa270" is an alias for "pxa270-a0" */ - { .name = "pxa270", .initfn = pxa270a0_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-a0", .initfn = pxa270a0_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-a1", .initfn = pxa270a1_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-b0", .initfn = pxa270b0_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-b1", .initfn = pxa270b1_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn, - .deprecation_note = "iwMMXt CPUs are no longer supported", }, #ifndef TARGET_AARCH64 { .name = "max", .initfn = arm_max_initfn }, #endif diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 5d8ed27..1bffe66 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -32,6 +32,7 @@ static void aarch64_a35_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a35"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -48,29 +49,29 @@ static void aarch64_a35_initfn(Object *obj) cpu->midr = 0x411fd040; cpu->revidr = 0; cpu->ctr = 0x84448004; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64pfr1 = 0; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64dfr1 = 0; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64isar1 = 0; - cpu->isar.id_aa64mmfr0 = 0x00101122; - cpu->isar.id_aa64mmfr1 = 0; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64PFR1, 0); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64DFR1, 0); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64ISAR1, 0); + SET_IDREG(isar, ID_AA64MMFR0, 0x00101122); + SET_IDREG(isar, ID_AA64MMFR1, 0); + SET_IDREG(isar, CLIDR, 0x0a200023); cpu->dcz_blocksize = 4; /* From B2.4 AArch64 Virtual Memory control registers */ @@ -157,11 +158,9 @@ static bool cpu_arm_get_rme(Object *obj, Error **errp) static void cpu_arm_set_rme(Object *obj, bool value, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); - uint64_t t; - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, RME, value); - cpu->isar.id_aa64pfr0 = t; + /* Enable FEAT_RME_GPC2 */ + FIELD_DP64_IDREG(&cpu->isar, ID_AA64PFR0, RME, value ? 2 : 0); } static void cpu_max_set_l0gptsz(Object *obj, Visitor *v, const char *name, @@ -204,6 +203,7 @@ static const Property arm_cpu_lpa2_property = static void aarch64_a55_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a55"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -217,34 +217,34 @@ static void aarch64_a55_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_PMU); /* Ordered by B2.4 AArch64 registers by functional group */ - cpu->clidr = 0x82000023; + SET_IDREG(isar, CLIDR, 0x82000023); cpu->ctr = 0x84448004; /* L1Ip = VIPT */ cpu->dcz_blocksize = 4; /* 64 bytes */ - cpu->isar.id_aa64dfr0 = 0x0000000010305408ull; - cpu->isar.id_aa64isar0 = 0x0000100010211120ull; - cpu->isar.id_aa64isar1 = 0x0000000000100001ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; - cpu->isar.id_aa64pfr0 = 0x0000000010112222ull; - cpu->isar.id_aa64pfr1 = 0x0000000000000010ull; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x04010088; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x01011121; - cpu->isar.id_isar6 = 0x00000010; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x00021110; - cpu->isar.id_pfr0 = 0x10010131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408ull); + SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101122ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x0000000010112222ull); + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000010ull); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_DFR0, 0x04010088); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x01011121); + SET_IDREG(isar, ID_ISAR6, 0x00000010); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x00021110); + SET_IDREG(isar, ID_PFR0, 0x10010131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x412FD050; /* r2p0 */ cpu->revidr = 0; @@ -276,6 +276,7 @@ static void aarch64_a55_initfn(Object *obj) static void aarch64_a72_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a72"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -295,29 +296,29 @@ static void aarch64_a72_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; cpu->ctr = 0x8444c004; cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; + SET_IDREG(isar, ID_PFR0, 0x00000131); + SET_IDREG(isar, ID_PFR1, 0x00011011); + SET_IDREG(isar, ID_DFR0, 0x03010066); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02102211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00011142); + SET_IDREG(isar, ID_ISAR5, 0x00011121); + SET_IDREG(isar, ID_AA64PFR0, 0x00002222); + SET_IDREG(isar, ID_AA64DFR0, 0x10305106); + SET_IDREG(isar, ID_AA64ISAR0, 0x00011120); + SET_IDREG(isar, ID_AA64MMFR0, 0x00001124); cpu->isar.dbgdidr = 0x3516d000; cpu->isar.dbgdevid = 0x01110f13; cpu->isar.dbgdevid1 = 0x2; cpu->isar.reset_pmcr_el0 = 0x41023000; - cpu->clidr = 0x0a200023; + SET_IDREG(isar, CLIDR, 0x0a200023); /* 32KB L1 dcache */ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 32 * KiB, 7); /* 48KB L1 dcache */ @@ -335,6 +336,7 @@ static void aarch64_a72_initfn(Object *obj) static void aarch64_a76_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a76"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -348,34 +350,34 @@ static void aarch64_a76_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_PMU); /* Ordered by B2.4 AArch64 registers by functional group */ - cpu->clidr = 0x82000023; + SET_IDREG(isar, CLIDR, 0x82000023); cpu->ctr = 0x8444C004; cpu->dcz_blocksize = 4; - cpu->isar.id_aa64dfr0 = 0x0000000010305408ull; - cpu->isar.id_aa64isar0 = 0x0000100010211120ull; - cpu->isar.id_aa64isar1 = 0x0000000000100001ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; - cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000010ull; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x04010088; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x01011121; - cpu->isar.id_isar6 = 0x00000010; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x00021110; - cpu->isar.id_pfr0 = 0x10010131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408ull); + SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101122ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x1100000010111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000010ull); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_DFR0, 0x04010088); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x01011121); + SET_IDREG(isar, ID_ISAR6, 0x00000010); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x00021110); + SET_IDREG(isar, ID_PFR0, 0x10010131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x414fd0b1; /* r4p1 */ cpu->revidr = 0; @@ -405,9 +407,83 @@ static void aarch64_a76_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x410b3000; } +static void aarch64_a78ae_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; + + cpu->dtb_compatible = "arm,cortex-a78ae"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by 3.2.4 AArch64 registers by functional group */ + SET_IDREG(isar, CLIDR, 0x82000023); + cpu->ctr = 0x9444c004; + cpu->dcz_blocksize = 4; + SET_IDREG(isar, ID_AA64DFR0, 0x0000000110305408ull); + SET_IDREG(isar, ID_AA64ISAR0, 0x0010100010211120ull); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000001200031ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101125ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000100001011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x1100000010111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000010ull); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_DFR0, 0x04010088); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x01011121); + SET_IDREG(isar, ID_ISAR6, 0x00000010); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x00021110); + SET_IDREG(isar, ID_PFR0, 0x10010131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_PFR2, 0x00000011); + cpu->midr = 0x410fd423; /* r0p3 */ + cpu->revidr = 0; + + /* From 3.2.33 CCSIDR_EL1 */ + /* 64KB L1 dcache */ + cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 7); + /* 64KB L1 icache */ + cpu->ccsidr[1] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 64, 64 * KiB, 2); + /* 512KB L2 cache */ + cpu->ccsidr[2] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 8, 64, 512 * KiB, 7); + + /* From 3.2.118 SCTLR_EL3 */ + cpu->reset_sctlr = 0x30c50838; + + /* From 3.4.23 ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + /* From 3.4.8 ICC_CTLR_EL3 */ + cpu->gic_pribits = 5; + + /* From 3.5.1 AdvSIMD AArch64 register summary */ + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + + /* From 5.5.1 AArch64 PMU register summary */ + cpu->isar.reset_pmcr_el0 = 0x41223000; +} + static void aarch64_a64fx_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,a64fx"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -422,19 +498,19 @@ static void aarch64_a64fx_initfn(Object *obj) cpu->revidr = 0x00000000; cpu->ctr = 0x86668006; cpu->reset_sctlr = 0x30000180; - cpu->isar.id_aa64pfr0 = 0x0000000101111111; /* No RAS Extensions */ - cpu->isar.id_aa64pfr1 = 0x0000000000000000; - cpu->isar.id_aa64dfr0 = 0x0000000010305408; - cpu->isar.id_aa64dfr1 = 0x0000000000000000; - cpu->id_aa64afr0 = 0x0000000000000000; - cpu->id_aa64afr1 = 0x0000000000000000; - cpu->isar.id_aa64mmfr0 = 0x0000000000001122; - cpu->isar.id_aa64mmfr1 = 0x0000000011212100; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011; - cpu->isar.id_aa64isar0 = 0x0000000010211120; - cpu->isar.id_aa64isar1 = 0x0000000000010001; - cpu->isar.id_aa64zfr0 = 0x0000000000000000; - cpu->clidr = 0x0000000080000023; + SET_IDREG(isar, ID_AA64PFR0, 0x0000000101111111); /* No RAS Extensions */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000000); + SET_IDREG(isar, ID_AA64DFR0, 0x0000000010305408); + SET_IDREG(isar, ID_AA64DFR1, 0x0000000000000000); + SET_IDREG(isar, ID_AA64AFR0, 0x0000000000000000); + SET_IDREG(isar, ID_AA64AFR1, 0x0000000000000000); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000001122); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000011212100); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011); + SET_IDREG(isar, ID_AA64ISAR0, 0x0000000010211120); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000010001); + SET_IDREG(isar, ID_AA64ZFR0, 0x0000000000000000); + SET_IDREG(isar, CLIDR, 0x0000000080000023); /* 64KB L1 dcache */ cpu->ccsidr[0] = make_ccsidr(CCSIDR_FORMAT_LEGACY, 4, 256, 64 * KiB, 7); /* 64KB L1 icache */ @@ -581,6 +657,7 @@ static void define_neoverse_v1_cp_reginfo(ARMCPU *cpu) static void aarch64_neoverse_n1_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,neoverse-n1"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -594,34 +671,34 @@ static void aarch64_neoverse_n1_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_PMU); /* Ordered by B2.4 AArch64 registers by functional group */ - cpu->clidr = 0x82000023; + SET_IDREG(isar, CLIDR, 0x82000023); cpu->ctr = 0x8444c004; cpu->dcz_blocksize = 4; - cpu->isar.id_aa64dfr0 = 0x0000000110305408ull; - cpu->isar.id_aa64isar0 = 0x0000100010211120ull; - cpu->isar.id_aa64isar1 = 0x0000000000100001ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; - cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000020ull; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x04010088; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x01011121; - cpu->isar.id_isar6 = 0x00000010; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x00021110; - cpu->isar.id_pfr0 = 0x10010131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_AA64DFR0, 0x0000000110305408ull); + SET_IDREG(isar, ID_AA64ISAR0, 0x0000100010211120ull); + SET_IDREG(isar, ID_AA64ISAR1, 0x0000000000100001ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101125ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0000000000001011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x1100000010111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000020ull); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_DFR0, 0x04010088); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x01011121); + SET_IDREG(isar, ID_ISAR6, 0x00000010); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x00021110); + SET_IDREG(isar, ID_PFR0, 0x10010131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x414fd0c1; /* r4p1 */ cpu->revidr = 0; @@ -656,6 +733,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) static void aarch64_neoverse_v1_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,neoverse-v1"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -669,37 +747,37 @@ static void aarch64_neoverse_v1_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_PMU); /* Ordered by 3.2.4 AArch64 registers by functional group */ - cpu->clidr = 0x82000023; + SET_IDREG(isar, CLIDR, 0x82000023); cpu->ctr = 0xb444c004; /* With DIC and IDC set */ cpu->dcz_blocksize = 4; - cpu->id_aa64afr0 = 0x00000000; - cpu->id_aa64afr1 = 0x00000000; - cpu->isar.id_aa64dfr0 = 0x000001f210305519ull; - cpu->isar.id_aa64dfr1 = 0x00000000; - cpu->isar.id_aa64isar0 = 0x1011111110212120ull; /* with FEAT_RNG */ - cpu->isar.id_aa64isar1 = 0x0011100001211032ull; - cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x0220011102101011ull; - cpu->isar.id_aa64pfr0 = 0x1101110120111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000020ull; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_dfr0 = 0x15011099; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x11011121; - cpu->isar.id_isar6 = 0x01100111; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_mmfr4 = 0x01021110; - cpu->isar.id_pfr0 = 0x21110131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_pfr2 = 0x00000011; + SET_IDREG(isar, ID_AA64AFR0, 0x00000000); + SET_IDREG(isar, ID_AA64AFR1, 0x00000000); + SET_IDREG(isar, ID_AA64DFR0, 0x000001f210305519ull); + SET_IDREG(isar, ID_AA64DFR1, 0x00000000); + SET_IDREG(isar, ID_AA64ISAR0, 0x1011111110212120ull); /* with FEAT_RNG */ + SET_IDREG(isar, ID_AA64ISAR1, 0x0011000001211032ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000000000101125ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x0220011102101011ull); + SET_IDREG(isar, ID_AA64PFR0, 0x1101110120111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000020ull); + SET_IDREG(isar, ID_AFR0, 0x00000000); + SET_IDREG(isar, ID_DFR0, 0x15011099); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x11011121); + SET_IDREG(isar, ID_ISAR6, 0x01100111); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_MMFR4, 0x01021110); + SET_IDREG(isar, ID_PFR0, 0x21110131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_PFR2, 0x00000011); cpu->midr = 0x411FD402; /* r1p2 */ cpu->revidr = 0; @@ -735,7 +813,7 @@ static void aarch64_neoverse_v1_initfn(Object *obj) cpu->isar.mvfr2 = 0x00000043; /* From 3.7.5 ID_AA64ZFR0_EL1 */ - cpu->isar.id_aa64zfr0 = 0x0000100000100000; + SET_IDREG(isar, ID_AA64ZFR0, 0x0000100000100000); cpu->sve_vq.supported = (1 << 0) /* 128bit */ | (1 << 1); /* 256bit */ @@ -882,6 +960,7 @@ static const ARMCPRegInfo cortex_a710_cp_reginfo[] = { static void aarch64_a710_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,cortex-a710"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -897,39 +976,39 @@ static void aarch64_a710_initfn(Object *obj) /* Ordered by Section B.4: AArch64 registers */ cpu->midr = 0x412FD471; /* r2p1 */ cpu->revidr = 0; - cpu->isar.id_pfr0 = 0x21110131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_dfr0 = 0x16011099; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x11011121; /* with Crypto */ - cpu->isar.id_mmfr4 = 0x21021110; - cpu->isar.id_isar6 = 0x01111111; + SET_IDREG(isar, ID_PFR0, 0x21110131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_DFR0, 0x16011099); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x11011121); /* with Crypto */ + SET_IDREG(isar, ID_MMFR4, 0x21021110); + SET_IDREG(isar, ID_ISAR6, 0x01111111); cpu->isar.mvfr0 = 0x10110222; cpu->isar.mvfr1 = 0x13211111; cpu->isar.mvfr2 = 0x00000043; - cpu->isar.id_pfr2 = 0x00000011; - cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000221ull; - cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */ - cpu->isar.id_aa64dfr0 = 0x000011f010305619ull; - cpu->isar.id_aa64dfr1 = 0; - cpu->id_aa64afr0 = 0; - cpu->id_aa64afr1 = 0; - cpu->isar.id_aa64isar0 = 0x0221111110212120ull; /* with Crypto */ - cpu->isar.id_aa64isar1 = 0x0010111101211052ull; - cpu->isar.id_aa64mmfr0 = 0x0000022200101122ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x1221011110101011ull; - cpu->clidr = 0x0000001482000023ull; + SET_IDREG(isar, ID_PFR2, 0x00000011); + SET_IDREG(isar, ID_AA64PFR0, 0x1201111120111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000221ull); + SET_IDREG(isar, ID_AA64ZFR0, 0x0000110100110021ull); /* with Crypto */ + SET_IDREG(isar, ID_AA64DFR0, 0x000011f010305619ull); + SET_IDREG(isar, ID_AA64DFR1, 0); + SET_IDREG(isar, ID_AA64AFR0, 0); + SET_IDREG(isar, ID_AA64AFR1, 0); + SET_IDREG(isar, ID_AA64ISAR0, 0x0221111110212120ull); /* with Crypto */ + SET_IDREG(isar, ID_AA64ISAR1, 0x0010111101211052ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000022200101122ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x1221011110101011ull); + SET_IDREG(isar, CLIDR, 0x0000001482000023ull); cpu->gm_blocksize = 4; cpu->ctr = 0x000000049444c004ull; cpu->dcz_blocksize = 4; @@ -983,6 +1062,7 @@ static const ARMCPRegInfo neoverse_n2_cp_reginfo[] = { static void aarch64_neoverse_n2_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; cpu->dtb_compatible = "arm,neoverse-n2"; set_feature(&cpu->env, ARM_FEATURE_V8); @@ -998,39 +1078,39 @@ static void aarch64_neoverse_n2_initfn(Object *obj) /* Ordered by Section B.5: AArch64 ID registers */ cpu->midr = 0x410FD493; /* r0p3 */ cpu->revidr = 0; - cpu->isar.id_pfr0 = 0x21110131; - cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ - cpu->isar.id_dfr0 = 0x16011099; - cpu->id_afr0 = 0; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02122211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00010142; - cpu->isar.id_isar5 = 0x11011121; /* with Crypto */ - cpu->isar.id_mmfr4 = 0x01021110; - cpu->isar.id_isar6 = 0x01111111; + SET_IDREG(isar, ID_PFR0, 0x21110131); + SET_IDREG(isar, ID_PFR1, 0x00010000); /* GIC filled in later */ + SET_IDREG(isar, ID_DFR0, 0x16011099); + SET_IDREG(isar, ID_AFR0, 0); + SET_IDREG(isar, ID_MMFR0, 0x10201105); + SET_IDREG(isar, ID_MMFR1, 0x40000000); + SET_IDREG(isar, ID_MMFR2, 0x01260000); + SET_IDREG(isar, ID_MMFR3, 0x02122211); + SET_IDREG(isar, ID_ISAR0, 0x02101110); + SET_IDREG(isar, ID_ISAR1, 0x13112111); + SET_IDREG(isar, ID_ISAR2, 0x21232042); + SET_IDREG(isar, ID_ISAR3, 0x01112131); + SET_IDREG(isar, ID_ISAR4, 0x00010142); + SET_IDREG(isar, ID_ISAR5, 0x11011121); /* with Crypto */ + SET_IDREG(isar, ID_MMFR4, 0x01021110); + SET_IDREG(isar, ID_ISAR6, 0x01111111); cpu->isar.mvfr0 = 0x10110222; cpu->isar.mvfr1 = 0x13211111; cpu->isar.mvfr2 = 0x00000043; - cpu->isar.id_pfr2 = 0x00000011; - cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */ - cpu->isar.id_aa64pfr1 = 0x0000000000000221ull; - cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */ - cpu->isar.id_aa64dfr0 = 0x000011f210305619ull; - cpu->isar.id_aa64dfr1 = 0; - cpu->id_aa64afr0 = 0; - cpu->id_aa64afr1 = 0; - cpu->isar.id_aa64isar0 = 0x1221111110212120ull; /* with Crypto and FEAT_RNG */ - cpu->isar.id_aa64isar1 = 0x0011111101211052ull; - cpu->isar.id_aa64mmfr0 = 0x0000022200101125ull; - cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; - cpu->isar.id_aa64mmfr2 = 0x1221011112101011ull; - cpu->clidr = 0x0000001482000023ull; + SET_IDREG(isar, ID_PFR2, 0x00000011); + SET_IDREG(isar, ID_AA64PFR0, 0x1201111120111112ull); /* GIC filled in later */ + SET_IDREG(isar, ID_AA64PFR1, 0x0000000000000221ull); + SET_IDREG(isar, ID_AA64ZFR0, 0x0000110100110021ull); /* with Crypto */ + SET_IDREG(isar, ID_AA64DFR0, 0x000011f210305619ull); + SET_IDREG(isar, ID_AA64DFR1, 0); + SET_IDREG(isar, ID_AA64AFR0, 0); + SET_IDREG(isar, ID_AA64AFR1, 0); + SET_IDREG(isar, ID_AA64ISAR0, 0x1221111110212120ull); /* with Crypto and FEAT_RNG */ + SET_IDREG(isar, ID_AA64ISAR1, 0x0011111101211052ull); + SET_IDREG(isar, ID_AA64MMFR0, 0x0000022200101125ull); + SET_IDREG(isar, ID_AA64MMFR1, 0x0000000010212122ull); + SET_IDREG(isar, ID_AA64MMFR2, 0x1221011112101011ull); + SET_IDREG(isar, CLIDR, 0x0000001482000023ull); cpu->gm_blocksize = 4; cpu->ctr = 0x00000004b444c004ull; cpu->dcz_blocksize = 4; @@ -1083,6 +1163,7 @@ static void aarch64_neoverse_n2_initfn(Object *obj) void aarch64_max_tcg_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); + ARMISARegisters *isar = &cpu->isar; uint64_t t; uint32_t u; @@ -1118,10 +1199,10 @@ void aarch64_max_tcg_initfn(Object *obj) * We're going to set FEAT_S2FWB, which mandates that CLIDR_EL1.{LoUU,LoUIS} * are zero. */ - u = cpu->clidr; + u = GET_IDREG(isar, CLIDR); u = FIELD_DP32(u, CLIDR_EL1, LOUIS, 0); u = FIELD_DP32(u, CLIDR_EL1, LOUU, 0); - cpu->clidr = u; + SET_IDREG(isar, CLIDR, u); /* * Set CTR_EL0.DIC and IDC to tell the guest it doesnt' need to @@ -1133,12 +1214,12 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, CTR_EL0, DIC, 1); cpu->ctr = t; - t = cpu->isar.id_aa64isar0; + t = GET_IDREG(isar, ID_AA64ISAR0); t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* FEAT_PMULL */ t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); /* FEAT_SHA1 */ t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* FEAT_SHA512 */ t = FIELD_DP64(t, ID_AA64ISAR0, CRC32, 1); /* FEAT_CRC32 */ - t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 2); /* FEAT_LSE */ + t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 3); /* FEAT_LSE, FEAT_LSE128 */ t = FIELD_DP64(t, ID_AA64ISAR0, RDM, 1); /* FEAT_RDM */ t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 1); /* FEAT_SHA3 */ t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1); /* FEAT_SM3 */ @@ -1148,9 +1229,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64ISAR0, TS, 2); /* FEAT_FlagM2 */ t = FIELD_DP64(t, ID_AA64ISAR0, TLB, 2); /* FEAT_TLBIRANGE */ t = FIELD_DP64(t, ID_AA64ISAR0, RNDR, 1); /* FEAT_RNG */ - cpu->isar.id_aa64isar0 = t; + SET_IDREG(isar, ID_AA64ISAR0, t); - t = cpu->isar.id_aa64isar1; + t = GET_IDREG(isar, ID_AA64ISAR1); t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2); /* FEAT_DPB2 */ t = FIELD_DP64(t, ID_AA64ISAR1, APA, PauthFeat_FPACCOMBINED); t = FIELD_DP64(t, ID_AA64ISAR1, API, 1); @@ -1164,16 +1245,18 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64ISAR1, DGH, 1); /* FEAT_DGH */ t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1); /* FEAT_I8MM */ t = FIELD_DP64(t, ID_AA64ISAR1, XS, 1); /* FEAT_XS */ - cpu->isar.id_aa64isar1 = t; + SET_IDREG(isar, ID_AA64ISAR1, t); - t = cpu->isar.id_aa64isar2; + t = GET_IDREG(isar, ID_AA64ISAR2); t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */ t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */ - cpu->isar.id_aa64isar2 = t; + t = FIELD_DP64(t, ID_AA64ISAR2, CSSC, 1); /* FEAT_CSSC */ + t = FIELD_DP64(t, ID_AA64ISAR2, ATS1A, 1); /* FEAT_ATS1A */ + SET_IDREG(isar, ID_AA64ISAR2, t); - t = cpu->isar.id_aa64pfr0; + t = GET_IDREG(isar, ID_AA64PFR0); t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); /* FEAT_FP16 */ t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); /* FEAT_FP16 */ t = FIELD_DP64(t, ID_AA64PFR0, RAS, 2); /* FEAT_RASv1p1 + FEAT_DoubleFault */ @@ -1182,9 +1265,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1); /* FEAT_DIT */ t = FIELD_DP64(t, ID_AA64PFR0, CSV2, 3); /* FEAT_CSV2_3 */ t = FIELD_DP64(t, ID_AA64PFR0, CSV3, 1); /* FEAT_CSV3 */ - cpu->isar.id_aa64pfr0 = t; + SET_IDREG(isar, ID_AA64PFR0, t); - t = cpu->isar.id_aa64pfr1; + t = GET_IDREG(isar, ID_AA64PFR1); t = FIELD_DP64(t, ID_AA64PFR1, BT, 1); /* FEAT_BTI */ t = FIELD_DP64(t, ID_AA64PFR1, SSBS, 2); /* FEAT_SSBS2 */ /* @@ -1194,12 +1277,13 @@ void aarch64_max_tcg_initfn(Object *obj) */ t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */ t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */ - t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */ + t = FIELD_DP64(t, ID_AA64PFR1, SME, 2); /* FEAT_SME2 */ t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_3 */ t = FIELD_DP64(t, ID_AA64PFR1, NMI, 1); /* FEAT_NMI */ - cpu->isar.id_aa64pfr1 = t; + t = FIELD_DP64(t, ID_AA64PFR1, GCS, 1); /* FEAT_GCS */ + SET_IDREG(isar, ID_AA64PFR1, t); - t = cpu->isar.id_aa64mmfr0; + t = GET_IDREG(isar, ID_AA64MMFR0); t = FIELD_DP64(t, ID_AA64MMFR0, PARANGE, 6); /* FEAT_LPA: 52 bits */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 1); /* 16k pages supported */ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 2); /* 16k stage2 supported */ @@ -1207,9 +1291,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 2); /* 4k stage2 supported */ t = FIELD_DP64(t, ID_AA64MMFR0, FGT, 1); /* FEAT_FGT */ t = FIELD_DP64(t, ID_AA64MMFR0, ECV, 2); /* FEAT_ECV */ - cpu->isar.id_aa64mmfr0 = t; + SET_IDREG(isar, ID_AA64MMFR0, t); - t = cpu->isar.id_aa64mmfr1; + t = GET_IDREG(isar, ID_AA64MMFR1); t = FIELD_DP64(t, ID_AA64MMFR1, HAFDBS, 2); /* FEAT_HAFDBS */ t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* FEAT_VMID16 */ t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); /* FEAT_VHE */ @@ -1222,9 +1306,9 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */ t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */ - cpu->isar.id_aa64mmfr1 = t; + SET_IDREG(isar, ID_AA64MMFR1, t); - t = cpu->isar.id_aa64mmfr2; + t = GET_IDREG(isar, ID_AA64MMFR2); t = FIELD_DP64(t, ID_AA64MMFR2, CNP, 1); /* FEAT_TTCNP */ t = FIELD_DP64(t, ID_AA64MMFR2, UAO, 1); /* FEAT_UAO */ t = FIELD_DP64(t, ID_AA64MMFR2, IESB, 1); /* FEAT_IESB */ @@ -1238,39 +1322,50 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR2, BBM, 2); /* FEAT_BBM at level 2 */ t = FIELD_DP64(t, ID_AA64MMFR2, EVT, 2); /* FEAT_EVT */ t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */ - cpu->isar.id_aa64mmfr2 = t; + SET_IDREG(isar, ID_AA64MMFR2, t); - t = cpu->isar.id_aa64mmfr3; + t = GET_IDREG(isar, ID_AA64MMFR3); + t = FIELD_DP64(t, ID_AA64MMFR3, TCRX, 1); /* FEAT_TCR2 */ + t = FIELD_DP64(t, ID_AA64MMFR3, SCTLRX, 1); /* FEAT_SCTLR2 */ + t = FIELD_DP64(t, ID_AA64MMFR3, MEC, 1); /* FEAT_MEC */ t = FIELD_DP64(t, ID_AA64MMFR3, SPEC_FPACC, 1); /* FEAT_FPACC_SPEC */ - cpu->isar.id_aa64mmfr3 = t; + t = FIELD_DP64(t, ID_AA64MMFR3, S1PIE, 1); /* FEAT_S1PIE */ + t = FIELD_DP64(t, ID_AA64MMFR3, S2PIE, 1); /* FEAT_S2PIE */ + SET_IDREG(isar, ID_AA64MMFR3, t); - t = cpu->isar.id_aa64zfr0; - t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1); + t = GET_IDREG(isar, ID_AA64ZFR0); + t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 2); /* FEAT_SVE2p1 */ t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* FEAT_SVE_PMULL128 */ t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1); /* FEAT_SVE_BitPerm */ t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 2); /* FEAT_BF16, FEAT_EBF16 */ + t = FIELD_DP64(t, ID_AA64ZFR0, B16B16, 1); /* FEAT_SVE_B16B16 */ t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1); /* FEAT_SVE_SHA3 */ t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1); /* FEAT_SVE_SM4 */ t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1); /* FEAT_I8MM */ t = FIELD_DP64(t, ID_AA64ZFR0, F32MM, 1); /* FEAT_F32MM */ t = FIELD_DP64(t, ID_AA64ZFR0, F64MM, 1); /* FEAT_F64MM */ - cpu->isar.id_aa64zfr0 = t; + SET_IDREG(isar, ID_AA64ZFR0, t); - t = cpu->isar.id_aa64dfr0; + t = GET_IDREG(isar, ID_AA64DFR0); t = FIELD_DP64(t, ID_AA64DFR0, DEBUGVER, 10); /* FEAT_Debugv8p8 */ t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 6); /* FEAT_PMUv3p5 */ t = FIELD_DP64(t, ID_AA64DFR0, HPMN0, 1); /* FEAT_HPMN0 */ - cpu->isar.id_aa64dfr0 = t; + SET_IDREG(isar, ID_AA64DFR0, t); - t = cpu->isar.id_aa64smfr0; + t = GET_IDREG(isar, ID_AA64SMFR0); t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */ + t = FIELD_DP64(t, ID_AA64SMFR0, BI32I32, 1); /* FEAT_SME2 */ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */ t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */ + t = FIELD_DP64(t, ID_AA64SMFR0, F16F16, 1); /* FEAT_SME_F16F16 */ + t = FIELD_DP64(t, ID_AA64SMFR0, B16B16, 1); /* FEAT_SME_B16B16 */ + t = FIELD_DP64(t, ID_AA64SMFR0, I16I32, 5); /* FEAT_SME2 */ t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */ + t = FIELD_DP64(t, ID_AA64SMFR0, SMEVER, 2); /* FEAT_SME2p1 */ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */ - cpu->isar.id_aa64smfr0 = t; + SET_IDREG(isar, ID_AA64SMFR0, t); /* Replicate the same data to the 32-bit id registers. */ aa32_max_features(cpu); @@ -1304,6 +1399,11 @@ static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, + /* + * The Cortex-A78AE differs slightly from the plain Cortex-A78. We don't + * currently model the latter. + */ + { .name = "cortex-a78ae", .initfn = aarch64_a78ae_initfn }, { .name = "cortex-a710", .initfn = aarch64_a710_initfn }, { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn }, diff --git a/target/arm/tcg/gengvec64.c b/target/arm/tcg/gengvec64.c index 2617cde..2429cab 100644 --- a/target/arm/tcg/gengvec64.c +++ b/target/arm/tcg/gengvec64.c @@ -369,3 +369,14 @@ void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs, tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } + +void gen_gvec_sve2_sqdmulh(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, + gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, + }; + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index 4f618ae..ba1d775 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -402,6 +402,8 @@ AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min) AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max) AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max) AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max) +AH_MINMAX_HELPER(sme2_ah_fmax_b16, bfloat16, bfloat16, max) +AH_MINMAX_HELPER(sme2_ah_fmin_b16, bfloat16, bfloat16, min) /* 64-bit versions of the CRC helpers. Note that although the operation * (and the prototypes of crc32c() and crc32() mean that only the bottom @@ -574,6 +576,7 @@ uint32_t HELPER(advsimd_rinth)(uint32_t x, float_status *fp_status) return ret; } +#ifndef CONFIG_USER_ONLY static int el_from_spsr(uint32_t spsr) { /* Return the exception level that this SPSR is requesting a return to, @@ -612,32 +615,12 @@ static int el_from_spsr(uint32_t spsr) } } -static void cpsr_write_from_spsr_elx(CPUARMState *env, - uint32_t val) -{ - uint32_t mask; - - /* Save SPSR_ELx.SS into PSTATE. */ - env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); - val &= ~PSTATE_SS; - - /* Move DIT to the correct location for CPSR */ - if (val & PSTATE_DIT) { - val &= ~PSTATE_DIT; - val |= CPSR_DIT; - } - - mask = aarch32_cpsr_valid_mask(env->features, \ - &env_archcpu(env)->isar); - cpsr_write(env, val, mask, CPSRWriteRaw); -} - void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) { ARMCPU *cpu = env_archcpu(env); int cur_el = arm_current_el(env); unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); - uint32_t spsr = env->banked_spsr[spsr_idx]; + uint64_t spsr = env->banked_spsr[spsr_idx]; int new_el; bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; @@ -656,15 +639,6 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) spsr &= ~PSTATE_SS; } - /* - * FEAT_RME forbids return from EL3 with an invalid security state. - * We don't need an explicit check for FEAT_RME here because we enforce - * in scr_write() that you can't set the NSE bit without it. - */ - if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { - goto illegal_return; - } - new_el = el_from_spsr(spsr); if (new_el == -1) { goto illegal_return; @@ -676,6 +650,17 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) goto illegal_return; } + /* + * FEAT_RME forbids return from EL3 to a lower exception level + * with an invalid security state. + * We don't need an explicit check for FEAT_RME here because we enforce + * in scr_write() that you can't set the NSE bit without it. + */ + if (cur_el == 3 && new_el < 3 && + (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { + goto illegal_return; + } + if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { /* Return to an EL which is configured for a different register width */ goto illegal_return; @@ -690,6 +675,17 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) goto illegal_return; } + /* + * If GetCurrentEXLOCKEN, the exception return path must use GCSPOPCX, + * which will set PSTATE.EXLOCK. We need not explicitly check FEAT_GCS, + * because GCSCR_ELx cannot be set without it. + */ + if (new_el == cur_el && + (env->cp15.gcscr_el[cur_el] & GCSCR_EXLOCKEN) && + !(env->pstate & PSTATE_EXLOCK)) { + goto illegal_return; + } + bql_lock(); arm_call_pre_el_change_hook(cpu); bql_unlock(); @@ -783,6 +779,7 @@ illegal_return: qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); } +#endif /* !CONFIG_USER_ONLY */ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) { diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h index 8502346..b6008b5 100644 --- a/target/arm/tcg/helper-a64.h +++ b/target/arm/tcg/helper-a64.h @@ -80,7 +80,6 @@ DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst) DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst) DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst) -DEF_HELPER_2(exception_return, void, env, i64) DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_3(pacia, TCG_CALL_NO_WG, i64, env, i64, i64) @@ -145,3 +144,7 @@ DEF_HELPER_FLAGS_5(gvec_fmulx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32 DEF_HELPER_FLAGS_5(gvec_fmulx_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmulx_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmulx_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +#ifndef CONFIG_USER_ONLY +DEF_HELPER_2(exception_return, void, env, i64) +#endif diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h index 858d691..c551797 100644 --- a/target/arm/tcg/helper-sme.h +++ b/target/arm/tcg/helper-sme.h @@ -33,101 +33,147 @@ DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) - -DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_3(sme2_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) + +DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG, +DEF_HELPER_FLAGS_7(sme_fmopa_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_bfmopa_w, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_FLAGS_7(sme_bfmopa, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_7(sme_fmops_w_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_7(sme_fmops_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_fmops_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_fmops_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_bfmops_w, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_7(sme_bfmops, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_7(sme_ah_fmops_w_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_7(sme_ah_fmops_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_ah_fmops_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_ah_fmops_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sme_ah_bfmops_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_7(sme_ah_bfmops, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) + DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG, @@ -144,3 +190,168 @@ DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(sme2_bmopa_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme2_smopa2_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme2_umopa2_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmax_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmin_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fmax_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_fmin_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnum_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fminnum_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_6(sme2_fdot_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(sme2_fdot_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(sme2_fvdot_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_svdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uvdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_suvdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_usvdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_svdot_idx_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uvdot_idx_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_svdot_idx_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uvdot_idx_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sme2_smlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlall_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlsll_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlsll_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlall_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlsll_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlsll_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_usmlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sme2_smlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlsll_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_smlsll_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlsll_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_umlsll_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_usmlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme2_sumlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_bfcvt, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_bfcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_fcvt_n, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_fcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_fcvt_w, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_fcvtl, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_scvtf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_4(sme2_ucvtf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_3(sme2_sqcvt_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvt_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtu_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtu_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvt_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvt_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtu_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_sqcvtn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvtn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtun_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvtn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtun_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqcvtn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqcvtun_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_sunpk2_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sunpk2_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sunpk2_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sunpk4_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sunpk4_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sunpk4_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk2_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk2_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk2_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk4_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk4_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uunpk4_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_zip2_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_zip2_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_zip2_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_zip2_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_zip2_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_uzp2_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uzp2_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uzp2_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uzp2_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uzp2_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_zip4_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_zip4_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_zip4_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_zip4_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_zip4_q, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_uzp4_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uzp4_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uzp4_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uzp4_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uzp4_q, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_sqrshr_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshr_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshru_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshr_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshr_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshru_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshr_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshr_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshru_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(sme2_sqrshrn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshrn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshrun_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshrn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshrn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshrun_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshrn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_uqrshrn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(sme2_sqrshrun_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_sclamp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_sclamp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_sclamp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_sclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_uclamp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uclamp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uclamp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_uclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sme2_fclamp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sme2_fclamp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sme2_fclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sme2_bfclamp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sme2_sel_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) +DEF_HELPER_FLAGS_5(sme2_sel_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) +DEF_HELPER_FLAGS_5(sme2_sel_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) +DEF_HELPER_FLAGS_5(sme2_sel_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h index 0b1b588..c3541a8 100644 --- a/target/arm/tcg/helper-sve.h +++ b/target/arm/tcg/helper-sve.h @@ -676,11 +676,21 @@ DEF_HELPER_FLAGS_5(sve2_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve2_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve2_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tblq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tblq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tblq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tblq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve2_tbx_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_tbx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_tbx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_tbx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tbxq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tbxq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tbxq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_tbxq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_3(sve_sunpk_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_sunpk_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_sunpk_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) @@ -701,12 +711,22 @@ DEF_HELPER_FLAGS_4(sve_zip_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_zip_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_zip_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_zipq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_zipq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_zipq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_zipq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve_uzp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_uzp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_uzp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_uzp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_uzp_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uzpq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uzpq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uzpq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uzpq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve_trn_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_trn_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_trn_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -937,10 +957,17 @@ DEF_HELPER_FLAGS_4(sve_brkn, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) +DEF_HELPER_FLAGS_2(sve2p1_cntp_c, TCG_CALL_NO_RWG_SE, i64, i32, i32) DEF_HELPER_FLAGS_3(sve_whilel, TCG_CALL_NO_RWG, i32, ptr, i32, i32) DEF_HELPER_FLAGS_3(sve_whileg, TCG_CALL_NO_RWG, i32, ptr, i32, i32) +DEF_HELPER_FLAGS_3(sve_while2l, TCG_CALL_NO_RWG, i32, ptr, i32, i32) +DEF_HELPER_FLAGS_3(sve_while2g, TCG_CALL_NO_RWG, i32, ptr, i32, i32) + +DEF_HELPER_FLAGS_3(sve_whilecl, TCG_CALL_NO_RWG, i32, ptr, i32, i32) +DEF_HELPER_FLAGS_3(sve_whilecg, TCG_CALL_NO_RWG, i32, ptr, i32, i32) + DEF_HELPER_FLAGS_4(sve_subri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) DEF_HELPER_FLAGS_4(sve_subri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) DEF_HELPER_FLAGS_4(sve_subri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) @@ -1071,6 +1098,55 @@ DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_faddqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_faddqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_faddqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_fmaxnmqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fmaxnmqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fmaxnmqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_fminnmqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fminnmqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fminnmqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_fmaxqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fmaxqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fmaxqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_fminqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fminqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_fminqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_ah_fmaxqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_ah_fmaxqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_ah_fmaxqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + +DEF_HELPER_FLAGS_5(sve2p1_ah_fminqv_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_ah_fminqv_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(sve2p1_ah_fminqv_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) + DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG, i64, i64, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG, @@ -1120,6 +1196,8 @@ DEF_HELPER_FLAGS_5(sve_fcmne0_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(sve_fcmne0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fadd_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadd_s, TCG_CALL_NO_RWG, @@ -1127,6 +1205,8 @@ DEF_HELPER_FLAGS_6(sve_fadd_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fsub_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsub_s, TCG_CALL_NO_RWG, @@ -1134,6 +1214,8 @@ DEF_HELPER_FLAGS_6(sve_fsub_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmul_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmul_s, TCG_CALL_NO_RWG, @@ -1148,6 +1230,8 @@ DEF_HELPER_FLAGS_6(sve_fdiv_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmin_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmin_s, TCG_CALL_NO_RWG, @@ -1155,6 +1239,8 @@ DEF_HELPER_FLAGS_6(sve_fmin_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmax_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, @@ -1162,6 +1248,8 @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmin_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, @@ -1169,6 +1257,8 @@ DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmax_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, @@ -1176,6 +1266,8 @@ DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fminnum_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, @@ -1183,6 +1275,8 @@ DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmaxnum_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnum_s, TCG_CALL_NO_RWG, @@ -1447,6 +1541,8 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1454,6 +1550,8 @@ DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1461,6 +1559,8 @@ DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1468,6 +1568,8 @@ DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1475,6 +1577,8 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1482,6 +1586,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1489,6 +1595,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1547,945 +1655,1015 @@ DEF_HELPER_FLAGS_4(sve2_usubw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_usubw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_usubw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(sve_ld1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldff1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldff1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld2qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld2qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1squ_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1dqu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1squ_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1dqu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld2qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld2qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld3qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld4qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1squ_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1dqu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ld1squ_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ld1dqu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldff1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldff1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_ldnf1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_ldnf1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_ldnf1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_ldnf1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) + void, env, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r_mte, TCG_CALL_NO_WG, - void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1bh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1bs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1bd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hs_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hs_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1hs_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hs_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) - -DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) -DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st2qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st2qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1bh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1bs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1bd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hs_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hs_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1sq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1sq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1dq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1dq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st2qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st2qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st3qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st4qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1hs_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hs_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) + +DEF_HELPER_FLAGS_4(sve_st1sq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1sq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1dq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) +DEF_HELPER_FLAGS_4(sve_st1dq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbss_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbsu_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbss_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_ldqq_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_ldqq_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbsu_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbss_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbsu_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldss_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbss_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhss_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbdu_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_lddd_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldbds_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldhds_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldsds_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_ldqq_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_ldqq_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbss_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbsu_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbss_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbss_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbsu_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffss_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbss_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbdu_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffbds_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbs_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zsu, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zss, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_stqq_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_stqq_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbs_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbs_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sths_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stss_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zsu_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zss_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stbd_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_sthd_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stsd_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_le_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_6(sve_stdd_be_zd_mte, TCG_CALL_NO_WG, - void, env, ptr, ptr, ptr, tl, i32) + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_stqq_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) +DEF_HELPER_FLAGS_6(sve_stqq_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i64) DEF_HELPER_FLAGS_4(sve2_sqdmull_zzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -2922,3 +3100,69 @@ DEF_HELPER_FLAGS_4(sve2_sqshlu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqshlu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqshlu_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqshlu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_addqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_addqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_addqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_addqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_smaxqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_smaxqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_smaxqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_smaxqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_sminqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_sminqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_sminqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_sminqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_umaxqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_umaxqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_umaxqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_umaxqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_uminqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uminqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uminqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_uminqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(pext, TCG_CALL_NO_RWG, void, ptr, i32, i32) + +DEF_HELPER_FLAGS_4(sve2p1_orqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_orqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_orqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_orqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_eorqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_eorqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_eorqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_eorqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2p1_andqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_andqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_andqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sve2p1_andqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(pmov_pv_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(pmov_pv_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(pmov_pv_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(pmov_vp_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(pmov_vp_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(pmov_vp_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sve2p1_ld1bb_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1hh_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1hh_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1ss_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1ss_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1dd_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_ld1dd_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) + +DEF_HELPER_FLAGS_5(sve2p1_st1bb_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1hh_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1hh_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1ss_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1ss_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1dd_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) +DEF_HELPER_FLAGS_5(sve2p1_st1dd_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i64) diff --git a/target/arm/tcg/helper.h b/target/arm/tcg/helper.h index 80db7c2..4636d1b 100644 --- a/target/arm/tcg/helper.h +++ b/target/arm/tcg/helper.h @@ -353,6 +353,14 @@ DEF_HELPER_FLAGS_4(gvec_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme2_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_2(neon_add_u8, i32, i32, i32) DEF_HELPER_2(neon_add_u16, i32, i32, i32) DEF_HELPER_2(neon_sub_u8, i32, i32, i32) @@ -436,101 +444,6 @@ DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, fpst) DEF_HELPER_3(neon_acge_f64, i64, i64, i64, fpst) DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, fpst) -/* iwmmxt_helper.c */ -DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64) -DEF_HELPER_2(iwmmxt_madduq, i64, i64, i64) -DEF_HELPER_2(iwmmxt_sadb, i64, i64, i64) -DEF_HELPER_2(iwmmxt_sadw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_mulslw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_mulshw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_mululw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_muluhw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_macsw, i64, i64, i64) -DEF_HELPER_2(iwmmxt_macuw, i64, i64, i64) -DEF_HELPER_1(iwmmxt_setpsr_nz, i32, i64) - -#define DEF_IWMMXT_HELPER_SIZE_ENV(name) \ -DEF_HELPER_3(iwmmxt_##name##b, i64, env, i64, i64) \ -DEF_HELPER_3(iwmmxt_##name##w, i64, env, i64, i64) \ -DEF_HELPER_3(iwmmxt_##name##l, i64, env, i64, i64) \ - -DEF_IWMMXT_HELPER_SIZE_ENV(unpackl) -DEF_IWMMXT_HELPER_SIZE_ENV(unpackh) - -DEF_HELPER_2(iwmmxt_unpacklub, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackluw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklul, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhub, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhuw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhul, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsb, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsl, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsb, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsl, i64, env, i64) - -DEF_IWMMXT_HELPER_SIZE_ENV(cmpeq) -DEF_IWMMXT_HELPER_SIZE_ENV(cmpgtu) -DEF_IWMMXT_HELPER_SIZE_ENV(cmpgts) - -DEF_IWMMXT_HELPER_SIZE_ENV(mins) -DEF_IWMMXT_HELPER_SIZE_ENV(minu) -DEF_IWMMXT_HELPER_SIZE_ENV(maxs) -DEF_IWMMXT_HELPER_SIZE_ENV(maxu) - -DEF_IWMMXT_HELPER_SIZE_ENV(subn) -DEF_IWMMXT_HELPER_SIZE_ENV(addn) -DEF_IWMMXT_HELPER_SIZE_ENV(subu) -DEF_IWMMXT_HELPER_SIZE_ENV(addu) -DEF_IWMMXT_HELPER_SIZE_ENV(subs) -DEF_IWMMXT_HELPER_SIZE_ENV(adds) - -DEF_HELPER_3(iwmmxt_avgb0, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64) - -DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32) -DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32) - -DEF_HELPER_1(iwmmxt_bcstb, i64, i32) -DEF_HELPER_1(iwmmxt_bcstw, i64, i32) -DEF_HELPER_1(iwmmxt_bcstl, i64, i32) - -DEF_HELPER_1(iwmmxt_addcb, i64, i64) -DEF_HELPER_1(iwmmxt_addcw, i64, i64) -DEF_HELPER_1(iwmmxt_addcl, i64, i64) - -DEF_HELPER_1(iwmmxt_msbb, i32, i64) -DEF_HELPER_1(iwmmxt_msbw, i32, i64) -DEF_HELPER_1(iwmmxt_msbl, i32, i64) - -DEF_HELPER_3(iwmmxt_srlw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_srll, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_srlq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sllw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_slll, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sllq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sraw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sral, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sraq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorl, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_shufh, i64, env, i64, i32) - -DEF_HELPER_3(iwmmxt_packuw, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packul, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packuq, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsw, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsl, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsq, i64, env, i64, i64) - -DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32) -DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32) -DEF_HELPER_3(iwmmxt_muladdswl, i64, i64, i32, i32) - DEF_HELPER_FLAGS_2(neon_unzip8, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_unzip16, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qunzip8, TCG_CALL_NO_RWG, void, ptr, ptr) @@ -608,23 +521,31 @@ DEF_HELPER_FLAGS_5(sve2_sqrdmlah_d, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sdot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sdot_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usdot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sdot_idx_b, TCG_CALL_NO_RWG, +DEF_HELPER_FLAGS_5(gvec_sdot_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_sdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_idx_b, TCG_CALL_NO_RWG, +DEF_HELPER_FLAGS_5(gvec_udot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sdot_idx_h, TCG_CALL_NO_RWG, +DEF_HELPER_FLAGS_5(gvec_sdot_idx_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_udot_idx_h, TCG_CALL_NO_RWG, +DEF_HELPER_FLAGS_5(gvec_udot_idx_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_sudot_idx_b, TCG_CALL_NO_RWG, +DEF_HELPER_FLAGS_5(gvec_sudot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(gvec_usdot_idx_b, TCG_CALL_NO_RWG, +DEF_HELPER_FLAGS_5(gvec_usdot_idx_4b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_sdot_idx_2h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_udot_idx_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG, @@ -712,14 +633,19 @@ DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(gvec_fclt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fadd_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_bfadd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fsub_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_bfsub, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) @@ -774,23 +700,26 @@ DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmla_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmla_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) -DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmls_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmls_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_bfmla, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_bfmls, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_ah_bfmls, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) @@ -799,6 +728,8 @@ DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_idx_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG, @@ -822,6 +753,8 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_bfmla_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) @@ -829,6 +762,8 @@ DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_bfmls_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) @@ -836,6 +771,8 @@ DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_ah_bfmls_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -1081,14 +1018,24 @@ DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_FLAGS_6(gvec_bfdot_idx, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_6(sme2_bfvdot_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_FLAGS_6(gvec_bfmmla, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_bfmlsl, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_ah_bfmlsl, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_bfmlsl_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(gvec_ah_bfmlsl_idx, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -1151,3 +1098,26 @@ DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_urecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_ursqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sme2_luti2_1b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_1h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_1s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_luti2_2b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_2h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_2s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_luti2_4b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_4h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti2_4s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_luti4_1b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti4_1h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti4_1s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_luti4_2b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti4_2h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti4_2s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) + +DEF_HELPER_FLAGS_4(sme2_luti4_4h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) +DEF_HELPER_FLAGS_4(sme2_luti4_4s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32) diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c index 1ccec63..5c9b9be 100644 --- a/target/arm/tcg/hflags.c +++ b/target/arm/tcg/hflags.c @@ -214,6 +214,31 @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el, return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); } +/* + * Return the exception level to which exceptions should be taken for ZT0. + * C.f. the ARM pseudocode function CheckSMEZT0Enabled, after the ZA check. + */ +static int zt0_exception_el(CPUARMState *env, int el) +{ +#ifndef CONFIG_USER_ONLY + if (el <= 1 + && !el_is_in_host(env, el) + && !FIELD_EX64(env->vfp.smcr_el[1], SMCR, EZT0)) { + return 1; + } + if (el <= 2 + && arm_is_el2_enabled(env) + && !FIELD_EX64(env->vfp.smcr_el[2], SMCR, EZT0)) { + return 2; + } + if (arm_feature(env, ARM_FEATURE_EL3) + && !FIELD_EX64(env->vfp.smcr_el[3], SMCR, EZT0)) { + return 3; + } +#endif + return 0; +} + static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, ARMMMUIdx mmu_idx) { @@ -233,6 +258,11 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, DP_TBFLAG_A64(flags, TBII, tbii); DP_TBFLAG_A64(flags, TBID, tbid); + /* E2H is used by both VHE and NV2. */ + if (hcr & HCR_E2H) { + DP_TBFLAG_A64(flags, E2H, 1); + } + if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { int sve_el = sve_exception_el(env, el); @@ -269,7 +299,14 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, DP_TBFLAG_A64(flags, PSTATE_SM, 1); DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el)); } - DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA)); + + if (FIELD_EX64(env->svcr, SVCR, ZA)) { + DP_TBFLAG_A64(flags, PSTATE_ZA, 1); + if (cpu_isar_feature(aa64_sme2, env_archcpu(env))) { + int zt0_el = zt0_exception_el(env, el); + DP_TBFLAG_A64(flags, ZT0EXC_EL, zt0_el); + } + } } sctlr = regime_sctlr(env, stage1); @@ -358,9 +395,6 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, } if (hcr & HCR_NV2) { DP_TBFLAG_A64(flags, NV2, 1); - if (hcr & HCR_E2H) { - DP_TBFLAG_A64(flags, NV2_MEM_E20, 1); - } if (env->cp15.sctlr_el[2] & SCTLR_EE) { DP_TBFLAG_A64(flags, NV2_MEM_BE, 1); } @@ -417,6 +451,44 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); } + if (cpu_isar_feature(aa64_gcs, env_archcpu(env))) { + /* C.f. GCSEnabled */ + if (env->cp15.gcscr_el[el] & GCSCR_PCRSEL) { + switch (el) { + default: + if (!el_is_in_host(env, el) + && !(arm_hcrx_el2_eff(env) & HCRX_GCSEN)) { + break; + } + /* fall through */ + case 2: + if (arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_GCSEN)) { + break; + } + /* fall through */ + case 3: + DP_TBFLAG_A64(flags, GCS_EN, 1); + break; + } + } + + /* C.f. GCSReturnValueCheckEnabled */ + if (env->cp15.gcscr_el[el] & GCSCR_RVCHKEN) { + DP_TBFLAG_A64(flags, GCS_RVCEN, 1); + } + + /* C.f. CheckGCSSTREnabled */ + if (!(env->cp15.gcscr_el[el] & GCSCR_STREN)) { + DP_TBFLAG_A64(flags, GCSSTR_EL, el ? el : 1); + } else if (el == 1 + && EX_TBFLAG_ANY(flags, FGT_ACTIVE) + && !FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], + HFGITR_EL2, NGCSSTR_EL1)) { + DP_TBFLAG_A64(flags, GCSSTR_EL, 2); + } + } + if (env->vfp.fpcr & FPCR_AH) { DP_TBFLAG_A64(flags, AH, 1); } @@ -592,16 +664,9 @@ TCGTBCPUState arm_get_tb_cpu_state(CPUState *cs) DP_TBFLAG_M32(flags, MVE_NO_PRED, 1); } } else { - /* - * Note that XSCALE_CPAR shares bits with VECSTRIDE. - * Note that VECLEN+VECSTRIDE are RES0 for M-profile. - */ - if (arm_feature(env, ARM_FEATURE_XSCALE)) { - DP_TBFLAG_A32(flags, XSCALE_CPAR, env->cp15.c15_cpar); - } else { - DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len); - DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride); - } + /* Note that VECLEN+VECSTRIDE are RES0 for M-profile. */ + DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len); + DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride); if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) { DP_TBFLAG_A32(flags, VFPEN, 1); } diff --git a/target/arm/tcg/iwmmxt_helper.c b/target/arm/tcg/iwmmxt_helper.c deleted file mode 100644 index ba054b6..0000000 --- a/target/arm/tcg/iwmmxt_helper.c +++ /dev/null @@ -1,672 +0,0 @@ -/* - * iwMMXt micro operations for XScale. - * - * Copyright (c) 2007 OpenedHand, Ltd. - * Written by Andrzej Zaborowski <andrew@openedhand.com> - * Copyright (c) 2008 CodeSourcery - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - -#include "qemu/osdep.h" - -#include "cpu.h" - -#define HELPER_H "tcg/helper.h" -#include "exec/helper-proto.h.inc" - -/* iwMMXt macros extracted from GNU gdb. */ - -/* Set the SIMD wCASF flags for 8, 16, 32 or 64-bit operations. */ -#define SIMD8_SET(v, n, b) ((v != 0) << ((((b) + 1) * 4) + (n))) -#define SIMD16_SET(v, n, h) ((v != 0) << ((((h) + 1) * 8) + (n))) -#define SIMD32_SET(v, n, w) ((v != 0) << ((((w) + 1) * 16) + (n))) -#define SIMD64_SET(v, n) ((v != 0) << (32 + (n))) -/* Flags to pass as "n" above. */ -#define SIMD_NBIT -1 -#define SIMD_ZBIT -2 -#define SIMD_CBIT -3 -#define SIMD_VBIT -4 -/* Various status bit macros. */ -#define NBIT8(x) ((x) & 0x80) -#define NBIT16(x) ((x) & 0x8000) -#define NBIT32(x) ((x) & 0x80000000) -#define NBIT64(x) ((x) & 0x8000000000000000ULL) -#define ZBIT8(x) (((x) & 0xff) == 0) -#define ZBIT16(x) (((x) & 0xffff) == 0) -#define ZBIT32(x) (((x) & 0xffffffff) == 0) -#define ZBIT64(x) (x == 0) -/* Sign extension macros. */ -#define EXTEND8H(a) ((uint16_t) (int8_t) (a)) -#define EXTEND8(a) ((uint32_t) (int8_t) (a)) -#define EXTEND16(a) ((uint32_t) (int16_t) (a)) -#define EXTEND16S(a) ((int32_t) (int16_t) (a)) -#define EXTEND32(a) ((uint64_t) (int32_t) (a)) - -uint64_t HELPER(iwmmxt_maddsq)(uint64_t a, uint64_t b) -{ - a = (( - EXTEND16S((a >> 0) & 0xffff) * EXTEND16S((b >> 0) & 0xffff) + - EXTEND16S((a >> 16) & 0xffff) * EXTEND16S((b >> 16) & 0xffff) - ) & 0xffffffff) | ((uint64_t) ( - EXTEND16S((a >> 32) & 0xffff) * EXTEND16S((b >> 32) & 0xffff) + - EXTEND16S((a >> 48) & 0xffff) * EXTEND16S((b >> 48) & 0xffff) - ) << 32); - return a; -} - -uint64_t HELPER(iwmmxt_madduq)(uint64_t a, uint64_t b) -{ - a = (( - ((a >> 0) & 0xffff) * ((b >> 0) & 0xffff) + - ((a >> 16) & 0xffff) * ((b >> 16) & 0xffff) - ) & 0xffffffff) | (( - ((a >> 32) & 0xffff) * ((b >> 32) & 0xffff) + - ((a >> 48) & 0xffff) * ((b >> 48) & 0xffff) - ) << 32); - return a; -} - -uint64_t HELPER(iwmmxt_sadb)(uint64_t a, uint64_t b) -{ -#define abs(x) (((x) >= 0) ? x : -x) -#define SADB(SHR) abs((int) ((a >> SHR) & 0xff) - (int) ((b >> SHR) & 0xff)) - return - SADB(0) + SADB(8) + SADB(16) + SADB(24) + - SADB(32) + SADB(40) + SADB(48) + SADB(56); -#undef SADB -} - -uint64_t HELPER(iwmmxt_sadw)(uint64_t a, uint64_t b) -{ -#define SADW(SHR) \ - abs((int) ((a >> SHR) & 0xffff) - (int) ((b >> SHR) & 0xffff)) - return SADW(0) + SADW(16) + SADW(32) + SADW(48); -#undef SADW -} - -uint64_t HELPER(iwmmxt_mulslw)(uint64_t a, uint64_t b) -{ -#define MULS(SHR) ((uint64_t) ((( \ - EXTEND16S((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff) \ - ) >> 0) & 0xffff) << SHR) - return MULS(0) | MULS(16) | MULS(32) | MULS(48); -#undef MULS -} - -uint64_t HELPER(iwmmxt_mulshw)(uint64_t a, uint64_t b) -{ -#define MULS(SHR) ((uint64_t) ((( \ - EXTEND16S((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff) \ - ) >> 16) & 0xffff) << SHR) - return MULS(0) | MULS(16) | MULS(32) | MULS(48); -#undef MULS -} - -uint64_t HELPER(iwmmxt_mululw)(uint64_t a, uint64_t b) -{ -#define MULU(SHR) ((uint64_t) ((( \ - ((a >> SHR) & 0xffff) * ((b >> SHR) & 0xffff) \ - ) >> 0) & 0xffff) << SHR) - return MULU(0) | MULU(16) | MULU(32) | MULU(48); -#undef MULU -} - -uint64_t HELPER(iwmmxt_muluhw)(uint64_t a, uint64_t b) -{ -#define MULU(SHR) ((uint64_t) ((( \ - ((a >> SHR) & 0xffff) * ((b >> SHR) & 0xffff) \ - ) >> 16) & 0xffff) << SHR) - return MULU(0) | MULU(16) | MULU(32) | MULU(48); -#undef MULU -} - -uint64_t HELPER(iwmmxt_macsw)(uint64_t a, uint64_t b) -{ -#define MACS(SHR) ( \ - EXTEND16((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff)) - return (int64_t) (MACS(0) + MACS(16) + MACS(32) + MACS(48)); -#undef MACS -} - -uint64_t HELPER(iwmmxt_macuw)(uint64_t a, uint64_t b) -{ -#define MACU(SHR) ( \ - (uint32_t) ((a >> SHR) & 0xffff) * \ - (uint32_t) ((b >> SHR) & 0xffff)) - return MACU(0) + MACU(16) + MACU(32) + MACU(48); -#undef MACU -} - -#define NZBIT8(x, i) \ - SIMD8_SET(NBIT8((x) & 0xff), SIMD_NBIT, i) | \ - SIMD8_SET(ZBIT8((x) & 0xff), SIMD_ZBIT, i) -#define NZBIT16(x, i) \ - SIMD16_SET(NBIT16((x) & 0xffff), SIMD_NBIT, i) | \ - SIMD16_SET(ZBIT16((x) & 0xffff), SIMD_ZBIT, i) -#define NZBIT32(x, i) \ - SIMD32_SET(NBIT32((x) & 0xffffffff), SIMD_NBIT, i) | \ - SIMD32_SET(ZBIT32((x) & 0xffffffff), SIMD_ZBIT, i) -#define NZBIT64(x) \ - SIMD64_SET(NBIT64(x), SIMD_NBIT) | \ - SIMD64_SET(ZBIT64(x), SIMD_ZBIT) -#define IWMMXT_OP_UNPACK(S, SH0, SH1, SH2, SH3) \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = \ - (((a >> SH0) & 0xff) << 0) | (((b >> SH0) & 0xff) << 8) | \ - (((a >> SH1) & 0xff) << 16) | (((b >> SH1) & 0xff) << 24) | \ - (((a >> SH2) & 0xff) << 32) | (((b >> SH2) & 0xff) << 40) | \ - (((a >> SH3) & 0xff) << 48) | (((b >> SH3) & 0xff) << 56); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | \ - NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | \ - NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | \ - NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); \ - return a; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = \ - (((a >> SH0) & 0xffff) << 0) | \ - (((b >> SH0) & 0xffff) << 16) | \ - (((a >> SH2) & 0xffff) << 32) | \ - (((b >> SH2) & 0xffff) << 48); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT8(a >> 0, 0) | NZBIT8(a >> 16, 1) | \ - NZBIT8(a >> 32, 2) | NZBIT8(a >> 48, 3); \ - return a; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = \ - (((a >> SH0) & 0xffffffff) << 0) | \ - (((b >> SH0) & 0xffffffff) << 32); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); \ - return a; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = \ - (((x >> SH0) & 0xff) << 0) | \ - (((x >> SH1) & 0xff) << 16) | \ - (((x >> SH2) & 0xff) << 32) | \ - (((x >> SH3) & 0xff) << 48); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | \ - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); \ - return x; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = \ - (((x >> SH0) & 0xffff) << 0) | \ - (((x >> SH2) & 0xffff) << 32); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); \ - return x; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ul)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = (((x >> SH0) & 0xffffffff) << 0); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0); \ - return x; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = \ - ((uint64_t) EXTEND8H((x >> SH0) & 0xff) << 0) | \ - ((uint64_t) EXTEND8H((x >> SH1) & 0xff) << 16) | \ - ((uint64_t) EXTEND8H((x >> SH2) & 0xff) << 32) | \ - ((uint64_t) EXTEND8H((x >> SH3) & 0xff) << 48); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | \ - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); \ - return x; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = \ - ((uint64_t) EXTEND16((x >> SH0) & 0xffff) << 0) | \ - ((uint64_t) EXTEND16((x >> SH2) & 0xffff) << 32); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); \ - return x; \ -} \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sl)))(CPUARMState *env, \ - uint64_t x) \ -{ \ - x = EXTEND32((x >> SH0) & 0xffffffff); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0); \ - return x; \ -} -IWMMXT_OP_UNPACK(l, 0, 8, 16, 24) -IWMMXT_OP_UNPACK(h, 32, 40, 48, 56) - -#define IWMMXT_OP_CMP(SUFF, Tb, Tw, Tl, O) \ -uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = \ - CMP(0, Tb, O, 0xff) | CMP(8, Tb, O, 0xff) | \ - CMP(16, Tb, O, 0xff) | CMP(24, Tb, O, 0xff) | \ - CMP(32, Tb, O, 0xff) | CMP(40, Tb, O, 0xff) | \ - CMP(48, Tb, O, 0xff) | CMP(56, Tb, O, 0xff); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | \ - NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | \ - NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | \ - NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); \ - return a; \ -} \ -uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = CMP(0, Tw, O, 0xffff) | CMP(16, Tw, O, 0xffff) | \ - CMP(32, Tw, O, 0xffff) | CMP(48, Tw, O, 0xffff); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) | \ - NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3); \ - return a; \ -} \ -uint64_t HELPER(glue(iwmmxt_, glue(SUFF, l)))(CPUARMState *env, \ - uint64_t a, uint64_t b) \ -{ \ - a = CMP(0, Tl, O, 0xffffffff) | \ - CMP(32, Tl, O, 0xffffffff); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); \ - return a; \ -} -#define CMP(SHR, TYPE, OPER, MASK) ((((TYPE) ((a >> SHR) & MASK) OPER \ - (TYPE) ((b >> SHR) & MASK)) ? (uint64_t) MASK : 0) << SHR) -IWMMXT_OP_CMP(cmpeq, uint8_t, uint16_t, uint32_t, ==) -IWMMXT_OP_CMP(cmpgts, int8_t, int16_t, int32_t, >) -IWMMXT_OP_CMP(cmpgtu, uint8_t, uint16_t, uint32_t, >) -#undef CMP -#define CMP(SHR, TYPE, OPER, MASK) ((((TYPE) ((a >> SHR) & MASK) OPER \ - (TYPE) ((b >> SHR) & MASK)) ? a : b) & ((uint64_t) MASK << SHR)) -IWMMXT_OP_CMP(mins, int8_t, int16_t, int32_t, <) -IWMMXT_OP_CMP(minu, uint8_t, uint16_t, uint32_t, <) -IWMMXT_OP_CMP(maxs, int8_t, int16_t, int32_t, >) -IWMMXT_OP_CMP(maxu, uint8_t, uint16_t, uint32_t, >) -#undef CMP -#define CMP(SHR, TYPE, OPER, MASK) ((uint64_t) (((TYPE) ((a >> SHR) & MASK) \ - OPER (TYPE) ((b >> SHR) & MASK)) & MASK) << SHR) -IWMMXT_OP_CMP(subn, uint8_t, uint16_t, uint32_t, -) -IWMMXT_OP_CMP(addn, uint8_t, uint16_t, uint32_t, +) -#undef CMP -/* TODO Signed- and Unsigned-Saturation */ -#define CMP(SHR, TYPE, OPER, MASK) ((uint64_t) (((TYPE) ((a >> SHR) & MASK) \ - OPER (TYPE) ((b >> SHR) & MASK)) & MASK) << SHR) -IWMMXT_OP_CMP(subu, uint8_t, uint16_t, uint32_t, -) -IWMMXT_OP_CMP(addu, uint8_t, uint16_t, uint32_t, +) -IWMMXT_OP_CMP(subs, int8_t, int16_t, int32_t, -) -IWMMXT_OP_CMP(adds, int8_t, int16_t, int32_t, +) -#undef CMP -#undef IWMMXT_OP_CMP - -#define AVGB(SHR) ((( \ - ((a >> SHR) & 0xff) + ((b >> SHR) & 0xff) + round) >> 1) << SHR) -#define IWMMXT_OP_AVGB(r) \ -uint64_t HELPER(iwmmxt_avgb##r)(CPUARMState *env, uint64_t a, uint64_t b) \ -{ \ - const int round = r; \ - a = AVGB(0) | AVGB(8) | AVGB(16) | AVGB(24) | \ - AVGB(32) | AVGB(40) | AVGB(48) | AVGB(56); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - SIMD8_SET(ZBIT8((a >> 0) & 0xff), SIMD_ZBIT, 0) | \ - SIMD8_SET(ZBIT8((a >> 8) & 0xff), SIMD_ZBIT, 1) | \ - SIMD8_SET(ZBIT8((a >> 16) & 0xff), SIMD_ZBIT, 2) | \ - SIMD8_SET(ZBIT8((a >> 24) & 0xff), SIMD_ZBIT, 3) | \ - SIMD8_SET(ZBIT8((a >> 32) & 0xff), SIMD_ZBIT, 4) | \ - SIMD8_SET(ZBIT8((a >> 40) & 0xff), SIMD_ZBIT, 5) | \ - SIMD8_SET(ZBIT8((a >> 48) & 0xff), SIMD_ZBIT, 6) | \ - SIMD8_SET(ZBIT8((a >> 56) & 0xff), SIMD_ZBIT, 7); \ - return a; \ -} -IWMMXT_OP_AVGB(0) -IWMMXT_OP_AVGB(1) -#undef IWMMXT_OP_AVGB -#undef AVGB - -#define AVGW(SHR) ((( \ - ((a >> SHR) & 0xffff) + ((b >> SHR) & 0xffff) + round) >> 1) << SHR) -#define IWMMXT_OP_AVGW(r) \ -uint64_t HELPER(iwmmxt_avgw##r)(CPUARMState *env, uint64_t a, uint64_t b) \ -{ \ - const int round = r; \ - a = AVGW(0) | AVGW(16) | AVGW(32) | AVGW(48); \ - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ - SIMD16_SET(ZBIT16((a >> 0) & 0xffff), SIMD_ZBIT, 0) | \ - SIMD16_SET(ZBIT16((a >> 16) & 0xffff), SIMD_ZBIT, 1) | \ - SIMD16_SET(ZBIT16((a >> 32) & 0xffff), SIMD_ZBIT, 2) | \ - SIMD16_SET(ZBIT16((a >> 48) & 0xffff), SIMD_ZBIT, 3); \ - return a; \ -} -IWMMXT_OP_AVGW(0) -IWMMXT_OP_AVGW(1) -#undef IWMMXT_OP_AVGW -#undef AVGW - -uint64_t HELPER(iwmmxt_align)(uint64_t a, uint64_t b, uint32_t n) -{ - a >>= n << 3; - a |= b << (64 - (n << 3)); - return a; -} - -uint64_t HELPER(iwmmxt_insr)(uint64_t x, uint32_t a, uint32_t b, uint32_t n) -{ - x &= ~((uint64_t) b << n); - x |= (uint64_t) (a & b) << n; - return x; -} - -uint32_t HELPER(iwmmxt_setpsr_nz)(uint64_t x) -{ - return SIMD64_SET((x == 0), SIMD_ZBIT) | - SIMD64_SET((x & (1ULL << 63)), SIMD_NBIT); -} - -uint64_t HELPER(iwmmxt_bcstb)(uint32_t arg) -{ - arg &= 0xff; - return - ((uint64_t) arg << 0 ) | ((uint64_t) arg << 8 ) | - ((uint64_t) arg << 16) | ((uint64_t) arg << 24) | - ((uint64_t) arg << 32) | ((uint64_t) arg << 40) | - ((uint64_t) arg << 48) | ((uint64_t) arg << 56); -} - -uint64_t HELPER(iwmmxt_bcstw)(uint32_t arg) -{ - arg &= 0xffff; - return - ((uint64_t) arg << 0 ) | ((uint64_t) arg << 16) | - ((uint64_t) arg << 32) | ((uint64_t) arg << 48); -} - -uint64_t HELPER(iwmmxt_bcstl)(uint32_t arg) -{ - return arg | ((uint64_t) arg << 32); -} - -uint64_t HELPER(iwmmxt_addcb)(uint64_t x) -{ - return - ((x >> 0) & 0xff) + ((x >> 8) & 0xff) + - ((x >> 16) & 0xff) + ((x >> 24) & 0xff) + - ((x >> 32) & 0xff) + ((x >> 40) & 0xff) + - ((x >> 48) & 0xff) + ((x >> 56) & 0xff); -} - -uint64_t HELPER(iwmmxt_addcw)(uint64_t x) -{ - return - ((x >> 0) & 0xffff) + ((x >> 16) & 0xffff) + - ((x >> 32) & 0xffff) + ((x >> 48) & 0xffff); -} - -uint64_t HELPER(iwmmxt_addcl)(uint64_t x) -{ - return (x & 0xffffffff) + (x >> 32); -} - -uint32_t HELPER(iwmmxt_msbb)(uint64_t x) -{ - return - ((x >> 7) & 0x01) | ((x >> 14) & 0x02) | - ((x >> 21) & 0x04) | ((x >> 28) & 0x08) | - ((x >> 35) & 0x10) | ((x >> 42) & 0x20) | - ((x >> 49) & 0x40) | ((x >> 56) & 0x80); -} - -uint32_t HELPER(iwmmxt_msbw)(uint64_t x) -{ - return - ((x >> 15) & 0x01) | ((x >> 30) & 0x02) | - ((x >> 45) & 0x04) | ((x >> 52) & 0x08); -} - -uint32_t HELPER(iwmmxt_msbl)(uint64_t x) -{ - return ((x >> 31) & 0x01) | ((x >> 62) & 0x02); -} - -/* FIXME: Split wCASF setting into a separate op to avoid env use. */ -uint64_t HELPER(iwmmxt_srlw)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = (((x & (0xffffll << 0)) >> n) & (0xffffll << 0)) | - (((x & (0xffffll << 16)) >> n) & (0xffffll << 16)) | - (((x & (0xffffll << 32)) >> n) & (0xffffll << 32)) | - (((x & (0xffffll << 48)) >> n) & (0xffffll << 48)); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); - return x; -} - -uint64_t HELPER(iwmmxt_srll)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ((x & (0xffffffffll << 0)) >> n) | - ((x >> n) & (0xffffffffll << 32)); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); - return x; -} - -uint64_t HELPER(iwmmxt_srlq)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x >>= n; - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); - return x; -} - -uint64_t HELPER(iwmmxt_sllw)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = (((x & (0xffffll << 0)) << n) & (0xffffll << 0)) | - (((x & (0xffffll << 16)) << n) & (0xffffll << 16)) | - (((x & (0xffffll << 32)) << n) & (0xffffll << 32)) | - (((x & (0xffffll << 48)) << n) & (0xffffll << 48)); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); - return x; -} - -uint64_t HELPER(iwmmxt_slll)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ((x << n) & (0xffffffffll << 0)) | - ((x & (0xffffffffll << 32)) << n); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); - return x; -} - -uint64_t HELPER(iwmmxt_sllq)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x <<= n; - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); - return x; -} - -uint64_t HELPER(iwmmxt_sraw)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ((uint64_t) ((EXTEND16(x >> 0) >> n) & 0xffff) << 0) | - ((uint64_t) ((EXTEND16(x >> 16) >> n) & 0xffff) << 16) | - ((uint64_t) ((EXTEND16(x >> 32) >> n) & 0xffff) << 32) | - ((uint64_t) ((EXTEND16(x >> 48) >> n) & 0xffff) << 48); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); - return x; -} - -uint64_t HELPER(iwmmxt_sral)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = (((EXTEND32(x >> 0) >> n) & 0xffffffff) << 0) | - (((EXTEND32(x >> 32) >> n) & 0xffffffff) << 32); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); - return x; -} - -uint64_t HELPER(iwmmxt_sraq)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = (int64_t) x >> n; - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); - return x; -} - -uint64_t HELPER(iwmmxt_rorw)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ((((x & (0xffffll << 0)) >> n) | - ((x & (0xffffll << 0)) << (16 - n))) & (0xffffll << 0)) | - ((((x & (0xffffll << 16)) >> n) | - ((x & (0xffffll << 16)) << (16 - n))) & (0xffffll << 16)) | - ((((x & (0xffffll << 32)) >> n) | - ((x & (0xffffll << 32)) << (16 - n))) & (0xffffll << 32)) | - ((((x & (0xffffll << 48)) >> n) | - ((x & (0xffffll << 48)) << (16 - n))) & (0xffffll << 48)); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); - return x; -} - -uint64_t HELPER(iwmmxt_rorl)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ((x & (0xffffffffll << 0)) >> n) | - ((x >> n) & (0xffffffffll << 32)) | - ((x << (32 - n)) & (0xffffffffll << 0)) | - ((x & (0xffffffffll << 32)) << (32 - n)); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); - return x; -} - -uint64_t HELPER(iwmmxt_rorq)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = ror64(x, n); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); - return x; -} - -uint64_t HELPER(iwmmxt_shufh)(CPUARMState *env, uint64_t x, uint32_t n) -{ - x = (((x >> ((n << 4) & 0x30)) & 0xffff) << 0) | - (((x >> ((n << 2) & 0x30)) & 0xffff) << 16) | - (((x >> ((n << 0) & 0x30)) & 0xffff) << 32) | - (((x >> ((n >> 2) & 0x30)) & 0xffff) << 48); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | - NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); - return x; -} - -/* TODO: Unsigned-Saturation */ -uint64_t HELPER(iwmmxt_packuw)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) | - (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) | - (((b >> 0) & 0xff) << 32) | (((b >> 16) & 0xff) << 40) | - (((b >> 32) & 0xff) << 48) | (((b >> 48) & 0xff) << 56); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | - NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | - NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | - NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); - return a; -} - -uint64_t HELPER(iwmmxt_packul)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) | - (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) | - NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3); - return a; -} - -uint64_t HELPER(iwmmxt_packuq)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (a & 0xffffffff) | ((b & 0xffffffff) << 32); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); - return a; -} - -/* TODO: Signed-Saturation */ -uint64_t HELPER(iwmmxt_packsw)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) | - (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) | - (((b >> 0) & 0xff) << 32) | (((b >> 16) & 0xff) << 40) | - (((b >> 32) & 0xff) << 48) | (((b >> 48) & 0xff) << 56); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | - NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | - NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | - NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); - return a; -} - -uint64_t HELPER(iwmmxt_packsl)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) | - (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) | - NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3); - return a; -} - -uint64_t HELPER(iwmmxt_packsq)(CPUARMState *env, uint64_t a, uint64_t b) -{ - a = (a & 0xffffffff) | ((b & 0xffffffff) << 32); - env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = - NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); - return a; -} - -uint64_t HELPER(iwmmxt_muladdsl)(uint64_t c, uint32_t a, uint32_t b) -{ - return c + ((int32_t) EXTEND32(a) * (int32_t) EXTEND32(b)); -} - -uint64_t HELPER(iwmmxt_muladdsw)(uint64_t c, uint32_t a, uint32_t b) -{ - c += EXTEND32(EXTEND16S((a >> 0) & 0xffff) * - EXTEND16S((b >> 0) & 0xffff)); - c += EXTEND32(EXTEND16S((a >> 16) & 0xffff) * - EXTEND16S((b >> 16) & 0xffff)); - return c; -} - -uint64_t HELPER(iwmmxt_muladdswl)(uint64_t c, uint32_t a, uint32_t b) -{ - return c + (EXTEND32(EXTEND16S(a & 0xffff) * - EXTEND16S(b & 0xffff))); -} diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c index 6614719..d856e3b 100644 --- a/target/arm/tcg/m_helper.c +++ b/target/arm/tcg/m_helper.c @@ -632,8 +632,11 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest) } /* Note that these stores can throw exceptions on MPU faults */ - cpu_stl_data_ra(env, sp, nextinst, GETPC()); - cpu_stl_data_ra(env, sp + 4, saved_psr, GETPC()); + ARMMMUIdx mmu_idx = arm_mmu_idx(env); + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, + arm_to_core_mmu_idx(mmu_idx)); + cpu_stl_mmu(env, sp, nextinst, oi, GETPC()); + cpu_stl_mmu(env, sp + 4, saved_psr, oi, GETPC()); env->regs[13] = sp; env->regs[14] = 0xfeffffff; @@ -1048,6 +1051,9 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr) bool s = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK; bool lspact = env->v7m.fpccr[s] & R_V7M_FPCCR_LSPACT_MASK; uintptr_t ra = GETPC(); + ARMMMUIdx mmu_idx = arm_mmu_idx(env); + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, + arm_to_core_mmu_idx(mmu_idx)); assert(env->v7m.secure); @@ -1073,7 +1079,7 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr) * Note that we do not use v7m_stack_write() here, because the * accesses should not set the FSR bits for stacking errors if they * fail. (In pseudocode terms, they are AccType_NORMAL, not AccType_STACK - * or AccType_LAZYFP). Faults in cpu_stl_data_ra() will throw exceptions + * or AccType_LAZYFP). Faults in cpu_stl_mmu() will throw exceptions * and longjmp out. */ if (!(env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPEN_MASK)) { @@ -1089,12 +1095,12 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr) if (i >= 16) { faddr += 8; /* skip the slot for the FPSCR */ } - cpu_stl_data_ra(env, faddr, slo, ra); - cpu_stl_data_ra(env, faddr + 4, shi, ra); + cpu_stl_mmu(env, faddr, slo, oi, ra); + cpu_stl_mmu(env, faddr + 4, shi, oi, ra); } - cpu_stl_data_ra(env, fptr + 0x40, vfp_get_fpscr(env), ra); + cpu_stl_mmu(env, fptr + 0x40, vfp_get_fpscr(env), oi, ra); if (cpu_isar_feature(aa32_mve, cpu)) { - cpu_stl_data_ra(env, fptr + 0x44, env->v7m.vpr, ra); + cpu_stl_mmu(env, fptr + 0x44, env->v7m.vpr, oi, ra); } /* @@ -1121,6 +1127,9 @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr) { ARMCPU *cpu = env_archcpu(env); uintptr_t ra = GETPC(); + ARMMMUIdx mmu_idx = arm_mmu_idx(env); + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, + arm_to_core_mmu_idx(mmu_idx)); /* fptr is the value of Rn, the frame pointer we load the FP regs from */ assert(env->v7m.secure); @@ -1155,16 +1164,16 @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr) faddr += 8; /* skip the slot for the FPSCR and VPR */ } - slo = cpu_ldl_data_ra(env, faddr, ra); - shi = cpu_ldl_data_ra(env, faddr + 4, ra); + slo = cpu_ldl_mmu(env, faddr, oi, ra); + shi = cpu_ldl_mmu(env, faddr + 4, oi, ra); dn = (uint64_t) shi << 32 | slo; *aa32_vfp_dreg(env, i / 2) = dn; } - fpscr = cpu_ldl_data_ra(env, fptr + 0x40, ra); + fpscr = cpu_ldl_mmu(env, fptr + 0x40, oi, ra); vfp_set_fpscr(env, fpscr); if (cpu_isar_feature(aa32_mve, cpu)) { - env->v7m.vpr = cpu_ldl_data_ra(env, fptr + 0x44, ra); + env->v7m.vpr = cpu_ldl_mmu(env, fptr + 0x44, oi, ra); } } @@ -1937,7 +1946,7 @@ static bool do_v7m_function_return(ARMCPU *cpu) * do them as secure, so work out what MMU index that is. */ mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true); - oi = make_memop_idx(MO_LEUL, arm_to_core_mmu_idx(mmu_idx)); + oi = make_memop_idx(MO_LEUL | MO_ALIGN, arm_to_core_mmu_idx(mmu_idx)); newpc = cpu_ldl_mmu(env, frameptr, oi, 0); newpsr = cpu_ldl_mmu(env, frameptr + 4, oi, 0); @@ -2820,8 +2829,8 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) ARMMMUFaultInfo fi = {}; /* We can ignore the return value as prot is always set */ - pmsav8_mpu_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, targetsec, - &res, &fi, &mregion); + pmsav8_mpu_lookup(env, addr, MMU_DATA_LOAD, PAGE_READ, mmu_idx, + targetsec, &res, &fi, &mregion); if (mregion == -1) { mrvalid = false; mregion = 0; diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build index 2d1502b..1b11565 100644 --- a/target/arm/tcg/meson.build +++ b/target/arm/tcg/meson.build @@ -56,14 +56,16 @@ arm_system_ss.add(files( arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) +arm_common_ss.add(zlib) + arm_common_ss.add(files( 'arith_helper.c', 'crypto_helper.c', )) arm_common_system_ss.add(files( + 'cpregs-at.c', 'hflags.c', - 'iwmmxt_helper.c', 'neon_helper.c', 'tlb_helper.c', 'tlb-insns.c', @@ -71,7 +73,6 @@ arm_common_system_ss.add(files( )) arm_user_ss.add(files( 'hflags.c', - 'iwmmxt_helper.c', 'neon_helper.c', 'tlb_helper.c', 'vfp_helper.c', diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c index 0efc18a..bb48fe3 100644 --- a/target/arm/tcg/mte_helper.c +++ b/target/arm/tcg/mte_helper.c @@ -21,12 +21,13 @@ #include "qemu/log.h" #include "cpu.h" #include "internals.h" +#include "exec/target_page.h" #include "exec/page-protection.h" #ifdef CONFIG_USER_ONLY #include "user/cpu_loop.h" #include "user/page-protection.h" #else -#include "system/ram_addr.h" +#include "system/physmem.h" #endif #include "accel/tcg/cpu-ldst.h" #include "accel/tcg/probe.h" @@ -188,7 +189,7 @@ uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, */ if (tag_access == MMU_DATA_STORE) { ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat; - cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION); + physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION); } return memory_region_get_ram_ptr(mr) + xlat; @@ -591,7 +592,7 @@ static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr, * which is rather sooner than "normal". But the alternative * is waiting until the next syscall. */ - qemu_cpu_kick(env_cpu(env)); + cpu_exit(env_cpu(env)); #endif } @@ -604,7 +605,7 @@ void mte_check_fail(CPUARMState *env, uint32_t desc, int el, reg_el, tcf; uint64_t sctlr; - reg_el = regime_el(env, arm_mmu_idx); + reg_el = regime_el(arm_mmu_idx); sctlr = env->cp15.sctlr_el[reg_el]; switch (arm_mmu_idx) { diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c index 506d1c3..63ddcf3 100644 --- a/target/arm/tcg/mve_helper.c +++ b/target/arm/tcg/mve_helper.c @@ -148,13 +148,15 @@ static void mve_advance_vpt(CPUARMState *env) } /* For loads, predicated lanes are zeroed instead of keeping their old values */ -#define DO_VLDR(OP, MSIZE, LDTYPE, ESIZE, TYPE) \ +#define DO_VLDR(OP, MFLAG, MSIZE, MTYPE, LDTYPE, ESIZE, TYPE) \ void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \ { \ TYPE *d = vd; \ uint16_t mask = mve_element_mask(env); \ uint16_t eci_mask = mve_eci_mask(env); \ unsigned b, e; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \ /* \ * R_SXTM allows the dest reg to become UNKNOWN for abandoned \ * beats so we don't care if we update part of the dest and \ @@ -163,46 +165,48 @@ static void mve_advance_vpt(CPUARMState *env) for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \ if (eci_mask & (1 << b)) { \ d[H##ESIZE(e)] = (mask & (1 << b)) ? \ - cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \ + (MTYPE)cpu_##LDTYPE##_mmu(env, addr, oi, GETPC()) : 0;\ } \ addr += MSIZE; \ } \ mve_advance_vpt(env); \ } -#define DO_VSTR(OP, MSIZE, STTYPE, ESIZE, TYPE) \ +#define DO_VSTR(OP, MFLAG, MSIZE, STTYPE, ESIZE, TYPE) \ void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \ { \ TYPE *d = vd; \ uint16_t mask = mve_element_mask(env); \ unsigned b, e; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \ for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \ if (mask & (1 << b)) { \ - cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \ + cpu_##STTYPE##_mmu(env, addr, d[H##ESIZE(e)], oi, GETPC()); \ } \ addr += MSIZE; \ } \ mve_advance_vpt(env); \ } -DO_VLDR(vldrb, 1, ldub, 1, uint8_t) -DO_VLDR(vldrh, 2, lduw, 2, uint16_t) -DO_VLDR(vldrw, 4, ldl, 4, uint32_t) +DO_VLDR(vldrb, MO_UB, 1, uint8_t, ldb, 1, uint8_t) +DO_VLDR(vldrh, MO_TEUW, 2, uint16_t, ldw, 2, uint16_t) +DO_VLDR(vldrw, MO_TEUL, 4, uint32_t, ldl, 4, uint32_t) -DO_VSTR(vstrb, 1, stb, 1, uint8_t) -DO_VSTR(vstrh, 2, stw, 2, uint16_t) -DO_VSTR(vstrw, 4, stl, 4, uint32_t) +DO_VSTR(vstrb, MO_UB, 1, stb, 1, uint8_t) +DO_VSTR(vstrh, MO_TEUW, 2, stw, 2, uint16_t) +DO_VSTR(vstrw, MO_TEUL, 4, stl, 4, uint32_t) -DO_VLDR(vldrb_sh, 1, ldsb, 2, int16_t) -DO_VLDR(vldrb_sw, 1, ldsb, 4, int32_t) -DO_VLDR(vldrb_uh, 1, ldub, 2, uint16_t) -DO_VLDR(vldrb_uw, 1, ldub, 4, uint32_t) -DO_VLDR(vldrh_sw, 2, ldsw, 4, int32_t) -DO_VLDR(vldrh_uw, 2, lduw, 4, uint32_t) +DO_VLDR(vldrb_sh, MO_SB, 1, int8_t, ldb, 2, int16_t) +DO_VLDR(vldrb_sw, MO_SB, 1, int8_t, ldb, 4, int32_t) +DO_VLDR(vldrb_uh, MO_UB, 1, uint8_t, ldb, 2, uint16_t) +DO_VLDR(vldrb_uw, MO_UB, 1, uint8_t, ldb, 4, uint32_t) +DO_VLDR(vldrh_sw, MO_TESW, 2, int16_t, ldw, 4, int32_t) +DO_VLDR(vldrh_uw, MO_TEUW, 2, uint16_t, ldw, 4, uint32_t) -DO_VSTR(vstrb_h, 1, stb, 2, int16_t) -DO_VSTR(vstrb_w, 1, stb, 4, int32_t) -DO_VSTR(vstrh_w, 2, stw, 4, int32_t) +DO_VSTR(vstrb_h, MO_UB, 1, stb, 2, int16_t) +DO_VSTR(vstrb_w, MO_UB, 1, stb, 4, int32_t) +DO_VSTR(vstrh_w, MO_TEUW, 2, stw, 4, int32_t) #undef DO_VLDR #undef DO_VSTR @@ -214,7 +218,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) * For loads, predicated lanes are zeroed instead of retaining * their previous values. */ -#define DO_VLDR_SG(OP, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB) \ +#define DO_VLDR_SG(OP, MFLAG, MTYPE, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB)\ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \ uint32_t base) \ { \ @@ -224,13 +228,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) uint16_t eci_mask = mve_eci_mask(env); \ unsigned e; \ uint32_t addr; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \ if (!(eci_mask & 1)) { \ continue; \ } \ addr = ADDRFN(base, m[H##ESIZE(e)]); \ d[H##ESIZE(e)] = (mask & 1) ? \ - cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \ + (MTYPE)cpu_##LDTYPE##_mmu(env, addr, oi, GETPC()) : 0; \ if (WB) { \ m[H##ESIZE(e)] = addr; \ } \ @@ -239,7 +245,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) } /* We know here TYPE is unsigned so always the same as the offset type */ -#define DO_VSTR_SG(OP, STTYPE, ESIZE, TYPE, ADDRFN, WB) \ +#define DO_VSTR_SG(OP, MFLAG, STTYPE, ESIZE, TYPE, ADDRFN, WB) \ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \ uint32_t base) \ { \ @@ -249,13 +255,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) uint16_t eci_mask = mve_eci_mask(env); \ unsigned e; \ uint32_t addr; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \ if (!(eci_mask & 1)) { \ continue; \ } \ addr = ADDRFN(base, m[H##ESIZE(e)]); \ if (mask & 1) { \ - cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \ + cpu_##STTYPE##_mmu(env, addr, d[H##ESIZE(e)], oi, GETPC()); \ } \ if (WB) { \ m[H##ESIZE(e)] = addr; \ @@ -282,13 +290,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) uint16_t eci_mask = mve_eci_mask(env); \ unsigned e; \ uint32_t addr; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \ if (!(eci_mask & 1)) { \ continue; \ } \ addr = ADDRFN(base, m[H4(e & ~1)]); \ addr += 4 * (e & 1); \ - d[H4(e)] = (mask & 1) ? cpu_ldl_data_ra(env, addr, GETPC()) : 0; \ + d[H4(e)] = (mask & 1) ? cpu_ldl_mmu(env, addr, oi, GETPC()) : 0; \ if (WB && (e & 1)) { \ m[H4(e & ~1)] = addr - 4; \ } \ @@ -306,6 +316,8 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) uint16_t eci_mask = mve_eci_mask(env); \ unsigned e; \ uint32_t addr; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \ if (!(eci_mask & 1)) { \ continue; \ @@ -313,7 +325,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) addr = ADDRFN(base, m[H4(e & ~1)]); \ addr += 4 * (e & 1); \ if (mask & 1) { \ - cpu_stl_data_ra(env, addr, d[H4(e)], GETPC()); \ + cpu_stl_mmu(env, addr, d[H4(e)], oi, GETPC()); \ } \ if (WB && (e & 1)) { \ m[H4(e & ~1)] = addr - 4; \ @@ -327,40 +339,44 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t) #define ADDR_ADD_OSW(BASE, OFFSET) ((BASE) + ((OFFSET) << 2)) #define ADDR_ADD_OSD(BASE, OFFSET) ((BASE) + ((OFFSET) << 3)) -DO_VLDR_SG(vldrb_sg_sh, ldsb, 2, int16_t, uint16_t, ADDR_ADD, false) -DO_VLDR_SG(vldrb_sg_sw, ldsb, 4, int32_t, uint32_t, ADDR_ADD, false) -DO_VLDR_SG(vldrh_sg_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD, false) +DO_VLDR_SG(vldrb_sg_sh, MO_SB, int8_t, ldb, 2, int16_t, uint16_t, ADDR_ADD, false) +DO_VLDR_SG(vldrb_sg_sw, MO_SB, int8_t, ldb, 4, int32_t, uint32_t, ADDR_ADD, false) +DO_VLDR_SG(vldrh_sg_sw, MO_TESW, int16_t, ldw, 4, int32_t, uint32_t, ADDR_ADD, false) -DO_VLDR_SG(vldrb_sg_ub, ldub, 1, uint8_t, uint8_t, ADDR_ADD, false) -DO_VLDR_SG(vldrb_sg_uh, ldub, 2, uint16_t, uint16_t, ADDR_ADD, false) -DO_VLDR_SG(vldrb_sg_uw, ldub, 4, uint32_t, uint32_t, ADDR_ADD, false) -DO_VLDR_SG(vldrh_sg_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD, false) -DO_VLDR_SG(vldrh_sg_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD, false) -DO_VLDR_SG(vldrw_sg_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, false) +DO_VLDR_SG(vldrb_sg_ub, MO_UB, uint8_t, ldb, 1, uint8_t, uint8_t, ADDR_ADD, false) +DO_VLDR_SG(vldrb_sg_uh, MO_UB, uint8_t, ldb, 2, uint16_t, uint16_t, ADDR_ADD, false) +DO_VLDR_SG(vldrb_sg_uw, MO_UB, uint8_t, ldb, 4, uint32_t, uint32_t, ADDR_ADD, false) +DO_VLDR_SG(vldrh_sg_uh, MO_TEUW, uint16_t, ldw, 2, uint16_t, uint16_t, ADDR_ADD, false) +DO_VLDR_SG(vldrh_sg_uw, MO_TEUW, uint16_t, ldw, 4, uint32_t, uint32_t, ADDR_ADD, false) +DO_VLDR_SG(vldrw_sg_uw, MO_TEUL, uint32_t, ldl, 4, uint32_t, uint32_t, ADDR_ADD, false) DO_VLDR64_SG(vldrd_sg_ud, ADDR_ADD, false) -DO_VLDR_SG(vldrh_sg_os_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD_OSH, false) -DO_VLDR_SG(vldrh_sg_os_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD_OSH, false) -DO_VLDR_SG(vldrh_sg_os_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD_OSH, false) -DO_VLDR_SG(vldrw_sg_os_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD_OSW, false) +DO_VLDR_SG(vldrh_sg_os_sw, MO_TESW, int16_t, ldw, 4, + int32_t, uint32_t, ADDR_ADD_OSH, false) +DO_VLDR_SG(vldrh_sg_os_uh, MO_TEUW, uint16_t, ldw, 2, + uint16_t, uint16_t, ADDR_ADD_OSH, false) +DO_VLDR_SG(vldrh_sg_os_uw, MO_TEUW, uint16_t, ldw, 4, + uint32_t, uint32_t, ADDR_ADD_OSH, false) +DO_VLDR_SG(vldrw_sg_os_uw, MO_TEUL, uint32_t, ldl, 4, + uint32_t, uint32_t, ADDR_ADD_OSW, false) DO_VLDR64_SG(vldrd_sg_os_ud, ADDR_ADD_OSD, false) -DO_VSTR_SG(vstrb_sg_ub, stb, 1, uint8_t, ADDR_ADD, false) -DO_VSTR_SG(vstrb_sg_uh, stb, 2, uint16_t, ADDR_ADD, false) -DO_VSTR_SG(vstrb_sg_uw, stb, 4, uint32_t, ADDR_ADD, false) -DO_VSTR_SG(vstrh_sg_uh, stw, 2, uint16_t, ADDR_ADD, false) -DO_VSTR_SG(vstrh_sg_uw, stw, 4, uint32_t, ADDR_ADD, false) -DO_VSTR_SG(vstrw_sg_uw, stl, 4, uint32_t, ADDR_ADD, false) +DO_VSTR_SG(vstrb_sg_ub, MO_UB, stb, 1, uint8_t, ADDR_ADD, false) +DO_VSTR_SG(vstrb_sg_uh, MO_UB, stb, 2, uint16_t, ADDR_ADD, false) +DO_VSTR_SG(vstrb_sg_uw, MO_UB, stb, 4, uint32_t, ADDR_ADD, false) +DO_VSTR_SG(vstrh_sg_uh, MO_TEUW, stw, 2, uint16_t, ADDR_ADD, false) +DO_VSTR_SG(vstrh_sg_uw, MO_TEUW, stw, 4, uint32_t, ADDR_ADD, false) +DO_VSTR_SG(vstrw_sg_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD, false) DO_VSTR64_SG(vstrd_sg_ud, ADDR_ADD, false) -DO_VSTR_SG(vstrh_sg_os_uh, stw, 2, uint16_t, ADDR_ADD_OSH, false) -DO_VSTR_SG(vstrh_sg_os_uw, stw, 4, uint32_t, ADDR_ADD_OSH, false) -DO_VSTR_SG(vstrw_sg_os_uw, stl, 4, uint32_t, ADDR_ADD_OSW, false) +DO_VSTR_SG(vstrh_sg_os_uh, MO_TEUW, stw, 2, uint16_t, ADDR_ADD_OSH, false) +DO_VSTR_SG(vstrh_sg_os_uw, MO_TEUW, stw, 4, uint32_t, ADDR_ADD_OSH, false) +DO_VSTR_SG(vstrw_sg_os_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD_OSW, false) DO_VSTR64_SG(vstrd_sg_os_ud, ADDR_ADD_OSD, false) -DO_VLDR_SG(vldrw_sg_wb_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, true) +DO_VLDR_SG(vldrw_sg_wb_uw, MO_TEUL, uint32_t, ldl, 4, uint32_t, uint32_t, ADDR_ADD, true) DO_VLDR64_SG(vldrd_sg_wb_ud, ADDR_ADD, true) -DO_VSTR_SG(vstrw_sg_wb_uw, stl, 4, uint32_t, ADDR_ADD, true) +DO_VSTR_SG(vstrw_sg_wb_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD, true) DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true) /* @@ -387,13 +403,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true) uint16_t mask = mve_eci_mask(env); \ static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat] * 4; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ for (e = 0; e < 4; e++, data >>= 8) { \ uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \ qd[H1(off[beat])] = data; \ @@ -411,13 +429,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true) uint32_t addr, data; \ int y; /* y counts 0 2 0 2 */ \ uint16_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat] * 8 + (beat & 1) * 4; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y); \ qd[H2(off[beat])] = data; \ data >>= 16; \ @@ -436,13 +456,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true) uint32_t addr, data; \ uint32_t *qd; \ int y; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat] * 4; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ y = (beat + (O1 & 2)) & 3; \ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y); \ qd[H4(off[beat] >> 2)] = data; \ @@ -473,13 +495,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9) static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ uint8_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat] * 2; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ for (e = 0; e < 4; e++, data >>= 8) { \ qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \ qd[H1(off[beat] + (e >> 1))] = data; \ @@ -497,13 +521,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9) uint32_t addr, data; \ int e; \ uint16_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat] * 4; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ for (e = 0; e < 2; e++, data >>= 16) { \ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \ qd[H2(off[beat])] = data; \ @@ -520,13 +546,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9) static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ uint32_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ continue; \ } \ addr = base + off[beat]; \ - data = cpu_ldl_le_data_ra(env, addr, GETPC()); \ + data = cpu_ldl_mmu(env, addr, oi, GETPC()); \ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1)); \ qd[H4(off[beat] >> 3)] = data; \ } \ @@ -549,6 +577,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) uint16_t mask = mve_eci_mask(env); \ static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -560,7 +590,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \ data = (data << 8) | qd[H1(off[beat])]; \ } \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -574,6 +604,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) uint32_t addr, data; \ int y; /* y counts 0 2 0 2 */ \ uint16_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -584,7 +616,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) data = qd[H2(off[beat])]; \ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y + 1); \ data |= qd[H2(off[beat])] << 16; \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -598,6 +630,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) uint32_t addr, data; \ uint32_t *qd; \ int y; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -607,7 +641,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20) y = (beat + (O1 & 2)) & 3; \ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y); \ data = qd[H4(off[beat] >> 2)]; \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -635,6 +669,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9) static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ uint8_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -646,7 +682,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9) qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \ data = (data << 8) | qd[H1(off[beat] + (e >> 1))]; \ } \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -660,6 +696,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9) uint32_t addr, data; \ int e; \ uint16_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -671,7 +709,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9) qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \ data = (data << 16) | qd[H2(off[beat])]; \ } \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -684,6 +722,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9) static const uint8_t off[4] = { O1, O2, O3, O4 }; \ uint32_t addr, data; \ uint32_t *qd; \ + int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \ + MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \ for (beat = 0; beat < 4; beat++, mask >>= 4) { \ if ((mask & 1) == 0) { \ /* ECI says skip this beat */ \ @@ -692,7 +732,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9) addr = base + off[beat]; \ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1)); \ data = qd[H4(off[beat] >> 3)]; \ - cpu_stl_le_data_ra(env, addr, data, GETPC()); \ + cpu_stl_mmu(env, addr, data, oi, GETPC()); \ } \ } @@ -2164,27 +2204,6 @@ DO_VSHLL_ALL(vshllt, true) DO_VSHRN(OP##tb, true, 1, uint8_t, 2, uint16_t, FN) \ DO_VSHRN(OP##th, true, 2, uint16_t, 4, uint32_t, FN) -static inline uint64_t do_urshr(uint64_t x, unsigned sh) -{ - if (likely(sh < 64)) { - return (x >> sh) + ((x >> (sh - 1)) & 1); - } else if (sh == 64) { - return x >> 63; - } else { - return 0; - } -} - -static inline int64_t do_srshr(int64_t x, unsigned sh) -{ - if (likely(sh < 64)) { - return (x >> sh) + ((x >> (sh - 1)) & 1); - } else { - /* Rounding the sign bit always produces 0. */ - return 0; - } -} - DO_VSHRN_ALL(vshrn, DO_SHR) DO_VSHRN_ALL(vrshrn, do_urshr) diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c index 2cc8241..8d288f3 100644 --- a/target/arm/tcg/neon_helper.c +++ b/target/arm/tcg/neon_helper.c @@ -229,15 +229,30 @@ NEON_GVEC_VOP2(gvec_srshl_h, int16_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, src2, 16, true, NULL)) +NEON_GVEC_VOP2(sme2_srshl_h, int16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, true, NULL)) NEON_GVEC_VOP2(gvec_srshl_s, int32_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, src2, 32, true, NULL)) +NEON_GVEC_VOP2(sme2_srshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_d(src1, (int8_t)src2, true, NULL)) NEON_GVEC_VOP2(gvec_srshl_d, int64_t) #undef NEON_FN +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_d(src1, src2, true, NULL)) +NEON_GVEC_VOP2(sme2_srshl_d, int64_t) +#undef NEON_FN + uint32_t HELPER(neon_rshl_s32)(uint32_t val, uint32_t shift) { return do_sqrshl_bhs(val, (int8_t)shift, 32, true, NULL); @@ -261,15 +276,30 @@ NEON_GVEC_VOP2(gvec_urshl_h, uint16_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int16_t)src2, 16, true, NULL)) +NEON_GVEC_VOP2(sme2_urshl_h, uint16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, true, NULL)) NEON_GVEC_VOP2(gvec_urshl_s, int32_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, src2, 32, true, NULL)) +NEON_GVEC_VOP2(sme2_urshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_d(src1, (int8_t)src2, true, NULL)) NEON_GVEC_VOP2(gvec_urshl_d, int64_t) #undef NEON_FN +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_d(src1, src2, true, NULL)) +NEON_GVEC_VOP2(sme2_urshl_d, int64_t) +#undef NEON_FN + uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shift) { return do_uqrshl_bhs(val, (int8_t)shift, 32, true, NULL); diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c index 575e566..4fbd219 100644 --- a/target/arm/tcg/op_helper.c +++ b/target/arm/tcg/op_helper.c @@ -46,7 +46,7 @@ int exception_target_el(CPUARMState *env) } void raise_exception(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el) + uint64_t syndrome, uint32_t target_el) { CPUState *cs = env_cpu(env); @@ -70,7 +70,7 @@ void raise_exception(CPUARMState *env, uint32_t excp, cpu_loop_exit(cs); } -void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome, +void raise_exception_ra(CPUARMState *env, uint32_t excp, uint64_t syndrome, uint32_t target_el, uintptr_t ra) { CPUState *cs = env_cpu(env); @@ -768,12 +768,6 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, assert(ri != NULL); - if (arm_feature(env, ARM_FEATURE_XSCALE) && ri->cp < 14 - && extract32(env->cp15.c15_cpar, ri->cp, 1) == 0) { - res = CP_ACCESS_UNDEFINED; - goto fail; - } - if (ri->accessfn) { res = ri->accessfn(env, ri, isread); } @@ -887,6 +881,13 @@ const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, } syndrome = syn_uncategorized(); break; + case CP_ACCESS_EXLOCK: + /* + * CP_ACCESS_EXLOCK is always directed to the current EL, + * which is going to be the same as the usual target EL. + */ + syndrome = syn_gcs_exlock(); + break; default: g_assert_not_reached(); } diff --git a/target/arm/tcg/sme.decode b/target/arm/tcg/sme.decode index 628804e..6bb9aa2 100644 --- a/target/arm/tcg/sme.decode +++ b/target/arm/tcg/sme.decode @@ -22,30 +22,139 @@ ### SME Misc ZERO 11000000 00 001 00000000000 imm:8 +ZERO_zt0 11000000 01 001 00000000000 00000001 ### SME Move into/from Array %mova_rs 13:2 !function=plus_12 -&mova esz rs pg zr za_imm v:bool to_vec:bool +%mova_rv 13:2 !function=plus_8 +&mova_a rv zr off +&mova_p esz rs pg zr za off v:bool +&mova_t esz rs zr za off v:bool -MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \ - &mova to_vec=0 rs=%mova_rs -MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \ - &mova to_vec=0 rs=%mova_rs esz=4 +MOVA_tz 11000000 00 00000 0 v:1 .. pg:3 zr:5 0 off:4 \ + &mova_p rs=%mova_rs esz=0 za=0 +MOVA_tz 11000000 01 00000 0 v:1 .. pg:3 zr:5 0 za:1 off:3 \ + &mova_p rs=%mova_rs esz=1 +MOVA_tz 11000000 10 00000 0 v:1 .. pg:3 zr:5 0 za:2 off:2 \ + &mova_p rs=%mova_rs esz=2 +MOVA_tz 11000000 11 00000 0 v:1 .. pg:3 zr:5 0 za:3 off:1 \ + &mova_p rs=%mova_rs esz=3 +MOVA_tz 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za:4 \ + &mova_p rs=%mova_rs esz=4 off=0 -MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \ - &mova to_vec=1 rs=%mova_rs -MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \ - &mova to_vec=1 rs=%mova_rs esz=4 +MOVA_zt 11000000 00 00001 0 v:1 .. pg:3 0 off:4 zr:5 \ + &mova_p rs=%mova_rs esz=0 za=0 +MOVA_zt 11000000 01 00001 0 v:1 .. pg:3 0 za:1 off:3 zr:5 \ + &mova_p rs=%mova_rs esz=1 +MOVA_zt 11000000 10 00001 0 v:1 .. pg:3 0 za:2 off:2 zr:5 \ + &mova_p rs=%mova_rs esz=2 +MOVA_zt 11000000 11 00001 0 v:1 .. pg:3 0 za:3 off:1 zr:5 \ + &mova_p rs=%mova_rs esz=3 +MOVA_zt 11000000 11 00001 1 v:1 .. pg:3 0 za:4 zr:5 \ + &mova_p rs=%mova_rs esz=4 off=0 + +MOVA_tz2 11000000 00 00010 0 v:1 .. 000 zr:4 0 00 off:3 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVA_tz2 11000000 01 00010 0 v:1 .. 000 zr:4 0 00 za:1 off:2 \ + &mova_t rs=%mova_rs esz=1 +MOVA_tz2 11000000 10 00010 0 v:1 .. 000 zr:4 0 00 za:2 off:1 \ + &mova_t rs=%mova_rs esz=2 +MOVA_tz2 11000000 11 00010 0 v:1 .. 000 zr:4 0 00 za:3 \ + &mova_t rs=%mova_rs esz=3 off=0 + +MOVA_zt2 11000000 00 00011 0 v:1 .. 000 00 off:3 zr:4 0 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVA_zt2 11000000 01 00011 0 v:1 .. 000 00 za:1 off:2 zr:4 0 \ + &mova_t rs=%mova_rs esz=1 +MOVA_zt2 11000000 10 00011 0 v:1 .. 000 00 za:2 off:1 zr:4 0 \ + &mova_t rs=%mova_rs esz=2 +MOVA_zt2 11000000 11 00011 0 v:1 .. 000 00 za:3 zr:4 0 \ + &mova_t rs=%mova_rs esz=3 off=0 + +MOVA_tz4 11000000 00 00010 0 v:1 .. 001 zr:3 00 000 off:2 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVA_tz4 11000000 01 00010 0 v:1 .. 001 zr:3 00 000 za:1 off:1 \ + &mova_t rs=%mova_rs esz=1 +MOVA_tz4 11000000 10 00010 0 v:1 .. 001 zr:3 00 000 za:2 \ + &mova_t rs=%mova_rs esz=2 off=0 +MOVA_tz4 11000000 11 00010 0 v:1 .. 001 zr:3 00 00 za:3 \ + &mova_t rs=%mova_rs esz=3 off=0 + +MOVA_zt4 11000000 00 00011 0 v:1 .. 001 000 off:2 zr:3 00 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVA_zt4 11000000 01 00011 0 v:1 .. 001 000 za:1 off:1 zr:3 00 \ + &mova_t rs=%mova_rs esz=1 +MOVA_zt4 11000000 10 00011 0 v:1 .. 001 000 za:2 zr:3 00 \ + &mova_t rs=%mova_rs esz=2 off=0 +MOVA_zt4 11000000 11 00011 0 v:1 .. 001 00 za:3 zr:3 00 \ + &mova_t rs=%mova_rs esz=3 off=0 + +MOVA_az2 11000000 00 00010 00 .. 010 zr:4 000 off:3 \ + &mova_a rv=%mova_rv +MOVA_az4 11000000 00 00010 00 .. 011 zr:3 0000 off:3 \ + &mova_a rv=%mova_rv + +MOVA_za2 11000000 00 00011 00 .. 010 00 off:3 zr:4 0 \ + &mova_a rv=%mova_rv +MOVA_za4 11000000 00 00011 00 .. 011 00 off:3 zr:3 00 \ + &mova_a rv=%mova_rv + +### SME Move and Zero + +MOVAZ_za2 11000000 00000110 0 .. 01010 off:3 zr:4 0 \ + &mova_a rv=%mova_rv +MOVAZ_za4 11000000 00000110 0 .. 01110 off:3 zr:3 00 \ + &mova_a rv=%mova_rv + +MOVAZ_zt 11000000 00 00001 0 v:1 .. 0001 off:4 zr:5 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVAZ_zt 11000000 01 00001 0 v:1 .. 0001 za:1 off:3 zr:5 \ + &mova_t rs=%mova_rs esz=1 +MOVAZ_zt 11000000 10 00001 0 v:1 .. 0001 za:2 off:2 zr:5 \ + &mova_t rs=%mova_rs esz=2 +MOVAZ_zt 11000000 11 00001 0 v:1 .. 0001 za:3 off:1 zr:5 \ + &mova_t rs=%mova_rs esz=3 +MOVAZ_zt 11000000 11 00001 1 v:1 .. 0001 za:4 zr:5 \ + &mova_t rs=%mova_rs esz=4 off=0 + +MOVAZ_zt2 11000000 00 00011 0 v:1 .. 00010 off:3 zr:4 0 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVAZ_zt2 11000000 01 00011 0 v:1 .. 00010 za:1 off:2 zr:4 0 \ + &mova_t rs=%mova_rs esz=1 +MOVAZ_zt2 11000000 10 00011 0 v:1 .. 00010 za:2 off:1 zr:4 0 \ + &mova_t rs=%mova_rs esz=2 +MOVAZ_zt2 11000000 11 00011 0 v:1 .. 00010 za:3 zr:4 0 \ + &mova_t rs=%mova_rs esz=3 off=0 + +MOVAZ_zt4 11000000 00 00011 0 v:1 .. 001100 off:2 zr:3 00 \ + &mova_t rs=%mova_rs esz=0 za=0 +MOVAZ_zt4 11000000 01 00011 0 v:1 .. 001100 za:1 off:1 zr:3 00 \ + &mova_t rs=%mova_rs esz=1 +MOVAZ_zt4 11000000 10 00011 0 v:1 .. 001100 za:2 zr:3 00 \ + &mova_t rs=%mova_rs esz=2 off=0 +MOVAZ_zt4 11000000 11 00011 0 v:1 .. 00110 za:3 zr:3 00 \ + &mova_t rs=%mova_rs esz=3 off=0 + +### SME Move into/from ZT0 + +MOVT_rzt 1100 0000 0100 1100 0 off:3 00 11111 rt:5 +MOVT_ztr 1100 0000 0100 1110 0 off:3 00 11111 rt:5 ### SME Memory -&ldst esz rs pg rn rm za_imm v:bool st:bool +&ldst esz rs pg rn rm za off v:bool st:bool -LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ - &ldst rs=%mova_rs -LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ - &ldst esz=4 rs=%mova_rs +LDST1 1110000 0 00 st:1 rm:5 v:1 .. pg:3 rn:5 0 off:4 \ + &ldst rs=%mova_rs esz=0 za=0 +LDST1 1110000 0 01 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:1 off:3 \ + &ldst rs=%mova_rs esz=1 +LDST1 1110000 0 10 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:2 off:2 \ + &ldst rs=%mova_rs esz=2 +LDST1 1110000 0 11 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:3 off:1 \ + &ldst rs=%mova_rs esz=3 +LDST1 1110000 1 11 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:4 \ + &ldst rs=%mova_rs esz=4 off=0 &ldstr rv rn imm @ldstr ....... ... . ...... .. ... rn:5 . imm:4 \ @@ -54,6 +163,12 @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr +&ldstzt0 rn +@ldstzt0 ....... ... . ...... .. ... rn:5 ..... &ldstzt0 + +LDR_zt0 1110000 100 0 111111 00 000 ..... 00000 @ldstzt0 +STR_zt0 1110000 100 1 111111 00 000 ..... 00000 @ldstzt0 + ### SME Add Vector to Array &adda zad zn pm pn @@ -68,14 +183,18 @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64 ### SME Outer Product &op zad zn zm pm pn sub:bool +@op_16 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 ... zad:1 &op @op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op @op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op +FMOPA_h 10000001 100 ..... ... ... ..... . 100 . @op_16 FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32 FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64 -BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32 -FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32 +BFMOPA 10000001 101 ..... ... ... ..... . 100 . @op_16 + +BFMOPA_w 10000001 100 ..... ... ... ..... . 00 .. @op_32 +FMOPA_w_h 10000001 101 ..... ... ... ..... . 00 .. @op_32 SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32 SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32 @@ -86,3 +205,789 @@ SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64 SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64 USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64 UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64 + +BMOPA 1000000 0 10 0 ..... ... ... ..... . 10 .. @op_32 +SMOPA2_s 1010000 0 10 0 ..... ... ... ..... . 10 .. @op_32 +UMOPA2_s 1010000 1 10 0 ..... ... ... ..... . 10 .. @op_32 + +### SME2 Multi-vector Multiple and Single SVE Destructive + +%zd_ax2 1:4 !function=times_2 +%zd_ax4 2:3 !function=times_4 + +&z2z_en zdn zm esz n +@z2z_2x1 ....... . esz:2 .. zm:4 ....0. ..... .... . \ + &z2z_en n=2 zdn=%zd_ax2 +@z2z_4x1 ....... . esz:2 .. zm:4 ....1. ..... ...0 . \ + &z2z_en n=4 zdn=%zd_ax4 + +SMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 0 @z2z_2x1 +SMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 0 @z2z_4x1 +UMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 1 @z2z_2x1 +UMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 1 @z2z_4x1 +SMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 0 @z2z_2x1 +SMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 0 @z2z_4x1 +UMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 1 @z2z_2x1 +UMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 1 @z2z_4x1 + +FMAX_n1 1100000 1 .. 10 .... 1010.0 01000 .... 0 @z2z_2x1 +FMAX_n1 1100000 1 .. 10 .... 1010.0 01000 .... 0 @z2z_4x1 +FMIN_n1 1100000 1 .. 10 .... 1010.0 01000 .... 1 @z2z_2x1 +FMIN_n1 1100000 1 .. 10 .... 1010.0 01000 .... 1 @z2z_4x1 +FMAXNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 0 @z2z_2x1 +FMAXNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 0 @z2z_4x1 +FMINNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 1 @z2z_2x1 +FMINNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 1 @z2z_4x1 + +SRSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 0 @z2z_2x1 +SRSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 0 @z2z_4x1 +URSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 1 @z2z_2x1 +URSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 1 @z2z_4x1 + +ADD_n1 1100000 1 .. 10 .... 1010.0 11000 .... 0 @z2z_2x1 +ADD_n1 1100000 1 .. 10 .... 1010.0 11000 .... 0 @z2z_4x1 + +SQDMULH_n1 1100000 1 .. 10 .... 1010.1 00000 .... 0 @z2z_2x1 +SQDMULH_n1 1100000 1 .. 10 .... 1010.1 00000 .... 0 @z2z_4x1 + +### SME2 Multi-vector Multiple Vectors SVE Destructive + +%zm_ax2 17:4 !function=times_2 +%zm_ax4 18:3 !function=times_4 + +@z2z_2x2 ....... . esz:2 . ....0 ....0. ..... .... . \ + &z2z_en n=2 zdn=%zd_ax2 zm=%zm_ax2 +@z2z_4x4 ....... . esz:2 . ...00 ....1. ..... ...0 . \ + &z2z_en n=4 zdn=%zd_ax4 zm=%zm_ax4 + +SMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 0 @z2z_2x2 +SMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 0 @z2z_4x4 +UMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 1 @z2z_2x2 +UMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 1 @z2z_4x4 +SMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 0 @z2z_2x2 +SMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 0 @z2z_4x4 +UMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 1 @z2z_2x2 +UMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 1 @z2z_4x4 + +FMAX_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 0 @z2z_2x2 +FMAX_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 0 @z2z_4x4 +FMIN_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 1 @z2z_2x2 +FMIN_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 1 @z2z_4x4 +FMAXNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 0 @z2z_2x2 +FMAXNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 0 @z2z_4x4 +FMINNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 1 @z2z_2x2 +FMINNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 1 @z2z_4x4 + +SRSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 0 @z2z_2x2 +SRSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 0 @z2z_4x4 +URSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 1 @z2z_2x2 +URSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 1 @z2z_4x4 + +SQDMULH_nn 1100000 1 .. 1 ..... 1011.1 00000 .... 0 @z2z_2x2 +SQDMULH_nn 1100000 1 .. 1 ..... 1011.1 00000 .... 0 @z2z_4x4 + +### SME2 Multi-vector Multiple and Single Array Vectors + +&azz_n n off rv zn zm +@azz_nx1_o3 ........ .... zm:4 ...... zn:5 .. off:3 &azz_n rv=%mova_rv + +ADD_azz_n1_s 11000001 0010 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=2 +ADD_azz_n1_s 11000001 0011 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=4 +ADD_azz_n1_d 11000001 0110 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=2 +ADD_azz_n1_d 11000001 0111 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=4 + +SUB_azz_n1_s 11000001 0010 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=2 +SUB_azz_n1_s 11000001 0011 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=4 +SUB_azz_n1_d 11000001 0110 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=2 +SUB_azz_n1_d 11000001 0111 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=4 + +%off3_x2 0:3 !function=times_2 +%off2_x2 0:2 !function=times_2 + +@azz_nx1_o3x2 ........ ... . zm:4 . .. ... zn:5 .. ... \ + &azz_n off=%off3_x2 rv=%mova_rv +@azz_nx1_o2x2 ........ ... . zm:4 . .. ... zn:5 ... .. \ + &azz_n off=%off2_x2 rv=%mova_rv + +FMLAL_n1 11000001 001 0 .... 0 .. 011 ..... 00 ... @azz_nx1_o3x2 n=1 +FMLAL_n1 11000001 001 0 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=2 +FMLAL_n1 11000001 001 1 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=4 + +FMLSL_n1 11000001 001 0 .... 0 .. 011 ..... 01 ... @azz_nx1_o3x2 n=1 +FMLSL_n1 11000001 001 0 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=2 +FMLSL_n1 11000001 001 1 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=4 + +BFMLAL_n1 11000001 001 0 .... 0 .. 011 ..... 10 ... @azz_nx1_o3x2 n=1 +BFMLAL_n1 11000001 001 0 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=2 +BFMLAL_n1 11000001 001 1 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=4 + +BFMLSL_n1 11000001 001 0 .... 0 .. 011 ..... 11 ... @azz_nx1_o3x2 n=1 +BFMLSL_n1 11000001 001 0 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=2 +BFMLSL_n1 11000001 001 1 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=4 + +FDOT_n1 11000001 001 0 .... 0 .. 100 ..... 00 ... @azz_nx1_o3 n=2 +FDOT_n1 11000001 001 1 .... 0 .. 100 ..... 00 ... @azz_nx1_o3 n=4 + +BFDOT_n1 11000001 001 0 .... 0 .. 100 ..... 10 ... @azz_nx1_o3 n=2 +BFDOT_n1 11000001 001 1 .... 0 .. 100 ..... 10 ... @azz_nx1_o3 n=4 + +USDOT_n1 11000001 001 0 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=2 +USDOT_n1 11000001 001 1 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=4 + +SUDOT_n1 11000001 001 0 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=2 +SUDOT_n1 11000001 001 1 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=4 + +SDOT_n1_4b 11000001 001 0 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=2 +SDOT_n1_4b 11000001 001 1 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=4 +SDOT_n1_4h 11000001 011 0 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=2 +SDOT_n1_4h 11000001 011 1 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=4 +SDOT_n1_2h 11000001 011 0 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=2 +SDOT_n1_2h 11000001 011 1 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=4 + +UDOT_n1_4b 11000001 001 0 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=2 +UDOT_n1_4b 11000001 001 1 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=4 +UDOT_n1_4h 11000001 011 0 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=2 +UDOT_n1_4h 11000001 011 1 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=4 +UDOT_n1_2h 11000001 011 0 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=2 +UDOT_n1_2h 11000001 011 1 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=4 + +SMLAL_n1 11000001 011 0 .... 0 .. 011 ..... 00 ... @azz_nx1_o3x2 n=1 +SMLAL_n1 11000001 011 0 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=2 +SMLAL_n1 11000001 011 1 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=4 + +SMLSL_n1 11000001 011 0 .... 0 .. 011 ..... 01 ... @azz_nx1_o3x2 n=1 +SMLSL_n1 11000001 011 0 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=2 +SMLSL_n1 11000001 011 1 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=4 + +UMLAL_n1 11000001 011 0 .... 0 .. 011 ..... 10 ... @azz_nx1_o3x2 n=1 +UMLAL_n1 11000001 011 0 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=2 +UMLAL_n1 11000001 011 1 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=4 + +UMLSL_n1 11000001 011 0 .... 0 .. 011 ..... 11 ... @azz_nx1_o3x2 n=1 +UMLSL_n1 11000001 011 0 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=2 +UMLSL_n1 11000001 011 1 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=4 + +%off2_x4 0:2 !function=times_4 +%off1_x4 0:1 !function=times_4 + +@azz_nx1_o2x4 ........ ... . zm:4 . .. ... zn:5 ... .. \ + &azz_n off=%off2_x4 rv=%mova_rv +@azz_nx1_o1x4 ........ ... . zm:4 . .. ... zn:5 .... . \ + &azz_n off=%off1_x4 rv=%mova_rv + +SMLALL_n1_s 11000001 001 0 .... 0 .. 001 ..... 000 .. @azz_nx1_o2x4 n=1 +SMLALL_n1_d 11000001 011 0 .... 0 .. 001 ..... 000 .. @azz_nx1_o2x4 n=1 +SMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=2 +SMLALL_n1_d 11000001 011 0 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=2 +SMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=4 +SMLALL_n1_d 11000001 011 1 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=4 + +SMLSLL_n1_s 11000001 001 0 .... 0 .. 001 ..... 010 .. @azz_nx1_o2x4 n=1 +SMLSLL_n1_d 11000001 011 0 .... 0 .. 001 ..... 010 .. @azz_nx1_o2x4 n=1 +SMLSLL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=2 +SMLSLL_n1_d 11000001 011 0 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=2 +SMLSLL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=4 +SMLSLL_n1_d 11000001 011 1 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=4 + +UMLALL_n1_s 11000001 001 0 .... 0 .. 001 ..... 100 .. @azz_nx1_o2x4 n=1 +UMLALL_n1_d 11000001 011 0 .... 0 .. 001 ..... 100 .. @azz_nx1_o2x4 n=1 +UMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=2 +UMLALL_n1_d 11000001 011 0 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=2 +UMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=4 +UMLALL_n1_d 11000001 011 1 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=4 + +UMLSLL_n1_s 11000001 001 0 .... 0 .. 001 ..... 110 .. @azz_nx1_o2x4 n=1 +UMLSLL_n1_d 11000001 011 0 .... 0 .. 001 ..... 110 .. @azz_nx1_o2x4 n=1 +UMLSLL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=2 +UMLSLL_n1_d 11000001 011 0 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=2 +UMLSLL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=4 +UMLSLL_n1_d 11000001 011 1 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=4 + +USMLALL_n1_s 11000001 001 0 .... 0 .. 001 ..... 001 .. @azz_nx1_o2x4 n=1 +USMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0010 . @azz_nx1_o1x4 n=2 +USMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0010 . @azz_nx1_o1x4 n=4 + +SUMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=2 +SUMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=4 + +BFMLA_n1 11000001 011 0 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=2 +FMLA_n1_h 11000001 001 0 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=2 +FMLA_n1_s 11000001 001 0 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=2 +FMLA_n1_d 11000001 011 0 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=2 + +BFMLA_n1 11000001 011 1 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=4 +FMLA_n1_h 11000001 001 1 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=4 +FMLA_n1_s 11000001 001 1 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=4 +FMLA_n1_d 11000001 011 1 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=4 + +BFMLS_n1 11000001 011 0 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=2 +FMLS_n1_h 11000001 001 0 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=2 +FMLS_n1_s 11000001 001 0 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=2 +FMLS_n1_d 11000001 011 0 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=2 + +BFMLS_n1 11000001 011 1 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=4 +FMLS_n1_h 11000001 001 1 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=4 +FMLS_n1_s 11000001 001 1 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=4 +FMLS_n1_d 11000001 011 1 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=4 + +### SME2 Multi-vector Multiple Array Vectors + +%zn_ax2 6:4 !function=times_2 +%zn_ax4 7:3 !function=times_4 + +@azz_2x2_o3 ........ ... ..... . .. ... ..... .. off:3 \ + &azz_n n=2 rv=%mova_rv zn=%zn_ax2 zm=%zm_ax2 +@azz_4x4_o3 ........ ... ..... . .. ... ..... .. off:3 \ + &azz_n n=4 rv=%mova_rv zn=%zn_ax4 zm=%zm_ax4 + +ADD_azz_nn_s 11000001 101 ....0 0 .. 110 ....0 10 ... @azz_2x2_o3 +ADD_azz_nn_s 11000001 101 ...01 0 .. 110 ...00 10 ... @azz_4x4_o3 +ADD_azz_nn_d 11000001 111 ....0 0 .. 110 ....0 10 ... @azz_2x2_o3 +ADD_azz_nn_d 11000001 111 ...01 0 .. 110 ...00 10 ... @azz_4x4_o3 + +SUB_azz_nn_s 11000001 101 ....0 0 .. 110 ....0 11 ... @azz_2x2_o3 +SUB_azz_nn_s 11000001 101 ...01 0 .. 110 ...00 11 ... @azz_4x4_o3 +SUB_azz_nn_d 11000001 111 ....0 0 .. 110 ....0 11 ... @azz_2x2_o3 +SUB_azz_nn_d 11000001 111 ...01 0 .. 110 ...00 11 ... @azz_4x4_o3 + +@azz_2x2_o2x2 ........ ... ..... . .. ... ..... ... .. \ + &azz_n n=2 rv=%mova_rv zn=%zn_ax2 zm=%zm_ax2 off=%off2_x2 +@azz_4x4_o2x2 ........ ... ..... . .. ... ..... ... .. \ + &azz_n n=4 rv=%mova_rv zn=%zn_ax4 zm=%zm_ax4 off=%off2_x2 + +FMLAL_nn 11000001 101 ....0 0 .. 010 ....0 000 .. @azz_2x2_o2x2 +FMLAL_nn 11000001 101 ...01 0 .. 010 ...00 000 .. @azz_4x4_o2x2 + +FMLSL_nn 11000001 101 ....0 0 .. 010 ....0 010 .. @azz_2x2_o2x2 +FMLSL_nn 11000001 101 ...01 0 .. 010 ...00 010 .. @azz_4x4_o2x2 + +BFMLAL_nn 11000001 101 ....0 0 .. 010 ....0 100 .. @azz_2x2_o2x2 +BFMLAL_nn 11000001 101 ...01 0 .. 010 ...00 100 .. @azz_4x4_o2x2 + +BFMLSL_nn 11000001 101 ....0 0 .. 010 ....0 110 .. @azz_2x2_o2x2 +BFMLSL_nn 11000001 101 ...01 0 .. 010 ...00 110 .. @azz_4x4_o2x2 + +FDOT_nn 11000001 101 ....0 0 .. 100 ....0 00 ... @azz_2x2_o3 +FDOT_nn 11000001 101 ...01 0 .. 100 ...00 00 ... @azz_4x4_o3 + +BFDOT_nn 11000001 101 ....0 0 .. 100 ....0 10 ... @azz_2x2_o3 +BFDOT_nn 11000001 101 ...01 0 .. 100 ...00 10 ... @azz_4x4_o3 + +USDOT_nn 11000001 101 ....0 0 .. 101 ....0 01 ... @azz_2x2_o3 +USDOT_nn 11000001 101 ...01 0 .. 101 ...00 01 ... @azz_4x4_o3 + +SDOT_nn_4b 11000001 101 ....0 0 .. 101 ....0 00 ... @azz_2x2_o3 +SDOT_nn_4b 11000001 101 ...01 0 .. 101 ...00 00 ... @azz_4x4_o3 +SDOT_nn_4h 11000001 111 ....0 0 .. 101 ....0 00 ... @azz_2x2_o3 +SDOT_nn_4h 11000001 111 ...01 0 .. 101 ...00 00 ... @azz_4x4_o3 +SDOT_nn_2h 11000001 111 ....0 0 .. 101 ....0 01 ... @azz_2x2_o3 +SDOT_nn_2h 11000001 111 ...01 0 .. 101 ...00 01 ... @azz_4x4_o3 + +UDOT_nn_4b 11000001 101 ....0 0 .. 101 ....0 10 ... @azz_2x2_o3 +UDOT_nn_4b 11000001 101 ...01 0 .. 101 ...00 10 ... @azz_4x4_o3 +UDOT_nn_4h 11000001 111 ....0 0 .. 101 ....0 10 ... @azz_2x2_o3 +UDOT_nn_4h 11000001 111 ...01 0 .. 101 ...00 10 ... @azz_4x4_o3 +UDOT_nn_2h 11000001 111 ....0 0 .. 101 ....0 11 ... @azz_2x2_o3 +UDOT_nn_2h 11000001 111 ...01 0 .. 101 ...00 11 ... @azz_4x4_o3 + +SMLAL_nn 11000001 111 ....0 0 .. 010 ....0 000 .. @azz_2x2_o2x2 +SMLAL_nn 11000001 111 ...01 0 .. 010 ...00 000 .. @azz_4x4_o2x2 + +SMLSL_nn 11000001 111 ....0 0 .. 010 ....0 010 .. @azz_2x2_o2x2 +SMLSL_nn 11000001 111 ...01 0 .. 010 ...00 010 .. @azz_4x4_o2x2 + +UMLAL_nn 11000001 111 ....0 0 .. 010 ....0 100 .. @azz_2x2_o2x2 +UMLAL_nn 11000001 111 ...01 0 .. 010 ...00 100 .. @azz_4x4_o2x2 + +UMLSL_nn 11000001 111 ....0 0 .. 010 ....0 110 .. @azz_2x2_o2x2 +UMLSL_nn 11000001 111 ...01 0 .. 010 ...00 110 .. @azz_4x4_o2x2 + +@azz_2x2_o1x4 ........ ... ..... . .. ... ..... ... .. \ + &azz_n n=2 rv=%mova_rv zn=%zn_ax2 zm=%zm_ax2 off=%off1_x4 +@azz_4x4_o1x4 ........ ... ..... . .. ... ..... ... .. \ + &azz_n n=4 rv=%mova_rv zn=%zn_ax4 zm=%zm_ax4 off=%off1_x4 + +SMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 0000 . @azz_2x2_o1x4 +SMLALL_nn_d 11000001 111 ....0 0 .. 000 ....0 0000 . @azz_2x2_o1x4 +SMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 0000 . @azz_4x4_o1x4 +SMLALL_nn_d 11000001 111 ...01 0 .. 000 ...00 0000 . @azz_4x4_o1x4 + +SMLSLL_nn_s 11000001 101 ....0 0 .. 000 ....0 0100 . @azz_2x2_o1x4 +SMLSLL_nn_d 11000001 111 ....0 0 .. 000 ....0 0100 . @azz_2x2_o1x4 +SMLSLL_nn_s 11000001 101 ...01 0 .. 000 ...00 0100 . @azz_4x4_o1x4 +SMLSLL_nn_d 11000001 111 ...01 0 .. 000 ...00 0100 . @azz_4x4_o1x4 + +UMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 1000 . @azz_2x2_o1x4 +UMLALL_nn_d 11000001 111 ....0 0 .. 000 ....0 1000 . @azz_2x2_o1x4 +UMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 1000 . @azz_4x4_o1x4 +UMLALL_nn_d 11000001 111 ...01 0 .. 000 ...00 1000 . @azz_4x4_o1x4 + +UMLSLL_nn_s 11000001 101 ....0 0 .. 000 ....0 1100 . @azz_2x2_o1x4 +UMLSLL_nn_d 11000001 111 ....0 0 .. 000 ....0 1100 . @azz_2x2_o1x4 +UMLSLL_nn_s 11000001 101 ...01 0 .. 000 ...00 1100 . @azz_4x4_o1x4 +UMLSLL_nn_d 11000001 111 ...01 0 .. 000 ...00 1100 . @azz_4x4_o1x4 + +USMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 0010 . @azz_2x2_o1x4 +USMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 0010 . @azz_4x4_o1x4 + +BFMLA_nn 11000001 111 ....0 0 .. 100 ....0 01 ... @azz_2x2_o3 +FMLA_nn_h 11000001 101 ....0 0 .. 100 ....0 01 ... @azz_2x2_o3 +FMLA_nn_s 11000001 101 ....0 0 .. 110 ....0 00 ... @azz_2x2_o3 +FMLA_nn_d 11000001 111 ....0 0 .. 110 ....0 00 ... @azz_2x2_o3 + +BFMLA_nn 11000001 111 ...01 0 .. 100 ...00 01 ... @azz_4x4_o3 +FMLA_nn_h 11000001 101 ...01 0 .. 100 ...00 01 ... @azz_4x4_o3 +FMLA_nn_s 11000001 101 ...01 0 .. 110 ...00 00 ... @azz_4x4_o3 +FMLA_nn_d 11000001 111 ...01 0 .. 110 ...00 00 ... @azz_4x4_o3 + +BFMLS_nn 11000001 111 ....0 0 .. 100 ....0 11 ... @azz_2x2_o3 +FMLS_nn_h 11000001 101 ....0 0 .. 100 ....0 11 ... @azz_2x2_o3 +FMLS_nn_s 11000001 101 ....0 0 .. 110 ....0 01 ... @azz_2x2_o3 +FMLS_nn_d 11000001 111 ....0 0 .. 110 ....0 01 ... @azz_2x2_o3 + +BFMLS_nn 11000001 111 ...01 0 .. 100 ...00 11 ... @azz_4x4_o3 +FMLS_nn_h 11000001 101 ...01 0 .. 100 ...00 11 ... @azz_4x4_o3 +FMLS_nn_s 11000001 101 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3 +FMLS_nn_d 11000001 111 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3 + +&az_n n off rv zm +@az_2x2_o3 ........ ... ..... . .. ... ..... .. off:3 \ + &az_n n=2 rv=%mova_rv zm=%zn_ax2 +@az_4x4_o3 ........ ... ..... . .. ... ..... .. off:3 \ + &az_n n=4 rv=%mova_rv zm=%zn_ax4 + +FADD_nn_h 11000001 101 00100 0 .. 111 ....0 00 ... @az_2x2_o3 +FADD_nn_s 11000001 101 00000 0 .. 111 ....0 00 ... @az_2x2_o3 +FADD_nn_d 11000001 111 00000 0 .. 111 ....0 00 ... @az_2x2_o3 +FADD_nn_h 11000001 101 00101 0 .. 111 ...00 00 ... @az_4x4_o3 +FADD_nn_s 11000001 101 00001 0 .. 111 ...00 00 ... @az_4x4_o3 +FADD_nn_d 11000001 111 00001 0 .. 111 ...00 00 ... @az_4x4_o3 + +FSUB_nn_h 11000001 101 00100 0 .. 111 ....0 01 ... @az_2x2_o3 +FSUB_nn_s 11000001 101 00000 0 .. 111 ....0 01 ... @az_2x2_o3 +FSUB_nn_d 11000001 111 00000 0 .. 111 ....0 01 ... @az_2x2_o3 +FSUB_nn_h 11000001 101 00101 0 .. 111 ...00 01 ... @az_4x4_o3 +FSUB_nn_s 11000001 101 00001 0 .. 111 ...00 01 ... @az_4x4_o3 +FSUB_nn_d 11000001 111 00001 0 .. 111 ...00 01 ... @az_4x4_o3 + +BFADD_nn 11000001 111 00100 0 .. 111 ....0 00 ... @az_2x2_o3 +BFADD_nn 11000001 111 00101 0 .. 111 ...00 00 ... @az_4x4_o3 +BFSUB_nn 11000001 111 00100 0 .. 111 ....0 01 ... @az_2x2_o3 +BFSUB_nn 11000001 111 00101 0 .. 111 ...00 01 ... @az_4x4_o3 + +### SME2 Multi-vector Indexed + +&azx_n n off rv zn zm idx + +%idx3_15_10 15:1 10:2 +%idx2_10_2 10:2 2:1 + +@azx_1x1_o3x2 ........ .... zm:4 . .. . .. zn:5 .. ... \ + &azx_n n=1 rv=%mova_rv off=%off3_x2 idx=%idx3_15_10 +@azx_2x1_o2x2 ........ .... zm:4 . .. . .. ..... .. ... \ + &azx_n n=2 rv=%mova_rv off=%off2_x2 zn=%zn_ax2 idx=%idx2_10_2 +@azx_4x1_o2x2 ........ .... zm:4 . .. . .. ..... .. ... \ + &azx_n n=4 rv=%mova_rv off=%off2_x2 zn=%zn_ax4 idx=%idx2_10_2 + +FMLAL_nx 11000001 1000 .... . .. 1 .. ..... 00 ... @azx_1x1_o3x2 +FMLAL_nx 11000001 1001 .... 0 .. 1 .. ....0 00 ... @azx_2x1_o2x2 +FMLAL_nx 11000001 1001 .... 1 .. 1 .. ...00 00 ... @azx_4x1_o2x2 + +FMLSL_nx 11000001 1000 .... . .. 1 .. ..... 01 ... @azx_1x1_o3x2 +FMLSL_nx 11000001 1001 .... 0 .. 1 .. ....0 01 ... @azx_2x1_o2x2 +FMLSL_nx 11000001 1001 .... 1 .. 1 .. ...00 01 ... @azx_4x1_o2x2 + +BFMLAL_nx 11000001 1000 .... . .. 1 .. ..... 10 ... @azx_1x1_o3x2 +BFMLAL_nx 11000001 1001 .... 0 .. 1 .. ....0 10 ... @azx_2x1_o2x2 +BFMLAL_nx 11000001 1001 .... 1 .. 1 .. ...00 10 ... @azx_4x1_o2x2 + +BFMLSL_nx 11000001 1000 .... . .. 1 .. ..... 11 ... @azx_1x1_o3x2 +BFMLSL_nx 11000001 1001 .... 0 .. 1 .. ....0 11 ... @azx_2x1_o2x2 +BFMLSL_nx 11000001 1001 .... 1 .. 1 .. ...00 11 ... @azx_4x1_o2x2 + +@azx_2x1_i2_o3 ........ .... zm:4 . .. . idx:2 .... ... off:3 \ + &azx_n n=2 rv=%mova_rv zn=%zn_ax2 +@azx_4x1_i2_o3 ........ .... zm:4 . .. . idx:2 .... ... off:3 \ + &azx_n n=4 rv=%mova_rv zn=%zn_ax4 +@azx_2x1_i1_o3 ........ .... zm:4 . .. .. idx:1 .... ... off:3 \ + &azx_n n=2 rv=%mova_rv zn=%zn_ax2 +@azx_4x1_i1_o3 ........ .... zm:4 . .. .. idx:1 .... ... off:3 \ + &azx_n n=4 rv=%mova_rv zn=%zn_ax4 + +FDOT_nx 11000001 0101 .... 0 .. 1 .. ....0 01 ... @azx_2x1_i2_o3 +FDOT_nx 11000001 0101 .... 1 .. 1 .. ...00 01 ... @azx_4x1_i2_o3 + +BFDOT_nx 11000001 0101 .... 0 .. 1 .. ....0 11 ... @azx_2x1_i2_o3 +BFDOT_nx 11000001 0101 .... 1 .. 1 .. ...00 11 ... @azx_4x1_i2_o3 + +FVDOT 11000001 0101 .... 0 .. 0 .. ....0 01 ... @azx_2x1_i2_o3 +BFVDOT 11000001 0101 .... 0 .. 0 .. ....0 11 ... @azx_2x1_i2_o3 + +SDOT_nx_2h 11000001 0101 .... 0 .. 1 .. ....0 00 ... @azx_2x1_i2_o3 +SDOT_nx_2h 11000001 0101 .... 1 .. 1 .. ...00 00 ... @azx_4x1_i2_o3 +SDOT_nx_4b 11000001 0101 .... 0 .. 1 .. ....1 00 ... @azx_2x1_i2_o3 +SDOT_nx_4b 11000001 0101 .... 1 .. 1 .. ...01 00 ... @azx_4x1_i2_o3 +SDOT_nx_4h 11000001 1101 .... 0 .. 00 . ....0 01 ... @azx_2x1_i1_o3 +SDOT_nx_4h 11000001 1101 .... 1 .. 00 . ...00 01 ... @azx_4x1_i1_o3 + +UDOT_nx_2h 11000001 0101 .... 0 .. 1 .. ....0 10 ... @azx_2x1_i2_o3 +UDOT_nx_2h 11000001 0101 .... 1 .. 1 .. ...00 10 ... @azx_4x1_i2_o3 +UDOT_nx_4b 11000001 0101 .... 0 .. 1 .. ....1 10 ... @azx_2x1_i2_o3 +UDOT_nx_4b 11000001 0101 .... 1 .. 1 .. ...01 10 ... @azx_4x1_i2_o3 +UDOT_nx_4h 11000001 1101 .... 0 .. 00 . ....0 11 ... @azx_2x1_i1_o3 +UDOT_nx_4h 11000001 1101 .... 1 .. 00 . ...00 11 ... @azx_4x1_i1_o3 + +USDOT_nx 11000001 0101 .... 0 .. 1 .. ....1 01 ... @azx_2x1_i2_o3 +USDOT_nx 11000001 0101 .... 1 .. 1 .. ...01 01 ... @azx_4x1_i2_o3 + +SUDOT_nx 11000001 0101 .... 0 .. 1 .. ....1 11 ... @azx_2x1_i2_o3 +SUDOT_nx 11000001 0101 .... 1 .. 1 .. ...01 11 ... @azx_4x1_i2_o3 + +SVDOT_nx_2h 11000001 0101 .... 0 .. 0 .. ....1 00 ... @azx_2x1_i2_o3 +SVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 00 ... @azx_4x1_i2_o3 +SVDOT_nx_4h 11000001 1101 .... 1 .. 01 . ...00 01 ... @azx_4x1_i1_o3 + +UVDOT_nx_2h 11000001 0101 .... 0 .. 0 .. ....1 10 ... @azx_2x1_i2_o3 +UVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 10 ... @azx_4x1_i2_o3 +UVDOT_nx_4h 11000001 1101 .... 1 .. 01 . ...00 11 ... @azx_4x1_i1_o3 + +SUVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 11 ... @azx_4x1_i2_o3 +USVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 01 ... @azx_4x1_i2_o3 + +SMLAL_nx 11000001 1100 .... . .. 1 .. ..... 00 ... @azx_1x1_o3x2 +SMLAL_nx 11000001 1101 .... 0 .. 1 .. ....0 00 ... @azx_2x1_o2x2 +SMLAL_nx 11000001 1101 .... 1 .. 1 .. ...00 00 ... @azx_4x1_o2x2 + +SMLSL_nx 11000001 1100 .... . .. 1 .. ..... 01 ... @azx_1x1_o3x2 +SMLSL_nx 11000001 1101 .... 0 .. 1 .. ....0 01 ... @azx_2x1_o2x2 +SMLSL_nx 11000001 1101 .... 1 .. 1 .. ...00 01 ... @azx_4x1_o2x2 + +UMLAL_nx 11000001 1100 .... . .. 1 .. ..... 10 ... @azx_1x1_o3x2 +UMLAL_nx 11000001 1101 .... 0 .. 1 .. ....0 10 ... @azx_2x1_o2x2 +UMLAL_nx 11000001 1101 .... 1 .. 1 .. ...00 10 ... @azx_4x1_o2x2 + +UMLSL_nx 11000001 1100 .... . .. 1 .. ..... 11 ... @azx_1x1_o3x2 +UMLSL_nx 11000001 1101 .... 0 .. 1 .. ....0 11 ... @azx_2x1_o2x2 +UMLSL_nx 11000001 1101 .... 1 .. 1 .. ...00 11 ... @azx_4x1_o2x2 + +%idx4_15_10 15:1 10:3 +%idx4_10_1 10:2 1:2 +%idx3_10_1 10:1 1:2 + +@azx_1x1_i4_o2 ........ .... zm:4 . .. ... zn:5 ... .. \ + &azx_n n=1 rv=%mova_rv off=%off2_x4 idx=%idx4_15_10 +@azx_1x1_i3_o2 ........ .... zm:4 . .. ... zn:5 ... .. \ + &azx_n n=1 rv=%mova_rv off=%off2_x4 idx=%idx3_15_10 +@azx_2x1_i4_o1 ........ .... zm:4 . .. ... ..... ... .. \ + &azx_n n=2 rv=%mova_rv off=%off1_x4 zn=%zn_ax2 idx=%idx4_10_1 +@azx_2x1_i3_o1 ........ .... zm:4 . .. ... ..... ... .. \ + &azx_n n=2 rv=%mova_rv off=%off1_x4 zn=%zn_ax2 idx=%idx3_10_1 +@azx_4x1_i4_o1 ........ .... zm:4 . .. ... ..... ... .. \ + &azx_n n=4 rv=%mova_rv off=%off1_x4 zn=%zn_ax4 idx=%idx4_10_1 +@azx_4x1_i3_o1 ........ .... zm:4 . .. ... ..... ... .. \ + &azx_n n=4 rv=%mova_rv off=%off1_x4 zn=%zn_ax4 idx=%idx3_10_1 + +SMLALL_nx_s 11000001 0000 .... . .. ... ..... 000 .. @azx_1x1_i4_o2 +SMLALL_nx_d 11000001 1000 .... . .. 0.. ..... 000 .. @azx_1x1_i3_o2 +SMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....0 00 ... @azx_2x1_i4_o1 +SMLALL_nx_d 11000001 1001 .... 0 .. 00. ....0 00 ... @azx_2x1_i3_o1 +SMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...00 00 ... @azx_4x1_i4_o1 +SMLALL_nx_d 11000001 1001 .... 1 .. 00. ...00 00 ... @azx_4x1_i3_o1 + +SMLSLL_nx_s 11000001 0000 .... . .. ... ..... 010 .. @azx_1x1_i4_o2 +SMLSLL_nx_d 11000001 1000 .... . .. 0.. ..... 010 .. @azx_1x1_i3_o2 +SMLSLL_nx_s 11000001 0001 .... 0 .. 0.. ....0 01 ... @azx_2x1_i4_o1 +SMLSLL_nx_d 11000001 1001 .... 0 .. 00. ....0 01 ... @azx_2x1_i3_o1 +SMLSLL_nx_s 11000001 0001 .... 1 .. 0.. ...00 01 ... @azx_4x1_i4_o1 +SMLSLL_nx_d 11000001 1001 .... 1 .. 00. ...00 01 ... @azx_4x1_i3_o1 + +UMLALL_nx_s 11000001 0000 .... . .. ... ..... 100 .. @azx_1x1_i4_o2 +UMLALL_nx_d 11000001 1000 .... . .. 0.. ..... 100 .. @azx_1x1_i3_o2 +UMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....0 10 ... @azx_2x1_i4_o1 +UMLALL_nx_d 11000001 1001 .... 0 .. 00. ....0 10 ... @azx_2x1_i3_o1 +UMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...00 10 ... @azx_4x1_i4_o1 +UMLALL_nx_d 11000001 1001 .... 1 .. 00. ...00 10 ... @azx_4x1_i3_o1 + +UMLSLL_nx_s 11000001 0000 .... . .. ... ..... 110 .. @azx_1x1_i4_o2 +UMLSLL_nx_d 11000001 1000 .... . .. 0.. ..... 110 .. @azx_1x1_i3_o2 +UMLSLL_nx_s 11000001 0001 .... 0 .. 0.. ....0 11 ... @azx_2x1_i4_o1 +UMLSLL_nx_d 11000001 1001 .... 0 .. 00. ....0 11 ... @azx_2x1_i3_o1 +UMLSLL_nx_s 11000001 0001 .... 1 .. 0.. ...00 11 ... @azx_4x1_i4_o1 +UMLSLL_nx_d 11000001 1001 .... 1 .. 00. ...00 11 ... @azx_4x1_i3_o1 + +USMLALL_nx_s 11000001 0000 .... . .. ... ..... 001 .. @azx_1x1_i4_o2 +USMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....1 00 ... @azx_2x1_i4_o1 +USMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...01 00 ... @azx_4x1_i4_o1 + +SUMLALL_nx_s 11000001 0000 .... . .. ... ..... 101 .. @azx_1x1_i4_o2 +SUMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....1 10 ... @azx_2x1_i4_o1 +SUMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...01 10 ... @azx_4x1_i4_o1 + +%idx3_10_3 10:2 3:1 +@azx_2x1_i3_o3 ........ .... zm:4 . .. ... ..... .. off:3 \ + &azx_n n=2 rv=%mova_rv zn=%zn_ax2 idx=%idx3_10_3 +@azx_4x1_i3_o3 ........ .... zm:4 . .. ... ..... .. off:3 \ + &azx_n n=4 rv=%mova_rv zn=%zn_ax4 idx=%idx3_10_3 + +BFMLA_nx 11000001 0001 .... 0 .. 1.. ....1 0 .... @azx_2x1_i3_o3 +FMLA_nx_h 11000001 0001 .... 0 .. 1.. ....0 0 .... @azx_2x1_i3_o3 +FMLA_nx_s 11000001 0101 .... 0 .. 0.. ....0 00 ... @azx_2x1_i2_o3 +FMLA_nx_d 11000001 1101 .... 0 .. 00. ....0 00 ... @azx_2x1_i1_o3 + +BFMLA_nx 11000001 0001 .... 1 .. 1.. ...01 0 .... @azx_4x1_i3_o3 +FMLA_nx_h 11000001 0001 .... 1 .. 1.. ...00 0 .... @azx_4x1_i3_o3 +FMLA_nx_s 11000001 0101 .... 1 .. 0.. ...00 00 ... @azx_4x1_i2_o3 +FMLA_nx_d 11000001 1101 .... 1 .. 00. ...00 00 ... @azx_4x1_i1_o3 + +BFMLS_nx 11000001 0001 .... 0 .. 1.. ....1 1 .... @azx_2x1_i3_o3 +FMLS_nx_h 11000001 0001 .... 0 .. 1.. ....0 1 .... @azx_2x1_i3_o3 +FMLS_nx_s 11000001 0101 .... 0 .. 0.. ....0 10 ... @azx_2x1_i2_o3 +FMLS_nx_d 11000001 1101 .... 0 .. 00. ....0 10 ... @azx_2x1_i1_o3 + +BFMLS_nx 11000001 0001 .... 1 .. 1.. ...01 1 .... @azx_4x1_i3_o3 +FMLS_nx_h 11000001 0001 .... 1 .. 1.. ...00 1 .... @azx_4x1_i3_o3 +FMLS_nx_s 11000001 0101 .... 1 .. 0.. ...00 10 ... @azx_4x1_i2_o3 +FMLS_nx_d 11000001 1101 .... 1 .. 00. ...00 10 ... @azx_4x1_i1_o3 + +### SME2 Add / Sub array accumulators + +ADD_aaz_s 11000001 101 000000 .. 111 ....0 10 ... @az_2x2_o3 +ADD_aaz_s 11000001 101 000010 .. 111 ...00 10 ... @az_4x4_o3 +ADD_aaz_d 11000001 111 000000 .. 111 ....0 10 ... @az_2x2_o3 +ADD_aaz_d 11000001 111 000010 .. 111 ...00 10 ... @az_4x4_o3 + +SUB_aaz_s 11000001 101 000000 .. 111 ....0 11 ... @az_2x2_o3 +SUB_aaz_s 11000001 101 000010 .. 111 ...00 11 ... @az_4x4_o3 +SUB_aaz_d 11000001 111 000000 .. 111 ....0 11 ... @az_2x2_o3 +SUB_aaz_d 11000001 111 000010 .. 111 ...00 11 ... @az_4x4_o3 + +### SME2 Multi-vector SVE Constructive Unary + +&zz_e zd zn esz +&zz_n zd zn n +@zz_1x2 ........ ... ..... ...... ..... zd:5 \ + &zz_n n=1 zn=%zn_ax2 +@zz_1x4 ........ ... ..... ...... ..... zd:5 \ + &zz_n n=1 zn=%zn_ax4 +@zz_2x1 ........ ... ..... ...... zn:5 ..... \ + &zz_n n=1 zd=%zd_ax2 +@zz_2x2 ........ ... ..... ...... .... . ..... \ + &zz_n n=2 zd=%zd_ax2 zn=%zn_ax2 +@zz_4x4 ........ ... ..... ...... .... . ..... \ + &zz_n n=4 zd=%zd_ax4 zn=%zn_ax4 +@zz_4x2_n1 ........ ... ..... ...... .... . ..... \ + &zz_n n=1 zd=%zd_ax4 zn=%zn_ax2 + +BFCVT 11000001 011 00000 111000 ....0 ..... @zz_1x2 +BFCVTN 11000001 011 00000 111000 ....1 ..... @zz_1x2 + +FCVT_n 11000001 001 00000 111000 ....0 ..... @zz_1x2 +FCVTN 11000001 001 00000 111000 ....1 ..... @zz_1x2 + +FCVT_w 11000001 101 00000 111000 ..... ....0 @zz_2x1 +FCVTL 11000001 101 00000 111000 ..... ....1 @zz_2x1 + +FCVTZS 11000001 001 00001 111000 ....0 ....0 @zz_2x2 +FCVTZS 11000001 001 10001 111000 ...00 ...00 @zz_4x4 +FCVTZU 11000001 001 00001 111000 ....1 ....0 @zz_2x2 +FCVTZU 11000001 001 10001 111000 ...01 ...00 @zz_4x4 + +SCVTF 11000001 001 00010 111000 ....0 ....0 @zz_2x2 +SCVTF 11000001 001 10010 111000 ...00 ...00 @zz_4x4 +UCVTF 11000001 001 00010 111000 ....1 ....0 @zz_2x2 +UCVTF 11000001 001 10010 111000 ...01 ...00 @zz_4x4 + +FRINTN 11000001 101 01000 111000 ....0 ....0 @zz_2x2 +FRINTN 11000001 101 11000 111000 ...00 ...00 @zz_4x4 +FRINTP 11000001 101 01001 111000 ....0 ....0 @zz_2x2 +FRINTP 11000001 101 11001 111000 ...00 ...00 @zz_4x4 +FRINTM 11000001 101 01010 111000 ....0 ....0 @zz_2x2 +FRINTM 11000001 101 11010 111000 ...00 ...00 @zz_4x4 +FRINTA 11000001 101 01100 111000 ....0 ....0 @zz_2x2 +FRINTA 11000001 101 11100 111000 ...00 ...00 @zz_4x4 + +SQCVT_sh 11000001 001 00011 111000 ....0 ..... @zz_1x2 +UQCVT_sh 11000001 001 00011 111000 ....1 ..... @zz_1x2 +SQCVTU_sh 11000001 011 00011 111000 ....0 ..... @zz_1x2 + +SQCVT_sb 11000001 001 10011 111000 ...00 ..... @zz_1x4 +UQCVT_sb 11000001 001 10011 111000 ...01 ..... @zz_1x4 +SQCVTU_sb 11000001 011 10011 111000 ...00 ..... @zz_1x4 + +SQCVT_dh 11000001 101 10011 111000 ...00 ..... @zz_1x4 +UQCVT_dh 11000001 101 10011 111000 ...01 ..... @zz_1x4 +SQCVTU_dh 11000001 111 10011 111000 ...00 ..... @zz_1x4 + +SQCVTN_sb 11000001 001 10011 111000 ...10 ..... @zz_1x4 +UQCVTN_sb 11000001 001 10011 111000 ...11 ..... @zz_1x4 +SQCVTUN_sb 11000001 011 10011 111000 ...10 ..... @zz_1x4 + +SQCVTN_dh 11000001 101 10011 111000 ...10 ..... @zz_1x4 +UQCVTN_dh 11000001 101 10011 111000 ...11 ..... @zz_1x4 +SQCVTUN_dh 11000001 111 10011 111000 ...10 ..... @zz_1x4 + +SUNPK_2bh 11000001 011 00101 111000 ..... ....0 @zz_2x1 +SUNPK_2hs 11000001 101 00101 111000 ..... ....0 @zz_2x1 +SUNPK_2sd 11000001 111 00101 111000 ..... ....0 @zz_2x1 + +UUNPK_2bh 11000001 011 00101 111000 ..... ....1 @zz_2x1 +UUNPK_2hs 11000001 101 00101 111000 ..... ....1 @zz_2x1 +UUNPK_2sd 11000001 111 00101 111000 ..... ....1 @zz_2x1 + +SUNPK_4bh 11000001 011 10101 111000 ....0 ...00 @zz_4x2_n1 +SUNPK_4hs 11000001 101 10101 111000 ....0 ...00 @zz_4x2_n1 +SUNPK_4sd 11000001 111 10101 111000 ....0 ...00 @zz_4x2_n1 + +UUNPK_4bh 11000001 011 10101 111000 ....0 ...01 @zz_4x2_n1 +UUNPK_4hs 11000001 101 10101 111000 ....0 ...01 @zz_4x2_n1 +UUNPK_4sd 11000001 111 10101 111000 ....0 ...01 @zz_4x2_n1 + +ZIP_4 11000001 esz:2 1 10110 111000 ...00 ... 00 \ + &zz_e zd=%zd_ax4 zn=%zn_ax4 +ZIP_4 11000001 001 10111 111000 ...00 ... 00 \ + &zz_e esz=4 zd=%zd_ax4 zn=%zn_ax4 + +UZP_4 11000001 esz:2 1 10110 111000 ...00 ... 10 \ + &zz_e zd=%zd_ax4 zn=%zn_ax4 +UZP_4 11000001 001 10111 111000 ...00 ... 10 \ + &zz_e esz=4 zd=%zd_ax4 zn=%zn_ax4 + +### SME2 Multi-vector SVE Constructive Binary + +&rshr zd zn shift + +%rshr_sh_shift 16:4 !function=rsub_16 +%rshr_sb_shift 16:5 !function=rsub_32 +%rshr_dh_shift 22:1 16:5 !function=rsub_64 + +@rshr_sh ........ .... .... ...... ..... zd:5 \ + &rshr zn=%zn_ax2 shift=%rshr_sh_shift +@rshr_sb ........ ... ..... ...... ..... zd:5 \ + &rshr zn=%zn_ax4 shift=%rshr_sb_shift +@rshr_dh ........ ... ..... ...... ..... zd:5 \ + &rshr zn=%zn_ax4 shift=%rshr_dh_shift + +SQRSHR_sh 11000001 1110 .... 110101 ....0 ..... @rshr_sh +UQRSHR_sh 11000001 1110 .... 110101 ....1 ..... @rshr_sh +SQRSHRU_sh 11000001 1111 .... 110101 ....0 ..... @rshr_sh + +SQRSHR_sb 11000001 011 ..... 110110 ...00 ..... @rshr_sb +SQRSHR_dh 11000001 1.1 ..... 110110 ...00 ..... @rshr_dh +UQRSHR_sb 11000001 011 ..... 110110 ...01 ..... @rshr_sb +UQRSHR_dh 11000001 1.1 ..... 110110 ...01 ..... @rshr_dh +SQRSHRU_sb 11000001 011 ..... 110110 ...10 ..... @rshr_sb +SQRSHRU_dh 11000001 1.1 ..... 110110 ...10 ..... @rshr_dh + +SQRSHRN_sh 01000101 1011 .... 001010 ....0 ..... @rshr_sh +UQRSHRN_sh 01000101 1011 .... 001110 ....0 ..... @rshr_sh +SQRSHRUN_sh 01000101 1011 .... 000010 ....0 ..... @rshr_sh + +SQRSHRN_sb 11000001 011 ..... 110111 ...00 ..... @rshr_sb +SQRSHRN_dh 11000001 1.1 ..... 110111 ...00 ..... @rshr_dh +UQRSHRN_sb 11000001 011 ..... 110111 ...01 ..... @rshr_sb +UQRSHRN_dh 11000001 1.1 ..... 110111 ...01 ..... @rshr_dh +SQRSHRUN_sb 11000001 011 ..... 110111 ...10 ..... @rshr_sb +SQRSHRUN_dh 11000001 1.1 ..... 110111 ...10 ..... @rshr_dh + +&zzz_e zd zn zm esz + +ZIP_2 11000001 esz:2 1 zm:5 110100 zn:5 .... 0 \ + &zzz_e zd=%zd_ax2 +ZIP_2 11000001 00 1 zm:5 110101 zn:5 .... 0 \ + &zzz_e zd=%zd_ax2 esz=4 + +UZP_2 11000001 esz:2 1 zm:5 110100 zn:5 .... 1 \ + &zzz_e zd=%zd_ax2 +UZP_2 11000001 00 1 zm:5 110101 zn:5 .... 1 \ + &zzz_e zd=%zd_ax2 esz=4 + +&zzz_en zd zn zm esz n + +FCLAMP 11000001 esz:2 1 zm:5 110000 zn:5 .... 0 \ + &zzz_en zd=%zd_ax2 n=2 +FCLAMP 11000001 esz:2 1 zm:5 110010 zn:5 ...0 0 \ + &zzz_en zd=%zd_ax4 n=4 + +SCLAMP 11000001 esz:2 1 zm:5 110001 zn:5 .... 0 \ + &zzz_en zd=%zd_ax2 n=2 +SCLAMP 11000001 esz:2 1 zm:5 110011 zn:5 ...0 0 \ + &zzz_en zd=%zd_ax4 n=4 + +UCLAMP 11000001 esz:2 1 zm:5 110001 zn:5 .... 1 \ + &zzz_en zd=%zd_ax2 n=2 +UCLAMP 11000001 esz:2 1 zm:5 110011 zn:5 ...0 1 \ + &zzz_en zd=%zd_ax4 n=4 + +### SME2 Multi-vector SVE Select + +%sel_pg 10:3 !function=plus_8 + +SEL 11000001 esz:2 1 ....0 100 ... ....0 ....0 \ + n=2 zd=%zd_ax2 zn=%zn_ax2 zm=%zm_ax2 pg=%sel_pg +SEL 11000001 esz:2 1 ...01 100 ... ...00 ...00 \ + n=4 zd=%zd_ax4 zn=%zn_ax4 zm=%zm_ax4 pg=%sel_pg + +### SME Multiple Zero + +&zero_za rv off ngrp nvec + +ZERO_za 11000000 000011 000 .. 0000000000 off:3 \ + &zero_za ngrp=2 nvec=1 rv=%mova_rv +ZERO_za 11000000 000011 100 .. 0000000000 off:3 \ + &zero_za ngrp=4 nvec=1 rv=%mova_rv + +ZERO_za 11000000 000011 001 .. 0000000000 ... \ + &zero_za ngrp=1 nvec=2 rv=%mova_rv off=%off3_x2 +ZERO_za 11000000 000011 010 .. 0000000000 0.. \ + &zero_za ngrp=2 nvec=2 rv=%mova_rv off=%off2_x2 +ZERO_za 11000000 000011 011 .. 0000000000 0.. \ + &zero_za ngrp=4 nvec=2 rv=%mova_rv off=%off2_x2 + +ZERO_za 11000000 000011 101 .. 0000000000 0.. \ + &zero_za ngrp=1 nvec=4 rv=%mova_rv off=%off2_x4 +ZERO_za 11000000 000011 110 .. 0000000000 00. \ + &zero_za ngrp=2 nvec=4 rv=%mova_rv off=%off1_x4 +ZERO_za 11000000 000011 111 .. 0000000000 00. \ + &zero_za ngrp=4 nvec=4 rv=%mova_rv off=%off1_x4 + +### SME Lookup Table Read + +&lut zd zn idx + +# LUTI2, consecutive +LUTI2_c_1b 1100 0000 1100 11 idx:4 00 00 zn:5 zd:5 &lut +LUTI2_c_1h 1100 0000 1100 11 idx:4 01 00 zn:5 zd:5 &lut +LUTI2_c_1s 1100 0000 1100 11 idx:4 10 00 zn:5 zd:5 &lut + +LUTI2_c_2b 1100 0000 1000 11 idx:3 1 00 00 zn:5 .... 0 &lut zd=%zd_ax2 +LUTI2_c_2h 1100 0000 1000 11 idx:3 1 01 00 zn:5 .... 0 &lut zd=%zd_ax2 +LUTI2_c_2s 1100 0000 1000 11 idx:3 1 10 00 zn:5 .... 0 &lut zd=%zd_ax2 + +LUTI2_c_4b 1100 0000 1000 11 idx:2 10 00 00 zn:5 ... 00 &lut zd=%zd_ax4 +LUTI2_c_4h 1100 0000 1000 11 idx:2 10 01 00 zn:5 ... 00 &lut zd=%zd_ax4 +LUTI2_c_4s 1100 0000 1000 11 idx:2 10 10 00 zn:5 ... 00 &lut zd=%zd_ax4 + +# LUTI2, strided (must check zd alignment) +LUTI2_s_2b 1100 0000 1001 11 idx:3 1 00 00 zn:5 zd:5 &lut +LUTI2_s_2h 1100 0000 1001 11 idx:3 1 01 00 zn:5 zd:5 &lut + +LUTI2_s_4b 1100 0000 1001 11 idx:2 10 00 00 zn:5 zd:5 &lut +LUTI2_s_4h 1100 0000 1001 11 idx:2 10 01 00 zn:5 zd:5 &lut + +# LUTI4, consecutive +LUTI4_c_1b 1100 0000 1100 101 idx:3 00 00 zn:5 zd:5 &lut +LUTI4_c_1h 1100 0000 1100 101 idx:3 01 00 zn:5 zd:5 &lut +LUTI4_c_1s 1100 0000 1100 101 idx:3 10 00 zn:5 zd:5 &lut + +LUTI4_c_2b 1100 0000 1000 101 idx:2 1 00 00 zn:5 .... 0 &lut zd=%zd_ax2 +LUTI4_c_2h 1100 0000 1000 101 idx:2 1 01 00 zn:5 .... 0 &lut zd=%zd_ax2 +LUTI4_c_2s 1100 0000 1000 101 idx:2 1 10 00 zn:5 .... 0 &lut zd=%zd_ax2 + +LUTI4_c_4h 1100 0000 1000 101 idx:1 10 01 00 zn:5 ... 00 &lut zd=%zd_ax4 +LUTI4_c_4s 1100 0000 1000 101 idx:1 10 10 00 zn:5 ... 00 &lut zd=%zd_ax4 + +# LUTI4, strided (must check zd alignment) +LUTI4_s_2b 1100 0000 1001 101 idx:2 1 00 00 zn:5 zd:5 &lut +LUTI4_s_2h 1100 0000 1001 101 idx:2 1 01 00 zn:5 zd:5 &lut + +LUTI4_s_4h 1100 0000 1001 101 idx:1 10 01 00 zn:5 zd:5 &lut diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c index de0c6e5..075360d 100644 --- a/target/arm/tcg/sme_helper.c +++ b/target/arm/tcg/sme_helper.c @@ -29,6 +29,13 @@ #include "vec_internal.h" #include "sve_ldst_internal.h" + +static bool vectors_overlap(ARMVectorReg *x, unsigned nx, + ARMVectorReg *y, unsigned ny) +{ + return !(x + nx <= y || y + ny <= x); +} + void helper_set_svcr(CPUARMState *env, uint32_t val, uint32_t mask) { aarch64_set_svcr(env, val, mask); @@ -39,12 +46,12 @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl) uint32_t i; /* - * Special case clearing the entire ZA space. + * Special case clearing the entire ZArray. * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any * parts of the ZA storage outside of SVL. */ if (imm == 0xff) { - memset(env->zarray, 0, sizeof(env->zarray)); + memset(env->za_state.za, 0, sizeof(env->za_state.za)); return; } @@ -54,7 +61,7 @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl) */ for (i = 0; i < svl; i++) { if (imm & (1 << (i % 8))) { - memset(&env->zarray[i], 0, svl); + memset(&env->za_state.za[i], 0, svl); } } } @@ -206,6 +213,110 @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc) #undef DO_MOVA_Z +void HELPER(sme2_mova_zc_b)(void *vdst, void *vsrc, uint32_t desc) +{ + const uint8_t *src = vsrc; + uint8_t *dst = vdst; + size_t i, n = simd_oprsz(desc); + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + } +} + +void HELPER(sme2_mova_zc_h)(void *vdst, void *vsrc, uint32_t desc) +{ + const uint16_t *src = vsrc; + uint16_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 2; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + } +} + +void HELPER(sme2_mova_zc_s)(void *vdst, void *vsrc, uint32_t desc) +{ + const uint32_t *src = vsrc; + uint32_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 4; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + } +} + +void HELPER(sme2_mova_zc_d)(void *vdst, void *vsrc, uint32_t desc) +{ + const uint64_t *src = vsrc; + uint64_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 8; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + } +} + +void HELPER(sme2p1_movaz_zc_b)(void *vdst, void *vsrc, uint32_t desc) +{ + uint8_t *src = vsrc; + uint8_t *dst = vdst; + size_t i, n = simd_oprsz(desc); + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + src[tile_vslice_index(i)] = 0; + } +} + +void HELPER(sme2p1_movaz_zc_h)(void *vdst, void *vsrc, uint32_t desc) +{ + uint16_t *src = vsrc; + uint16_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 2; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + src[tile_vslice_index(i)] = 0; + } +} + +void HELPER(sme2p1_movaz_zc_s)(void *vdst, void *vsrc, uint32_t desc) +{ + uint32_t *src = vsrc; + uint32_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 4; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + src[tile_vslice_index(i)] = 0; + } +} + +void HELPER(sme2p1_movaz_zc_d)(void *vdst, void *vsrc, uint32_t desc) +{ + uint64_t *src = vsrc; + uint64_t *dst = vdst; + size_t i, n = simd_oprsz(desc) / 8; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + src[tile_vslice_index(i)] = 0; + } +} + +void HELPER(sme2p1_movaz_zc_q)(void *vdst, void *vsrc, uint32_t desc) +{ + Int128 *src = vsrc; + Int128 *dst = vdst; + size_t i, n = simd_oprsz(desc) / 16; + + for (i = 0; i < n; ++i) { + dst[i] = src[tile_vslice_index(i)]; + memset(&src[tile_vslice_index(i)], 0, 16); + } +} + /* * Clear elements in a tile slice comprising len bytes. */ @@ -314,6 +425,26 @@ static void copy_vertical_q(void *vdst, const void *vsrc, size_t len) } } +void HELPER(sme2_mova_cz_b)(void *vdst, void *vsrc, uint32_t desc) +{ + copy_vertical_b(vdst, vsrc, simd_oprsz(desc)); +} + +void HELPER(sme2_mova_cz_h)(void *vdst, void *vsrc, uint32_t desc) +{ + copy_vertical_h(vdst, vsrc, simd_oprsz(desc)); +} + +void HELPER(sme2_mova_cz_s)(void *vdst, void *vsrc, uint32_t desc) +{ + copy_vertical_s(vdst, vsrc, simd_oprsz(desc)); +} + +void HELPER(sme2_mova_cz_d)(void *vdst, void *vsrc, uint32_t desc) +{ + copy_vertical_d(vdst, vsrc, simd_oprsz(desc)); +} + /* * Host and TLB primitives for vertical tile slice addressing. */ @@ -344,54 +475,22 @@ static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \ TLB(env, useronly_clean_ptr(addr), val, ra); \ } -/* - * The ARMVectorReg elements are stored in host-endian 64-bit units. - * For 128-bit quantities, the sequence defined by the Elem[] pseudocode - * corresponds to storing the two 64-bit pieces in little-endian order. - */ -#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \ -static inline void HNAME##_host(void *za, intptr_t off, void *host) \ -{ \ - uint64_t val0 = HOST(host), val1 = HOST(host + 8); \ - uint64_t *ptr = za + off; \ - ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \ -} \ +#define DO_LDQ(HNAME, VNAME) \ static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \ { \ HNAME##_host(za, tile_vslice_offset(off), host); \ } \ -static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \ - target_ulong addr, uintptr_t ra) \ -{ \ - uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \ - uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \ - uint64_t *ptr = za + off; \ - ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \ -} \ static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \ target_ulong addr, uintptr_t ra) \ { \ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \ } -#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \ -static inline void HNAME##_host(void *za, intptr_t off, void *host) \ -{ \ - uint64_t *ptr = za + off; \ - HOST(host, ptr[BE]); \ - HOST(host + 8, ptr[!BE]); \ -} \ +#define DO_STQ(HNAME, VNAME) \ static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \ { \ HNAME##_host(za, tile_vslice_offset(off), host); \ } \ -static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \ - target_ulong addr, uintptr_t ra) \ -{ \ - uint64_t *ptr = za + off; \ - TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \ - TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \ -} \ static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \ target_ulong addr, uintptr_t ra) \ { \ @@ -406,8 +505,8 @@ DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra) DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra) DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra) -DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra) -DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra) +DO_LDQ(sve_ld1qq_be, sme_ld1q_be) +DO_LDQ(sve_ld1qq_le, sme_ld1q_le) DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra) DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra) @@ -417,8 +516,8 @@ DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra) DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra) DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra) -DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra) -DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra) +DO_STQ(sve_st1qq_be, sme_st1q_be) +DO_STQ(sve_st1qq_le, sme_st1q_le) #undef DO_LD #undef DO_ST @@ -567,19 +666,16 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, static inline QEMU_ALWAYS_INLINE void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg, - target_ulong addr, uint32_t desc, uintptr_t ra, + target_ulong addr, uint64_t desc, uintptr_t ra, const int esz, bool vertical, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn, ClearFn *clr_fn, CopyFn *cpy_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -592,28 +688,28 @@ void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg, #define DO_LD(L, END, ESZ) \ void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \ clear_horizontal, copy_horizontal); \ } \ void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \ clear_vertical_##L, copy_vertical_##L); \ } \ void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \ clear_horizontal, copy_horizontal); \ } \ void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \ @@ -755,16 +851,13 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg, static inline QEMU_ALWAYS_INLINE void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr, - uint32_t desc, uintptr_t ra, int esz, bool vertical, + uint64_t desc, uintptr_t ra, int esz, bool vertical, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -777,25 +870,25 @@ void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr, #define DO_ST(L, END, ESZ) \ void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \ } \ void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \ } \ void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \ } \ void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \ @@ -903,28 +996,69 @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn, } } -void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, - void *vpm, float_status *fpst_in, uint32_t desc) +static void do_fmopa_h(void *vza, void *vzn, void *vzm, uint16_t *pn, + uint16_t *pm, float_status *fpst, uint32_t desc, + uint16_t negx, int negf) { intptr_t row, col, oprsz = simd_maxsz(desc); - uint32_t neg = simd_data(desc) << 31; - uint16_t *pn = vpn, *pm = vpm; - float_status fpst; - /* - * Make a copy of float_status because this operation does not - * update the cumulative fp exception status. It also produces - * default nans. - */ - fpst = *fpst_in; - set_default_nan_mode(true, &fpst); + for (row = 0; row < oprsz; ) { + uint16_t pa = pn[H2(row >> 4)]; + do { + if (pa & 1) { + void *vza_row = vza + tile_vslice_offset(row); + uint16_t n = *(uint32_t *)(vzn + H1_2(row)) ^ negx; + + for (col = 0; col < oprsz; ) { + uint16_t pb = pm[H2(col >> 4)]; + do { + if (pb & 1) { + uint16_t *a = vza_row + H1_2(col); + uint16_t *m = vzm + H1_2(col); + *a = float16_muladd(n, *m, *a, negf, fpst); + } + col += 2; + pb >>= 2; + } while (col & 15); + } + } + row += 2; + pa >>= 2; + } while (row & 15); + } +} + +void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_h(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, 0); +} + +void HELPER(sme_fmops_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_h(vza, vzn, vzm, vpn, vpm, fpst, desc, 1u << 15, 0); +} + +void HELPER(sme_ah_fmops_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_h(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, + float_muladd_negate_product); +} + +static void do_fmopa_s(void *vza, void *vzn, void *vzm, uint16_t *pn, + uint16_t *pm, float_status *fpst, uint32_t desc, + uint32_t negx, int negf) +{ + intptr_t row, col, oprsz = simd_maxsz(desc); for (row = 0; row < oprsz; ) { uint16_t pa = pn[H2(row >> 4)]; do { if (pa & 1) { void *vza_row = vza + tile_vslice_offset(row); - uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg; + uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ negx; for (col = 0; col < oprsz; ) { uint16_t pb = pm[H2(col >> 4)]; @@ -932,7 +1066,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, if (pb & 1) { uint32_t *a = vza_row + H1_4(col); uint32_t *m = vzm + H1_4(col); - *a = float32_muladd(n, *m, *a, 0, &fpst); + *a = float32_muladd(n, *m, *a, negf, fpst); } col += 4; pb >>= 4; @@ -945,32 +1079,116 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, } } -void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn, - void *vpm, float_status *fpst_in, uint32_t desc) +void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) { - intptr_t row, col, oprsz = simd_oprsz(desc) / 8; - uint64_t neg = (uint64_t)simd_data(desc) << 63; - uint64_t *za = vza, *zn = vzn, *zm = vzm; - uint8_t *pn = vpn, *pm = vpm; - float_status fpst = *fpst_in; + do_fmopa_s(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, 0); +} - set_default_nan_mode(true, &fpst); +void HELPER(sme_fmops_s)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_s(vza, vzn, vzm, vpn, vpm, fpst, desc, 1u << 31, 0); +} + +void HELPER(sme_ah_fmops_s)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_s(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, + float_muladd_negate_product); +} + +static void do_fmopa_d(uint64_t *za, uint64_t *zn, uint64_t *zm, uint8_t *pn, + uint8_t *pm, float_status *fpst, uint32_t desc, + uint64_t negx, int negf) +{ + intptr_t row, col, oprsz = simd_oprsz(desc) / 8; for (row = 0; row < oprsz; ++row) { if (pn[H1(row)] & 1) { uint64_t *za_row = &za[tile_vslice_index(row)]; - uint64_t n = zn[row] ^ neg; + uint64_t n = zn[row] ^ negx; for (col = 0; col < oprsz; ++col) { if (pm[H1(col)] & 1) { uint64_t *a = &za_row[col]; - *a = float64_muladd(n, zm[col], *a, 0, &fpst); + *a = float64_muladd(n, zm[col], *a, negf, fpst); } } } } } +void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_d(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, 0); +} + +void HELPER(sme_fmops_d)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_d(vza, vzn, vzm, vpn, vpm, fpst, desc, 1ull << 63, 0); +} + +void HELPER(sme_ah_fmops_d)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_fmopa_d(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, + float_muladd_negate_product); +} + +static void do_bfmopa(void *vza, void *vzn, void *vzm, uint16_t *pn, + uint16_t *pm, float_status *fpst, uint32_t desc, + uint16_t negx, int negf) +{ + intptr_t row, col, oprsz = simd_maxsz(desc); + + for (row = 0; row < oprsz; ) { + uint16_t pa = pn[H2(row >> 4)]; + do { + if (pa & 1) { + void *vza_row = vza + tile_vslice_offset(row); + uint16_t n = *(uint32_t *)(vzn + H1_2(row)) ^ negx; + + for (col = 0; col < oprsz; ) { + uint16_t pb = pm[H2(col >> 4)]; + do { + if (pb & 1) { + uint16_t *a = vza_row + H1_2(col); + uint16_t *m = vzm + H1_2(col); + *a = bfloat16_muladd(n, *m, *a, negf, fpst); + } + col += 2; + pb >>= 2; + } while (col & 15); + } + } + row += 2; + pa >>= 2; + } while (row & 15); + } +} + +void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_bfmopa(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, 0); +} + +void HELPER(sme_bfmops)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_bfmopa(vza, vzn, vzm, vpn, vpm, fpst, desc, 1u << 15, 0); +} + +void HELPER(sme_ah_bfmops)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, float_status *fpst, uint32_t desc) +{ + do_bfmopa(vza, vzn, vzm, vpn, vpm, fpst, desc, 0, + float_muladd_negate_product); +} + /* * Alter PAIR as needed for controlling predicates being false, * and for NEG on an enabled row element. @@ -991,6 +1209,20 @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg) return pair; } +static inline uint32_t f16mop_ah_neg_adj_pair(uint32_t pair, uint32_t pg) +{ + uint32_t l = pg & 1 ? float16_ah_chs(pair) : 0; + uint32_t h = pg & 4 ? float16_ah_chs(pair >> 16) : 0; + return l | (h << 16); +} + +static inline uint32_t bf16mop_ah_neg_adj_pair(uint32_t pair, uint32_t pg) +{ + uint32_t l = pg & 1 ? bfloat16_ah_chs(pair) : 0; + uint32_t h = pg & 4 ? bfloat16_ah_chs(pair >> 16) : 0; + return l | (h << 16); +} + static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *s_f16, float_status *s_std, float_status *s_odd) @@ -1005,49 +1237,67 @@ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2, * - we have pre-set-up copy of s_std which is set to round-to-odd, * for the multiply (see below) */ - float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16); - float64 e1c = float16_to_float64(e1 >> 16, true, s_f16); - float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16); - float64 e2c = float16_to_float64(e2 >> 16, true, s_f16); - float64 t64; + float16 h1r = e1 & 0xffff; + float16 h1c = e1 >> 16; + float16 h2r = e2 & 0xffff; + float16 h2c = e2 >> 16; float32 t32; - /* - * The ARM pseudocode function FPDot performs both multiplies - * and the add with a single rounding operation. Emulate this - * by performing the first multiply in round-to-odd, then doing - * the second multiply as fused multiply-add, and rounding to - * float32 all in one step. - */ - t64 = float64_mul(e1r, e2r, s_odd); - t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std); + /* C.f. FPProcessNaNs4 */ + if (float16_is_any_nan(h1r) || float16_is_any_nan(h1c) || + float16_is_any_nan(h2r) || float16_is_any_nan(h2c)) { + float16 t16; + + if (float16_is_signaling_nan(h1r, s_f16)) { + t16 = h1r; + } else if (float16_is_signaling_nan(h1c, s_f16)) { + t16 = h1c; + } else if (float16_is_signaling_nan(h2r, s_f16)) { + t16 = h2r; + } else if (float16_is_signaling_nan(h2c, s_f16)) { + t16 = h2c; + } else if (float16_is_any_nan(h1r)) { + t16 = h1r; + } else if (float16_is_any_nan(h1c)) { + t16 = h1c; + } else if (float16_is_any_nan(h2r)) { + t16 = h2r; + } else { + t16 = h2c; + } + t32 = float16_to_float32(t16, true, s_f16); + } else { + float64 e1r = float16_to_float64(h1r, true, s_f16); + float64 e1c = float16_to_float64(h1c, true, s_f16); + float64 e2r = float16_to_float64(h2r, true, s_f16); + float64 e2c = float16_to_float64(h2c, true, s_f16); + float64 t64; + + /* + * The ARM pseudocode function FPDot performs both multiplies + * and the add with a single rounding operation. Emulate this + * by performing the first multiply in round-to-odd, then doing + * the second multiply as fused multiply-add, and rounding to + * float32 all in one step. + */ + t64 = float64_mul(e1r, e2r, s_odd); + t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std); - /* This conversion is exact, because we've already rounded. */ - t32 = float64_to_float32(t64, s_std); + /* This conversion is exact, because we've already rounded. */ + t32 = float64_to_float32(t64, s_std); + } /* The final accumulation step is not fused. */ return float32_add(sum, t32, s_std); } -void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, - void *vpm, CPUARMState *env, uint32_t desc) +static void do_fmopa_w_h(void *vza, void *vzn, void *vzm, uint16_t *pn, + uint16_t *pm, CPUARMState *env, uint32_t desc, + uint32_t negx, bool ah_neg) { intptr_t row, col, oprsz = simd_maxsz(desc); - uint32_t neg = simd_data(desc) * 0x80008000u; - uint16_t *pn = vpn, *pm = vpm; - float_status fpst_odd, fpst_std, fpst_f16; + float_status fpst_odd = env->vfp.fp_status[FPST_ZA]; - /* - * Make copies of the fp status fields we use, because this operation - * does not update the cumulative fp exception status. It also - * produces default NaNs. We also need a second copy of fp_status with - * round-to-odd -- see above. - */ - fpst_f16 = env->vfp.fp_status[FPST_A64_F16]; - fpst_std = env->vfp.fp_status[FPST_A64]; - set_default_nan_mode(true, &fpst_std); - set_default_nan_mode(true, &fpst_f16); - fpst_odd = fpst_std; set_float_rounding_mode(float_round_to_odd, &fpst_odd); for (row = 0; row < oprsz; ) { @@ -1056,7 +1306,11 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, void *vza_row = vza + tile_vslice_offset(row); uint32_t n = *(uint32_t *)(vzn + H1_4(row)); - n = f16mop_adj_pair(n, prow, neg); + if (ah_neg) { + n = f16mop_ah_neg_adj_pair(n, prow); + } else { + n = f16mop_adj_pair(n, prow, negx); + } for (col = 0; col < oprsz; ) { uint16_t pcol = pm[H2(col >> 4)]; @@ -1067,7 +1321,9 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, m = f16mop_adj_pair(m, pcol, 0); *a = f16_dotadd(*a, n, m, - &fpst_f16, &fpst_std, &fpst_odd); + &env->vfp.fp_status[FPST_ZA_F16], + &env->vfp.fp_status[FPST_ZA], + &fpst_odd); } col += 4; pcol >>= 4; @@ -1079,12 +1335,103 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, } } -void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, - void *vpn, void *vpm, CPUARMState *env, uint32_t desc) +void HELPER(sme_fmopa_w_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_fmopa_w_h(vza, vzn, vzm, vpn, vpm, env, desc, 0, false); +} + +void HELPER(sme_fmops_w_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_fmopa_w_h(vza, vzn, vzm, vpn, vpm, env, desc, 0x80008000u, false); +} + +void HELPER(sme_ah_fmops_w_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_fmopa_w_h(vza, vzn, vzm, vpn, vpm, env, desc, 0, true); +} + +void HELPER(sme2_fdot_h)(void *vd, void *vn, void *vm, void *va, + CPUARMState *env, uint32_t desc) +{ + intptr_t i, oprsz = simd_maxsz(desc); + bool za = extract32(desc, SIMD_DATA_SHIFT, 1); + float_status *fpst_std = &env->vfp.fp_status[za ? FPST_ZA : FPST_A64]; + float_status *fpst_f16 = &env->vfp.fp_status[za ? FPST_ZA_F16 : FPST_A64_F16]; + float_status fpst_odd = *fpst_std; + float32 *d = vd, *a = va; + uint32_t *n = vn, *m = vm; + + set_float_rounding_mode(float_round_to_odd, &fpst_odd); + + for (i = 0; i < oprsz / sizeof(float32); ++i) { + d[H4(i)] = f16_dotadd(a[H4(i)], n[H4(i)], m[H4(i)], + fpst_f16, fpst_std, &fpst_odd); + } +} + +void HELPER(sme2_fdot_idx_h)(void *vd, void *vn, void *vm, void *va, + CPUARMState *env, uint32_t desc) +{ + intptr_t i, j, oprsz = simd_maxsz(desc); + intptr_t elements = oprsz / sizeof(float32); + intptr_t eltspersegment = MIN(4, elements); + int idx = extract32(desc, SIMD_DATA_SHIFT, 2); + bool za = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + float_status *fpst_std = &env->vfp.fp_status[za ? FPST_ZA : FPST_A64]; + float_status *fpst_f16 = &env->vfp.fp_status[za ? FPST_ZA_F16 : FPST_A64_F16]; + float_status fpst_odd = *fpst_std; + float32 *d = vd, *a = va; + uint32_t *n = vn, *m = (uint32_t *)vm + H4(idx); + + set_float_rounding_mode(float_round_to_odd, &fpst_odd); + + for (i = 0; i < elements; i += eltspersegment) { + uint32_t mm = m[i]; + for (j = 0; j < eltspersegment; ++j) { + d[H4(i + j)] = f16_dotadd(a[H4(i + j)], n[H4(i + j)], mm, + fpst_f16, fpst_std, &fpst_odd); + } + } +} + +void HELPER(sme2_fvdot_idx_h)(void *vd, void *vn, void *vm, void *va, + CPUARMState *env, uint32_t desc) +{ + intptr_t i, j, oprsz = simd_maxsz(desc); + intptr_t elements = oprsz / sizeof(float32); + intptr_t eltspersegment = MIN(4, elements); + int idx = extract32(desc, SIMD_DATA_SHIFT, 2); + int sel = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + float_status fpst_odd, *fpst_std, *fpst_f16; + float32 *d = vd, *a = va; + uint16_t *n0 = vn; + uint16_t *n1 = vn + sizeof(ARMVectorReg); + uint32_t *m = (uint32_t *)vm + H4(idx); + + fpst_std = &env->vfp.fp_status[FPST_ZA]; + fpst_f16 = &env->vfp.fp_status[FPST_ZA_F16]; + fpst_odd = *fpst_std; + set_float_rounding_mode(float_round_to_odd, &fpst_odd); + + for (i = 0; i < elements; i += eltspersegment) { + uint32_t mm = m[i]; + for (j = 0; j < eltspersegment; ++j) { + uint32_t nn = (n0[H2(2 * (i + j) + sel)]) + | (n1[H2(2 * (i + j) + sel)] << 16); + d[i + H4(j)] = f16_dotadd(a[i + H4(j)], nn, mm, + fpst_f16, fpst_std, &fpst_odd); + } + } +} + +static void do_bfmopa_w(void *vza, void *vzn, void *vzm, + uint16_t *pn, uint16_t *pm, CPUARMState *env, + uint32_t desc, uint32_t negx, bool ah_neg) { intptr_t row, col, oprsz = simd_maxsz(desc); - uint32_t neg = simd_data(desc) * 0x80008000u; - uint16_t *pn = vpn, *pm = vpm; float_status fpst, fpst_odd; if (is_ebf(env, &fpst, &fpst_odd)) { @@ -1094,7 +1441,11 @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vza_row = vza + tile_vslice_offset(row); uint32_t n = *(uint32_t *)(vzn + H1_4(row)); - n = f16mop_adj_pair(n, prow, neg); + if (ah_neg) { + n = bf16mop_ah_neg_adj_pair(n, prow); + } else { + n = f16mop_adj_pair(n, prow, negx); + } for (col = 0; col < oprsz; ) { uint16_t pcol = pm[H2(col >> 4)]; @@ -1121,7 +1472,11 @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vza_row = vza + tile_vslice_offset(row); uint32_t n = *(uint32_t *)(vzn + H1_4(row)); - n = f16mop_adj_pair(n, prow, neg); + if (ah_neg) { + n = bf16mop_ah_neg_adj_pair(n, prow); + } else { + n = f16mop_adj_pair(n, prow, negx); + } for (col = 0; col < oprsz; ) { uint16_t pcol = pm[H2(col >> 4)]; @@ -1144,6 +1499,24 @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, } } +void HELPER(sme_bfmopa_w)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_bfmopa_w(vza, vzn, vzm, vpn, vpm, env, desc, 0, false); +} + +void HELPER(sme_bfmops_w)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_bfmopa_w(vza, vzn, vzm, vpn, vpm, env, desc, 0x80008000u, false); +} + +void HELPER(sme_ah_bfmops_w)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, CPUARMState *env, uint32_t desc) +{ + do_bfmopa_w(vza, vzn, vzm, vpn, vpm, env, desc, 0, true); +} + typedef uint32_t IMOPFn32(uint32_t, uint32_t, uint32_t, uint8_t, bool); static inline void do_imopa_s(uint32_t *za, uint32_t *zn, uint32_t *zm, uint8_t *pn, uint8_t *pm, @@ -1188,7 +1561,7 @@ static inline void do_imopa_d(uint64_t *za, uint64_t *zn, uint64_t *zm, } } -#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \ +#define DEF_IMOP_8x4_32(NAME, NTYPE, MTYPE) \ static uint32_t NAME(uint32_t n, uint32_t m, uint32_t a, uint8_t p, bool neg) \ { \ uint32_t sum = 0; \ @@ -1201,7 +1574,7 @@ static uint32_t NAME(uint32_t n, uint32_t m, uint32_t a, uint8_t p, bool neg) \ return neg ? a - sum : a + sum; \ } -#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \ +#define DEF_IMOP_16x4_64(NAME, NTYPE, MTYPE) \ static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \ { \ uint64_t sum = 0; \ @@ -1214,27 +1587,1070 @@ static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \ return neg ? a - sum : a + sum; \ } -DEF_IMOP_32(smopa_s, int8_t, int8_t) -DEF_IMOP_32(umopa_s, uint8_t, uint8_t) -DEF_IMOP_32(sumopa_s, int8_t, uint8_t) -DEF_IMOP_32(usmopa_s, uint8_t, int8_t) +DEF_IMOP_8x4_32(smopa_s, int8_t, int8_t) +DEF_IMOP_8x4_32(umopa_s, uint8_t, uint8_t) +DEF_IMOP_8x4_32(sumopa_s, int8_t, uint8_t) +DEF_IMOP_8x4_32(usmopa_s, uint8_t, int8_t) -DEF_IMOP_64(smopa_d, int16_t, int16_t) -DEF_IMOP_64(umopa_d, uint16_t, uint16_t) -DEF_IMOP_64(sumopa_d, int16_t, uint16_t) -DEF_IMOP_64(usmopa_d, uint16_t, int16_t) +DEF_IMOP_16x4_64(smopa_d, int16_t, int16_t) +DEF_IMOP_16x4_64(umopa_d, uint16_t, uint16_t) +DEF_IMOP_16x4_64(sumopa_d, int16_t, uint16_t) +DEF_IMOP_16x4_64(usmopa_d, uint16_t, int16_t) -#define DEF_IMOPH(NAME, S) \ - void HELPER(sme_##NAME##_##S)(void *vza, void *vzn, void *vzm, \ +#define DEF_IMOPH(P, NAME, S) \ + void HELPER(P##_##NAME##_##S)(void *vza, void *vzn, void *vzm, \ void *vpn, void *vpm, uint32_t desc) \ { do_imopa_##S(vza, vzn, vzm, vpn, vpm, desc, NAME##_##S); } -DEF_IMOPH(smopa, s) -DEF_IMOPH(umopa, s) -DEF_IMOPH(sumopa, s) -DEF_IMOPH(usmopa, s) +DEF_IMOPH(sme, smopa, s) +DEF_IMOPH(sme, umopa, s) +DEF_IMOPH(sme, sumopa, s) +DEF_IMOPH(sme, usmopa, s) + +DEF_IMOPH(sme, smopa, d) +DEF_IMOPH(sme, umopa, d) +DEF_IMOPH(sme, sumopa, d) +DEF_IMOPH(sme, usmopa, d) + +static uint32_t bmopa_s(uint32_t n, uint32_t m, uint32_t a, uint8_t p, bool neg) +{ + uint32_t sum = ctpop32(~(n ^ m)); + if (neg) { + sum = -sum; + } + if (!(p & 1)) { + sum = 0; + } + return a + sum; +} + +DEF_IMOPH(sme2, bmopa, s) + +#define DEF_IMOP_16x2_32(NAME, NTYPE, MTYPE) \ +static uint32_t NAME(uint32_t n, uint32_t m, uint32_t a, uint8_t p, bool neg) \ +{ \ + uint32_t sum = 0; \ + /* Apply P to N as a mask, making the inactive elements 0. */ \ + n &= expand_pred_h(p); \ + sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \ + sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \ + return neg ? a - sum : a + sum; \ +} + +DEF_IMOP_16x2_32(smopa2_s, int16_t, int16_t) +DEF_IMOP_16x2_32(umopa2_s, uint16_t, uint16_t) + +DEF_IMOPH(sme2, smopa2, s) +DEF_IMOPH(sme2, umopa2, s) + +#define DO_VDOT_IDX(NAME, TYPED, TYPEN, TYPEM, HD, HN) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t svl = simd_oprsz(desc); \ + intptr_t elements = svl / sizeof(TYPED); \ + intptr_t eltperseg = 16 / sizeof(TYPED); \ + intptr_t nreg = sizeof(TYPED) / sizeof(TYPEN); \ + intptr_t vstride = (svl / nreg) * sizeof(ARMVectorReg); \ + intptr_t zstride = sizeof(ARMVectorReg) / sizeof(TYPEN); \ + intptr_t idx = extract32(desc, SIMD_DATA_SHIFT, 2); \ + TYPEN *n = vn; \ + TYPEM *m = vm; \ + for (intptr_t r = 0; r < nreg; r++) { \ + TYPED *d = vd + r * vstride; \ + for (intptr_t seg = 0; seg < elements; seg += eltperseg) { \ + intptr_t s = seg + idx; \ + for (intptr_t e = seg; e < seg + eltperseg; e++) { \ + TYPED sum = d[HD(e)]; \ + for (intptr_t i = 0; i < nreg; i++) { \ + TYPED nn = n[i * zstride + HN(nreg * e + r)]; \ + TYPED mm = m[HN(nreg * s + i)]; \ + sum += nn * mm; \ + } \ + d[HD(e)] = sum; \ + } \ + } \ + } \ +} + +DO_VDOT_IDX(sme2_svdot_idx_4b, int32_t, int8_t, int8_t, H4, H1) +DO_VDOT_IDX(sme2_uvdot_idx_4b, uint32_t, uint8_t, uint8_t, H4, H1) +DO_VDOT_IDX(sme2_suvdot_idx_4b, int32_t, int8_t, uint8_t, H4, H1) +DO_VDOT_IDX(sme2_usvdot_idx_4b, int32_t, uint8_t, int8_t, H4, H1) + +DO_VDOT_IDX(sme2_svdot_idx_4h, int64_t, int16_t, int16_t, H8, H2) +DO_VDOT_IDX(sme2_uvdot_idx_4h, uint64_t, uint16_t, uint16_t, H8, H2) + +DO_VDOT_IDX(sme2_svdot_idx_2h, int32_t, int16_t, int16_t, H4, H2) +DO_VDOT_IDX(sme2_uvdot_idx_2h, uint32_t, uint16_t, uint16_t, H4, H2) + +#undef DO_VDOT_IDX + +#define DO_MLALL(NAME, TYPEW, TYPEN, TYPEM, HW, HN, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ +{ \ + intptr_t elements = simd_oprsz(desc) / sizeof(TYPEW); \ + intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 2); \ + TYPEW *d = vd, *a = va; TYPEN *n = vn; TYPEM *m = vm; \ + for (intptr_t i = 0; i < elements; ++i) { \ + TYPEW nn = n[HN(i * 4 + sel)]; \ + TYPEM mm = m[HN(i * 4 + sel)]; \ + d[HW(i)] = a[HW(i)] OP (nn * mm); \ + } \ +} + +DO_MLALL(sme2_smlall_s, int32_t, int8_t, int8_t, H4, H1, +) +DO_MLALL(sme2_smlall_d, int64_t, int16_t, int16_t, H8, H2, +) +DO_MLALL(sme2_smlsll_s, int32_t, int8_t, int8_t, H4, H1, -) +DO_MLALL(sme2_smlsll_d, int64_t, int16_t, int16_t, H8, H2, -) + +DO_MLALL(sme2_umlall_s, uint32_t, uint8_t, uint8_t, H4, H1, +) +DO_MLALL(sme2_umlall_d, uint64_t, uint16_t, uint16_t, H8, H2, +) +DO_MLALL(sme2_umlsll_s, uint32_t, uint8_t, uint8_t, H4, H1, -) +DO_MLALL(sme2_umlsll_d, uint64_t, uint16_t, uint16_t, H8, H2, -) + +DO_MLALL(sme2_usmlall_s, uint32_t, uint8_t, int8_t, H4, H1, +) + +#undef DO_MLALL + +#define DO_MLALL_IDX(NAME, TYPEW, TYPEN, TYPEM, HW, HN, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ +{ \ + intptr_t elements = simd_oprsz(desc) / sizeof(TYPEW); \ + intptr_t eltspersegment = 16 / sizeof(TYPEW); \ + intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 2); \ + intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 4); \ + TYPEW *d = vd, *a = va; TYPEN *n = vn; TYPEM *m = vm; \ + for (intptr_t i = 0; i < elements; i += eltspersegment) { \ + TYPEW mm = m[HN(i * 4 + idx)]; \ + for (intptr_t j = 0; j < eltspersegment; ++j) { \ + TYPEN nn = n[HN((i + j) * 4 + sel)]; \ + d[HW(i + j)] = a[HW(i + j)] OP (nn * mm); \ + } \ + } \ +} + +DO_MLALL_IDX(sme2_smlall_idx_s, int32_t, int8_t, int8_t, H4, H1, +) +DO_MLALL_IDX(sme2_smlall_idx_d, int64_t, int16_t, int16_t, H8, H2, +) +DO_MLALL_IDX(sme2_smlsll_idx_s, int32_t, int8_t, int8_t, H4, H1, -) +DO_MLALL_IDX(sme2_smlsll_idx_d, int64_t, int16_t, int16_t, H8, H2, -) + +DO_MLALL_IDX(sme2_umlall_idx_s, uint32_t, uint8_t, uint8_t, H4, H1, +) +DO_MLALL_IDX(sme2_umlall_idx_d, uint64_t, uint16_t, uint16_t, H8, H2, +) +DO_MLALL_IDX(sme2_umlsll_idx_s, uint32_t, uint8_t, uint8_t, H4, H1, -) +DO_MLALL_IDX(sme2_umlsll_idx_d, uint64_t, uint16_t, uint16_t, H8, H2, -) + +DO_MLALL_IDX(sme2_usmlall_idx_s, uint32_t, uint8_t, int8_t, H4, H1, +) +DO_MLALL_IDX(sme2_sumlall_idx_s, uint32_t, int8_t, uint8_t, H4, H1, +) + +#undef DO_MLALL_IDX + +/* Convert and compress */ +void HELPER(sme2_bfcvt)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + ARMVectorReg scratch; + size_t oprsz = simd_oprsz(desc); + size_t i, n = oprsz / 4; + float32 *s0 = vs; + float32 *s1 = vs + sizeof(ARMVectorReg); + bfloat16 *d = vd; + + if (vd == s1) { + s1 = memcpy(&scratch, s1, oprsz); + } + + for (i = 0; i < n; ++i) { + d[H2(i)] = float32_to_bfloat16(s0[H4(i)], fpst); + } + for (i = 0; i < n; ++i) { + d[H2(i) + n] = float32_to_bfloat16(s1[H4(i)], fpst); + } +} -DEF_IMOPH(smopa, d) -DEF_IMOPH(umopa, d) -DEF_IMOPH(sumopa, d) -DEF_IMOPH(usmopa, d) +void HELPER(sme2_fcvt_n)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + ARMVectorReg scratch; + size_t oprsz = simd_oprsz(desc); + size_t i, n = oprsz / 4; + float32 *s0 = vs; + float32 *s1 = vs + sizeof(ARMVectorReg); + float16 *d = vd; + + if (vd == s1) { + s1 = memcpy(&scratch, s1, oprsz); + } + + for (i = 0; i < n; ++i) { + d[H2(i)] = sve_f32_to_f16(s0[H4(i)], fpst); + } + for (i = 0; i < n; ++i) { + d[H2(i) + n] = sve_f32_to_f16(s1[H4(i)], fpst); + } +} + +#define SQCVT2(NAME, TW, TN, HW, HN, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 2)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(i)] = SAT(s0[HW(i)]); \ + d[HN(i + n)] = SAT(s1[HW(i)]); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQCVT2(sme2_sqcvt_sh, int32_t, int16_t, H4, H2, do_ssat_h) +SQCVT2(sme2_uqcvt_sh, uint32_t, uint16_t, H4, H2, do_usat_h) +SQCVT2(sme2_sqcvtu_sh, int32_t, uint16_t, H4, H2, do_usat_h) + +#undef SQCVT2 + +#define SQCVT4(NAME, TW, TN, HW, HN, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TW *s2 = vs + 2 * sizeof(ARMVectorReg); \ + TW *s3 = vs + 3 * sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 4)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(i)] = SAT(s0[HW(i)]); \ + d[HN(i + n)] = SAT(s1[HW(i)]); \ + d[HN(i + 2 * n)] = SAT(s2[HW(i)]); \ + d[HN(i + 3 * n)] = SAT(s3[HW(i)]); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQCVT4(sme2_sqcvt_sb, int32_t, int8_t, H4, H2, do_ssat_b) +SQCVT4(sme2_uqcvt_sb, uint32_t, uint8_t, H4, H2, do_usat_b) +SQCVT4(sme2_sqcvtu_sb, int32_t, uint8_t, H4, H2, do_usat_b) + +SQCVT4(sme2_sqcvt_dh, int64_t, int16_t, H8, H2, do_ssat_h) +SQCVT4(sme2_uqcvt_dh, uint64_t, uint16_t, H8, H2, do_usat_h) +SQCVT4(sme2_sqcvtu_dh, int64_t, uint16_t, H8, H2, do_usat_h) + +#undef SQCVT4 + +#define SQRSHR2(NAME, TW, TN, HW, HN, RSHR, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + int shift = simd_data(desc); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 2)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(i)] = SAT(RSHR(s0[HW(i)], shift)); \ + d[HN(i + n)] = SAT(RSHR(s1[HW(i)], shift)); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQRSHR2(sme2_sqrshr_sh, int32_t, int16_t, H4, H2, do_srshr, do_ssat_h) +SQRSHR2(sme2_uqrshr_sh, uint32_t, uint16_t, H4, H2, do_urshr, do_usat_h) +SQRSHR2(sme2_sqrshru_sh, int32_t, uint16_t, H4, H2, do_srshr, do_usat_h) + +#undef SQRSHR2 + +#define SQRSHR4(NAME, TW, TN, HW, HN, RSHR, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + int shift = simd_data(desc); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TW *s2 = vs + 2 * sizeof(ARMVectorReg); \ + TW *s3 = vs + 3 * sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 4)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(i)] = SAT(RSHR(s0[HW(i)], shift)); \ + d[HN(i + n)] = SAT(RSHR(s1[HW(i)], shift)); \ + d[HN(i + 2 * n)] = SAT(RSHR(s2[HW(i)], shift)); \ + d[HN(i + 3 * n)] = SAT(RSHR(s3[HW(i)], shift)); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQRSHR4(sme2_sqrshr_sb, int32_t, int8_t, H4, H2, do_srshr, do_ssat_b) +SQRSHR4(sme2_uqrshr_sb, uint32_t, uint8_t, H4, H2, do_urshr, do_usat_b) +SQRSHR4(sme2_sqrshru_sb, int32_t, uint8_t, H4, H2, do_srshr, do_usat_b) + +SQRSHR4(sme2_sqrshr_dh, int64_t, int16_t, H8, H2, do_srshr, do_ssat_h) +SQRSHR4(sme2_uqrshr_dh, uint64_t, uint16_t, H8, H2, do_urshr, do_usat_h) +SQRSHR4(sme2_sqrshru_dh, int64_t, uint16_t, H8, H2, do_srshr, do_usat_h) + +#undef SQRSHR4 + +/* Convert and interleave */ +void HELPER(sme2_bfcvtn)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + size_t i, n = simd_oprsz(desc) / 4; + float32 *s0 = vs; + float32 *s1 = vs + sizeof(ARMVectorReg); + bfloat16 *d = vd; + + for (i = 0; i < n; ++i) { + bfloat16 d0 = float32_to_bfloat16(s0[H4(i)], fpst); + bfloat16 d1 = float32_to_bfloat16(s1[H4(i)], fpst); + d[H2(i * 2 + 0)] = d0; + d[H2(i * 2 + 1)] = d1; + } +} + +void HELPER(sme2_fcvtn)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + size_t i, n = simd_oprsz(desc) / 4; + float32 *s0 = vs; + float32 *s1 = vs + sizeof(ARMVectorReg); + bfloat16 *d = vd; + + for (i = 0; i < n; ++i) { + bfloat16 d0 = sve_f32_to_f16(s0[H4(i)], fpst); + bfloat16 d1 = sve_f32_to_f16(s1[H4(i)], fpst); + d[H2(i * 2 + 0)] = d0; + d[H2(i * 2 + 1)] = d1; + } +} + +#define SQCVTN2(NAME, TW, TN, HW, HN, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 2)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(2 * i + 0)] = SAT(s0[HW(i)]); \ + d[HN(2 * i + 1)] = SAT(s1[HW(i)]); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQCVTN2(sme2_sqcvtn_sh, int32_t, int16_t, H4, H2, do_ssat_h) +SQCVTN2(sme2_uqcvtn_sh, uint32_t, uint16_t, H4, H2, do_usat_h) +SQCVTN2(sme2_sqcvtun_sh, int32_t, uint16_t, H4, H2, do_usat_h) + +#undef SQCVTN2 + +#define SQCVTN4(NAME, TW, TN, HW, HN, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TW *s2 = vs + 2 * sizeof(ARMVectorReg); \ + TW *s3 = vs + 3 * sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 4)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(4 * i + 0)] = SAT(s0[HW(i)]); \ + d[HN(4 * i + 1)] = SAT(s1[HW(i)]); \ + d[HN(4 * i + 2)] = SAT(s2[HW(i)]); \ + d[HN(4 * i + 3)] = SAT(s3[HW(i)]); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQCVTN4(sme2_sqcvtn_sb, int32_t, int8_t, H4, H1, do_ssat_b) +SQCVTN4(sme2_uqcvtn_sb, uint32_t, uint8_t, H4, H1, do_usat_b) +SQCVTN4(sme2_sqcvtun_sb, int32_t, uint8_t, H4, H1, do_usat_b) + +SQCVTN4(sme2_sqcvtn_dh, int64_t, int16_t, H8, H2, do_ssat_h) +SQCVTN4(sme2_uqcvtn_dh, uint64_t, uint16_t, H8, H2, do_usat_h) +SQCVTN4(sme2_sqcvtun_dh, int64_t, uint16_t, H8, H2, do_usat_h) + +#undef SQCVTN4 + +#define SQRSHRN2(NAME, TW, TN, HW, HN, RSHR, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + int shift = simd_data(desc); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 2)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(2 * i + 0)] = SAT(RSHR(s0[HW(i)], shift)); \ + d[HN(2 * i + 1)] = SAT(RSHR(s1[HW(i)], shift)); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQRSHRN2(sme2_sqrshrn_sh, int32_t, int16_t, H4, H2, do_srshr, do_ssat_h) +SQRSHRN2(sme2_uqrshrn_sh, uint32_t, uint16_t, H4, H2, do_urshr, do_usat_h) +SQRSHRN2(sme2_sqrshrun_sh, int32_t, uint16_t, H4, H2, do_srshr, do_usat_h) + +#undef SQRSHRN2 + +#define SQRSHRN4(NAME, TW, TN, HW, HN, RSHR, SAT) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + size_t oprsz = simd_oprsz(desc), n = oprsz / sizeof(TW); \ + int shift = simd_data(desc); \ + TW *s0 = vs, *s1 = vs + sizeof(ARMVectorReg); \ + TW *s2 = vs + 2 * sizeof(ARMVectorReg); \ + TW *s3 = vs + 3 * sizeof(ARMVectorReg); \ + TN *d = vd; \ + if (vectors_overlap(vd, 1, vs, 4)) { \ + d = (TN *)&scratch; \ + } \ + for (size_t i = 0; i < n; ++i) { \ + d[HN(4 * i + 0)] = SAT(RSHR(s0[HW(i)], shift)); \ + d[HN(4 * i + 1)] = SAT(RSHR(s1[HW(i)], shift)); \ + d[HN(4 * i + 2)] = SAT(RSHR(s2[HW(i)], shift)); \ + d[HN(4 * i + 3)] = SAT(RSHR(s3[HW(i)], shift)); \ + } \ + if (d != vd) { \ + memcpy(vd, d, oprsz); \ + } \ +} + +SQRSHRN4(sme2_sqrshrn_sb, int32_t, int8_t, H4, H1, do_srshr, do_ssat_b) +SQRSHRN4(sme2_uqrshrn_sb, uint32_t, uint8_t, H4, H1, do_urshr, do_usat_b) +SQRSHRN4(sme2_sqrshrun_sb, int32_t, uint8_t, H4, H1, do_srshr, do_usat_b) + +SQRSHRN4(sme2_sqrshrn_dh, int64_t, int16_t, H8, H2, do_srshr, do_ssat_h) +SQRSHRN4(sme2_uqrshrn_dh, uint64_t, uint16_t, H8, H2, do_urshr, do_usat_h) +SQRSHRN4(sme2_sqrshrun_dh, int64_t, uint16_t, H8, H2, do_srshr, do_usat_h) + +#undef SQRSHRN4 + +/* Expand and convert */ +void HELPER(sme2_fcvt_w)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + ARMVectorReg scratch; + size_t oprsz = simd_oprsz(desc); + size_t i, n = oprsz / 4; + float16 *s = vs; + float32 *d0 = vd; + float32 *d1 = vd + sizeof(ARMVectorReg); + + if (vectors_overlap(vd, 1, vs, 2)) { + s = memcpy(&scratch, s, oprsz); + } + + for (i = 0; i < n; ++i) { + d0[H4(i)] = sve_f16_to_f32(s[H2(i)], fpst); + } + for (i = 0; i < n; ++i) { + d1[H4(i)] = sve_f16_to_f32(s[H2(n + i)], fpst); + } +} + +#define UNPK(NAME, SREG, TW, TN, HW, HN) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch[SREG]; \ + size_t oprsz = simd_oprsz(desc); \ + size_t n = oprsz / sizeof(TW); \ + if (vectors_overlap(vd, 2 * SREG, vs, SREG)) { \ + vs = memcpy(scratch, vs, sizeof(scratch)); \ + } \ + for (size_t r = 0; r < SREG; ++r) { \ + TN *s = vs + r * sizeof(ARMVectorReg); \ + for (size_t i = 0; i < 2; ++i) { \ + TW *d = vd + (2 * r + i) * sizeof(ARMVectorReg); \ + for (size_t e = 0; e < n; ++e) { \ + d[HW(e)] = s[HN(i * n + e)]; \ + } \ + } \ + } \ +} + +UNPK(sme2_sunpk2_bh, 1, int16_t, int8_t, H2, H1) +UNPK(sme2_sunpk2_hs, 1, int32_t, int16_t, H4, H2) +UNPK(sme2_sunpk2_sd, 1, int64_t, int32_t, H8, H4) + +UNPK(sme2_sunpk4_bh, 2, int16_t, int8_t, H2, H1) +UNPK(sme2_sunpk4_hs, 2, int32_t, int16_t, H4, H2) +UNPK(sme2_sunpk4_sd, 2, int64_t, int32_t, H8, H4) + +UNPK(sme2_uunpk2_bh, 1, uint16_t, uint8_t, H2, H1) +UNPK(sme2_uunpk2_hs, 1, uint32_t, uint16_t, H4, H2) +UNPK(sme2_uunpk2_sd, 1, uint64_t, uint32_t, H8, H4) + +UNPK(sme2_uunpk4_bh, 2, uint16_t, uint8_t, H2, H1) +UNPK(sme2_uunpk4_hs, 2, uint32_t, uint16_t, H4, H2) +UNPK(sme2_uunpk4_sd, 2, uint64_t, uint32_t, H8, H4) + +#undef UNPK + +/* Deinterleave and convert. */ +void HELPER(sme2_fcvtl)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + size_t i, n = simd_oprsz(desc) / 4; + float16 *s = vs; + float32 *d0 = vd; + float32 *d1 = vd + sizeof(ARMVectorReg); + + for (i = 0; i < n; ++i) { + float32 v0 = sve_f16_to_f32(s[H2(i * 2 + 0)], fpst); + float32 v1 = sve_f16_to_f32(s[H2(i * 2 + 1)], fpst); + d0[H4(i)] = v0; + d1[H4(i)] = v1; + } +} + +void HELPER(sme2_scvtf)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + size_t i, n = simd_oprsz(desc) / 4; + int32_t *d = vd; + float32 *s = vs; + + for (i = 0; i < n; ++i) { + d[i] = int32_to_float32(s[i], fpst); + } +} + +void HELPER(sme2_ucvtf)(void *vd, void *vs, float_status *fpst, uint32_t desc) +{ + size_t i, n = simd_oprsz(desc) / 4; + uint32_t *d = vd; + float32 *s = vs; + + for (i = 0; i < n; ++i) { + d[i] = uint32_to_float32(s[i], fpst); + } +} + +#define ZIP2(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + ARMVectorReg scratch[2]; \ + size_t oprsz = simd_oprsz(desc); \ + size_t pairs = oprsz / (sizeof(TYPE) * 2); \ + TYPE *n = vn, *m = vm; \ + if (vectors_overlap(vd, 2, vn, 1)) { \ + n = memcpy(&scratch[0], vn, oprsz); \ + } \ + if (vectors_overlap(vd, 2, vm, 1)) { \ + m = memcpy(&scratch[1], vm, oprsz); \ + } \ + for (size_t r = 0; r < 2; ++r) { \ + TYPE *d = vd + r * sizeof(ARMVectorReg); \ + size_t base = r * pairs; \ + for (size_t p = 0; p < pairs; ++p) { \ + d[H(2 * p + 0)] = n[base + H(p)]; \ + d[H(2 * p + 1)] = m[base + H(p)]; \ + } \ + } \ +} + +ZIP2(sme2_zip2_b, uint8_t, H1) +ZIP2(sme2_zip2_h, uint16_t, H2) +ZIP2(sme2_zip2_s, uint32_t, H4) +ZIP2(sme2_zip2_d, uint64_t, ) +ZIP2(sme2_zip2_q, Int128, ) + +#undef ZIP2 + +#define ZIP4(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch[4]; \ + size_t oprsz = simd_oprsz(desc); \ + size_t quads = oprsz / (sizeof(TYPE) * 4); \ + TYPE *s0, *s1, *s2, *s3; \ + if (vs == vd) { \ + vs = memcpy(scratch, vs, sizeof(scratch)); \ + } \ + s0 = vs; \ + s1 = vs + sizeof(ARMVectorReg); \ + s2 = vs + 2 * sizeof(ARMVectorReg); \ + s3 = vs + 3 * sizeof(ARMVectorReg); \ + for (size_t r = 0; r < 4; ++r) { \ + TYPE *d = vd + r * sizeof(ARMVectorReg); \ + size_t base = r * quads; \ + for (size_t q = 0; q < quads; ++q) { \ + d[H(4 * q + 0)] = s0[base + H(q)]; \ + d[H(4 * q + 1)] = s1[base + H(q)]; \ + d[H(4 * q + 2)] = s2[base + H(q)]; \ + d[H(4 * q + 3)] = s3[base + H(q)]; \ + } \ + } \ +} + +ZIP4(sme2_zip4_b, uint8_t, H1) +ZIP4(sme2_zip4_h, uint16_t, H2) +ZIP4(sme2_zip4_s, uint32_t, H4) +ZIP4(sme2_zip4_d, uint64_t, ) +ZIP4(sme2_zip4_q, Int128, ) + +#undef ZIP4 + +#define UZP2(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + ARMVectorReg scratch[2]; \ + size_t oprsz = simd_oprsz(desc); \ + size_t pairs = oprsz / (sizeof(TYPE) * 2); \ + TYPE *d0 = vd, *d1 = vd + sizeof(ARMVectorReg); \ + if (vectors_overlap(vd, 2, vn, 1)) { \ + vn = memcpy(&scratch[0], vn, oprsz); \ + } \ + if (vectors_overlap(vd, 2, vm, 1)) { \ + vm = memcpy(&scratch[1], vm, oprsz); \ + } \ + for (size_t r = 0; r < 2; ++r) { \ + TYPE *s = r ? vm : vn; \ + size_t base = r * pairs; \ + for (size_t p = 0; p < pairs; ++p) { \ + d0[base + H(p)] = s[H(2 * p + 0)]; \ + d1[base + H(p)] = s[H(2 * p + 1)]; \ + } \ + } \ +} + +UZP2(sme2_uzp2_b, uint8_t, H1) +UZP2(sme2_uzp2_h, uint16_t, H2) +UZP2(sme2_uzp2_s, uint32_t, H4) +UZP2(sme2_uzp2_d, uint64_t, ) +UZP2(sme2_uzp2_q, Int128, ) + +#undef UZP2 + +#define UZP4(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + ARMVectorReg scratch[4]; \ + size_t oprsz = simd_oprsz(desc); \ + size_t quads = oprsz / (sizeof(TYPE) * 4); \ + TYPE *d0, *d1, *d2, *d3; \ + if (vs == vd) { \ + vs = memcpy(scratch, vs, sizeof(scratch)); \ + } \ + d0 = vd; \ + d1 = vd + sizeof(ARMVectorReg); \ + d2 = vd + 2 * sizeof(ARMVectorReg); \ + d3 = vd + 3 * sizeof(ARMVectorReg); \ + for (size_t r = 0; r < 4; ++r) { \ + TYPE *s = vs + r * sizeof(ARMVectorReg); \ + size_t base = r * quads; \ + for (size_t q = 0; q < quads; ++q) { \ + d0[base + H(q)] = s[H(4 * q + 0)]; \ + d1[base + H(q)] = s[H(4 * q + 1)]; \ + d2[base + H(q)] = s[H(4 * q + 2)]; \ + d3[base + H(q)] = s[H(4 * q + 3)]; \ + } \ + } \ +} + +UZP4(sme2_uzp4_b, uint8_t, H1) +UZP4(sme2_uzp4_h, uint16_t, H2) +UZP4(sme2_uzp4_s, uint32_t, H4) +UZP4(sme2_uzp4_d, uint64_t, ) +UZP4(sme2_uzp4_q, Int128, ) + +#undef UZP4 + +#define ICLAMP(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + size_t stride = sizeof(ARMVectorReg) / sizeof(TYPE); \ + size_t elements = simd_oprsz(desc) / sizeof(TYPE); \ + size_t nreg = simd_data(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + for (size_t e = 0; e < elements; e++) { \ + TYPE nn = n[H(e)], mm = m[H(e)]; \ + for (size_t r = 0; r < nreg; r++) { \ + TYPE *dd = &d[r * stride + H(e)]; \ + *dd = MIN(MAX(*dd, nn), mm); \ + } \ + } \ +} + +ICLAMP(sme2_sclamp_b, int8_t, H1) +ICLAMP(sme2_sclamp_h, int16_t, H2) +ICLAMP(sme2_sclamp_s, int32_t, H4) +ICLAMP(sme2_sclamp_d, int64_t, H8) + +ICLAMP(sme2_uclamp_b, uint8_t, H1) +ICLAMP(sme2_uclamp_h, uint16_t, H2) +ICLAMP(sme2_uclamp_s, uint32_t, H4) +ICLAMP(sme2_uclamp_d, uint64_t, H8) + +#undef ICLAMP + +/* + * Note the argument ordering to minnum and maxnum must match + * the ARM pseudocode so that NaNs are propagated properly. + */ +#define FCLAMP(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, \ + float_status *fpst, uint32_t desc) \ +{ \ + size_t stride = sizeof(ARMVectorReg) / sizeof(TYPE); \ + size_t elements = simd_oprsz(desc) / sizeof(TYPE); \ + size_t nreg = simd_data(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + for (size_t e = 0; e < elements; e++) { \ + TYPE nn = n[H(e)], mm = m[H(e)]; \ + for (size_t r = 0; r < nreg; r++) { \ + TYPE *dd = &d[r * stride + H(e)]; \ + *dd = TYPE##_minnum(TYPE##_maxnum(nn, *dd, fpst), mm, fpst); \ + } \ + } \ +} + +FCLAMP(sme2_fclamp_h, float16, H2) +FCLAMP(sme2_fclamp_s, float32, H4) +FCLAMP(sme2_fclamp_d, float64, H8) +FCLAMP(sme2_bfclamp, bfloat16, H2) + +#undef FCLAMP + +void HELPER(sme2_sel_b)(void *vd, void *vn, void *vm, + uint32_t png, uint32_t desc) +{ + int vl = simd_oprsz(desc); + int nreg = simd_data(desc); + int elements = vl / sizeof(uint8_t); + DecodeCounter p = decode_counter(png, vl, MO_8); + + if (p.lg2_stride == 0) { + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint8_t *d = vd + r * sizeof(ARMVectorReg); + uint8_t *n = vn + r * sizeof(ARMVectorReg); + uint8_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, n, vl); /* all true */ + } else if (elements <= split) { + memcpy(d, m, vl); /* all false */ + } else { + for (int e = 0; e < split; e++) { + d[H1(e)] = m[H1(e)]; + } + for (int e = split; e < elements; e++) { + d[H1(e)] = n[H1(e)]; + } + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint8_t *d = vd + r * sizeof(ARMVectorReg); + uint8_t *n = vn + r * sizeof(ARMVectorReg); + uint8_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, m, vl); /* all false */ + } else if (elements <= split) { + memcpy(d, n, vl); /* all true */ + } else { + for (int e = 0; e < split; e++) { + d[H1(e)] = n[H1(e)]; + } + for (int e = split; e < elements; e++) { + d[H1(e)] = m[H1(e)]; + } + } + } + } + } else { + int estride = 1 << p.lg2_stride; + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint8_t *d = vd + r * sizeof(ARMVectorReg); + uint8_t *n = vn + r * sizeof(ARMVectorReg); + uint8_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e++) { + d[H1(e)] = m[H1(e)]; + } + for (; e < elements; e += estride) { + d[H1(e)] = n[H1(e)]; + for (int i = 1; i < estride; i++) { + d[H1(e + i)] = m[H1(e + i)]; + } + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint8_t *d = vd + r * sizeof(ARMVectorReg); + uint8_t *n = vn + r * sizeof(ARMVectorReg); + uint8_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e += estride) { + d[H1(e)] = n[H1(e)]; + for (int i = 1; i < estride; i++) { + d[H1(e + i)] = m[H1(e + i)]; + } + } + for (; e < elements; e++) { + d[H1(e)] = m[H1(e)]; + } + } + } + } +} + +void HELPER(sme2_sel_h)(void *vd, void *vn, void *vm, + uint32_t png, uint32_t desc) +{ + int vl = simd_oprsz(desc); + int nreg = simd_data(desc); + int elements = vl / sizeof(uint16_t); + DecodeCounter p = decode_counter(png, vl, MO_16); + + if (p.lg2_stride == 0) { + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint16_t *d = vd + r * sizeof(ARMVectorReg); + uint16_t *n = vn + r * sizeof(ARMVectorReg); + uint16_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, n, vl); /* all true */ + } else if (elements <= split) { + memcpy(d, m, vl); /* all false */ + } else { + for (int e = 0; e < split; e++) { + d[H2(e)] = m[H2(e)]; + } + for (int e = split; e < elements; e++) { + d[H2(e)] = n[H2(e)]; + } + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint16_t *d = vd + r * sizeof(ARMVectorReg); + uint16_t *n = vn + r * sizeof(ARMVectorReg); + uint16_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, m, vl); /* all false */ + } else if (elements <= split) { + memcpy(d, n, vl); /* all true */ + } else { + for (int e = 0; e < split; e++) { + d[H2(e)] = n[H2(e)]; + } + for (int e = split; e < elements; e++) { + d[H2(e)] = m[H2(e)]; + } + } + } + } + } else { + int estride = 1 << p.lg2_stride; + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint16_t *d = vd + r * sizeof(ARMVectorReg); + uint16_t *n = vn + r * sizeof(ARMVectorReg); + uint16_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e++) { + d[H2(e)] = m[H2(e)]; + } + for (; e < elements; e += estride) { + d[H2(e)] = n[H2(e)]; + for (int i = 1; i < estride; i++) { + d[H2(e + i)] = m[H2(e + i)]; + } + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint16_t *d = vd + r * sizeof(ARMVectorReg); + uint16_t *n = vn + r * sizeof(ARMVectorReg); + uint16_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e += estride) { + d[H2(e)] = n[H2(e)]; + for (int i = 1; i < estride; i++) { + d[H2(e + i)] = m[H2(e + i)]; + } + } + for (; e < elements; e++) { + d[H2(e)] = m[H2(e)]; + } + } + } + } +} + +void HELPER(sme2_sel_s)(void *vd, void *vn, void *vm, + uint32_t png, uint32_t desc) +{ + int vl = simd_oprsz(desc); + int nreg = simd_data(desc); + int elements = vl / sizeof(uint32_t); + DecodeCounter p = decode_counter(png, vl, MO_32); + + if (p.lg2_stride == 0) { + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint32_t *d = vd + r * sizeof(ARMVectorReg); + uint32_t *n = vn + r * sizeof(ARMVectorReg); + uint32_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, n, vl); /* all true */ + } else if (elements <= split) { + memcpy(d, m, vl); /* all false */ + } else { + for (int e = 0; e < split; e++) { + d[H4(e)] = m[H4(e)]; + } + for (int e = split; e < elements; e++) { + d[H4(e)] = n[H4(e)]; + } + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint32_t *d = vd + r * sizeof(ARMVectorReg); + uint32_t *n = vn + r * sizeof(ARMVectorReg); + uint32_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, m, vl); /* all false */ + } else if (elements <= split) { + memcpy(d, n, vl); /* all true */ + } else { + for (int e = 0; e < split; e++) { + d[H4(e)] = n[H4(e)]; + } + for (int e = split; e < elements; e++) { + d[H4(e)] = m[H4(e)]; + } + } + } + } + } else { + /* p.esz must be MO_64, so stride must be 2. */ + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint32_t *d = vd + r * sizeof(ARMVectorReg); + uint32_t *n = vn + r * sizeof(ARMVectorReg); + uint32_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e++) { + d[H4(e)] = m[H4(e)]; + } + for (; e < elements; e += 2) { + d[H4(e)] = n[H4(e)]; + d[H4(e + 1)] = m[H4(e + 1)]; + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint32_t *d = vd + r * sizeof(ARMVectorReg); + uint32_t *n = vn + r * sizeof(ARMVectorReg); + uint32_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + int e = 0; + + for (; e < MIN(split, elements); e += 2) { + d[H4(e)] = n[H4(e)]; + d[H4(e + 1)] = m[H4(e + 1)]; + } + for (; e < elements; e++) { + d[H4(e)] = m[H4(e)]; + } + } + } + } +} + +void HELPER(sme2_sel_d)(void *vd, void *vn, void *vm, + uint32_t png, uint32_t desc) +{ + int vl = simd_oprsz(desc); + int nreg = simd_data(desc); + int elements = vl / sizeof(uint64_t); + DecodeCounter p = decode_counter(png, vl, MO_64); + + if (p.invert) { + for (int r = 0; r < nreg; r++) { + uint64_t *d = vd + r * sizeof(ARMVectorReg); + uint64_t *n = vn + r * sizeof(ARMVectorReg); + uint64_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, n, vl); /* all true */ + } else if (elements <= split) { + memcpy(d, m, vl); /* all false */ + } else { + memcpy(d, m, split * sizeof(uint64_t)); + memcpy(d + split, n + split, + (elements - split) * sizeof(uint64_t)); + } + } + } else { + for (int r = 0; r < nreg; r++) { + uint64_t *d = vd + r * sizeof(ARMVectorReg); + uint64_t *n = vn + r * sizeof(ARMVectorReg); + uint64_t *m = vm + r * sizeof(ARMVectorReg); + int split = p.count - r * elements; + + if (split <= 0) { + memcpy(d, m, vl); /* all false */ + } else if (elements <= split) { + memcpy(d, n, vl); /* all true */ + } else { + memcpy(d, n, split * sizeof(uint64_t)); + memcpy(d + split, m + split, + (elements - split) * sizeof(uint64_t)); + } + } + } +} diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode index 04b6fcc..ab63cfa 100644 --- a/target/arm/tcg/sve.decode +++ b/target/arm/tcg/sve.decode @@ -30,6 +30,7 @@ %size_23 23:2 %dtype_23_13 23:2 13:2 %index3_22_19 22:1 19:2 +%index3_22_17 22:1 17:2 %index3_19_11 19:2 11:1 %index2_20_11 20:1 11:1 @@ -57,6 +58,11 @@ # as propagated via the MOVPRFX instruction. %reg_movprfx 0:5 +%rn_ax2 6:4 !function=times_2 + +%pnd 0:3 !function=plus_8 +%pnn 5:3 !function=plus_8 + ########################################################################### # Named attribute sets. These are used to make nice(er) names # when creating helpers common to those for the individual @@ -102,6 +108,7 @@ # Two operand @pd_pn ........ esz:2 .. .... ....... rn:4 . rd:4 &rr_esz @rd_rn ........ esz:2 ...... ...... rn:5 rd:5 &rr_esz +@rd_rnx2 ........ ... ..... ...... ..... rd:5 &rr_esz rn=%rn_ax2 # Two operand with governing predicate, flags setting @pd_pg_pn_s ........ . s:1 ...... .. pg:4 . rn:4 . rd:4 &rpr_s @@ -131,11 +138,11 @@ @rda_rn_rm ........ esz:2 . rm:5 ... ... rn:5 rd:5 \ &rrrr_esz ra=%reg_movprfx -# Four operand with unused vector element size -@rda_rn_rm_e0 ........ ... rm:5 ... ... rn:5 rd:5 \ - &rrrr_esz esz=0 ra=%reg_movprfx -@rdn_ra_rm_e0 ........ ... rm:5 ... ... ra:5 rd:5 \ - &rrrr_esz esz=0 rn=%reg_movprfx +# Four operand with explicit vector element size +@rda_rn_rm_ex ........ ... rm:5 ... ... rn:5 rd:5 \ + &rrrr_esz ra=%reg_movprfx +@rdn_ra_rm_ex ........ ... rm:5 ... ... ra:5 rd:5 \ + &rrrr_esz rn=%reg_movprfx # Three operand with "memory" size, aka immediate left shift @rd_rn_msz_rm ........ ... rm:5 .... imm:2 rn:5 rd:5 &rrri @@ -222,6 +229,9 @@ @rprr_load_dt ....... dtype:4 rm:5 ... pg:3 rn:5 rd:5 &rprr_load @rpri_load_dt ....... dtype:4 . imm:s4 ... pg:3 rn:5 rd:5 &rpri_load +@rprr_load ....... .... rm:5 ... pg:3 rn:5 rd:5 &rprr_load +@rpri_load ....... .... . imm:s4 ... pg:3 rn:5 rd:5 &rpri_load + @rprr_load_msz ....... .... rm:5 ... pg:3 rn:5 rd:5 \ &rprr_load dtype=%msz_dtype @rpri_load_msz ....... .... . imm:s4 ... pg:3 rn:5 rd:5 \ @@ -245,7 +255,7 @@ # Stores; user must fill in ESZ, MSZ, NREG as needed. @rprr_store ....... .. .. rm:5 ... pg:3 rn:5 rd:5 &rprr_store -@rpri_store_msz ....... msz:2 .. . imm:s4 ... pg:3 rn:5 rd:5 &rpri_store +@rpri_store ....... .. .. . imm:s4 ... pg:3 rn:5 rd:5 &rpri_store @rprr_store_esz_n0 ....... .. esz:2 rm:5 ... pg:3 rn:5 rd:5 \ &rprr_store nreg=0 @rprr_scatter_store ....... msz:2 .. rm:5 ... pg:3 rn:5 rd:5 \ @@ -320,6 +330,11 @@ ORV 00000100 .. 011 000 001 ... ..... ..... @rd_pg_rn EORV 00000100 .. 011 001 001 ... ..... ..... @rd_pg_rn ANDV 00000100 .. 011 010 001 ... ..... ..... @rd_pg_rn +# SVE2.1 bitwise logical reduction (quadwords) +ORQV 00000100 .. 011 100 001 ... ..... ..... @rd_pg_rn +EORQV 00000100 .. 011 101 001 ... ..... ..... @rd_pg_rn +ANDQV 00000100 .. 011 110 001 ... ..... ..... @rd_pg_rn + # SVE constructive prefix (predicated) MOVPRFX_z 00000100 .. 010 000 001 ... ..... ..... @rd_pg_rn MOVPRFX_m 00000100 .. 010 001 001 ... ..... ..... @rd_pg_rn @@ -335,6 +350,13 @@ UMAXV 00000100 .. 001 001 001 ... ..... ..... @rd_pg_rn SMINV 00000100 .. 001 010 001 ... ..... ..... @rd_pg_rn UMINV 00000100 .. 001 011 001 ... ..... ..... @rd_pg_rn +# SVE2.1 segment reduction +ADDQV 00000100 .. 000 101 001 ... ..... ..... @rd_pg_rn +SMAXQV 00000100 .. 001 100 001 ... ..... ..... @rd_pg_rn +SMINQV 00000100 .. 001 110 001 ... ..... ..... @rd_pg_rn +UMAXQV 00000100 .. 001 101 001 ... ..... ..... @rd_pg_rn +UMINQV 00000100 .. 001 111 001 ... ..... ..... @rd_pg_rn + ### SVE Shift by Immediate - Predicated Group # SVE bitwise shift by immediate (predicated) @@ -428,12 +450,12 @@ XAR 00000100 .. 1 ..... 001 101 rm:5 rd:5 &rrri_esz \ rn=%reg_movprfx esz=%tszimm16_esz imm=%tszimm16_shr # SVE2 bitwise ternary operations -EOR3 00000100 00 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0 -BSL 00000100 00 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0 -BCAX 00000100 01 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0 -BSL1N 00000100 01 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0 -BSL2N 00000100 10 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0 -NBSL 00000100 11 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0 +EOR3 00000100 00 1 ..... 001 110 ..... ..... @rdn_ra_rm_ex esz=0 +BSL 00000100 00 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0 +BCAX 00000100 01 1 ..... 001 110 ..... ..... @rdn_ra_rm_ex esz=0 +BSL1N 00000100 01 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0 +BSL2N 00000100 10 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0 +NBSL 00000100 11 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0 ### SVE Index Generation Group @@ -559,6 +581,14 @@ DUP_s 00000101 .. 1 00000 001110 ..... ..... @rd_rn DUP_x 00000101 .. 1 ..... 001000 rn:5 rd:5 \ &rri imm=%imm7_22_16 +# SVE Permute Vector - one source quadwords +DUPQ 00000101 001 imm:4 1 001001 rn:5 rd:5 &rri_esz esz=0 +DUPQ 00000101 001 imm:3 10 001001 rn:5 rd:5 &rri_esz esz=1 +DUPQ 00000101 001 imm:2 100 001001 rn:5 rd:5 &rri_esz esz=2 +DUPQ 00000101 001 imm:1 1000 001001 rn:5 rd:5 &rri_esz esz=3 + +EXTQ 00000101 0110 imm:4 001001 rn:5 rd:5 &rri + # SVE insert SIMD&FP scalar register INSR_f 00000101 .. 1 10100 001110 ..... ..... @rdn_rm @@ -568,6 +598,22 @@ INSR_r 00000101 .. 1 00100 001110 ..... ..... @rdn_rm # SVE reverse vector elements REV_v 00000101 .. 1 11000 001110 ..... ..... @rd_rn +# SVE move predicate to/from vector + +PMOV_pv 00000101 00 101 01 0001110 rn:5 0 rd:4 \ + &rri_esz esz=0 imm=0 +PMOV_pv 00000101 00 101 1 imm:1 0001110 rn:5 0 rd:4 &rri_esz esz=1 +PMOV_pv 00000101 01 101 imm:2 0001110 rn:5 0 rd:4 &rri_esz esz=2 +PMOV_pv 00000101 1. 101 .. 0001110 rn:5 0 rd:4 \ + &rri_esz esz=3 imm=%index3_22_17 + +PMOV_vp 00000101 00 101 01 1001110 0 rn:4 rd:5 \ + &rri_esz esz=0 imm=0 +PMOV_vp 00000101 00 101 1 imm:1 1001110 0 rn:4 rd:5 &rri_esz esz=1 +PMOV_vp 00000101 01 101 imm:2 1001110 0 rn:4 rd:5 &rri_esz esz=2 +PMOV_vp 00000101 1. 101 .. 1001110 0 rn:4 rd:5 \ + &rri_esz esz=3 imm=%index3_22_17 + # SVE vector table lookup TBL 00000101 .. 1 ..... 001100 ..... ..... @rd_rn_rm @@ -614,6 +660,15 @@ UZP2_q 00000101 10 1 ..... 000 011 ..... ..... @rd_rn_rm_e0 TRN1_q 00000101 10 1 ..... 000 110 ..... ..... @rd_rn_rm_e0 TRN2_q 00000101 10 1 ..... 000 111 ..... ..... @rd_rn_rm_e0 +# SVE2.1 permute vector elements (quadwords) +ZIPQ1 01000100 .. 0 ..... 111 000 ..... ..... @rd_rn_rm +ZIPQ2 01000100 .. 0 ..... 111 001 ..... ..... @rd_rn_rm +UZPQ1 01000100 .. 0 ..... 111 010 ..... ..... @rd_rn_rm +UZPQ2 01000100 .. 0 ..... 111 011 ..... ..... @rd_rn_rm + +TBLQ 01000100 .. 0 ..... 111 110 ..... ..... @rd_rn_rm +TBXQ 00000101 .. 1 ..... 001 101 ..... ..... @rd_rn_rm + ### SVE Permute - Predicated Group # SVE compress active elements @@ -725,6 +780,7 @@ PTEST 00100101 01 010000 11 pg:4 0 rn:4 0 0000 # SVE predicate initialize PTRUE 00100101 esz:2 01100 s:1 111000 pat:5 0 rd:4 +PTRUE_cnt 00100101 esz:2 1000000111100000010 ... rd=%pnd # SVE initialize FFR SETFFR 00100101 0010 1100 1001 0000 0000 0000 @@ -765,7 +821,8 @@ BRKN 00100101 0. 01100001 .... 0 .... 0 .... @pd_pg_pn_s ### SVE Predicate Count Group # SVE predicate count -CNTP 00100101 .. 100 000 10 .... 0 .... ..... @rd_pg4_pn +CNTP 00100101 .. 100 000 10 .... 0 .... ..... @rd_pg4_pn +CNTP_c 00100101 esz:2 100 000 10 000 vl:1 1 rn:4 rd:5 # SVE inc/dec register by predicate count INCDECP_r 00100101 .. 10110 d:1 10001 00 .... ..... @incdec_pred u=1 @@ -786,11 +843,35 @@ SINCDECP_z 00100101 .. 1010 d:1 u:1 10000 00 .... ..... @incdec2_pred CTERM 00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000 # SVE integer compare scalar count and limit -WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4 +&while esz rd rn rm sf u eq +WHILE_lt 00100101 esz:2 1 rm:5 000 sf:1 u:1 1 rn:5 eq:1 rd:4 &while +WHILE_gt 00100101 esz:2 1 rm:5 000 sf:1 u:1 0 rn:5 eq:1 rd:4 &while # SVE2 pointer conflict compare WHILE_ptr 00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4 +# SVE2.1 predicate pair +%pd_pair 1:3 !function=times_2 +@while_pair ........ esz:2 . rm:5 .... u:1 . rn:5 . ... eq:1 \ + &while rd=%pd_pair sf=1 + +WHILE_lt_pair 00100101 .. 1 ..... 0101 . 1 ..... 1 ... . @while_pair +WHILE_gt_pair 00100101 .. 1 ..... 0101 . 0 ..... 1 ... . @while_pair + +# SVE2.1 predicate as count +@while_cnt ........ esz:2 . rm:5 .... u:1 . rn:5 . eq:1 ... \ + &while rd=%pnd sf=1 + +WHILE_lt_cnt2 00100101 .. 1 ..... 0100 . 1 ..... 1 . ... @while_cnt +WHILE_lt_cnt4 00100101 .. 1 ..... 0110 . 1 ..... 1 . ... @while_cnt +WHILE_gt_cnt2 00100101 .. 1 ..... 0100 . 0 ..... 1 . ... @while_cnt +WHILE_gt_cnt4 00100101 .. 1 ..... 0110 . 0 ..... 1 . ... @while_cnt + +# SVE2.1 extract mask predicate from predicate-as-counter +&pext rd rn esz imm +PEXT_1 00100101 esz:2 1 00000 0111 00 imm:2 ... 1 rd:4 &pext rn=%pnn +PEXT_2 00100101 esz:2 1 00000 0111 010 imm:1 ... 1 rd:4 &pext rn=%pnn + ### SVE Integer Wide Immediate - Unpredicated Group # SVE broadcast floating-point immediate (unpredicated) @@ -851,10 +932,13 @@ CDOT_zzzz 01000100 esz:2 0 rm:5 0001 rot:2 rn:5 rd:5 ra=%reg_movprfx #### SVE Multiply - Indexed # SVE integer dot product (indexed) -SDOT_zzxw_s 01000100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2 -SDOT_zzxw_d 01000100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3 -UDOT_zzxw_s 01000100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2 -UDOT_zzxw_d 01000100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3 +SDOT_zzxw_4s 01000100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2 +SDOT_zzxw_4d 01000100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3 +UDOT_zzxw_4s 01000100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2 +UDOT_zzxw_4d 01000100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3 + +SDOT_zzxw_2s 01000100 10 0 ..... 110010 ..... ..... @rrxr_2 esz=2 +UDOT_zzxw_2s 01000100 10 0 ..... 110011 ..... ..... @rrxr_2 esz=2 # SVE2 integer multiply-add (indexed) MLA_zzxz_h 01000100 0. 1 ..... 000010 ..... ..... @rrxr_3 esz=1 @@ -873,8 +957,8 @@ SQRDMLSH_zzxz_s 01000100 10 1 ..... 000101 ..... ..... @rrxr_2 esz=2 SQRDMLSH_zzxz_d 01000100 11 1 ..... 000101 ..... ..... @rrxr_1 esz=3 # SVE mixed sign dot product (indexed) -USDOT_zzxw_s 01000100 10 1 ..... 000110 ..... ..... @rrxr_2 esz=2 -SUDOT_zzxw_s 01000100 10 1 ..... 000111 ..... ..... @rrxr_2 esz=2 +USDOT_zzxw_4s 01000100 10 1 ..... 000110 ..... ..... @rrxr_2 esz=2 +SUDOT_zzxw_4s 01000100 10 1 ..... 000111 ..... ..... @rrxr_2 esz=2 # SVE2 saturating multiply-add (indexed) SQDMLALB_zzxw_s 01000100 10 1 ..... 0010.0 ..... ..... @rrxr_3a esz=2 @@ -968,9 +1052,11 @@ FCMLA_zzxz 01100100 11 1 index:1 rm:4 0001 rot:2 rn:5 rd:5 \ ### SVE FP Multiply-Add Indexed Group # SVE floating-point multiply-add (indexed) +FMLA_zzxz 01100100 0. 1 ..... 000010 ..... ..... @rrxr_3 esz=0 FMLA_zzxz 01100100 0. 1 ..... 000000 ..... ..... @rrxr_3 esz=1 FMLA_zzxz 01100100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2 FMLA_zzxz 01100100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3 +FMLS_zzxz 01100100 0. 1 ..... 000011 ..... ..... @rrxr_3 esz=0 FMLS_zzxz 01100100 0. 1 ..... 000001 ..... ..... @rrxr_3 esz=1 FMLS_zzxz 01100100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2 FMLS_zzxz 01100100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3 @@ -978,6 +1064,7 @@ FMLS_zzxz 01100100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3 ### SVE FP Multiply Indexed Group # SVE floating-point multiply (indexed) +FMUL_zzx 01100100 0. 1 ..... 001010 ..... ..... @rrx_3 esz=0 FMUL_zzx 01100100 0. 1 ..... 001000 ..... ..... @rrx_3 esz=1 FMUL_zzx 01100100 10 1 ..... 001000 ..... ..... @rrx_2 esz=2 FMUL_zzx 01100100 11 1 ..... 001000 ..... ..... @rrx_1 esz=3 @@ -990,6 +1077,14 @@ FMINNMV 01100101 .. 000 101 001 ... ..... ..... @rd_pg_rn FMAXV 01100101 .. 000 110 001 ... ..... ..... @rd_pg_rn FMINV 01100101 .. 000 111 001 ... ..... ..... @rd_pg_rn +### SVE FP recursive reduction (quadwords) + +FADDQV 01100100 .. 010 000 101 ... ..... ..... @rd_pg_rn +FMAXNMQV 01100100 .. 010 100 101 ... ..... ..... @rd_pg_rn +FMINNMQV 01100100 .. 010 101 101 ... ..... ..... @rd_pg_rn +FMAXQV 01100100 .. 010 110 101 ... ..... ..... @rd_pg_rn +FMINQV 01100100 .. 010 111 101 ... ..... ..... @rd_pg_rn + ## SVE Floating Point Unary Operations - Unpredicated Group FRECPE 01100101 .. 001 110 001100 ..... ..... @rd_rn @@ -1151,12 +1246,24 @@ LD1_zpiz 1000010 .. 01 ..... 1.. ... ..... ..... \ # SVE contiguous load (scalar plus scalar) LD_zprr 1010010 .... ..... 010 ... ..... ..... @rprr_load_dt nreg=0 +# LD1W (128-bit element) +LD_zprr 1010010 1000 rm:5 100 pg:3 rn:5 rd:5 \ + &rprr_load dtype=16 nreg=0 +# LD1D (128-bit element) +LD_zprr 1010010 1100 rm:5 100 pg:3 rn:5 rd:5 \ + &rprr_load dtype=17 nreg=0 # SVE contiguous first-fault load (scalar plus scalar) LDFF1_zprr 1010010 .... ..... 011 ... ..... ..... @rprr_load_dt nreg=0 # SVE contiguous load (scalar plus immediate) LD_zpri 1010010 .... 0.... 101 ... ..... ..... @rpri_load_dt nreg=0 +# LD1W (128-bit element) +LD_zpri 1010010 1000 1 imm:s4 001 pg:3 rn:5 rd:5 \ + &rpri_load dtype=16 nreg=0 +# LD1D (128-bit element) +LD_zpri 1010010 1100 1 imm:s4 001 pg:3 rn:5 rd:5 \ + &rpri_load dtype=17 nreg=0 # SVE contiguous non-fault load (scalar plus immediate) LDNF1_zpri 1010010 .... 1.... 101 ... ..... ..... @rpri_load_dt nreg=0 @@ -1166,12 +1273,26 @@ LDNF1_zpri 1010010 .... 1.... 101 ... ..... ..... @rpri_load_dt nreg=0 # SVE load multiple structures (scalar plus scalar) # LD2B, LD2H, LD2W, LD2D; etc. LD_zprr 1010010 .. nreg:2 ..... 110 ... ..... ..... @rprr_load_msz +# LD[234]Q +LD_zprr 1010010 01 01 ..... 100 ... ..... ..... \ + @rprr_load dtype=18 nreg=1 +LD_zprr 1010010 10 01 ..... 100 ... ..... ..... \ + @rprr_load dtype=18 nreg=2 +LD_zprr 1010010 11 01 ..... 100 ... ..... ..... \ + @rprr_load dtype=18 nreg=3 # SVE contiguous non-temporal load (scalar plus immediate) # LDNT1B, LDNT1H, LDNT1W, LDNT1D # SVE load multiple structures (scalar plus immediate) # LD2B, LD2H, LD2W, LD2D; etc. LD_zpri 1010010 .. nreg:2 0.... 111 ... ..... ..... @rpri_load_msz +# LD[234]Q +LD_zpri 1010010 01 001 .... 111 ... ..... ..... \ + @rpri_load dtype=18 nreg=1 +LD_zpri 1010010 10 001 .... 111 ... ..... ..... \ + @rpri_load dtype=18 nreg=2 +LD_zpri 1010010 11 001 .... 111 ... ..... ..... \ + @rpri_load dtype=18 nreg=3 # SVE load and broadcast quadword (scalar plus scalar) LD1RQ_zprr 1010010 .. 00 ..... 000 ... ..... ..... \ @@ -1222,6 +1343,10 @@ LD1_zprz 1100010 10 1. ..... 1.. ... ..... ..... \ LD1_zprz 1100010 11 1. ..... 11. ... ..... ..... \ @rprr_g_load_sc esz=3 msz=3 u=1 +# LD1Q. Note that this is subtly different from LD1_zprz because +# it is vector + scalar, not scalar + vector. +LD1Q 1100 0100 000 rm:5 101 pg:3 rn:5 rd:5 + # SVE 64-bit gather load (vector plus immediate) LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \ @rpri_g_load esz=3 @@ -1245,8 +1370,20 @@ STR_zri 1110010 11 0. ..... 010 ... ..... ..... @rd_rn_i9 # SVE contiguous store (scalar plus immediate) # ST1B, ST1H, ST1W, ST1D; require msz <= esz -ST_zpri 1110010 .. esz:2 0.... 111 ... ..... ..... \ - @rpri_store_msz nreg=0 +ST_zpri 1110010 00 esz:2 0.... 111 ... ..... ..... \ + @rpri_store msz=0 nreg=0 +ST_zpri 1110010 01 esz:2 0.... 111 ... ..... ..... \ + @rpri_store msz=1 nreg=0 +ST_zpri 1110010 10 10 0.... 111 ... ..... ..... \ + @rpri_store msz=2 esz=2 nreg=0 +ST_zpri 1110010 10 11 0.... 111 ... ..... ..... \ + @rpri_store msz=2 esz=3 nreg=0 +ST_zpri 1110010 11 11 0.... 111 ... ..... ..... \ + @rpri_store msz=3 esz=3 nreg=0 +ST_zpri 1110010 10 00 0.... 111 ... ..... ..... \ + @rpri_store msz=2 esz=4 nreg=0 +ST_zpri 1110010 11 10 0.... 111 ... ..... ..... \ + @rpri_store msz=3 esz=4 nreg=0 # SVE contiguous store (scalar plus scalar) # ST1B, ST1H, ST1W, ST1D; require msz <= esz @@ -1255,20 +1392,40 @@ ST_zprr 1110010 00 .. ..... 010 ... ..... ..... \ @rprr_store_esz_n0 msz=0 ST_zprr 1110010 01 .. ..... 010 ... ..... ..... \ @rprr_store_esz_n0 msz=1 -ST_zprr 1110010 10 .. ..... 010 ... ..... ..... \ - @rprr_store_esz_n0 msz=2 +ST_zprr 1110010 10 10 ..... 010 ... ..... ..... \ + @rprr_store msz=2 esz=2 nreg=0 +ST_zprr 1110010 10 11 ..... 010 ... ..... ..... \ + @rprr_store msz=2 esz=3 nreg=0 ST_zprr 1110010 11 11 ..... 010 ... ..... ..... \ @rprr_store msz=3 esz=3 nreg=0 +ST_zprr 1110010 10 00 ..... 010 ... ..... ..... \ + @rprr_store msz=2 esz=4 nreg=0 +ST_zprr 1110010 11 10 ..... 010 ... ..... ..... \ + @rprr_store msz=3 esz=4 nreg=0 # SVE contiguous non-temporal store (scalar plus immediate) (nreg == 0) # SVE store multiple structures (scalar plus immediate) (nreg != 0) ST_zpri 1110010 .. nreg:2 1.... 111 ... ..... ..... \ - @rpri_store_msz esz=%size_23 + @rpri_store msz=%size_23 esz=%size_23 +# ST[234]Q +ST_zpri 11100100 01 00 .... 000 ... ..... ..... \ + @rpri_store msz=4 esz=4 nreg=1 +ST_zpri 11100100 10 00 .... 000 ... ..... ..... \ + @rpri_store msz=4 esz=4 nreg=2 +ST_zpri 11100100 11 00 .... 000 ... ..... ..... \ + @rpri_store msz=4 esz=4 nreg=3 # SVE contiguous non-temporal store (scalar plus scalar) (nreg == 0) # SVE store multiple structures (scalar plus scalar) (nreg != 0) -ST_zprr 1110010 msz:2 nreg:2 ..... 011 ... ..... ..... \ - @rprr_store esz=%size_23 +ST_zprr 1110010 .. nreg:2 ..... 011 ... ..... ..... \ + @rprr_store msz=%size_23 esz=%size_23 +# ST[234]Q +ST_zprr 11100100 01 1 ..... 000 ... ..... ..... \ + @rprr_store msz=4 esz=4 nreg=1 +ST_zprr 11100100 10 1 ..... 000 ... ..... ..... \ + @rprr_store msz=4 esz=4 nreg=2 +ST_zprr 11100100 11 1 ..... 000 ... ..... ..... \ + @rprr_store msz=4 esz=4 nreg=3 # SVE 32-bit scatter store (scalar plus 32-bit scaled offsets) # Require msz > 0 && msz <= esz. @@ -1293,6 +1450,10 @@ ST1_zprz 1110010 .. 01 ..... 101 ... ..... ..... \ ST1_zprz 1110010 .. 00 ..... 101 ... ..... ..... \ @rprr_scatter_store xs=2 esz=3 scale=0 +# ST1Q. Note that this is subtly different from ST1_zprz because +# it is vector + scalar, not scalar + vector. +ST1Q 1110 0100 001 rm:5 001 pg:3 rn:5 rd:5 + # SVE 64-bit scatter store (vector plus immediate) ST1_zpiz 1110010 .. 10 ..... 101 ... ..... ..... \ @rpri_scatter_store esz=3 @@ -1450,9 +1611,9 @@ EORTB 01000101 .. 0 ..... 10010 1 ..... ..... @rd_rn_rm ## SVE integer matrix multiply accumulate -SMMLA 01000101 00 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0 -USMMLA 01000101 10 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0 -UMMLA 01000101 11 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0 +SMMLA 01000101 00 0 ..... 10011 0 ..... ..... @rda_rn_rm_ex esz=2 +USMMLA 01000101 10 0 ..... 10011 0 ..... ..... @rda_rn_rm_ex esz=2 +UMMLA 01000101 11 0 ..... 10011 0 ..... ..... @rda_rn_rm_ex esz=2 ## SVE2 bitwise permute @@ -1504,13 +1665,22 @@ UABA 01000101 .. 0 ..... 11111 1 ..... ..... @rd_rn_rm #### SVE2 Narrowing ## SVE2 saturating extract narrow - # Bits 23, 18-16 are zero, limited in the translator via esz < 3 & imm == 0. -SQXTNB 01000101 .. 1 ..... 010 000 ..... ..... @rd_rn_tszimm_shl + +{ + SQCVTN_sh 01000101 00 1 10001 010 000 ....0 ..... @rd_rnx2 esz=1 + SQXTNB 01000101 .. 1 ..... 010 000 ..... ..... @rd_rn_tszimm_shl +} SQXTNT 01000101 .. 1 ..... 010 001 ..... ..... @rd_rn_tszimm_shl -UQXTNB 01000101 .. 1 ..... 010 010 ..... ..... @rd_rn_tszimm_shl +{ + UQCVTN_sh 01000101 00 1 10001 010 010 ....0 ..... @rd_rnx2 esz=1 + UQXTNB 01000101 .. 1 ..... 010 010 ..... ..... @rd_rn_tszimm_shl +} UQXTNT 01000101 .. 1 ..... 010 011 ..... ..... @rd_rn_tszimm_shl -SQXTUNB 01000101 .. 1 ..... 010 100 ..... ..... @rd_rn_tszimm_shl +{ + SQCVTUN_sh 01000101 00 1 10001 010 100 ....0 ..... @rd_rnx2 esz=1 + SQXTUNB 01000101 .. 1 ..... 010 100 ..... ..... @rd_rn_tszimm_shl +} SQXTUNT 01000101 .. 1 ..... 010 101 ..... ..... @rd_rn_tszimm_shl ## SVE2 bitwise shift right narrow @@ -1597,14 +1767,17 @@ UMLSLT_zzzw 01000100 .. 0 ..... 010 111 ..... ..... @rda_rn_rm CMLA_zzzz 01000100 esz:2 0 rm:5 0010 rot:2 rn:5 rd:5 ra=%reg_movprfx SQRDCMLAH_zzzz 01000100 esz:2 0 rm:5 0011 rot:2 rn:5 rd:5 ra=%reg_movprfx -## SVE mixed sign dot product +## SVE dot product + +SDOT_zzzz_2s 01000100 00 0 ..... 110 010 ..... ..... @rda_rn_rm_ex esz=2 +UDOT_zzzz_2s 01000100 00 0 ..... 110 011 ..... ..... @rda_rn_rm_ex esz=2 -USDOT_zzzz 01000100 .. 0 ..... 011 110 ..... ..... @rda_rn_rm +USDOT_zzzz_4s 01000100 10 0 ..... 011 110 ..... ..... @rda_rn_rm_ex esz=2 ### SVE2 floating point matrix multiply accumulate -BFMMLA 01100100 01 1 ..... 111 001 ..... ..... @rda_rn_rm_e0 -FMMLA_s 01100100 10 1 ..... 111 001 ..... ..... @rda_rn_rm_e0 -FMMLA_d 01100100 11 1 ..... 111 001 ..... ..... @rda_rn_rm_e0 +BFMMLA 01100100 01 1 ..... 111 001 ..... ..... @rda_rn_rm_ex esz=1 +FMMLA_s 01100100 10 1 ..... 111 001 ..... ..... @rda_rn_rm_ex esz=2 +FMMLA_d 01100100 11 1 ..... 111 001 ..... ..... @rda_rn_rm_ex esz=3 ### SVE2 Memory Gather Load Group @@ -1654,26 +1827,35 @@ FCVTLT_sd 01100100 11 0010 11 101 ... ..... ..... @rd_pg_rn_e0 FLOGB 01100101 00 011 esz:2 0101 pg:3 rn:5 rd:5 &rpr_esz ### SVE2 floating-point multiply-add long (vectors) -FMLALB_zzzw 01100100 10 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0 -FMLALT_zzzw 01100100 10 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0 -FMLSLB_zzzw 01100100 10 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_e0 -FMLSLT_zzzw 01100100 10 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_e0 +FMLALB_zzzw 01100100 10 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2 +FMLALT_zzzw 01100100 10 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_ex esz=2 +FMLSLB_zzzw 01100100 10 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_ex esz=2 +FMLSLT_zzzw 01100100 10 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_ex esz=2 -BFMLALB_zzzw 01100100 11 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0 -BFMLALT_zzzw 01100100 11 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0 +BFMLALB_zzzw 01100100 11 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2 +BFMLALT_zzzw 01100100 11 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_ex esz=2 +BFMLSLB_zzzw 01100100 11 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_ex esz=2 +BFMLSLT_zzzw 01100100 11 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_ex esz=2 -### SVE2 floating-point bfloat16 dot-product -BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0 +### SVE2 floating-point dot-product +FDOT_zzzz 01100100 00 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2 +BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2 ### SVE2 floating-point multiply-add long (indexed) + FMLALB_zzxw 01100100 10 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2 FMLALT_zzxw 01100100 10 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2 FMLSLB_zzxw 01100100 10 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2 FMLSLT_zzxw 01100100 10 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2 + BFMLALB_zzxw 01100100 11 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2 BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2 +BFMLSLB_zzxw 01100100 11 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2 +BFMLSLT_zzxw 01100100 11 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2 -### SVE2 floating-point bfloat16 dot-product (indexed) +### SVE2 floating-point dot-product (indexed) + +FDOT_zzxz 01100100 00 1 ..... 010000 ..... ..... @rrxr_2 esz=2 BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2 ### SVE broadcast predicate element @@ -1700,3 +1882,55 @@ PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \ SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm + +FCLAMP 01100100 .. 1 ..... 001001 ..... ..... @rda_rn_rm + +### SVE2p1 multi-vec contiguous load + +&zcrr_ldst rd png rn rm esz nreg +&zcri_ldst rd png rn imm esz nreg +%png 10:3 !function=plus_8 +%zd_ax2 1:4 !function=times_2 +%zd_ax4 2:3 !function=times_4 + +LD1_zcrr 10100000000 rm:5 0 esz:2 ... rn:5 .... - \ + &zcrr_ldst %png rd=%zd_ax2 nreg=2 +LD1_zcrr 10100000000 rm:5 1 esz:2 ... rn:5 ... 0- \ + &zcrr_ldst %png rd=%zd_ax4 nreg=4 + +ST1_zcrr 10100000001 rm:5 0 esz:2 ... rn:5 .... - \ + &zcrr_ldst %png rd=%zd_ax2 nreg=2 +ST1_zcrr 10100000001 rm:5 1 esz:2 ... rn:5 ... 0- \ + &zcrr_ldst %png rd=%zd_ax4 nreg=4 + +LD1_zcri 101000000100 imm:s4 0 esz:2 ... rn:5 .... - \ + &zcri_ldst %png rd=%zd_ax2 nreg=2 +LD1_zcri 101000000100 imm:s4 1 esz:2 ... rn:5 ... 0- \ + &zcri_ldst %png rd=%zd_ax4 nreg=4 + +ST1_zcri 101000000110 imm:s4 0 esz:2 ... rn:5 .... - \ + &zcri_ldst %png rd=%zd_ax2 nreg=2 +ST1_zcri 101000000110 imm:s4 1 esz:2 ... rn:5 ... 0- \ + &zcri_ldst %png rd=%zd_ax4 nreg=4 + +# Note: N bit and 0 bit (for nreg4) still mashed in rd. +# This is handled within gen_ldst_c(). +LD1_zcrr_stride 10100001000 rm:5 0 esz:2 ... rn:5 rd:5 \ + &zcrr_ldst %png nreg=2 +LD1_zcrr_stride 10100001000 rm:5 1 esz:2 ... rn:5 rd:5 \ + &zcrr_ldst %png nreg=4 + +ST1_zcrr_stride 10100001001 rm:5 0 esz:2 ... rn:5 rd:5 \ + &zcrr_ldst %png nreg=2 +ST1_zcrr_stride 10100001001 rm:5 1 esz:2 ... rn:5 rd:5 \ + &zcrr_ldst %png nreg=4 + +LD1_zcri_stride 101000010100 imm:s4 0 esz:2 ... rn:5 rd:5 \ + &zcri_ldst %png nreg=2 +LD1_zcri_stride 101000010100 imm:s4 1 esz:2 ... rn:5 rd:5 \ + &zcri_ldst %png nreg=4 + +ST1_zcri_stride 101000010110 imm:s4 0 esz:2 ... rn:5 rd:5 \ + &zcri_ldst %png nreg=2 +ST1_zcri_stride 101000010110 imm:s4 1 esz:2 ... rn:5 rd:5 \ + &zcri_ldst %png nreg=4 diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index a2c363a..c442fcb 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -123,6 +123,11 @@ static inline uint64_t expand_pred_s(uint8_t byte) return word[byte & 0x11]; } +static inline uint64_t expand_pred_d(uint8_t byte) +{ + return -(uint64_t)(byte & 1); +} + #define LOGICAL_PPPP(NAME, FUNC) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ { \ @@ -206,6 +211,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ #define DO_EOR(N, M) (N ^ M) #define DO_ORR(N, M) (N | M) #define DO_BIC(N, M) (N & ~M) +#define DO_ORC(N, M) (N | ~M) #define DO_ADD(N, M) (N + M) #define DO_SUB(N, M) (N - M) #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) @@ -527,14 +533,9 @@ DO_ZPZZ(sve2_uhsub_zpzz_h, uint16_t, H1_2, DO_HSUB_BHS) DO_ZPZZ(sve2_uhsub_zpzz_s, uint32_t, H1_4, DO_HSUB_BHS) DO_ZPZZ_D(sve2_uhsub_zpzz_d, uint64_t, DO_HSUB_D) -static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max) -{ - return val >= max ? max : val <= min ? min : val; -} - -#define DO_SQADD_B(n, m) do_sat_bhs((int64_t)n + m, INT8_MIN, INT8_MAX) -#define DO_SQADD_H(n, m) do_sat_bhs((int64_t)n + m, INT16_MIN, INT16_MAX) -#define DO_SQADD_S(n, m) do_sat_bhs((int64_t)n + m, INT32_MIN, INT32_MAX) +#define DO_SQADD_B(n, m) do_ssat_b((int64_t)n + m) +#define DO_SQADD_H(n, m) do_ssat_h((int64_t)n + m) +#define DO_SQADD_S(n, m) do_ssat_s((int64_t)n + m) static inline int64_t do_sqadd_d(int64_t n, int64_t m) { @@ -551,9 +552,9 @@ DO_ZPZZ(sve2_sqadd_zpzz_h, int16_t, H1_2, DO_SQADD_H) DO_ZPZZ(sve2_sqadd_zpzz_s, int32_t, H1_4, DO_SQADD_S) DO_ZPZZ_D(sve2_sqadd_zpzz_d, int64_t, do_sqadd_d) -#define DO_UQADD_B(n, m) do_sat_bhs((int64_t)n + m, 0, UINT8_MAX) -#define DO_UQADD_H(n, m) do_sat_bhs((int64_t)n + m, 0, UINT16_MAX) -#define DO_UQADD_S(n, m) do_sat_bhs((int64_t)n + m, 0, UINT32_MAX) +#define DO_UQADD_B(n, m) do_usat_b((int64_t)n + m) +#define DO_UQADD_H(n, m) do_usat_h((int64_t)n + m) +#define DO_UQADD_S(n, m) do_usat_s((int64_t)n + m) static inline uint64_t do_uqadd_d(uint64_t n, uint64_t m) { @@ -566,9 +567,9 @@ DO_ZPZZ(sve2_uqadd_zpzz_h, uint16_t, H1_2, DO_UQADD_H) DO_ZPZZ(sve2_uqadd_zpzz_s, uint32_t, H1_4, DO_UQADD_S) DO_ZPZZ_D(sve2_uqadd_zpzz_d, uint64_t, do_uqadd_d) -#define DO_SQSUB_B(n, m) do_sat_bhs((int64_t)n - m, INT8_MIN, INT8_MAX) -#define DO_SQSUB_H(n, m) do_sat_bhs((int64_t)n - m, INT16_MIN, INT16_MAX) -#define DO_SQSUB_S(n, m) do_sat_bhs((int64_t)n - m, INT32_MIN, INT32_MAX) +#define DO_SQSUB_B(n, m) do_ssat_b((int64_t)n - m) +#define DO_SQSUB_H(n, m) do_ssat_h((int64_t)n - m) +#define DO_SQSUB_S(n, m) do_ssat_s((int64_t)n - m) static inline int64_t do_sqsub_d(int64_t n, int64_t m) { @@ -585,9 +586,9 @@ DO_ZPZZ(sve2_sqsub_zpzz_h, int16_t, H1_2, DO_SQSUB_H) DO_ZPZZ(sve2_sqsub_zpzz_s, int32_t, H1_4, DO_SQSUB_S) DO_ZPZZ_D(sve2_sqsub_zpzz_d, int64_t, do_sqsub_d) -#define DO_UQSUB_B(n, m) do_sat_bhs((int64_t)n - m, 0, UINT8_MAX) -#define DO_UQSUB_H(n, m) do_sat_bhs((int64_t)n - m, 0, UINT16_MAX) -#define DO_UQSUB_S(n, m) do_sat_bhs((int64_t)n - m, 0, UINT32_MAX) +#define DO_UQSUB_B(n, m) do_usat_b((int64_t)n - m) +#define DO_UQSUB_H(n, m) do_usat_h((int64_t)n - m) +#define DO_UQSUB_S(n, m) do_usat_s((int64_t)n - m) static inline uint64_t do_uqsub_d(uint64_t n, uint64_t m) { @@ -599,12 +600,9 @@ DO_ZPZZ(sve2_uqsub_zpzz_h, uint16_t, H1_2, DO_UQSUB_H) DO_ZPZZ(sve2_uqsub_zpzz_s, uint32_t, H1_4, DO_UQSUB_S) DO_ZPZZ_D(sve2_uqsub_zpzz_d, uint64_t, do_uqsub_d) -#define DO_SUQADD_B(n, m) \ - do_sat_bhs((int64_t)(int8_t)n + m, INT8_MIN, INT8_MAX) -#define DO_SUQADD_H(n, m) \ - do_sat_bhs((int64_t)(int16_t)n + m, INT16_MIN, INT16_MAX) -#define DO_SUQADD_S(n, m) \ - do_sat_bhs((int64_t)(int32_t)n + m, INT32_MIN, INT32_MAX) +#define DO_SUQADD_B(n, m) do_ssat_b((int64_t)(int8_t)n + m) +#define DO_SUQADD_H(n, m) do_ssat_h((int64_t)(int16_t)n + m) +#define DO_SUQADD_S(n, m) do_ssat_s((int64_t)(int32_t)n + m) static inline int64_t do_suqadd_d(int64_t n, uint64_t m) { @@ -634,12 +632,9 @@ DO_ZPZZ(sve2_suqadd_zpzz_h, uint16_t, H1_2, DO_SUQADD_H) DO_ZPZZ(sve2_suqadd_zpzz_s, uint32_t, H1_4, DO_SUQADD_S) DO_ZPZZ_D(sve2_suqadd_zpzz_d, uint64_t, do_suqadd_d) -#define DO_USQADD_B(n, m) \ - do_sat_bhs((int64_t)n + (int8_t)m, 0, UINT8_MAX) -#define DO_USQADD_H(n, m) \ - do_sat_bhs((int64_t)n + (int16_t)m, 0, UINT16_MAX) -#define DO_USQADD_S(n, m) \ - do_sat_bhs((int64_t)n + (int32_t)m, 0, UINT32_MAX) +#define DO_USQADD_B(n, m) do_usat_b((int64_t)n + (int8_t)m) +#define DO_USQADD_H(n, m) do_usat_h((int64_t)n + (int16_t)m) +#define DO_USQADD_S(n, m) do_usat_s((int64_t)n + (int32_t)m) static inline uint64_t do_usqadd_d(uint64_t n, int64_t m) { @@ -1226,37 +1221,29 @@ void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ } \ } -#define DO_SQXTN_H(n) do_sat_bhs(n, INT8_MIN, INT8_MAX) -#define DO_SQXTN_S(n) do_sat_bhs(n, INT16_MIN, INT16_MAX) -#define DO_SQXTN_D(n) do_sat_bhs(n, INT32_MIN, INT32_MAX) - -DO_XTNB(sve2_sqxtnb_h, int16_t, DO_SQXTN_H) -DO_XTNB(sve2_sqxtnb_s, int32_t, DO_SQXTN_S) -DO_XTNB(sve2_sqxtnb_d, int64_t, DO_SQXTN_D) - -DO_XTNT(sve2_sqxtnt_h, int16_t, int8_t, H1, DO_SQXTN_H) -DO_XTNT(sve2_sqxtnt_s, int32_t, int16_t, H1_2, DO_SQXTN_S) -DO_XTNT(sve2_sqxtnt_d, int64_t, int32_t, H1_4, DO_SQXTN_D) +DO_XTNB(sve2_sqxtnb_h, int16_t, do_ssat_b) +DO_XTNB(sve2_sqxtnb_s, int32_t, do_ssat_h) +DO_XTNB(sve2_sqxtnb_d, int64_t, do_ssat_s) -#define DO_UQXTN_H(n) do_sat_bhs(n, 0, UINT8_MAX) -#define DO_UQXTN_S(n) do_sat_bhs(n, 0, UINT16_MAX) -#define DO_UQXTN_D(n) do_sat_bhs(n, 0, UINT32_MAX) +DO_XTNT(sve2_sqxtnt_h, int16_t, int8_t, H1, do_ssat_b) +DO_XTNT(sve2_sqxtnt_s, int32_t, int16_t, H1_2, do_ssat_h) +DO_XTNT(sve2_sqxtnt_d, int64_t, int32_t, H1_4, do_ssat_s) -DO_XTNB(sve2_uqxtnb_h, uint16_t, DO_UQXTN_H) -DO_XTNB(sve2_uqxtnb_s, uint32_t, DO_UQXTN_S) -DO_XTNB(sve2_uqxtnb_d, uint64_t, DO_UQXTN_D) +DO_XTNB(sve2_uqxtnb_h, uint16_t, do_usat_b) +DO_XTNB(sve2_uqxtnb_s, uint32_t, do_usat_h) +DO_XTNB(sve2_uqxtnb_d, uint64_t, do_usat_s) -DO_XTNT(sve2_uqxtnt_h, uint16_t, uint8_t, H1, DO_UQXTN_H) -DO_XTNT(sve2_uqxtnt_s, uint32_t, uint16_t, H1_2, DO_UQXTN_S) -DO_XTNT(sve2_uqxtnt_d, uint64_t, uint32_t, H1_4, DO_UQXTN_D) +DO_XTNT(sve2_uqxtnt_h, uint16_t, uint8_t, H1, do_usat_b) +DO_XTNT(sve2_uqxtnt_s, uint32_t, uint16_t, H1_2, do_usat_h) +DO_XTNT(sve2_uqxtnt_d, uint64_t, uint32_t, H1_4, do_usat_s) -DO_XTNB(sve2_sqxtunb_h, int16_t, DO_UQXTN_H) -DO_XTNB(sve2_sqxtunb_s, int32_t, DO_UQXTN_S) -DO_XTNB(sve2_sqxtunb_d, int64_t, DO_UQXTN_D) +DO_XTNB(sve2_sqxtunb_h, int16_t, do_usat_b) +DO_XTNB(sve2_sqxtunb_s, int32_t, do_usat_h) +DO_XTNB(sve2_sqxtunb_d, int64_t, do_usat_s) -DO_XTNT(sve2_sqxtunt_h, int16_t, int8_t, H1, DO_UQXTN_H) -DO_XTNT(sve2_sqxtunt_s, int32_t, int16_t, H1_2, DO_UQXTN_S) -DO_XTNT(sve2_sqxtunt_d, int64_t, int32_t, H1_4, DO_UQXTN_D) +DO_XTNT(sve2_sqxtunt_h, int16_t, int8_t, H1, do_usat_b) +DO_XTNT(sve2_sqxtunt_s, int32_t, int16_t, H1_2, do_usat_h) +DO_XTNT(sve2_sqxtunt_d, int64_t, int32_t, H1_4, do_usat_s) #undef DO_XTNB #undef DO_XTNT @@ -1833,6 +1820,52 @@ DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN) #undef DO_VPZ #undef DO_VPZ_D +#define DO_VPQ(NAME, TYPE, H, INIT, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + TYPE tmp[16 / sizeof(TYPE)] = { [0 ... 16 / sizeof(TYPE) - 1] = INIT }; \ + TYPE *n = vn; uint16_t *g = vg; \ + uintptr_t oprsz = simd_oprsz(desc); \ + uintptr_t nseg = oprsz / 16, nsegelt = 16 / sizeof(TYPE); \ + for (uintptr_t s = 0; s < nseg; s++) { \ + uint16_t pg = g[H2(s)]; \ + for (uintptr_t e = 0; e < nsegelt; e++, pg >>= sizeof(TYPE)) { \ + if (pg & 1) { \ + tmp[e] = OP(tmp[H(e)], n[s * nsegelt + H(e)]); \ + } \ + } \ + } \ + memcpy(vd, tmp, 16); \ + clear_tail(vd, 16, simd_maxsz(desc)); \ +} + +DO_VPQ(sve2p1_addqv_b, uint8_t, H1, 0, DO_ADD) +DO_VPQ(sve2p1_addqv_h, uint16_t, H2, 0, DO_ADD) +DO_VPQ(sve2p1_addqv_s, uint32_t, H4, 0, DO_ADD) +DO_VPQ(sve2p1_addqv_d, uint64_t, H8, 0, DO_ADD) + +DO_VPQ(sve2p1_smaxqv_b, int8_t, H1, INT8_MIN, DO_MAX) +DO_VPQ(sve2p1_smaxqv_h, int16_t, H2, INT16_MIN, DO_MAX) +DO_VPQ(sve2p1_smaxqv_s, int32_t, H4, INT32_MIN, DO_MAX) +DO_VPQ(sve2p1_smaxqv_d, int64_t, H8, INT64_MIN, DO_MAX) + +DO_VPQ(sve2p1_sminqv_b, int8_t, H1, INT8_MAX, DO_MIN) +DO_VPQ(sve2p1_sminqv_h, int16_t, H2, INT16_MAX, DO_MIN) +DO_VPQ(sve2p1_sminqv_s, int32_t, H4, INT32_MAX, DO_MIN) +DO_VPQ(sve2p1_sminqv_d, int64_t, H8, INT64_MAX, DO_MIN) + +DO_VPQ(sve2p1_umaxqv_b, uint8_t, H1, 0, DO_MAX) +DO_VPQ(sve2p1_umaxqv_h, uint16_t, H2, 0, DO_MAX) +DO_VPQ(sve2p1_umaxqv_s, uint32_t, H4, 0, DO_MAX) +DO_VPQ(sve2p1_umaxqv_d, uint64_t, H8, 0, DO_MAX) + +DO_VPQ(sve2p1_uminqv_b, uint8_t, H1, -1, DO_MIN) +DO_VPQ(sve2p1_uminqv_h, uint16_t, H2, -1, DO_MIN) +DO_VPQ(sve2p1_uminqv_s, uint32_t, H4, -1, DO_MIN) +DO_VPQ(sve2p1_uminqv_d, uint64_t, H8, -1, DO_MIN) + +#undef DO_VPQ + /* Two vector operand, one scalar operand, unpredicated. */ #define DO_ZZI(NAME, TYPE, OP) \ void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc) \ @@ -1873,10 +1906,46 @@ DO_ZZI(sve_umini_d, uint64_t, DO_MIN) #undef DO_ZZI +#define DO_LOGIC_QV(NAME, SUFF, INIT, VOP, POP) \ +void HELPER(NAME ## _ ## SUFF)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + unsigned seg = simd_oprsz(desc) / 16; \ + uint64_t r0 = INIT, r1 = INIT; \ + for (unsigned s = 0; s < seg; s++) { \ + uint64_t p0 = expand_pred_##SUFF(*(uint8_t *)(vg + H1(s * 2))); \ + uint64_t p1 = expand_pred_##SUFF(*(uint8_t *)(vg + H1(s * 2 + 1))); \ + uint64_t v0 = *(uint64_t *)(vn + s * 16); \ + uint64_t v1 = *(uint64_t *)(vn + s * 16 + 8); \ + v0 = POP(v0, p0), v1 = POP(v1, p1); \ + r0 = VOP(r0, v0), r1 = VOP(r1, v1); \ + } \ + *(uint64_t *)(vd + 0) = r0; \ + *(uint64_t *)(vd + 8) = r1; \ + clear_tail(vd, 16, simd_maxsz(desc)); \ +} + +DO_LOGIC_QV(sve2p1_orqv, b, 0, DO_ORR, DO_AND) +DO_LOGIC_QV(sve2p1_orqv, h, 0, DO_ORR, DO_AND) +DO_LOGIC_QV(sve2p1_orqv, s, 0, DO_ORR, DO_AND) +DO_LOGIC_QV(sve2p1_orqv, d, 0, DO_ORR, DO_AND) + +DO_LOGIC_QV(sve2p1_eorqv, b, 0, DO_EOR, DO_AND) +DO_LOGIC_QV(sve2p1_eorqv, h, 0, DO_EOR, DO_AND) +DO_LOGIC_QV(sve2p1_eorqv, s, 0, DO_EOR, DO_AND) +DO_LOGIC_QV(sve2p1_eorqv, d, 0, DO_EOR, DO_AND) + +DO_LOGIC_QV(sve2p1_andqv, b, -1, DO_AND, DO_ORC) +DO_LOGIC_QV(sve2p1_andqv, h, -1, DO_AND, DO_ORC) +DO_LOGIC_QV(sve2p1_andqv, s, -1, DO_AND, DO_ORC) +DO_LOGIC_QV(sve2p1_andqv, d, -1, DO_AND, DO_ORC) + +#undef DO_LOGIC_QV + #undef DO_AND #undef DO_ORR #undef DO_EOR #undef DO_BIC +#undef DO_ORC #undef DO_ADD #undef DO_SUB #undef DO_MAX @@ -2069,27 +2138,6 @@ void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ when N is negative, add 2**M-1. */ #define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M) -static inline uint64_t do_urshr(uint64_t x, unsigned sh) -{ - if (likely(sh < 64)) { - return (x >> sh) + ((x >> (sh - 1)) & 1); - } else if (sh == 64) { - return x >> 63; - } else { - return 0; - } -} - -static inline int64_t do_srshr(int64_t x, unsigned sh) -{ - if (likely(sh < 64)) { - return (x >> sh) + ((x >> (sh - 1)) & 1); - } else { - /* Rounding the sign bit always produces 0. */ - return 0; - } -} - DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR) DO_ZPZI(sve_asr_zpzi_h, int16_t, H1_2, DO_SHR) DO_ZPZI(sve_asr_zpzi_s, int32_t, H1_4, DO_SHR) @@ -2187,10 +2235,9 @@ DO_SHRNT(sve2_rshrnt_h, uint16_t, uint8_t, H1_2, H1, do_urshr) DO_SHRNT(sve2_rshrnt_s, uint32_t, uint16_t, H1_4, H1_2, do_urshr) DO_SHRNT(sve2_rshrnt_d, uint64_t, uint32_t, H1_8, H1_4, do_urshr) -#define DO_SQSHRUN_H(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT8_MAX) -#define DO_SQSHRUN_S(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT16_MAX) -#define DO_SQSHRUN_D(x, sh) \ - do_sat_bhs((int64_t)(x) >> (sh < 64 ? sh : 63), 0, UINT32_MAX) +#define DO_SQSHRUN_H(x, sh) do_usat_b((int64_t)(x) >> sh) +#define DO_SQSHRUN_S(x, sh) do_usat_h((int64_t)(x) >> sh) +#define DO_SQSHRUN_D(x, sh) do_usat_s((int64_t)(x) >> (sh < 64 ? sh : 63)) DO_SHRNB(sve2_sqshrunb_h, int16_t, uint8_t, DO_SQSHRUN_H) DO_SHRNB(sve2_sqshrunb_s, int32_t, uint16_t, DO_SQSHRUN_S) @@ -2200,9 +2247,9 @@ DO_SHRNT(sve2_sqshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRUN_H) DO_SHRNT(sve2_sqshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRUN_S) DO_SHRNT(sve2_sqshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRUN_D) -#define DO_SQRSHRUN_H(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT8_MAX) -#define DO_SQRSHRUN_S(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT16_MAX) -#define DO_SQRSHRUN_D(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT32_MAX) +#define DO_SQRSHRUN_H(x, sh) do_usat_b(do_srshr(x, sh)) +#define DO_SQRSHRUN_S(x, sh) do_usat_h(do_srshr(x, sh)) +#define DO_SQRSHRUN_D(x, sh) do_usat_s(do_srshr(x, sh)) DO_SHRNB(sve2_sqrshrunb_h, int16_t, uint8_t, DO_SQRSHRUN_H) DO_SHRNB(sve2_sqrshrunb_s, int32_t, uint16_t, DO_SQRSHRUN_S) @@ -2212,9 +2259,9 @@ DO_SHRNT(sve2_sqrshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRUN_H) DO_SHRNT(sve2_sqrshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRUN_S) DO_SHRNT(sve2_sqrshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQRSHRUN_D) -#define DO_SQSHRN_H(x, sh) do_sat_bhs(x >> sh, INT8_MIN, INT8_MAX) -#define DO_SQSHRN_S(x, sh) do_sat_bhs(x >> sh, INT16_MIN, INT16_MAX) -#define DO_SQSHRN_D(x, sh) do_sat_bhs(x >> sh, INT32_MIN, INT32_MAX) +#define DO_SQSHRN_H(x, sh) do_ssat_b(x >> sh) +#define DO_SQSHRN_S(x, sh) do_ssat_h(x >> sh) +#define DO_SQSHRN_D(x, sh) do_ssat_s(x >> sh) DO_SHRNB(sve2_sqshrnb_h, int16_t, uint8_t, DO_SQSHRN_H) DO_SHRNB(sve2_sqshrnb_s, int32_t, uint16_t, DO_SQSHRN_S) @@ -2224,9 +2271,9 @@ DO_SHRNT(sve2_sqshrnt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRN_H) DO_SHRNT(sve2_sqshrnt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRN_S) DO_SHRNT(sve2_sqshrnt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRN_D) -#define DO_SQRSHRN_H(x, sh) do_sat_bhs(do_srshr(x, sh), INT8_MIN, INT8_MAX) -#define DO_SQRSHRN_S(x, sh) do_sat_bhs(do_srshr(x, sh), INT16_MIN, INT16_MAX) -#define DO_SQRSHRN_D(x, sh) do_sat_bhs(do_srshr(x, sh), INT32_MIN, INT32_MAX) +#define DO_SQRSHRN_H(x, sh) do_ssat_b(do_srshr(x, sh)) +#define DO_SQRSHRN_S(x, sh) do_ssat_h(do_srshr(x, sh)) +#define DO_SQRSHRN_D(x, sh) do_ssat_s(do_srshr(x, sh)) DO_SHRNB(sve2_sqrshrnb_h, int16_t, uint8_t, DO_SQRSHRN_H) DO_SHRNB(sve2_sqrshrnb_s, int32_t, uint16_t, DO_SQRSHRN_S) @@ -2988,6 +3035,56 @@ void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc) } } +/* + * TODO: This could use half_shuffle64 and similar bit tricks to + * expand blocks of bits at once. + */ +#define DO_PMOV_PV(NAME, ESIZE) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + unsigned vl = simd_oprsz(desc); \ + unsigned idx = simd_data(desc); \ + unsigned elements = vl / ESIZE; \ + ARMPredicateReg *d = vd; \ + ARMVectorReg *s = vs; \ + memset(d, 0, sizeof(*d)); \ + for (unsigned e = 0; e < elements; ++e) { \ + depositn(d->p, e * ESIZE, 1, extractn(s->d, elements * idx + e, 1)); \ + } \ +} + +DO_PMOV_PV(pmov_pv_h, 2) +DO_PMOV_PV(pmov_pv_s, 4) +DO_PMOV_PV(pmov_pv_d, 8) + +#undef DO_PMOV_PV + +/* + * TODO: This could use half_unshuffle64 and similar bit tricks to + * compress blocks of bits at once. + */ +#define DO_PMOV_VP(NAME, ESIZE) \ +void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \ +{ \ + unsigned vl = simd_oprsz(desc); \ + unsigned idx = simd_data(desc); \ + unsigned elements = vl / ESIZE; \ + ARMVectorReg *d = vd; \ + ARMPredicateReg *s = vs; \ + if (idx == 0) { \ + memset(d, 0, vl); \ + } \ + for (unsigned e = 0; e < elements; ++e) { \ + depositn(d->d, elements * idx + e, 1, extractn(s->p, e * ESIZE, 1)); \ + } \ +} + +DO_PMOV_VP(pmov_vp_h, 2) +DO_PMOV_VP(pmov_vp_s, 4) +DO_PMOV_VP(pmov_vp_d, 8) + +#undef DO_PMOV_VP + typedef void tb_impl_fn(void *, void *, void *, void *, uintptr_t, bool); static inline void do_tbl1(void *vd, void *vn, void *vm, uint32_t desc, @@ -3453,6 +3550,45 @@ DO_UZP(sve_uzp_s, uint32_t, H1_4) DO_UZP(sve_uzp_d, uint64_t, H1_8) DO_UZP(sve2_uzp_q, Int128, ) +typedef void perseg_zzz_fn(void *vd, void *vn, void *vm, uint32_t desc); + +static void do_perseg_zzz(void *vd, void *vn, void *vm, + uint32_t desc, perseg_zzz_fn *fn) +{ + intptr_t oprsz = simd_oprsz(desc); + + desc = simd_desc(16, 16, simd_data(desc)); + for (intptr_t i = 0; i < oprsz; i += 16) { + fn(vd + i, vn + i, vm + i, desc); + } +} + +#define DO_PERSEG_ZZZ(NAME, FUNC) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { do_perseg_zzz(vd, vn, vm, desc, FUNC); } + +DO_PERSEG_ZZZ(sve2p1_uzpq_b, helper_sve_uzp_b) +DO_PERSEG_ZZZ(sve2p1_uzpq_h, helper_sve_uzp_h) +DO_PERSEG_ZZZ(sve2p1_uzpq_s, helper_sve_uzp_s) +DO_PERSEG_ZZZ(sve2p1_uzpq_d, helper_sve_uzp_d) + +DO_PERSEG_ZZZ(sve2p1_zipq_b, helper_sve_zip_b) +DO_PERSEG_ZZZ(sve2p1_zipq_h, helper_sve_zip_h) +DO_PERSEG_ZZZ(sve2p1_zipq_s, helper_sve_zip_s) +DO_PERSEG_ZZZ(sve2p1_zipq_d, helper_sve_zip_d) + +DO_PERSEG_ZZZ(sve2p1_tblq_b, helper_sve_tbl_b) +DO_PERSEG_ZZZ(sve2p1_tblq_h, helper_sve_tbl_h) +DO_PERSEG_ZZZ(sve2p1_tblq_s, helper_sve_tbl_s) +DO_PERSEG_ZZZ(sve2p1_tblq_d, helper_sve_tbl_d) + +DO_PERSEG_ZZZ(sve2p1_tbxq_b, helper_sve2_tbx_b) +DO_PERSEG_ZZZ(sve2p1_tbxq_h, helper_sve2_tbx_h) +DO_PERSEG_ZZZ(sve2p1_tbxq_s, helper_sve2_tbx_s) +DO_PERSEG_ZZZ(sve2p1_tbxq_d, helper_sve2_tbx_d) + +#undef DO_PERSEG_ZZZ + #define DO_TRN(NAME, TYPE, H) \ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ { \ @@ -3993,15 +4129,6 @@ static uint32_t compute_brks_m(uint64_t *d, uint64_t *n, uint64_t *g, return flags; } -static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz) -{ - /* It is quicker to zero the whole predicate than loop on OPRSZ. - * The compiler should turn this into 4 64-bit integer stores. - */ - memset(d, 0, sizeof(ARMPredicateReg)); - return PREDTEST_INIT; -} - void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, uint32_t pred_desc) { @@ -4009,7 +4136,7 @@ void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, if (last_active_pred(vn, vg, oprsz)) { compute_brk_z(vd, vm, vg, oprsz, true); } else { - do_zero(vd, oprsz); + memset(vd, 0, sizeof(ARMPredicateReg)); } } @@ -4020,7 +4147,8 @@ uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg, if (last_active_pred(vn, vg, oprsz)) { return compute_brks_z(vd, vm, vg, oprsz, true); } else { - return do_zero(vd, oprsz); + memset(vd, 0, sizeof(ARMPredicateReg)); + return PREDTEST_INIT; } } @@ -4031,7 +4159,7 @@ void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg, if (last_active_pred(vn, vg, oprsz)) { compute_brk_z(vd, vm, vg, oprsz, false); } else { - do_zero(vd, oprsz); + memset(vd, 0, sizeof(ARMPredicateReg)); } } @@ -4042,7 +4170,8 @@ uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg, if (last_active_pred(vn, vg, oprsz)) { return compute_brks_z(vd, vm, vg, oprsz, false); } else { - return do_zero(vd, oprsz); + memset(vd, 0, sizeof(ARMPredicateReg)); + return PREDTEST_INIT; } } @@ -4098,35 +4227,30 @@ void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc) { intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (!last_active_pred(vn, vg, oprsz)) { - do_zero(vd, oprsz); + memset(vd, 0, sizeof(ARMPredicateReg)); } } -/* As if PredTest(Ones(PL), D, esz). */ -static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz, - uint64_t esz_mask) -{ - uint32_t flags = PREDTEST_INIT; - intptr_t i; - - for (i = 0; i < oprsz / 8; i++) { - flags = iter_predtest_fwd(d->p[i], esz_mask, flags); - } - if (oprsz & 7) { - uint64_t mask = ~(-1ULL << (8 * (oprsz & 7))); - flags = iter_predtest_fwd(d->p[i], esz_mask & mask, flags); - } - return flags; -} - uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc) { intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); if (last_active_pred(vn, vg, oprsz)) { - return predtest_ones(vd, oprsz, -1); - } else { - return do_zero(vd, oprsz); + ARMPredicateReg *d = vd; + uint32_t flags = PREDTEST_INIT; + intptr_t i; + + /* As if PredTest(Ones(PL), D, MO_8). */ + for (i = 0; i < oprsz / 8; i++) { + flags = iter_predtest_fwd(d->p[i], -1, flags); + } + if (oprsz & 7) { + uint64_t mask = ~(-1ULL << (8 * (oprsz & 7))); + flags = iter_predtest_fwd(d->p[i], mask, flags); + } + return flags; } + memset(vd, 0, sizeof(ARMPredicateReg)); + return PREDTEST_INIT; } uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) @@ -4143,66 +4267,200 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) return sum; } -uint32_t HELPER(sve_whilel)(void *vd, uint32_t count, uint32_t pred_desc) +uint64_t HELPER(sve2p1_cntp_c)(uint32_t png, uint32_t desc) { - intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); - intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); - uint64_t esz_mask = pred_esz_masks[esz]; - ARMPredicateReg *d = vd; - uint32_t flags; - intptr_t i; + int pl = FIELD_EX32(desc, PREDDESC, OPRSZ); + int vl = pl * 8; + unsigned v_esz = FIELD_EX32(desc, PREDDESC, ESZ); + int lg2_width = FIELD_EX32(desc, PREDDESC, DATA) + 1; + DecodeCounter p = decode_counter(png, vl, v_esz); + unsigned maxelem = (vl << lg2_width) >> v_esz; + unsigned count = p.count; + + if (p.invert) { + if (count >= maxelem) { + return 0; + } + count = maxelem - count; + } else { + count = MIN(count, maxelem); + } + return count >> p.lg2_stride; +} + +/* C.f. Arm pseudocode EncodePredCount */ +static uint64_t encode_pred_count(uint32_t elements, uint32_t count, + uint32_t esz, bool invert) +{ + uint32_t pred; - /* Begin with a zero predicate register. */ - flags = do_zero(d, oprsz); if (count == 0) { - return flags; + return 0; + } + if (invert) { + count = elements - count; + } else if (count == elements) { + count = 0; + invert = true; } - /* Set all of the requested bits. */ - for (i = 0; i < count / 64; ++i) { - d->p[i] = esz_mask; + pred = (count << 1) | 1; + pred <<= esz; + pred |= invert << 15; + + return pred; +} + +/* C.f. Arm pseudocode PredCountTest */ +static uint32_t pred_count_test(uint32_t elements, uint32_t count, bool invert) +{ + uint32_t flags; + + if (count == 0) { + flags = 1; /* !N, Z, C */ + } else if (!invert) { + flags = (1u << 31) | 2; /* N, !Z */ + flags |= count != elements; /* C */ + } else { + flags = 2; /* !Z, !C */ + flags |= (count == elements) << 31; /* N */ } - if (count & 63) { - d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask; + return flags; +} + +/* D must be cleared on entry. */ +static void do_whilel(ARMPredicateReg *d, uint64_t esz_mask, + uint32_t count, uint32_t oprbits) +{ + tcg_debug_assert(count <= oprbits); + if (count) { + uint32_t i; + + /* Set all of the requested bits. */ + for (i = 0; i < count / 64; ++i) { + d->p[i] = esz_mask; + } + if (count & 63) { + d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask; + } } +} + +uint32_t HELPER(sve_whilel)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uint32_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t oprbits = oprsz * 8; + uint64_t esz_mask = pred_esz_masks[esz]; + ARMPredicateReg *d = vd; - return predtest_ones(d, oprsz, esz_mask); + count <<= esz; + memset(d, 0, sizeof(*d)); + do_whilel(d, esz_mask, count, oprbits); + return pred_count_test(oprbits, count, false); } -uint32_t HELPER(sve_whileg)(void *vd, uint32_t count, uint32_t pred_desc) +uint32_t HELPER(sve_while2l)(void *vd, uint32_t count, uint32_t pred_desc) { - intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); - intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t oprbits = oprsz * 8; uint64_t esz_mask = pred_esz_masks[esz]; ARMPredicateReg *d = vd; - intptr_t i, invcount, oprbits; - uint64_t bits; - if (count == 0) { - return do_zero(d, oprsz); + count <<= esz; + memset(d, 0, 2 * sizeof(*d)); + if (count <= oprbits) { + do_whilel(&d[0], esz_mask, count, oprbits); + } else { + do_whilel(&d[0], esz_mask, oprbits, oprbits); + do_whilel(&d[1], esz_mask, count - oprbits, oprbits); } - oprbits = oprsz * 8; + return pred_count_test(2 * oprbits, count, false); +} + +uint32_t HELPER(sve_whilecl)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uint32_t pl = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t scale = FIELD_EX32(pred_desc, PREDDESC, DATA); + uint32_t vl = pl * 8; + uint32_t elements = (vl >> esz) << scale; + ARMPredicateReg *d = vd; + + *d = (ARMPredicateReg) { + .p[0] = encode_pred_count(elements, count, esz, false) + }; + return pred_count_test(elements, count, false); +} + +/* D must be cleared on entry. */ +static void do_whileg(ARMPredicateReg *d, uint64_t esz_mask, + uint32_t count, uint32_t oprbits) +{ tcg_debug_assert(count <= oprbits); + if (count) { + uint32_t i, invcount = oprbits - count; + uint64_t bits = esz_mask & MAKE_64BIT_MASK(invcount & 63, 64); - bits = esz_mask; - if (oprbits & 63) { - bits &= MAKE_64BIT_MASK(0, oprbits & 63); + for (i = invcount / 64; i < oprbits / 64; ++i) { + d->p[i] = bits; + bits = esz_mask; + } + if (oprbits & 63) { + d->p[i] = bits & MAKE_64BIT_MASK(0, oprbits & 63); + } } +} - invcount = oprbits - count; - for (i = (oprsz - 1) / 8; i > invcount / 64; --i) { - d->p[i] = bits; - bits = esz_mask; - } +uint32_t HELPER(sve_whileg)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uint32_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t oprbits = oprsz * 8; + uint64_t esz_mask = pred_esz_masks[esz]; + ARMPredicateReg *d = vd; - d->p[i] = bits & MAKE_64BIT_MASK(invcount & 63, 64); + count <<= esz; + memset(d, 0, sizeof(*d)); + do_whileg(d, esz_mask, count, oprbits); + return pred_count_test(oprbits, count, true); +} - while (--i >= 0) { - d->p[i] = 0; +uint32_t HELPER(sve_while2g)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uint32_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t oprbits = oprsz * 8; + uint64_t esz_mask = pred_esz_masks[esz]; + ARMPredicateReg *d = vd; + + count <<= esz; + memset(d, 0, 2 * sizeof(*d)); + if (count <= oprbits) { + do_whileg(&d[1], esz_mask, count, oprbits); + } else { + do_whilel(&d[1], esz_mask, oprbits, oprbits); + do_whileg(&d[0], esz_mask, count - oprbits, oprbits); } - return predtest_ones(d, oprsz, esz_mask); + return pred_count_test(2 * oprbits, count, true); +} + +uint32_t HELPER(sve_whilecg)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uint32_t pl = FIELD_EX32(pred_desc, PREDDESC, OPRSZ); + uint32_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ); + uint32_t scale = FIELD_EX32(pred_desc, PREDDESC, DATA); + uint32_t vl = pl * 8; + uint32_t elements = (vl >> esz) << scale; + ARMPredicateReg *d = vd; + + *d = (ARMPredicateReg) { + .p[0] = encode_pred_count(elements, count, esz, true) + }; + return pred_count_test(elements, count, true); } /* Recursive reduction on a function; @@ -4213,66 +4471,87 @@ uint32_t HELPER(sve_whileg)(void *vd, uint32_t count, uint32_t pred_desc) * The recursion is bounded to depth 7 (128 fp16 elements), so there's * little to gain with a more complex non-recursive form. */ -#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT) \ -static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ +#define DO_REDUCE(NAME, SUF, TYPE, H, FUNC, IDENT) \ +static TYPE FUNC##_reduce(TYPE *data, float_status *status, uintptr_t n) \ { \ if (n == 1) { \ return *data; \ } else { \ uintptr_t half = n / 2; \ - TYPE lo = NAME##_reduce(data, status, half); \ - TYPE hi = NAME##_reduce(data + half, status, half); \ + TYPE lo = FUNC##_reduce(data, status, half); \ + TYPE hi = FUNC##_reduce(data + half, status, half); \ return FUNC(lo, hi, status); \ } \ } \ -uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \ +uint64_t helper_sve_##NAME##v_##SUF(void *vn, void *vg, \ + float_status *status, uint32_t desc) \ { \ uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \ TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ + TYPE ident = IDENT; \ for (i = 0; i < oprsz; ) { \ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ do { \ TYPE nn = *(TYPE *)(vn + H(i)); \ - *(TYPE *)((void *)data + i) = (pg & 1 ? nn : IDENT); \ + *(TYPE *)((void *)data + i) = (pg & 1 ? nn : ident); \ i += sizeof(TYPE), pg >>= sizeof(TYPE); \ } while (i & 15); \ } \ for (; i < maxsz; i += sizeof(TYPE)) { \ - *(TYPE *)((void *)data + i) = IDENT; \ + *(TYPE *)((void *)data + i) = ident; \ } \ - return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \ + return FUNC##_reduce(data, status, maxsz / sizeof(TYPE)); \ +} \ +void helper_sve2p1_##NAME##qv_##SUF(void *vd, void *vn, void *vg, \ + float_status *status, uint32_t desc) \ +{ \ + unsigned oprsz = simd_oprsz(desc), segments = oprsz / 16; \ + TYPE ident = IDENT; \ + for (unsigned e = 0; e < 16; e += sizeof(TYPE)) { \ + TYPE data[ARM_MAX_VQ]; \ + for (unsigned s = 0; s < segments; s++) { \ + uint16_t pg = *(uint16_t *)(vg + H1_2(s * 2)); \ + TYPE nn = *(TYPE *)(vn + (s * 16 + H(e))); \ + data[s] = (pg >> e) & 1 ? nn : ident; \ + } \ + *(TYPE *)(vd + H(e)) = FUNC##_reduce(data, status, segments); \ + } \ + clear_tail(vd, 16, simd_maxsz(desc)); \ } -DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero) -DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero) -DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero) +DO_REDUCE(fadd,h, float16, H1_2, float16_add, float16_zero) +DO_REDUCE(fadd,s, float32, H1_4, float32_add, float32_zero) +DO_REDUCE(fadd,d, float64, H1_8, float64_add, float64_zero) -/* Identity is floatN_default_nan, without the function call. */ -DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00) -DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000) -DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL) +/* + * We can't avoid the function call for the default NaN value, because + * it changes when FPCR.AH is set. + */ +DO_REDUCE(fminnm,h, float16, H1_2, float16_minnum, float16_default_nan(status)) +DO_REDUCE(fminnm,s, float32, H1_4, float32_minnum, float32_default_nan(status)) +DO_REDUCE(fminnm,d, float64, H1_8, float64_minnum, float64_default_nan(status)) -DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00) -DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000) -DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL) +DO_REDUCE(fmaxnm,h, float16, H1_2, float16_maxnum, float16_default_nan(status)) +DO_REDUCE(fmaxnm,s, float32, H1_4, float32_maxnum, float32_default_nan(status)) +DO_REDUCE(fmaxnm,d, float64, H1_8, float64_maxnum, float64_default_nan(status)) -DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity) -DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity) -DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity) +DO_REDUCE(fmin,h, float16, H1_2, float16_min, float16_infinity) +DO_REDUCE(fmin,s, float32, H1_4, float32_min, float32_infinity) +DO_REDUCE(fmin,d, float64, H1_8, float64_min, float64_infinity) -DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity)) -DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity)) -DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity)) +DO_REDUCE(fmax,h, float16, H1_2, float16_max, float16_chs(float16_infinity)) +DO_REDUCE(fmax,s, float32, H1_4, float32_max, float32_chs(float32_infinity)) +DO_REDUCE(fmax,d, float64, H1_8, float64_max, float64_chs(float64_infinity)) -DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity) -DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity) -DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity) +DO_REDUCE(ah_fmin,h, float16, H1_2, helper_vfp_ah_minh, float16_infinity) +DO_REDUCE(ah_fmin,s, float32, H1_4, helper_vfp_ah_mins, float32_infinity) +DO_REDUCE(ah_fmin,d, float64, H1_8, helper_vfp_ah_mind, float64_infinity) -DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh, +DO_REDUCE(ah_fmax,h, float16, H1_2, helper_vfp_ah_maxh, float16_chs(float16_infinity)) -DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs, +DO_REDUCE(ah_fmax,s, float32, H1_4, helper_vfp_ah_maxs, float32_chs(float32_infinity)) -DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd, +DO_REDUCE(ah_fmax,d, float64, H1_8, helper_vfp_ah_maxd, float64_chs(float64_infinity)) #undef DO_REDUCE @@ -4355,14 +4634,17 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ } while (i != 0); \ } +DO_ZPZZ_FP(sve_fadd_b16, uint16_t, H1_2, bfloat16_add) DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add) DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add) DO_ZPZZ_FP(sve_fadd_d, uint64_t, H1_8, float64_add) +DO_ZPZZ_FP(sve_fsub_b16, uint16_t, H1_2, bfloat16_sub) DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub) DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub) DO_ZPZZ_FP(sve_fsub_d, uint64_t, H1_8, float64_sub) +DO_ZPZZ_FP(sve_fmul_b16, uint16_t, H1_2, bfloat16_mul) DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul) DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul) DO_ZPZZ_FP(sve_fmul_d, uint64_t, H1_8, float64_mul) @@ -4371,26 +4653,32 @@ DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div) DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div) DO_ZPZZ_FP(sve_fdiv_d, uint64_t, H1_8, float64_div) +DO_ZPZZ_FP(sve_fmin_b16, uint16_t, H1_2, bfloat16_min) DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min) DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min) DO_ZPZZ_FP(sve_fmin_d, uint64_t, H1_8, float64_min) +DO_ZPZZ_FP(sve_fmax_b16, uint16_t, H1_2, bfloat16_max) DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) +DO_ZPZZ_FP(sve_ah_fmin_b16, uint16_t, H1_2, helper_sme2_ah_fmin_b16) DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh) DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins) DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind) +DO_ZPZZ_FP(sve_ah_fmax_b16, uint16_t, H1_2, helper_sme2_ah_fmax_b16) DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) +DO_ZPZZ_FP(sve_fminnum_b16, uint16_t, H1_2, bfloat16_minnum) DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) +DO_ZPZZ_FP(sve_fmaxnum_b16, uint16_t, H1_2, bfloat16_maxnum) DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum) DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum) DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, H1_8, float64_maxnum) @@ -4554,7 +4842,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vg, \ * FZ16. When converting from fp16, this affects flushing input denormals; * when converting to fp16, this affects flushing output denormals. */ -static inline float32 sve_f16_to_f32(float16 f, float_status *fpst) +float32 sve_f16_to_f32(float16 f, float_status *fpst) { bool save = get_flush_inputs_to_zero(fpst); float32 ret; @@ -4576,7 +4864,7 @@ static inline float64 sve_f16_to_f64(float16 f, float_status *fpst) return ret; } -static inline float16 sve_f32_to_f16(float32 f, float_status *fpst) +float16 sve_f32_to_f16(float32 f, float_status *fpst) { bool save = get_flush_to_zero(fpst); float16 ret; @@ -4816,6 +5104,75 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) #undef DO_ZPZ_FP +static void do_fmla_zpzzz_b16(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, + uint16_t neg1, uint16_t neg3, int flags) +{ + intptr_t i = simd_oprsz(desc); + uint64_t *g = vg; + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + i -= 2; + if (likely((pg >> (i & 63)) & 1)) { + float16 e1, e2, e3, r; + + e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; + e2 = *(uint16_t *)(vm + H1_2(i)); + e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; + r = bfloat16_muladd(e1, e2, e3, flags, status); + *(uint16_t *)(vd + H1_2(i)) = r; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, 0); +} + +void HELPER(sve_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); +} + +void HELPER(sve_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); +} + +void HELPER(sve_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); +} + +void HELPER(sve_ah_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product); +} + +void HELPER(sve_ah_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product | float_muladd_negate_c); +} + +void HELPER(sve_ah_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_c); +} + static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc, uint16_t neg1, uint16_t neg3, int flags) @@ -6005,17 +6362,14 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, static inline QEMU_ALWAYS_INLINE void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, + uint64_t desc, const uintptr_t ra, const int esz, const int msz, const int N, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -6027,13 +6381,13 @@ void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, #define DO_LD1_1(NAME, ESZ) \ void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0, \ sve_##NAME##_host, sve_##NAME##_tlb); \ } \ void HELPER(sve_##NAME##_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \ sve_##NAME##_host, sve_##NAME##_tlb); \ @@ -6041,25 +6395,25 @@ void HELPER(sve_##NAME##_r_mte)(CPUARMState *env, void *vg, \ #define DO_LD1_2(NAME, ESZ, MSZ) \ void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ } \ void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ } \ void HELPER(sve_##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ } \ void HELPER(sve_##NAME##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ @@ -6085,18 +6439,21 @@ DO_LD1_2(ld1sds, MO_64, MO_32) DO_LD1_2(ld1dd, MO_64, MO_64) +DO_LD1_2(ld1squ, MO_128, MO_32) +DO_LD1_2(ld1dqu, MO_128, MO_64) + #undef DO_LD1_1 #undef DO_LD1_2 #define DO_LDN_1(N) \ void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0, \ sve_ld1bb_host, sve_ld1bb_tlb); \ } \ void HELPER(sve_ld##N##bb_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \ sve_ld1bb_host, sve_ld1bb_tlb); \ @@ -6104,25 +6461,25 @@ void HELPER(sve_ld##N##bb_r_mte)(CPUARMState *env, void *vg, \ #define DO_LDN_2(N, SUFF, ESZ) \ void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ } \ void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ } \ void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ } \ void HELPER(sve_ld##N##SUFF##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ @@ -6144,6 +6501,10 @@ DO_LDN_2(2, dd, MO_64) DO_LDN_2(3, dd, MO_64) DO_LDN_2(4, dd, MO_64) +DO_LDN_2(2, qq, MO_128) +DO_LDN_2(3, qq, MO_128) +DO_LDN_2(4, qq, MO_128) + #undef DO_LDN_1 #undef DO_LDN_2 @@ -6363,17 +6724,14 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, static inline QEMU_ALWAYS_INLINE void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t retaddr, + uint64_t desc, const uintptr_t retaddr, const int esz, const int msz, const SVEContFault fault, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -6386,25 +6744,25 @@ void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, #define DO_LDFF1_LDNF1_1(PART, ESZ) \ void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_FIRST, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } \ void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_NO, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } \ void HELPER(sve_ldff1##PART##_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } \ void HELPER(sve_ldnf1##PART##_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ @@ -6412,49 +6770,49 @@ void HELPER(sve_ldnf1##PART##_r_mte)(CPUARMState *env, void *vg, \ #define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } \ void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } \ void HELPER(sve_ldff1##PART##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldnf1##PART##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldff1##PART##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } \ void HELPER(sve_ldnf1##PART##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ @@ -6621,17 +6979,14 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, static inline QEMU_ALWAYS_INLINE void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, + uint64_t desc, const uintptr_t ra, const int esz, const int msz, const int N, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; int bit55 = extract64(addr, 55, 1); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Perform gross MTE suppression early. */ if (!tbi_check(mtedesc, bit55) || tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { @@ -6643,13 +6998,13 @@ void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, #define DO_STN_1(N, NAME, ESZ) \ void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0, \ sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ } \ void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \ sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ @@ -6657,25 +7012,25 @@ void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState *env, void *vg, \ #define DO_STN_2(N, NAME, ESZ, MSZ) \ void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ } \ void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ } \ void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ } \ void HELPER(sve_st##N##NAME##_be_r_mte)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ + target_ulong addr, uint64_t desc) \ { \ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ @@ -6707,6 +7062,13 @@ DO_STN_2(2, dd, MO_64, MO_64) DO_STN_2(3, dd, MO_64, MO_64) DO_STN_2(4, dd, MO_64, MO_64) +DO_STN_2(1, sq, MO_128, MO_32) +DO_STN_2(1, dq, MO_128, MO_64) + +DO_STN_2(2, qq, MO_128, MO_128) +DO_STN_2(3, qq, MO_128, MO_128) +DO_STN_2(4, qq, MO_128, MO_128) + #undef DO_STN_1 #undef DO_STN_2 @@ -6812,14 +7174,12 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, static inline QEMU_ALWAYS_INLINE void sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t retaddr, + target_ulong base, uint64_t desc, uintptr_t retaddr, int esize, int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; /* * ??? TODO: For the 32-bit offset extractions, base + ofs cannot @@ -6833,13 +7193,13 @@ void sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, #define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \ void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } \ void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ @@ -6847,18 +7207,32 @@ void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ #define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \ void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } \ void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } +#define DO_LD1_ZPZ_Q(MEM, OFS, MSZ) \ +void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint64_t desc) \ +{ \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 16, 1 << MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} \ +void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint64_t desc) \ +{ \ + sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 16, 1 << MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} + DO_LD1_ZPZ_S(bsu, zsu, MO_8) DO_LD1_ZPZ_S(bsu, zss, MO_8) DO_LD1_ZPZ_D(bdu, zsu, MO_8) @@ -6923,6 +7297,9 @@ DO_LD1_ZPZ_D(dd_be, zsu, MO_64) DO_LD1_ZPZ_D(dd_be, zss, MO_64) DO_LD1_ZPZ_D(dd_be, zd, MO_64) +DO_LD1_ZPZ_Q(qq_le, zd, MO_128) +DO_LD1_ZPZ_Q(qq_be, zd, MO_128) + #undef DO_LD1_ZPZ_S #undef DO_LD1_ZPZ_D @@ -7021,15 +7398,13 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, static inline QEMU_ALWAYS_INLINE void sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t retaddr, + target_ulong base, uint64_t desc, uintptr_t retaddr, const int esz, const int msz, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; /* * ??? TODO: For the 32-bit offset extractions, base + ofs cannot @@ -7044,14 +7419,14 @@ void sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, #define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \ void HELPER(sve_ldff##MEM##_##OFS) \ (CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_32, MSZ, \ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } \ void HELPER(sve_ldff##MEM##_##OFS##_mte) \ (CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ @@ -7060,14 +7435,14 @@ void HELPER(sve_ldff##MEM##_##OFS##_mte) \ #define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \ void HELPER(sve_ldff##MEM##_##OFS) \ (CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_64, MSZ, \ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } \ void HELPER(sve_ldff##MEM##_##OFS##_mte) \ (CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ @@ -7226,14 +7601,12 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, static inline QEMU_ALWAYS_INLINE void sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t retaddr, + target_ulong base, uint64_t desc, uintptr_t retaddr, int esize, int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* Remove mtedesc from the normal sve descriptor. */ - desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + uint32_t mtedesc = desc >> 32; /* * ??? TODO: For the 32-bit offset extractions, base + ofs cannot @@ -7247,13 +7620,13 @@ void sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, #define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \ void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \ off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ } \ void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ @@ -7261,18 +7634,32 @@ void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ #define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \ void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \ off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ } \ void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ + void *vm, target_ulong base, uint64_t desc) \ { \ sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ } +#define DO_ST1_ZPZ_Q(MEM, OFS, MSZ) \ +void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint64_t desc) \ +{ \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 16, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +} \ +void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint64_t desc) \ +{ \ + sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 16, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +} + DO_ST1_ZPZ_S(bs, zsu, MO_8) DO_ST1_ZPZ_S(hs_le, zsu, MO_16) DO_ST1_ZPZ_S(hs_be, zsu, MO_16) @@ -7309,9 +7696,507 @@ DO_ST1_ZPZ_D(sd_be, zd, MO_32) DO_ST1_ZPZ_D(dd_le, zd, MO_64) DO_ST1_ZPZ_D(dd_be, zd, MO_64) +DO_ST1_ZPZ_Q(qq_le, zd, MO_128) +DO_ST1_ZPZ_Q(qq_be, zd, MO_128) + #undef DO_ST1_ZPZ_S #undef DO_ST1_ZPZ_D +/* + * SVE2.1 consecutive register load/store + */ + +static unsigned sve2p1_cont_ldst_elements(SVEContLdSt *info, vaddr addr, + uint32_t png, intptr_t reg_max, + int N, int v_esz) +{ + const int esize = 1 << v_esz; + intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split; + DecodeCounter p = decode_counter(png, reg_max, v_esz); + unsigned b_count = p.count << v_esz; + unsigned b_stride = 1 << (v_esz + p.lg2_stride); + intptr_t page_split; + + /* Set all of the element indices to -1, and the TLB data to 0. */ + memset(info, -1, offsetof(SVEContLdSt, page)); + memset(info->page, 0, sizeof(info->page)); + + if (p.invert) { + if (b_count >= reg_max * N) { + return 0; + } + reg_off_first = b_count; + reg_off_last = reg_max * N - b_stride; + } else { + if (b_count == 0) { + return 0; + } + reg_off_first = 0; + reg_off_last = MIN(b_count - esize, reg_max * N - b_stride); + } + + info->reg_off_first[0] = reg_off_first; + info->mem_off_first[0] = reg_off_first; + + page_split = -(addr | TARGET_PAGE_MASK); + if (reg_off_last + esize <= page_split || reg_off_first >= page_split) { + /* The entire operation fits within a single page. */ + info->reg_off_last[0] = reg_off_last; + return b_stride; + } + + info->page_split = page_split; + reg_off_split = ROUND_DOWN(page_split, esize); + + /* + * This is the last full element on the first page, but it is not + * necessarily active. If there is no full element, i.e. the first + * active element is the one that's split, this value remains -1. + * It is useful as iteration bounds. + */ + if (reg_off_split != 0) { + info->reg_off_last[0] = ROUND_DOWN(reg_off_split - esize, b_stride); + } + + /* Determine if an unaligned element spans the pages. */ + if (page_split & (esize - 1)) { + /* It is helpful to know if the split element is active. */ + if ((reg_off_split & (b_stride - 1)) == 0) { + info->reg_off_split = reg_off_split; + info->mem_off_split = reg_off_split; + } + reg_off_split += esize; + } + + /* + * We do want the first active element on the second page, because + * this may affect the address reported in an exception. + */ + reg_off_split = ROUND_UP(reg_off_split, b_stride); + if (reg_off_split <= reg_off_last) { + info->reg_off_first[1] = reg_off_split; + info->mem_off_first[1] = reg_off_split; + info->reg_off_last[1] = reg_off_last; + } + return b_stride; +} + +static void sve2p1_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, + target_ulong addr, unsigned estride, + int esize, int wp_access, uintptr_t ra) +{ +#ifndef CONFIG_USER_ONLY + intptr_t count_off, count_last; + int flags0 = info->page[0].flags; + int flags1 = info->page[1].flags; + + if (likely(!((flags0 | flags1) & TLB_WATCHPOINT))) { + return; + } + + /* Indicate that watchpoints are handled. */ + info->page[0].flags = flags0 & ~TLB_WATCHPOINT; + info->page[1].flags = flags1 & ~TLB_WATCHPOINT; + + if (flags0 & TLB_WATCHPOINT) { + count_off = info->reg_off_first[0]; + count_last = info->reg_off_split; + if (count_last < 0) { + count_last = info->reg_off_last[0]; + } + do { + cpu_check_watchpoint(env_cpu(env), addr + count_off, + esize, info->page[0].attrs, wp_access, ra); + count_off += estride; + } while (count_off <= count_last); + } + + count_off = info->reg_off_first[1]; + if ((flags1 & TLB_WATCHPOINT) && count_off >= 0) { + count_last = info->reg_off_last[1]; + do { + cpu_check_watchpoint(env_cpu(env), addr + count_off, + esize, info->page[1].attrs, + wp_access, ra); + count_off += estride; + } while (count_off <= count_last); + } +#endif +} + +static void sve2p1_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, + target_ulong addr, unsigned estride, + int esize, uint32_t mtedesc, + uintptr_t ra) +{ + intptr_t count_off, count_last; + + /* + * TODO: estride is always a small power of two, <= 8. + * Manipulate the stride within the loops such that + * - first iteration hits addr + off, as required, + * - second iteration hits ALIGN_UP(addr, 16), + * - other iterations advance addr by 16. + * This will minimize the probing to once per MTE granule. + */ + + /* Process the page only if MemAttr == Tagged. */ + if (info->page[0].tagged) { + count_off = info->reg_off_first[0]; + count_last = info->reg_off_split; + if (count_last < 0) { + count_last = info->reg_off_last[0]; + } + + do { + mte_check(env, mtedesc, addr + count_off, ra); + count_off += estride; + } while (count_off <= count_last); + } + + count_off = info->reg_off_first[1]; + if (count_off >= 0 && info->page[1].tagged) { + count_last = info->reg_off_last[1]; + do { + mte_check(env, mtedesc, addr + count_off, ra); + count_off += estride; + } while (count_off <= count_last); + } +} + +static inline QEMU_ALWAYS_INLINE +void sve2p1_ld1_c(CPUARMState *env, ARMVectorReg *zd, const vaddr addr, + uint32_t png, uint64_t desc64, + const uintptr_t ra, const MemOp esz, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc64 >> 32; + uint32_t desc = desc64; + const unsigned N = (desc >> SIMD_DATA_SHIFT) & 1 ? 4 : 2; + const unsigned rstride = 1 << ((desc >> (SIMD_DATA_SHIFT + 1)) % 4); + const intptr_t reg_max = simd_oprsz(desc); + const unsigned esize = 1 << esz; + intptr_t count_off, count_last; + intptr_t reg_off, reg_last, reg_n; + SVEContLdSt info; + unsigned estride, flags; + void *host; + + estride = sve2p1_cont_ldst_elements(&info, addr, png, reg_max, N, esz); + if (estride == 0) { + /* The entire predicate was false; no load occurs. */ + for (unsigned n = 0; n < N; n++) { + memset(zd + n * rstride, 0, reg_max); + } + return; + } + + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra); + + /* Handle watchpoints for all active elements. */ + sve2p1_cont_ldst_watchpoints(&info, env, addr, estride, + esize, BP_MEM_READ, ra); + + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mtedesc) { + sve2p1_cont_ldst_mte_check(&info, env, estride, addr, + esize, mtedesc, ra); + } + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. Perform the load + * into scratch memory to preserve register state until the end. + */ + ARMVectorReg scratch[4] = { }; + + count_off = info.reg_off_first[0]; + count_last = info.reg_off_last[1]; + if (count_last < 0) { + count_last = info.reg_off_split; + if (count_last < 0) { + count_last = info.reg_off_last[0]; + } + } + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + + do { + reg_last = MIN(count_last - count_off, reg_max - esize); + do { + tlb_fn(env, &scratch[reg_n], reg_off, addr + count_off, ra); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + + for (unsigned n = 0; n < N; ++n) { + memcpy(&zd[n * rstride], &scratch[n], reg_max); + } + return; + } + + /* The entire operation is in RAM, on valid pages. */ + + for (unsigned n = 0; n < N; ++n) { + memset(&zd[n * rstride], 0, reg_max); + } + + count_off = info.reg_off_first[0]; + count_last = info.reg_off_last[0]; + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + host = info.page[0].host; + + set_helper_retaddr(ra); + + do { + reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); + do { + host_fn(&zd[reg_n * rstride], reg_off, host + count_off); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + + clear_helper_retaddr(); + + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + count_off = info.reg_off_split; + if (unlikely(count_off >= 0)) { + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + tlb_fn(env, &zd[reg_n * rstride], reg_off, addr + count_off, ra); + } + + count_off = info.reg_off_first[1]; + if (unlikely(count_off >= 0)) { + count_last = info.reg_off_last[1]; + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + host = info.page[1].host; + + set_helper_retaddr(ra); + + do { + reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); + do { + host_fn(&zd[reg_n * rstride], reg_off, host + count_off); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + + clear_helper_retaddr(); + } +} + +void HELPER(sve2p1_ld1bb_c)(CPUARMState *env, void *vd, target_ulong addr, + uint32_t png, uint64_t desc) +{ + sve2p1_ld1_c(env, vd, addr, png, desc, GETPC(), MO_8, + sve_ld1bb_host, sve_ld1bb_tlb); +} + +#define DO_LD1_2(NAME, ESZ) \ +void HELPER(sve2p1_##NAME##_le_c)(CPUARMState *env, void *vd, \ + target_ulong addr, uint32_t png, \ + uint64_t desc) \ +{ \ + sve2p1_ld1_c(env, vd, addr, png, desc, GETPC(), ESZ, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ +} \ +void HELPER(sve2p1_##NAME##_be_c)(CPUARMState *env, void *vd, \ + target_ulong addr, uint32_t png, \ + uint64_t desc) \ +{ \ + sve2p1_ld1_c(env, vd, addr, png, desc, GETPC(), ESZ, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ +} + +DO_LD1_2(ld1hh, MO_16) +DO_LD1_2(ld1ss, MO_32) +DO_LD1_2(ld1dd, MO_64) + +#undef DO_LD1_2 + +static inline QEMU_ALWAYS_INLINE +void sve2p1_st1_c(CPUARMState *env, ARMVectorReg *zd, const vaddr addr, + uint32_t png, uint64_t desc64, + const uintptr_t ra, const int esz, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc64 >> 32; + uint32_t desc = desc64; + const unsigned N = (desc >> SIMD_DATA_SHIFT) & 1 ? 4 : 2; + const unsigned rstride = 1 << ((desc >> (SIMD_DATA_SHIFT + 1)) % 4); + const intptr_t reg_max = simd_oprsz(desc); + const unsigned esize = 1 << esz; + intptr_t count_off, count_last; + intptr_t reg_off, reg_last, reg_n; + SVEContLdSt info; + unsigned estride, flags; + void *host; + + estride = sve2p1_cont_ldst_elements(&info, addr, png, reg_max, N, esz); + if (estride == 0) { + /* The entire predicate was false; no store occurs. */ + return; + } + + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra); + + /* Handle watchpoints for all active elements. */ + sve2p1_cont_ldst_watchpoints(&info, env, addr, estride, + esize, BP_MEM_WRITE, ra); + + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mtedesc) { + sve2p1_cont_ldst_mte_check(&info, env, estride, addr, + esize, mtedesc, ra); + } + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. Perform the load + * into scratch memory to preserve register state until the end. + */ + count_off = info.reg_off_first[0]; + count_last = info.reg_off_last[1]; + if (count_last < 0) { + count_last = info.reg_off_split; + if (count_last < 0) { + count_last = info.reg_off_last[0]; + } + } + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + + do { + reg_last = MIN(count_last - count_off, reg_max - esize); + do { + tlb_fn(env, &zd[reg_n * rstride], reg_off, addr + count_off, ra); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + return; + } + + /* The entire operation is in RAM, on valid pages. */ + + count_off = info.reg_off_first[0]; + count_last = info.reg_off_last[0]; + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + host = info.page[0].host; + + set_helper_retaddr(ra); + + do { + reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); + do { + host_fn(&zd[reg_n * rstride], reg_off, host + count_off); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + + clear_helper_retaddr(); + + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + count_off = info.reg_off_split; + if (unlikely(count_off >= 0)) { + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + tlb_fn(env, &zd[reg_n * rstride], reg_off, addr + count_off, ra); + } + + count_off = info.reg_off_first[1]; + if (unlikely(count_off >= 0)) { + count_last = info.reg_off_last[1]; + reg_off = count_off % reg_max; + reg_n = count_off / reg_max; + host = info.page[1].host; + + set_helper_retaddr(ra); + + do { + reg_last = MIN(count_last - reg_n * reg_max, reg_max - esize); + do { + host_fn(&zd[reg_n * rstride], reg_off, host + count_off); + reg_off += estride; + count_off += estride; + } while (reg_off <= reg_last); + reg_off = 0; + reg_n++; + } while (count_off <= count_last); + + clear_helper_retaddr(); + } +} + +void HELPER(sve2p1_st1bb_c)(CPUARMState *env, void *vd, target_ulong addr, + uint32_t png, uint64_t desc) +{ + sve2p1_st1_c(env, vd, addr, png, desc, GETPC(), MO_8, + sve_st1bb_host, sve_st1bb_tlb); +} + +#define DO_ST1_2(NAME, ESZ) \ +void HELPER(sve2p1_##NAME##_le_c)(CPUARMState *env, void *vd, \ + target_ulong addr, uint32_t png, \ + uint64_t desc) \ +{ \ + sve2p1_st1_c(env, vd, addr, png, desc, GETPC(), ESZ, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ +} \ +void HELPER(sve2p1_##NAME##_be_c)(CPUARMState *env, void *vd, \ + target_ulong addr, uint32_t png, \ + uint64_t desc) \ +{ \ + sve2p1_st1_c(env, vd, addr, png, desc, GETPC(), ESZ, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ +} + +DO_ST1_2(st1hh, MO_16) +DO_ST1_2(st1ss, MO_32) +DO_ST1_2(st1dd, MO_64) + +#undef DO_ST1_2 + void HELPER(sve2_eor3)(void *vd, void *vn, void *vm, void *vk, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; @@ -7715,3 +8600,31 @@ DO_FCVTLT(sve2_fcvtlt_sd, uint64_t, uint32_t, H1_8, H1_4, float32_to_float64) #undef DO_FCVTLT #undef DO_FCVTNT + +void HELPER(pext)(void *vd, uint32_t png, uint32_t desc) +{ + int pl = FIELD_EX32(desc, PREDDESC, OPRSZ); + int vl = pl * 8; + unsigned v_esz = FIELD_EX32(desc, PREDDESC, ESZ); + int part = FIELD_EX32(desc, PREDDESC, DATA); + DecodeCounter p = decode_counter(png, vl, v_esz); + uint64_t mask = pred_esz_masks[v_esz + p.lg2_stride]; + ARMPredicateReg *d = vd; + + /* + * Convert from element count to byte count and adjust + * for the portion of the 4*VL counter to be extracted. + */ + int b_count = (p.count << v_esz) - vl * part; + + memset(d, 0, sizeof(*d)); + if (p.invert) { + if (b_count <= 0) { + do_whilel(vd, mask, vl, vl); + } else if (b_count < vl) { + do_whileg(vd, mask, vl - b_count, vl); + } + } else if (b_count > 0) { + do_whilel(vd, mask, MIN(b_count, vl), vl); + } +} diff --git a/target/arm/tcg/sve_ldst_internal.h b/target/arm/tcg/sve_ldst_internal.h index f2243da..c67cda9 100644 --- a/target/arm/tcg/sve_ldst_internal.h +++ b/target/arm/tcg/sve_ldst_internal.h @@ -116,6 +116,94 @@ DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl) DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq) DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq) +#define DO_LD_PRIM_3(NAME, FUNC) \ + static inline void sve_##NAME##_host(void *vd, \ + intptr_t reg_off, void *host) \ + { sve_##FUNC##_host(vd, reg_off, host); \ + *(uint64_t *)(vd + reg_off + 8) = 0; } \ + static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \ + intptr_t reg_off, target_ulong addr, uintptr_t ra) \ + { sve_##FUNC##_tlb(env, vd, reg_off, addr, ra); \ + *(uint64_t *)(vd + reg_off + 8) = 0; } + +DO_LD_PRIM_3(ld1squ_be, ld1sdu_be) +DO_LD_PRIM_3(ld1squ_le, ld1sdu_le) +DO_LD_PRIM_3(ld1dqu_be, ld1dd_be) +DO_LD_PRIM_3(ld1dqu_le, ld1dd_le) + +#define sve_st1sq_be_host sve_st1sd_be_host +#define sve_st1sq_le_host sve_st1sd_le_host +#define sve_st1sq_be_tlb sve_st1sd_be_tlb +#define sve_st1sq_le_tlb sve_st1sd_le_tlb + +#define sve_st1dq_be_host sve_st1dd_be_host +#define sve_st1dq_le_host sve_st1dd_le_host +#define sve_st1dq_be_tlb sve_st1dd_be_tlb +#define sve_st1dq_le_tlb sve_st1dd_le_tlb + +/* + * The ARMVectorReg elements are stored in host-endian 64-bit units. + * For 128-bit quantities, the sequence defined by the Elem[] pseudocode + * corresponds to storing the two 64-bit pieces in little-endian order. + */ +/* FIXME: Nothing in this file makes any effort at atomicity. */ + +static inline void sve_ld1qq_be_host(void *vd, intptr_t reg_off, void *host) +{ + sve_ld1dd_be_host(vd, reg_off + 8, host); + sve_ld1dd_be_host(vd, reg_off, host + 8); +} + +static inline void sve_ld1qq_le_host(void *vd, intptr_t reg_off, void *host) +{ + sve_ld1dd_le_host(vd, reg_off, host); + sve_ld1dd_le_host(vd, reg_off + 8, host + 8); +} + +static inline void +sve_ld1qq_be_tlb(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong addr, uintptr_t ra) +{ + sve_ld1dd_be_tlb(env, vd, reg_off + 8, addr, ra); + sve_ld1dd_be_tlb(env, vd, reg_off, addr + 8, ra); +} + +static inline void +sve_ld1qq_le_tlb(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong addr, uintptr_t ra) +{ + sve_ld1dd_le_tlb(env, vd, reg_off, addr, ra); + sve_ld1dd_le_tlb(env, vd, reg_off + 8, addr + 8, ra); +} + +static inline void sve_st1qq_be_host(void *vd, intptr_t reg_off, void *host) +{ + sve_st1dd_be_host(vd, reg_off + 8, host); + sve_st1dd_be_host(vd, reg_off, host + 8); +} + +static inline void sve_st1qq_le_host(void *vd, intptr_t reg_off, void *host) +{ + sve_st1dd_le_host(vd, reg_off, host); + sve_st1dd_le_host(vd, reg_off + 8, host + 8); +} + +static inline void +sve_st1qq_be_tlb(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong addr, uintptr_t ra) +{ + sve_st1dd_be_tlb(env, vd, reg_off + 8, addr, ra); + sve_st1dd_be_tlb(env, vd, reg_off, addr + 8, ra); +} + +static inline void +sve_st1qq_le_tlb(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong addr, uintptr_t ra) +{ + sve_st1dd_le_tlb(env, vd, reg_off, addr, ra); + sve_st1dd_le_tlb(env, vd, reg_off + 8, addr + 8, ra); +} + #undef DO_LD_TLB #undef DO_ST_TLB #undef DO_LD_HOST @@ -123,6 +211,7 @@ DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq) #undef DO_ST_PRIM_1 #undef DO_LD_PRIM_2 #undef DO_ST_PRIM_2 +#undef DO_LD_PRIM_3 /* * Resolve the guest virtual address to info->host and info->flags. diff --git a/target/arm/tcg/tlb-insns.c b/target/arm/tcg/tlb-insns.c index 95c26c6..1a0a332 100644 --- a/target/arm/tcg/tlb-insns.c +++ b/target/arm/tcg/tlb-insns.c @@ -149,7 +149,8 @@ static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *cs = env_cpu(env); uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12); - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E2); + tlb_flush_page_by_mmuidx(cs, pageaddr, + ARMMMUIdxBit_E2 | ARMMMUIdxBit_E2_GCS); } static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -159,7 +160,8 @@ static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12); tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_E2); + ARMMMUIdxBit_E2 | + ARMMMUIdxBit_E2_GCS); } static void tlbiipas2_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -202,7 +204,7 @@ static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); - tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E2); + tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E2 | ARMMMUIdxBit_E2_GCS); } static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -210,7 +212,8 @@ static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); - tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E2); + tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E2 | + ARMMMUIdxBit_E2_GCS); } /* @@ -228,12 +231,16 @@ static int vae1_tlbmask(CPUARMState *env) if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { mask = ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E20_0; + ARMMMUIdxBit_E20_2_GCS | + ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_0_GCS; } else { /* This is AArch64 only, so we don't need to touch the EL30_x TLBs */ mask = ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0; + ARMMMUIdxBit_E10_1_GCS | + ARMMMUIdxBit_E10_0 | + ARMMMUIdxBit_E10_0_GCS; } return mask; } @@ -246,13 +253,20 @@ static int vae2_tlbmask(CPUARMState *env) if (hcr & HCR_E2H) { mask = ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E20_0; + ARMMMUIdxBit_E20_2_GCS | + ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_0_GCS; } else { - mask = ARMMMUIdxBit_E2; + mask = ARMMMUIdxBit_E2 | ARMMMUIdxBit_E2_GCS; } return mask; } +static int vae3_tlbmask(void) +{ + return ARMMMUIdxBit_E3 | ARMMMUIdxBit_E3_GCS; +} + /* Return 56 if TBI is enabled, 64 otherwise. */ static int tlbbits_for_regime(CPUARMState *env, ARMMMUIdx mmu_idx, uint64_t addr) @@ -325,9 +339,12 @@ static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, static int e2_tlbmask(CPUARMState *env) { return (ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_0_GCS | ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E2); + ARMMMUIdxBit_E20_2_GCS | + ARMMMUIdxBit_E2 | + ARMMMUIdxBit_E2_GCS); } static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -354,7 +371,7 @@ static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri, ARMCPU *cpu = env_archcpu(env); CPUState *cs = CPU(cpu); - tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E3); + tlb_flush_by_mmuidx(cs, vae3_tlbmask()); } static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -380,7 +397,7 @@ static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); - tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E3); + tlb_flush_by_mmuidx_all_cpus_synced(cs, vae3_tlbmask()); } static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -411,7 +428,7 @@ static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *cs = CPU(cpu); uint64_t pageaddr = sextract64(value << 12, 0, 56); - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E3); + tlb_flush_page_by_mmuidx(cs, pageaddr, vae3_tlbmask()); } static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -465,7 +482,7 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, int bits = tlbbits_for_regime(env, ARMMMUIdx_E3, pageaddr); tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_E3, bits); + vae3_tlbmask(), bits); } static int ipas2e1_tlbmask(CPUARMState *env, int64_t value) @@ -963,7 +980,7 @@ static void tlbi_aa64_rvae3_write(CPUARMState *env, * flush-last-level-only. */ - do_rvae_write(env, value, ARMMMUIdxBit_E3, tlb_force_broadcast(env)); + do_rvae_write(env, value, vae3_tlbmask(), tlb_force_broadcast(env)); } static void tlbi_aa64_rvae3is_write(CPUARMState *env, @@ -977,7 +994,7 @@ static void tlbi_aa64_rvae3is_write(CPUARMState *env, * flush-last-level-only or inner/outer specific flushes. */ - do_rvae_write(env, value, ARMMMUIdxBit_E3, true); + do_rvae_write(env, value, vae3_tlbmask(), true); } static void tlbi_aa64_ripas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri, diff --git a/target/arm/tcg/tlb_helper.c b/target/arm/tcg/tlb_helper.c index 23c72a9..f1983a5 100644 --- a/target/arm/tcg/tlb_helper.c +++ b/target/arm/tcg/tlb_helper.c @@ -24,13 +24,13 @@ bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) return regime_using_lpae_format(env, mmu_idx); } -static inline uint32_t merge_syn_data_abort(uint32_t template_syn, +static inline uint64_t merge_syn_data_abort(uint32_t template_syn, ARMMMUFaultInfo *fi, unsigned int target_el, bool same_el, bool is_write, - int fsc) + int fsc, bool gcs) { - uint32_t syn; + uint64_t syn; /* * ISV is only set for stage-2 data aborts routed to EL2 and @@ -75,6 +75,11 @@ static inline uint32_t merge_syn_data_abort(uint32_t template_syn, /* Merge the runtime syndrome with the template syndrome. */ syn |= template_syn; } + + /* Form ISS2 at the top of the syndrome. */ + syn |= (uint64_t)fi->dirtybit << 37; + syn |= (uint64_t)gcs << 40; + return syn; } @@ -176,7 +181,9 @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr, int target_el = exception_target_el(env); int current_el = arm_current_el(env); bool same_el; - uint32_t syn, exc, fsr, fsc; + uint32_t exc, fsr, fsc; + uint64_t syn; + /* * We know this must be a data or insn abort, and that * env->exception.syndrome contains the template syndrome set @@ -246,9 +253,10 @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr, syn = syn_insn_abort(same_el, fi->ea, fi->s1ptw, fsc); exc = EXCP_PREFETCH_ABORT; } else { + bool gcs = regime_is_gcs(core_to_arm_mmu_idx(env, mmu_idx)); syn = merge_syn_data_abort(env->exception.syndrome, fi, target_el, same_el, access_type == MMU_DATA_STORE, - fsc); + fsc, gcs); if (access_type == MMU_DATA_STORE && arm_feature(env, ARM_FEATURE_V6)) { fsr |= (1 << 11); diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index ac80f57..3292d7c 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -26,6 +26,7 @@ #include "cpregs.h" static TCGv_i64 cpu_X[32]; +static TCGv_i64 cpu_gcspr[4]; static TCGv_i64 cpu_pc; /* Load/store exclusive handling */ @@ -77,6 +78,10 @@ static int scale_by_log2_tag_granule(DisasContext *s, int x) /* initialize TCG globals. */ void a64_translate_init(void) { + static const char gcspr_names[4][12] = { + "gcspr_el0", "gcspr_el1", "gcspr_el2", "gcspr_el3" + }; + int i; cpu_pc = tcg_global_mem_new_i64(tcg_env, @@ -90,10 +95,17 @@ void a64_translate_init(void) cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, offsetof(CPUARMState, exclusive_high), "exclusive_high"); + + for (i = 0; i < 4; i++) { + cpu_gcspr[i] = + tcg_global_mem_new_i64(tcg_env, + offsetof(CPUARMState, cp15.gcspr_el[i]), + gcspr_names[i]); + } } /* - * Return the core mmu_idx to use for A64 load/store insns which + * Return the full arm mmu_idx to use for A64 load/store insns which * have a "unprivileged load/store" variant. Those insns access * EL0 if executed from an EL which has control over EL0 (usually * EL1) but behave like normal loads and stores if executed from @@ -103,7 +115,7 @@ void a64_translate_init(void) * normal encoding (in which case we will return the same * thing as get_mem_index(). */ -static int get_a64_user_mem_index(DisasContext *s, bool unpriv) +static ARMMMUIdx full_a64_user_mem_index(DisasContext *s, bool unpriv) { /* * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, @@ -130,7 +142,19 @@ static int get_a64_user_mem_index(DisasContext *s, bool unpriv) g_assert_not_reached(); } } - return arm_to_core_mmu_idx(useridx); + return useridx; +} + +/* Return the core mmu_idx per above. */ +static int core_a64_user_mem_index(DisasContext *s, bool unpriv) +{ + return arm_to_core_mmu_idx(full_a64_user_mem_index(s, unpriv)); +} + +/* For a given translation regime, return the core mmu_idx for gcs access. */ +static int core_gcs_mem_index(ARMMMUIdx armidx) +{ + return arm_to_core_mmu_idx(regime_to_gcs(armidx)); } static void set_btype_raw(int val) @@ -408,6 +432,39 @@ static MemOp check_ordered_align(DisasContext *s, int rn, int imm, return finalize_memop(s, mop); } +static void gen_add_gcs_record(DisasContext *s, TCGv_i64 value) +{ + TCGv_i64 addr = tcg_temp_new_i64(); + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + + tcg_gen_addi_i64(addr, gcspr, -8); + tcg_gen_qemu_st_i64(value, clean_data_tbi(s, addr), mmuidx, mop); + tcg_gen_mov_i64(gcspr, addr); +} + +static void gen_load_check_gcs_record(DisasContext *s, TCGv_i64 target, + GCSInstructionType it, int rt) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 rec_va = tcg_temp_new_i64(); + + tcg_gen_qemu_ld_i64(rec_va, clean_data_tbi(s, gcspr), mmuidx, mop); + + if (s->gcs_rvcen) { + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(it, rt)); + + tcg_gen_brcond_i64(TCG_COND_NE, rec_va, target, fail_label); + } + + gen_a64_set_pc(s, rec_va); + tcg_gen_addi_i64(gcspr, gcspr, 8); +} + typedef struct DisasCompare64 { TCGCond cond; TCGv_i64 value; @@ -433,12 +490,6 @@ static void gen_rebuild_hflags(DisasContext *s) gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); } -static void gen_exception_internal(int excp) -{ - assert(excp_is_internal(excp)); - gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); -} - static void gen_exception_internal_insn(DisasContext *s, int excp) { gen_a64_update_pc(s, 0); @@ -477,7 +528,7 @@ static inline bool use_goto_tb(DisasContext *s, uint64_t dest) return translator_use_goto_tb(&s->base, dest); } -static void gen_goto_tb(DisasContext *s, int n, int64_t diff) +static void gen_goto_tb(DisasContext *s, unsigned tb_slot_idx, int64_t diff) { if (use_goto_tb(s, s->pc_curr + diff)) { /* @@ -490,12 +541,12 @@ static void gen_goto_tb(DisasContext *s, int n, int64_t diff) */ if (tb_cflags(s->base.tb) & CF_PCREL) { gen_a64_update_pc(s, diff); - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); } else { - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); gen_a64_update_pc(s, diff); } - tcg_gen_exit_tb(s->base.tb, n); + tcg_gen_exit_tb(s->base.tb, tb_slot_idx); s->base.is_jmp = DISAS_NORETURN; } else { gen_a64_update_pc(s, diff); @@ -1387,11 +1438,8 @@ static bool fp_access_check_only(DisasContext *s) return true; } -static bool fp_access_check(DisasContext *s) +static bool nonstreaming_check(DisasContext *s) { - if (!fp_access_check_only(s)) { - return false; - } if (s->sme_trap_nonstreaming && s->is_nonstreaming) { gen_exception_insn(s, 0, EXCP_UDEF, syn_smetrap(SME_ET_Streaming, false)); @@ -1400,6 +1448,11 @@ static bool fp_access_check(DisasContext *s) return true; } +static bool fp_access_check(DisasContext *s) +{ + return fp_access_check_only(s) && nonstreaming_check(s); +} + /* * Return <0 for non-supported element sizes, with MO_16 controlled by * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. @@ -1450,14 +1503,24 @@ static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) */ bool sve_access_check(DisasContext *s) { - if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { + if (dc_isar_feature(aa64_sme, s)) { bool ret; - assert(dc_isar_feature(aa64_sme, s)); - ret = sme_sm_enabled_check(s); + if (s->pstate_sm) { + ret = sme_enabled_check(s); + } else if (dc_isar_feature(aa64_sve, s)) { + goto continue_sve; + } else { + ret = sme_sm_enabled_check(s); + } + if (ret) { + ret = nonstreaming_check(s); + } s->sve_access_checked = (ret ? 1 : -1); return ret; } + + continue_sve: if (s->sve_excp_el) { /* Assert that we only raise one exception per instruction. */ assert(!s->sve_access_checked); @@ -1494,7 +1557,8 @@ bool sme_enabled_check(DisasContext *s) * to be zero when fp_excp_el has priority. This is because we need * sme_excp_el by itself for cpregs access checks. */ - if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { + if (s->sme_excp_el + && (!s->fp_excp_el || s->sme_excp_el <= s->fp_excp_el)) { bool ret = sme_access_check(s); s->fp_access_checked = (ret ? 1 : -1); return ret; @@ -1635,7 +1699,14 @@ static bool trans_B(DisasContext *s, arg_i *a) static bool trans_BL(DisasContext *s, arg_i *a) { - gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); + TCGv_i64 link = tcg_temp_new_i64(); + + gen_pc_plus_diff(s, link, 4); + if (s->gcs_en) { + gen_add_gcs_record(s, link); + } + tcg_gen_mov_i64(cpu_reg(s, 30), link); + reset_btype(s); gen_goto_tb(s, 0, a->imm); return true; @@ -1732,15 +1803,15 @@ static bool trans_BR(DisasContext *s, arg_r *a) static bool trans_BLR(DisasContext *s, arg_r *a) { - TCGv_i64 dst = cpu_reg(s, a->rn); - TCGv_i64 lr = cpu_reg(s, 30); - if (dst == lr) { - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_mov_i64(tmp, dst); - dst = tmp; + TCGv_i64 link = tcg_temp_new_i64(); + + gen_pc_plus_diff(s, link, 4); + if (s->gcs_en) { + gen_add_gcs_record(s, link); } - gen_pc_plus_diff(s, lr, curr_insn_len(s)); - gen_a64_set_pc(s, dst); + gen_a64_set_pc(s, cpu_reg(s, a->rn)); + tcg_gen_mov_i64(cpu_reg(s, 30), link); + set_btype_for_blr(s); s->base.is_jmp = DISAS_JUMP; return true; @@ -1748,7 +1819,13 @@ static bool trans_BLR(DisasContext *s, arg_r *a) static bool trans_RET(DisasContext *s, arg_r *a) { - gen_a64_set_pc(s, cpu_reg(s, a->rn)); + TCGv_i64 target = cpu_reg(s, a->rn); + + if (s->gcs_en) { + gen_load_check_gcs_record(s, target, GCS_IT_RET_nPauth, a->rn); + } else { + gen_a64_set_pc(s, target); + } s->base.is_jmp = DISAS_JUMP; return true; } @@ -1792,21 +1869,21 @@ static bool trans_BRAZ(DisasContext *s, arg_braz *a) static bool trans_BLRAZ(DisasContext *s, arg_braz *a) { - TCGv_i64 dst, lr; + TCGv_i64 dst, link; if (!dc_isar_feature(aa64_pauth, s)) { return false; } - dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); - lr = cpu_reg(s, 30); - if (dst == lr) { - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_mov_i64(tmp, dst); - dst = tmp; + + link = tcg_temp_new_i64(); + gen_pc_plus_diff(s, link, 4); + if (s->gcs_en) { + gen_add_gcs_record(s, link); } - gen_pc_plus_diff(s, lr, curr_insn_len(s)); gen_a64_set_pc(s, dst); + tcg_gen_mov_i64(cpu_reg(s, 30), link); + set_btype_for_blr(s); s->base.is_jmp = DISAS_JUMP; return true; @@ -1816,8 +1893,17 @@ static bool trans_RETA(DisasContext *s, arg_reta *a) { TCGv_i64 dst; + if (!dc_isar_feature(aa64_pauth, s)) { + return false; + } + dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); - gen_a64_set_pc(s, dst); + if (s->gcs_en) { + GCSInstructionType it = a->m ? GCS_IT_RET_PauthB : GCS_IT_RET_PauthA; + gen_load_check_gcs_record(s, dst, it, 30); + } else { + gen_a64_set_pc(s, dst); + } s->base.is_jmp = DISAS_JUMP; return true; } @@ -1838,20 +1924,21 @@ static bool trans_BRA(DisasContext *s, arg_bra *a) static bool trans_BLRA(DisasContext *s, arg_bra *a) { - TCGv_i64 dst, lr; + TCGv_i64 dst, link; if (!dc_isar_feature(aa64_pauth, s)) { return false; } dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); - lr = cpu_reg(s, 30); - if (dst == lr) { - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_mov_i64(tmp, dst); - dst = tmp; + + link = tcg_temp_new_i64(); + gen_pc_plus_diff(s, link, 4); + if (s->gcs_en) { + gen_add_gcs_record(s, link); } - gen_pc_plus_diff(s, lr, curr_insn_len(s)); gen_a64_set_pc(s, dst); + tcg_gen_mov_i64(cpu_reg(s, 30), link); + set_btype_for_blr(s); s->base.is_jmp = DISAS_JUMP; return true; @@ -1859,6 +1946,9 @@ static bool trans_BLRA(DisasContext *s, arg_bra *a) static bool trans_ERET(DisasContext *s, arg_ERET *a) { +#ifdef CONFIG_USER_ONLY + return false; +#else TCGv_i64 dst; if (s->current_el == 0) { @@ -1878,10 +1968,14 @@ static bool trans_ERET(DisasContext *s, arg_ERET *a) /* Must exit loop to check un-masked IRQs */ s->base.is_jmp = DISAS_EXIT; return true; +#endif } static bool trans_ERETA(DisasContext *s, arg_reta *a) { +#ifdef CONFIG_USER_ONLY + return false; +#else TCGv_i64 dst; if (!dc_isar_feature(aa64_pauth, s)) { @@ -1907,6 +2001,7 @@ static bool trans_ERETA(DisasContext *s, arg_reta *a) /* Must exit loop to check un-masked IRQs */ s->base.is_jmp = DISAS_EXIT; return true; +#endif } static bool trans_NOP(DisasContext *s, arg_NOP *a) @@ -2049,6 +2144,14 @@ static bool trans_ESB(DisasContext *s, arg_ESB *a) return true; } +static bool trans_GCSB(DisasContext *s, arg_GCSB *a) +{ + if (dc_isar_feature(aa64_gcs, s)) { + tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); + } + return true; +} + static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) { if (s->pauth_active) { @@ -2113,6 +2216,20 @@ static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) return true; } +static bool trans_CHKFEAT(DisasContext *s, arg_CHKFEAT *a) +{ + uint64_t feat_en = 0; + + if (s->gcs_en) { + feat_en |= 1 << 0; + } + if (feat_en) { + TCGv_i64 x16 = cpu_reg(s, 16); + tcg_gen_andi_i64(x16, x16, ~feat_en); + } + return true; +} + static bool trans_CLREX(DisasContext *s, arg_CLREX *a) { tcg_gen_movi_i64(cpu_exclusive_addr, -1); @@ -2444,6 +2561,195 @@ static void gen_sysreg_undef(DisasContext *s, bool isread, gen_exception_insn(s, 0, EXCP_UDEF, syndrome); } +static void gen_gcspopm(DisasContext *s, int rt) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 value = tcg_temp_new_i64(); + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPM, rt)); + + /* The value at top-of-stack must have low 2 bits clear. */ + tcg_gen_qemu_ld_i64(value, clean_data_tbi(s, gcspr), mmuidx, mop); + tcg_gen_brcondi_i64(TCG_COND_TSTNE, value, 3, fail_label); + + /* Complete the pop and return the value. */ + tcg_gen_addi_i64(gcspr, gcspr, 8); + tcg_gen_mov_i64(cpu_reg(s, rt), value); +} + +static void gen_gcspushx(DisasContext *s) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int spsr_idx = aarch64_banked_spsr_index(s->current_el); + int spsr_off = offsetof(CPUARMState, banked_spsr[spsr_idx]); + int elr_off = offsetof(CPUARMState, elr_el[s->current_el]); + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 addr = tcg_temp_new_i64(); + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_addi_i64(addr, gcspr, -8); + tcg_gen_qemu_st_i64(cpu_reg(s, 30), addr, mmuidx, mop); + + tcg_gen_ld_i64(tmp, tcg_env, spsr_off); + tcg_gen_addi_i64(addr, addr, -8); + tcg_gen_qemu_st_i64(tmp, addr, mmuidx, mop); + + tcg_gen_ld_i64(tmp, tcg_env, elr_off); + tcg_gen_addi_i64(addr, addr, -8); + tcg_gen_qemu_st_i64(tmp, addr, mmuidx, mop); + + tcg_gen_addi_i64(addr, addr, -8); + tcg_gen_qemu_st_i64(tcg_constant_i64(0b1001), addr, mmuidx, mop); + + tcg_gen_mov_i64(gcspr, addr); + clear_pstate_bits(PSTATE_EXLOCK); +} + +static void gen_gcspopcx(DisasContext *s) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int spsr_idx = aarch64_banked_spsr_index(s->current_el); + int spsr_off = offsetof(CPUARMState, banked_spsr[spsr_idx]); + int elr_off = offsetof(CPUARMState, elr_el[s->current_el]); + int gcscr_off = offsetof(CPUARMState, cp15.gcscr_el[s->current_el]); + int pstate_off = offsetof(CPUARMState, pstate); + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 addr = tcg_temp_new_i64(); + TCGv_i64 tmp1 = tcg_temp_new_i64(); + TCGv_i64 tmp2 = tcg_temp_new_i64(); + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPCX, 31)); + + /* The value at top-of-stack must be an exception token. */ + tcg_gen_qemu_ld_i64(tmp1, gcspr, mmuidx, mop); + tcg_gen_brcondi_i64(TCG_COND_NE, tmp1, 0b1001, fail_label); + + /* Validate in turn, ELR ... */ + tcg_gen_addi_i64(addr, gcspr, 8); + tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop); + tcg_gen_ld_i64(tmp2, tcg_env, elr_off); + tcg_gen_brcond_i64(TCG_COND_NE, tmp1, tmp2, fail_label); + + /* ... SPSR ... */ + tcg_gen_addi_i64(addr, addr, 8); + tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop); + tcg_gen_ld_i64(tmp2, tcg_env, spsr_off); + tcg_gen_brcond_i64(TCG_COND_NE, tmp1, tmp2, fail_label); + + /* ... and LR. */ + tcg_gen_addi_i64(addr, addr, 8); + tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop); + tcg_gen_brcond_i64(TCG_COND_NE, tmp1, cpu_reg(s, 30), fail_label); + + /* Writeback stack pointer after pop. */ + tcg_gen_addi_i64(gcspr, addr, 8); + + /* PSTATE.EXLOCK = GetCurrentEXLOCKEN(). */ + tcg_gen_ld_i64(tmp1, tcg_env, gcscr_off); + tcg_gen_ld_i64(tmp2, tcg_env, pstate_off); + tcg_gen_shri_i64(tmp1, tmp1, ctz64(GCSCR_EXLOCKEN)); + tcg_gen_deposit_i64(tmp2, tmp2, tmp1, ctz64(PSTATE_EXLOCK), 1); + tcg_gen_st_i64(tmp2, tcg_env, pstate_off); +} + +static void gen_gcspopx(DisasContext *s) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 addr = tcg_temp_new_i64(); + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPX, 31)); + + /* The value at top-of-stack must be an exception token. */ + tcg_gen_qemu_ld_i64(tmp, gcspr, mmuidx, mop); + tcg_gen_brcondi_i64(TCG_COND_NE, tmp, 0b1001, fail_label); + + /* + * The other three values in the exception return record + * are ignored, but are loaded anyway to raise faults. + */ + tcg_gen_addi_i64(addr, gcspr, 8); + tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop); + tcg_gen_addi_i64(addr, addr, 8); + tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop); + tcg_gen_addi_i64(addr, addr, 8); + tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop); + tcg_gen_addi_i64(gcspr, addr, 8); +} + +static void gen_gcsss1(DisasContext *s, int rt) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 inptr = cpu_reg(s, rt); + TCGv_i64 cmp = tcg_temp_new_i64(); + TCGv_i64 new = tcg_temp_new_i64(); + TCGv_i64 old = tcg_temp_new_i64(); + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSSS1, rt)); + + /* Compute the valid cap entry that the new stack must have. */ + tcg_gen_deposit_i64(cmp, inptr, tcg_constant_i64(1), 0, 12); + /* Compute the in-progress cap entry for the old stack. */ + tcg_gen_deposit_i64(new, gcspr, tcg_constant_i64(5), 0, 3); + + /* Swap the valid cap the with the in-progress cap. */ + tcg_gen_atomic_cmpxchg_i64(old, inptr, cmp, new, mmuidx, mop); + tcg_gen_brcond_i64(TCG_COND_NE, old, cmp, fail_label); + + /* The new stack had a valid cap: change gcspr. */ + tcg_gen_andi_i64(gcspr, inptr, ~7); +} + +static void gen_gcsss2(DisasContext *s, int rt) +{ + TCGv_i64 gcspr = cpu_gcspr[s->current_el]; + int mmuidx = core_gcs_mem_index(s->mmu_idx); + MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); + TCGv_i64 outptr = tcg_temp_new_i64(); + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGLabel *fail_label = + delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSSS2, rt)); + + /* Validate that the new stack has an in-progress cap. */ + tcg_gen_qemu_ld_i64(outptr, gcspr, mmuidx, mop); + tcg_gen_andi_i64(tmp, outptr, 7); + tcg_gen_brcondi_i64(TCG_COND_NE, tmp, 5, fail_label); + + /* Push a valid cap to the old stack. */ + tcg_gen_andi_i64(outptr, outptr, ~7); + tcg_gen_addi_i64(outptr, outptr, -8); + tcg_gen_deposit_i64(tmp, outptr, tcg_constant_i64(1), 0, 12); + tcg_gen_qemu_st_i64(tmp, outptr, mmuidx, mop); + tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); + + /* Pop the in-progress cap from the new stack. */ + tcg_gen_addi_i64(gcspr, gcspr, 8); + + /* Return a pointer to the old stack cap. */ + tcg_gen_mov_i64(cpu_reg(s, rt), outptr); +} + +/* + * Look up @key, returning the cpreg, which must exist. + * Additionally, the new cpreg must also be accessible. + */ +static const ARMCPRegInfo * +redirect_cpreg(DisasContext *s, uint32_t key, bool isread) +{ + const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); + assert(ri); + assert(cp_access_ok(s->current_el, ri, isread)); + return ri; +} + /* MRS - move from system register * MSR (register) - move to system register * SYS @@ -2455,8 +2761,7 @@ static void handle_sys(DisasContext *s, bool isread, unsigned int op0, unsigned int op1, unsigned int op2, unsigned int crn, unsigned int crm, unsigned int rt) { - uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, - crn, crm, op0, op1, op2); + uint32_t key = ENCODE_AA64_CP_REG(op0, op1, crn, crm, op2); const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); bool need_exit_tb = false; bool nv_trap_to_el2 = false; @@ -2550,6 +2855,27 @@ static void handle_sys(DisasContext *s, bool isread, } } + if (ri->vhe_redir_to_el2 && s->current_el == 2 && s->e2h) { + /* + * This one of the FOO_EL1 registers which redirect to FOO_EL2 + * from EL2 when HCR_EL2.E2H is set. + */ + key = ri->vhe_redir_to_el2; + ri = redirect_cpreg(s, key, isread); + } else if (ri->vhe_redir_to_el01 && s->current_el >= 2) { + /* + * This is one of the FOO_EL12 or FOO_EL02 registers. + * With !E2H, they all UNDEF. + * With E2H, from EL2 or EL3, they redirect to FOO_EL1/FOO_EL0. + */ + if (!s->e2h) { + gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); + return; + } + key = ri->vhe_redir_to_el01; + ri = redirect_cpreg(s, key, isread); + } + if (ri->accessfn || (ri->fgt && s->fgt_active)) { /* Emit code to perform further access permissions checks at * runtime; this may result in an exception. @@ -2592,11 +2918,8 @@ static void handle_sys(DisasContext *s, bool isread, * We don't use the EL1 register's access function, and * fine-grained-traps on EL1 also do not apply here. */ - key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, - crn, crm, op0, 0, op2); - ri = get_arm_cp_reginfo(s->cp_regs, key); - assert(ri); - assert(cp_access_ok(s->current_el, ri, isread)); + key = ENCODE_AA64_CP_REG(op0, 0, crn, crm, op2); + ri = redirect_cpreg(s, key, isread); /* * We might not have done an update_pc earlier, so check we don't * need it. We could support this in future if necessary. @@ -2720,6 +3043,51 @@ static void handle_sys(DisasContext *s, bool isread, } } return; + case ARM_CP_GCSPUSHM: + if (s->gcs_en) { + gen_add_gcs_record(s, cpu_reg(s, rt)); + } + return; + case ARM_CP_GCSPOPM: + /* Note that X[rt] is unchanged if !GCSEnabled. */ + if (s->gcs_en) { + gen_gcspopm(s, rt); + } + return; + case ARM_CP_GCSPUSHX: + /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */ + if (rt != 31) { + unallocated_encoding(s); + } else if (s->gcs_en) { + gen_gcspushx(s); + } + return; + case ARM_CP_GCSPOPCX: + /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */ + if (rt != 31) { + unallocated_encoding(s); + } else if (s->gcs_en) { + gen_gcspopcx(s); + } + return; + case ARM_CP_GCSPOPX: + /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */ + if (rt != 31) { + unallocated_encoding(s); + } else if (s->gcs_en) { + gen_gcspopx(s); + } + return; + case ARM_CP_GCSSS1: + if (s->gcs_en) { + gen_gcsss1(s, rt); + } + return; + case ARM_CP_GCSSS2: + if (s->gcs_en) { + gen_gcsss2(s, rt); + } + return; default: g_assert_not_reached(); } @@ -3226,7 +3594,7 @@ static bool trans_LDXP(DisasContext *s, arg_stxr *a) static bool trans_CASP(DisasContext *s, arg_CASP *a) { - if (!dc_isar_feature(aa64_atomics, s)) { + if (!dc_isar_feature(aa64_lse, s)) { return false; } if (((a->rt | a->rs) & 1) != 0) { @@ -3239,7 +3607,7 @@ static bool trans_CASP(DisasContext *s, arg_CASP *a) static bool trans_CAS(DisasContext *s, arg_CAS *a) { - if (!dc_isar_feature(aa64_atomics, s)) { + if (!dc_isar_feature(aa64_lse, s)) { return false; } gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); @@ -3514,7 +3882,7 @@ static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, if (!a->p) { tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); } - memidx = get_a64_user_mem_index(s, a->unpriv); + memidx = core_a64_user_mem_index(s, a->unpriv); *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, a->w || a->rn != 31, mop, a->unpriv, memidx); @@ -3535,7 +3903,7 @@ static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) { bool iss_sf, iss_valid = !a->w; TCGv_i64 clean_addr, dirty_addr, tcg_rt; - int memidx = get_a64_user_mem_index(s, a->unpriv); + int memidx = core_a64_user_mem_index(s, a->unpriv); MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); @@ -3553,7 +3921,7 @@ static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) { bool iss_sf, iss_valid = !a->w; TCGv_i64 clean_addr, dirty_addr, tcg_rt; - int memidx = get_a64_user_mem_index(s, a->unpriv); + int memidx = core_a64_user_mem_index(s, a->unpriv); MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); @@ -3732,15 +4100,64 @@ static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, return true; } -TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) -TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) -TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) -TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) -TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) -TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) -TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) -TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) -TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) +TRANS_FEAT(LDADD, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) +TRANS_FEAT(LDCLR, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) +TRANS_FEAT(LDEOR, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) +TRANS_FEAT(LDSET, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) +TRANS_FEAT(LDSMAX, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) +TRANS_FEAT(LDSMIN, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) +TRANS_FEAT(LDUMAX, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) +TRANS_FEAT(LDUMIN, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) +TRANS_FEAT(SWP, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) + +typedef void Atomic128ThreeOpFn(TCGv_i128, TCGv_i64, TCGv_i128, TCGArg, MemOp); + +static bool do_atomic128_ld(DisasContext *s, arg_atomic128 *a, + Atomic128ThreeOpFn *fn, bool invert) +{ + MemOp mop; + int rlo, rhi; + TCGv_i64 clean_addr, tlo, thi; + TCGv_i128 t16; + + if (a->rt == 31 || a->rt2 == 31 || a->rt == a->rt2) { + return false; + } + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + mop = check_atomic_align(s, a->rn, MO_128); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, + a->rn != 31, mop); + + rlo = (s->be_data == MO_LE ? a->rt : a->rt2); + rhi = (s->be_data == MO_LE ? a->rt2 : a->rt); + + tlo = read_cpu_reg(s, rlo, true); + thi = read_cpu_reg(s, rhi, true); + if (invert) { + tcg_gen_not_i64(tlo, tlo); + tcg_gen_not_i64(thi, thi); + } + /* + * The tcg atomic primitives are all full barriers. Therefore we + * can ignore the Acquire and Release bits of this instruction. + */ + t16 = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(t16, tlo, thi); + + fn(t16, clean_addr, t16, get_mem_index(s), mop); + + tcg_gen_extr_i128_i64(cpu_reg(s, rlo), cpu_reg(s, rhi), t16); + return true; +} + +TRANS_FEAT(LDCLRP, aa64_lse128, do_atomic128_ld, + a, tcg_gen_atomic_fetch_and_i128, true) +TRANS_FEAT(LDSETP, aa64_lse128, do_atomic128_ld, + a, tcg_gen_atomic_fetch_or_i128, false) +TRANS_FEAT(SWPP, aa64_lse128, do_atomic128_ld, + a, tcg_gen_atomic_xchg_i128, false) static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) { @@ -3748,7 +4165,7 @@ static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) TCGv_i64 clean_addr; MemOp mop; - if (!dc_isar_feature(aa64_atomics, s) || + if (!dc_isar_feature(aa64_lse, s) || !dc_isar_feature(aa64_rcpc_8_3, s)) { return false; } @@ -3871,6 +4288,42 @@ static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) return true; } +static bool trans_GCSSTR(DisasContext *s, arg_GCSSTR *a) +{ + ARMMMUIdx armidx; + + if (!dc_isar_feature(aa64_gcs, s)) { + return false; + } + + /* + * The pseudocode for GCSSTTR is + * + * effective_el = AArch64.IsUnprivAccessPriv() ? PSTATE.EL : EL0; + * if (effective_el == PSTATE.EL) CheckGCSSTREnabled(); + * + * We have cached the result of IsUnprivAccessPriv in DisasContext, + * but since we need the result of full_a64_user_mem_index anyway, + * use the mmu_idx test as a proxy for the effective_el test. + */ + armidx = full_a64_user_mem_index(s, a->unpriv); + if (armidx == s->mmu_idx && s->gcsstr_el != 0) { + gen_exception_insn_el(s, 0, EXCP_UDEF, + syn_gcs_gcsstr(a->rn, a->rt), + s->gcsstr_el); + return true; + } + + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + tcg_gen_qemu_st_i64(cpu_reg(s, a->rt), + clean_data_tbi(s, cpu_reg_sp(s, a->rn)), + core_gcs_mem_index(armidx), + finalize_memop(s, MO_64 | MO_ALIGN)); + return true; +} + static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) { TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; @@ -4402,7 +4855,7 @@ static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, return false; } - memidx = get_a64_user_mem_index(s, a->unpriv); + memidx = core_a64_user_mem_index(s, a->unpriv); /* * We pass option_a == true, matching our implementation; @@ -4456,8 +4909,8 @@ static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) return false; } - rmemidx = get_a64_user_mem_index(s, runpriv); - wmemidx = get_a64_user_mem_index(s, wunpriv); + rmemidx = core_a64_user_mem_index(s, runpriv); + wmemidx = core_a64_user_mem_index(s, wunpriv); /* * We pass option_a == true, matching our implementation; @@ -4542,6 +4995,50 @@ TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) /* + * Min/Max (immediate) + */ + +static void gen_wrap3_i32(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, NeonGenTwoOpFn fn) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_extrl_i64_i32(t1, n); + tcg_gen_extrl_i64_i32(t2, m); + fn(t1, t1, t2); + tcg_gen_extu_i32_i64(d, t1); +} + +static void gen_smax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + gen_wrap3_i32(d, n, m, tcg_gen_smax_i32); +} + +static void gen_smin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + gen_wrap3_i32(d, n, m, tcg_gen_smin_i32); +} + +static void gen_umax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + gen_wrap3_i32(d, n, m, tcg_gen_umax_i32); +} + +static void gen_umin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + gen_wrap3_i32(d, n, m, tcg_gen_umin_i32); +} + +TRANS_FEAT(SMAX_i, aa64_cssc, gen_rri, a, 0, 0, + a->sf ? tcg_gen_smax_i64 : gen_smax32_i64) +TRANS_FEAT(SMIN_i, aa64_cssc, gen_rri, a, 0, 0, + a->sf ? tcg_gen_smin_i64 : gen_smin32_i64) +TRANS_FEAT(UMAX_i, aa64_cssc, gen_rri, a, 0, 0, + a->sf ? tcg_gen_umax_i64 : gen_umax32_i64) +TRANS_FEAT(UMIN_i, aa64_cssc, gen_rri, a, 0, 0, + a->sf ? tcg_gen_umin_i64 : gen_umin32_i64) + +/* * Add/subtract (immediate, with tags) */ @@ -6103,9 +6600,9 @@ static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, return true; } -TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) -TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) -TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) +TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_4b) +TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_4b) +TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_4b) TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) @@ -6865,12 +7362,12 @@ static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, return true; } -TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) -TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) +TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_4b) +TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_4b) TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, - gen_helper_gvec_sudot_idx_b) + gen_helper_gvec_sudot_idx_4b) TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, - gen_helper_gvec_usdot_idx_b) + gen_helper_gvec_usdot_idx_4b) TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, gen_helper_gvec_bfdot_idx) @@ -8146,6 +8643,28 @@ static bool trans_PACGA(DisasContext *s, arg_rrr *a) return false; } +static bool gen_rrr(DisasContext *s, arg_rrr_sf *a, ArithTwoOp fn) +{ + TCGv_i64 tcg_rm = cpu_reg(s, a->rm); + TCGv_i64 tcg_rn = cpu_reg(s, a->rn); + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + + fn(tcg_rd, tcg_rn, tcg_rm); + if (!a->sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + return true; +} + +TRANS_FEAT(SMAX, aa64_cssc, gen_rrr, a, + a->sf ? tcg_gen_smax_i64 : gen_smax32_i64) +TRANS_FEAT(SMIN, aa64_cssc, gen_rrr, a, + a->sf ? tcg_gen_smin_i64 : gen_smin32_i64) +TRANS_FEAT(UMAX, aa64_cssc, gen_rrr, a, + a->sf ? tcg_gen_umax_i64 : gen_umax32_i64) +TRANS_FEAT(UMIN, aa64_cssc, gen_rrr, a, + a->sf ? tcg_gen_umin_i64 : gen_umin32_i64) + typedef void ArithOneOp(TCGv_i64, TCGv_i64); static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) @@ -8154,13 +8673,22 @@ static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) return true; } -static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +/* + * Perform 32-bit operation fn on the low half of n; + * the high half of the output is zeroed. + */ +static void gen_wrap2_i32(TCGv_i64 d, TCGv_i64 n, NeonGenOneOpFn fn) { - TCGv_i32 t32 = tcg_temp_new_i32(); + TCGv_i32 t = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(t32, tcg_rn); - gen_helper_rbit(t32, t32); - tcg_gen_extu_i32_i64(tcg_rd, t32); + tcg_gen_extrl_i64_i32(t, n); + fn(t, t); + tcg_gen_extu_i32_i64(d, t); +} + +static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + gen_wrap2_i32(tcg_rd, tcg_rn, gen_helper_rbit); } static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) @@ -8216,15 +8744,42 @@ static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) { + gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_clrsb_i32); +} + +TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) +TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) + +static void gen_ctz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ TCGv_i32 t32 = tcg_temp_new_i32(); tcg_gen_extrl_i64_i32(t32, tcg_rn); - tcg_gen_clrsb_i32(t32, t32); + tcg_gen_ctzi_i32(t32, t32, 32); tcg_gen_extu_i32_i64(tcg_rd, t32); } -TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) -TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) +static void gen_ctz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + tcg_gen_ctzi_i64(tcg_rd, tcg_rn, 64); +} + +static void gen_cnt32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_ctpop_i32); +} + +static void gen_abs32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) +{ + gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_abs_i32); +} + +TRANS_FEAT(CTZ, aa64_cssc, gen_rr, a->rd, a->rn, + a->sf ? gen_ctz64 : gen_ctz32) +TRANS_FEAT(CNT, aa64_cssc, gen_rr, a->rd, a->rn, + a->sf ? tcg_gen_ctpop_i64 : gen_cnt32) +TRANS_FEAT(ABS, aa64_cssc, gen_rr, a->rd, a->rn, + a->sf ? tcg_gen_abs_i64 : gen_abs32) static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) { @@ -10128,8 +10683,10 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); + dc->zt0_excp_el = EX_TBFLAG_A64(tb_flags, ZT0EXC_EL); dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; + dc->max_svl = arm_cpu->sme_max_vq * 16; dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); dc->bt = EX_TBFLAG_A64(tb_flags, BT); dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); @@ -10142,13 +10699,17 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); dc->naa = EX_TBFLAG_A64(tb_flags, NAA); + dc->e2h = EX_TBFLAG_A64(tb_flags, E2H); dc->nv = EX_TBFLAG_A64(tb_flags, NV); dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); - dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); + dc->nv2_mem_e20 = dc->nv2 && dc->e2h; dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); + dc->gcs_en = EX_TBFLAG_A64(tb_flags, GCS_EN); + dc->gcs_rvcen = EX_TBFLAG_A64(tb_flags, GCS_RVCEN); + dc->gcsstr_el = EX_TBFLAG_A64(tb_flags, GCSSTR_EL); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; @@ -10375,6 +10936,8 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) break; } } + + emit_delayed_exceptions(dc); } const TranslatorOps aarch64_translator_ops = { diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h index b2420f5..9c45f89 100644 --- a/target/arm/tcg/translate-a64.h +++ b/target/arm/tcg/translate-a64.h @@ -28,7 +28,7 @@ bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, bool sve_access_check(DisasContext *s); bool sme_enabled_check(DisasContext *s); bool sme_enabled_check_with_svcr(DisasContext *s, unsigned); -uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, +uint64_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, uint32_t msz, bool is_write, uint32_t data); /* This function corresponds to CheckStreamingSVEEnabled. */ @@ -225,7 +225,13 @@ void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); -void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); -void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); +void gen_gvec_sve2_sqdmulh(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); + +void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, + int len, int rn, int imm, MemOp align); +void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, + int len, int rn, int imm, MemOp align); #endif /* TARGET_ARM_TRANSLATE_A64_H */ diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c index c4fecb8..844d2e2 100644 --- a/target/arm/tcg/translate-neon.c +++ b/target/arm/tcg/translate-neon.c @@ -271,7 +271,7 @@ static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a) return false; } return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, - gen_helper_gvec_sdot_b); + gen_helper_gvec_sdot_4b); } static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a) @@ -280,7 +280,7 @@ static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a) return false; } return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, - gen_helper_gvec_udot_b); + gen_helper_gvec_udot_4b); } static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a) @@ -289,7 +289,7 @@ static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a) return false; } return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0, - gen_helper_gvec_usdot_b); + gen_helper_gvec_usdot_4b); } static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a) @@ -356,7 +356,7 @@ static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a) return false; } return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, - gen_helper_gvec_sdot_idx_b); + gen_helper_gvec_sdot_idx_4b); } static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a) @@ -365,7 +365,7 @@ static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a) return false; } return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, - gen_helper_gvec_udot_idx_b); + gen_helper_gvec_udot_idx_4b); } static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a) @@ -374,7 +374,7 @@ static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a) return false; } return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, - gen_helper_gvec_usdot_idx_b); + gen_helper_gvec_usdot_idx_4b); } static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a) @@ -383,7 +383,7 @@ static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a) return false; } return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index, - gen_helper_gvec_sudot_idx_b); + gen_helper_gvec_sudot_idx_4b); } static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a) @@ -1010,8 +1010,8 @@ DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h) DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h) DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h) DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h) -DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h) -DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h) +DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_nf_s, gen_helper_gvec_fmla_nf_h) +DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_nf_s, gen_helper_gvec_fmls_nf_h) DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h) DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h) DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h) diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c index fcbb350..091c56d 100644 --- a/target/arm/tcg/translate-sme.c +++ b/target/arm/tcg/translate-sme.c @@ -27,16 +27,25 @@ #include "decode-sme.c.inc" +static bool sme2_zt0_enabled_check(DisasContext *s) +{ + if (!sme_za_enabled_check(s)) { + return false; + } + if (s->zt0_excp_el) { + gen_exception_insn_el(s, 0, EXCP_UDEF, + syn_smetrap(SME_ET_InaccessibleZT0, false), + s->zt0_excp_el); + return false; + } + return true; +} -/* - * Resolve tile.size[index] to a host pointer, where tile and index - * are always decoded together, dependent on the element size. - */ +/* Resolve tile.size[rs+imm] to a host pointer. */ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, - int tile_index, bool vertical) + int tile, int imm, int div_len, + int vec_mod, bool vertical) { - int tile = tile_index >> (4 - esz); - int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz); int pos, len, offset; TCGv_i32 tmp; TCGv_ptr addr; @@ -44,10 +53,23 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, /* Compute the final index, which is Rs+imm. */ tmp = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs)); - tcg_gen_addi_i32(tmp, tmp, index); + /* + * Round the vector index down to a multiple of vec_mod if necessary. + * We do this before adding the offset, to handle cases like + * MOVA (tile to vector, 2 registers) where we want to call this + * several times in a loop with an increasing offset. We rely on + * the instruction encodings always forcing the initial offset in + * [rs + offset] to be a multiple of vec_mod. The pseudocode usually + * does the round-down after adding the offset rather than before, + * but MOVA is an exception. + */ + if (vec_mod > 1) { + tcg_gen_andc_i32(tmp, tmp, tcg_constant_i32(vec_mod - 1)); + } + tcg_gen_addi_i32(tmp, tmp, imm); /* Prepare a power-of-two modulo via extraction of @len bits. */ - len = ctz32(streaming_vec_reg_size(s)) - esz; + len = ctz32(streaming_vec_reg_size(s) / div_len) - esz; if (!len) { /* @@ -92,7 +114,7 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, offset = tile * sizeof(ARMVectorReg); /* Include the byte offset of zarray to make this relative to env. */ - offset += offsetof(CPUARMState, zarray); + offset += offsetof(CPUARMState, za_state.za); tcg_gen_addi_i32(tmp, tmp, offset); /* Add the byte offset to env to produce the final pointer. */ @@ -103,6 +125,14 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, return addr; } +/* Resolve ZArray[rs+imm] to a host pointer. */ +static TCGv_ptr get_zarray(DisasContext *s, int rs, int imm, + int div_len, int vec_mod) +{ + /* ZA[n] equates to ZA0H.B[n]. */ + return get_tile_rowcol(s, MO_8, rs, 0, imm, div_len, vec_mod, false); +} + /* * Resolve tile.size[0] to a host pointer. * Used by e.g. outer product insns where we require the entire tile. @@ -112,7 +142,7 @@ static TCGv_ptr get_tile(DisasContext *s, int esz, int tile) TCGv_ptr addr = tcg_temp_new_ptr(); int offset; - offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray); + offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, za_state.za); tcg_gen_addi_ptr(addr, tcg_env, offset); return addr; @@ -130,7 +160,40 @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a) return true; } -static bool trans_MOVA(DisasContext *s, arg_MOVA *a) +static bool trans_ZERO_zt0(DisasContext *s, arg_ZERO_zt0 *a) +{ + if (!dc_isar_feature(aa64_sme2, s)) { + return false; + } + if (sme_enabled_check(s) && sme2_zt0_enabled_check(s)) { + tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUARMState, za_state.zt0), + sizeof_field(CPUARMState, za_state.zt0), + sizeof_field(CPUARMState, za_state.zt0), 0); + } + return true; +} + +static bool trans_ZERO_za(DisasContext *s, arg_ZERO_za *a) +{ + if (!dc_isar_feature(aa64_sme2p1, s)) { + return false; + } + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int vstride = svl / a->ngrp; + TCGv_ptr t_za = get_zarray(s, a->rv, a->off, a->ngrp, a->nvec); + + for (int r = 0; r < a->ngrp; ++r) { + for (int i = 0; i < a->nvec; ++i) { + int o_za = (r * vstride + i) * sizeof(ARMVectorReg); + tcg_gen_gvec_dup_imm_var(MO_64, t_za, o_za, svl, svl, 0); + } + } + } + return true; +} + +static bool do_mova_tile(DisasContext *s, arg_mova_p *a, bool to_vec) { static gen_helper_gvec_4 * const h_fns[5] = { gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, @@ -152,14 +215,11 @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a) TCGv_i32 t_desc; int svl; - if (!dc_isar_feature(aa64_sme, s)) { - return false; - } if (!sme_smza_enabled_check(s)) { return true; } - t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v); + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za, a->off, 1, 0, a->v); t_zr = vec_full_reg_ptr(s, a->zr); t_pg = pred_full_reg_ptr(s, a->pg); @@ -168,14 +228,14 @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a) if (a->v) { /* Vertical slice -- use sme mova helpers. */ - if (a->to_vec) { + if (to_vec) { zc_fns[a->esz](t_zr, t_za, t_pg, t_desc); } else { cz_fns[a->esz](t_za, t_zr, t_pg, t_desc); } } else { /* Horizontal slice -- reuse sve sel helpers. */ - if (a->to_vec) { + if (to_vec) { h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc); } else { h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc); @@ -184,9 +244,150 @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a) return true; } +TRANS_FEAT(MOVA_tz, aa64_sme, do_mova_tile, a, false) +TRANS_FEAT(MOVA_zt, aa64_sme, do_mova_tile, a, true) + +static bool do_mova_tile_n(DisasContext *s, arg_mova_t *a, int n, + bool to_vec, bool zero) +{ + static gen_helper_gvec_2 * const cz_fns[] = { + gen_helper_sme2_mova_cz_b, gen_helper_sme2_mova_cz_h, + gen_helper_sme2_mova_cz_s, gen_helper_sme2_mova_cz_d, + }; + static gen_helper_gvec_2 * const zc_fns[] = { + gen_helper_sme2_mova_zc_b, gen_helper_sme2_mova_zc_h, + gen_helper_sme2_mova_zc_s, gen_helper_sme2_mova_zc_d, + }; + static gen_helper_gvec_2 * const zc_z_fns[] = { + gen_helper_sme2p1_movaz_zc_b, gen_helper_sme2p1_movaz_zc_h, + gen_helper_sme2p1_movaz_zc_s, gen_helper_sme2p1_movaz_zc_d, + gen_helper_sme2p1_movaz_zc_q, + }; + TCGv_ptr t_za; + int svl, bytes_per_op = n << a->esz; + + /* + * The MaxImplementedSVL check happens in the decode pseudocode, + * before the SM+ZA enabled check in the operation pseudocode. + * This will (currently) only fail for NREG=4, ESZ=MO_64. + */ + if (s->max_svl < bytes_per_op) { + unallocated_encoding(s); + return true; + } + + assert(a->esz <= MO_64 + zero); + + if (!sme_smza_enabled_check(s)) { + return true; + } + + svl = streaming_vec_reg_size(s); + + /* + * The CurrentVL check happens in the operation pseudocode, + * after the SM+ZA enabled check. + */ + if (svl < bytes_per_op) { + unallocated_encoding(s); + return true; + } + + if (a->v) { + TCGv_i32 t_desc = tcg_constant_i32(simd_desc(svl, svl, 0)); + + for (int i = 0; i < n; ++i) { + TCGv_ptr t_zr = vec_full_reg_ptr(s, a->zr * n + i); + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za, + a->off * n + i, 1, n, a->v); + if (zero) { + zc_z_fns[a->esz](t_zr, t_za, t_desc); + } else if (to_vec) { + zc_fns[a->esz](t_zr, t_za, t_desc); + } else { + cz_fns[a->esz](t_za, t_zr, t_desc); + } + } + } else { + for (int i = 0; i < n; ++i) { + int o_zr = vec_full_reg_offset(s, a->zr * n + i); + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za, + a->off * n + i, 1, n, a->v); + if (to_vec) { + tcg_gen_gvec_mov_var(MO_8, tcg_env, o_zr, t_za, 0, svl, svl); + if (zero) { + tcg_gen_gvec_dup_imm_var(MO_8, t_za, 0, svl, svl, 0); + } + } else { + tcg_gen_gvec_mov_var(MO_8, t_za, 0, tcg_env, o_zr, svl, svl); + } + } + } + return true; +} + +TRANS_FEAT(MOVA_tz2, aa64_sme2, do_mova_tile_n, a, 2, false, false) +TRANS_FEAT(MOVA_tz4, aa64_sme2, do_mova_tile_n, a, 4, false, false) +TRANS_FEAT(MOVA_zt2, aa64_sme2, do_mova_tile_n, a, 2, true, false) +TRANS_FEAT(MOVA_zt4, aa64_sme2, do_mova_tile_n, a, 4, true, false) + +TRANS_FEAT(MOVAZ_zt, aa64_sme2p1, do_mova_tile_n, a, 1, true, true) +TRANS_FEAT(MOVAZ_zt2, aa64_sme2p1, do_mova_tile_n, a, 2, true, true) +TRANS_FEAT(MOVAZ_zt4, aa64_sme2p1, do_mova_tile_n, a, 4, true, true) + +static bool do_mova_array_n(DisasContext *s, arg_mova_a *a, int n, + bool to_vec, bool zero) +{ + TCGv_ptr t_za; + int svl; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + svl = streaming_vec_reg_size(s); + t_za = get_zarray(s, a->rv, a->off, n, 0); + + for (int i = 0; i < n; ++i) { + int o_za = (svl / n * sizeof(ARMVectorReg)) * i; + int o_zr = vec_full_reg_offset(s, a->zr * n + i); + + if (to_vec) { + tcg_gen_gvec_mov_var(MO_8, tcg_env, o_zr, t_za, o_za, svl, svl); + if (zero) { + tcg_gen_gvec_dup_imm_var(MO_8, t_za, o_za, svl, svl, 0); + } + } else { + tcg_gen_gvec_mov_var(MO_8, t_za, o_za, tcg_env, o_zr, svl, svl); + } + } + return true; +} + +TRANS_FEAT(MOVA_az2, aa64_sme2, do_mova_array_n, a, 2, false, false) +TRANS_FEAT(MOVA_az4, aa64_sme2, do_mova_array_n, a, 4, false, false) +TRANS_FEAT(MOVA_za2, aa64_sme2, do_mova_array_n, a, 2, true, false) +TRANS_FEAT(MOVA_za4, aa64_sme2, do_mova_array_n, a, 4, true, false) + +TRANS_FEAT(MOVAZ_za2, aa64_sme2p1, do_mova_array_n, a, 2, true, true) +TRANS_FEAT(MOVAZ_za4, aa64_sme2p1, do_mova_array_n, a, 4, true, true) + +static bool do_movt(DisasContext *s, arg_MOVT_rzt *a, + void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) +{ + if (sme2_zt0_enabled_check(s)) { + func(cpu_reg(s, a->rt), tcg_env, + offsetof(CPUARMState, za_state.zt0) + a->off * 8); + } + return true; +} + +TRANS_FEAT(MOVT_rzt, aa64_sme2, do_movt, a, tcg_gen_ld_i64) +TRANS_FEAT(MOVT_ztr, aa64_sme2, do_movt, a, tcg_gen_st_i64) + static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) { - typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32); + typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i64); /* * Indexed by [esz][be][v][mte][st], which is (except for load/store) @@ -214,7 +415,7 @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) TCGv_ptr t_za, t_pg; TCGv_i64 addr; - uint32_t desc; + uint64_t desc; bool be = s->be_data == MO_BE; bool mte = s->mte_active[0]; @@ -225,7 +426,7 @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) return true; } - t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v); + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za, a->off, 1, 0, a->v); t_pg = pred_full_reg_ptr(s, a->pg); addr = tcg_temp_new_i64(); @@ -239,32 +440,41 @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) desc = make_svemte_desc(s, streaming_vec_reg_size(s), 1, a->esz, a->st, 0); fns[a->esz][be][a->v][mte][a->st](tcg_env, t_za, t_pg, addr, - tcg_constant_i32(desc)); + tcg_constant_i64(desc)); return true; } -typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int); +typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int, MemOp); static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn) { - int svl = streaming_vec_reg_size(s); - int imm = a->imm; - TCGv_ptr base; + if (sme_za_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int imm = a->imm; + TCGv_ptr base = get_zarray(s, a->rv, imm, 1, 0); - if (!sme_za_enabled_check(s)) { - return true; + fn(s, base, 0, svl, a->rn, imm * svl, + s->align_mem ? MO_ALIGN_16 : MO_UNALN); } - - /* ZA[n] equates to ZA0H.B[n]. */ - base = get_tile_rowcol(s, MO_8, a->rv, imm, false); - - fn(s, base, 0, svl, a->rn, imm * svl); return true; } TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr) TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str) +static bool do_ldst_zt0(DisasContext *s, arg_ldstzt0 *a, GenLdStR *fn) +{ + if (sme2_zt0_enabled_check(s)) { + fn(s, tcg_env, offsetof(CPUARMState, za_state.zt0), + sizeof_field(CPUARMState, za_state.zt0), a->rn, 0, + s->align_mem ? MO_ALIGN_16 : MO_UNALN); + } + return true; +} + +TRANS_FEAT(LDR_zt0, aa64_sme2, do_ldst_zt0, a, gen_sve_ldr) +TRANS_FEAT(STR_zt0, aa64_sme2, do_ldst_zt0, a, gen_sve_str) + static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz, gen_helper_gvec_4 *fn) { @@ -316,7 +526,7 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, gen_helper_gvec_5_ptr *fn) { int svl = streaming_vec_reg_size(s); - uint32_t desc = simd_desc(svl, svl, a->sub); + uint32_t desc = simd_desc(svl, svl, 0); TCGv_ptr za, zn, zm, pn, pm, fpst; if (!sme_smza_enabled_check(s)) { @@ -338,7 +548,7 @@ static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz, gen_helper_gvec_5_ptr *fn) { int svl = streaming_vec_reg_size(s); - uint32_t desc = simd_desc(svl, svl, a->sub); + uint32_t desc = simd_desc(svl, svl, 0); TCGv_ptr za, zn, zm, pn, pm; if (!sme_smza_enabled_check(s)) { @@ -355,14 +565,32 @@ static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz, return true; } -TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a, - MO_32, gen_helper_sme_fmopa_h) -TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, - MO_32, FPST_A64, gen_helper_sme_fmopa_s) -TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, - MO_64, FPST_A64, gen_helper_sme_fmopa_d) +TRANS_FEAT(FMOPA_w_h, aa64_sme, do_outprod_env, a, MO_32, + !a->sub ? gen_helper_sme_fmopa_w_h + : !s->fpcr_ah ? gen_helper_sme_fmops_w_h + : gen_helper_sme_ah_fmops_w_h) +TRANS_FEAT(FMOPA_h, aa64_sme_f16f16, do_outprod_fpst, a, MO_16, FPST_ZA_F16, + !a->sub ? gen_helper_sme_fmopa_h + : !s->fpcr_ah ? gen_helper_sme_fmops_h + : gen_helper_sme_ah_fmops_h) +TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, FPST_ZA, + !a->sub ? gen_helper_sme_fmopa_s + : !s->fpcr_ah ? gen_helper_sme_fmops_s + : gen_helper_sme_ah_fmops_s) +TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, FPST_ZA, + !a->sub ? gen_helper_sme_fmopa_d + : !s->fpcr_ah ? gen_helper_sme_fmops_d + : gen_helper_sme_ah_fmops_d) + +TRANS_FEAT(BFMOPA, aa64_sme_b16b16, do_outprod_fpst, a, MO_16, FPST_ZA, + !a->sub ? gen_helper_sme_bfmopa + : !s->fpcr_ah ? gen_helper_sme_bfmops + : gen_helper_sme_ah_bfmops) -TRANS_FEAT(BFMOPA, aa64_sme, do_outprod_env, a, MO_32, gen_helper_sme_bfmopa) +TRANS_FEAT(BFMOPA_w, aa64_sme, do_outprod_env, a, MO_32, + !a->sub ? gen_helper_sme_bfmopa_w + : !s->fpcr_ah ? gen_helper_sme_bfmops_w + : gen_helper_sme_ah_bfmops_w) TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s) TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s) @@ -373,3 +601,1173 @@ TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_ TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d) TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d) TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d) + +TRANS_FEAT(BMOPA, aa64_sme2, do_outprod, a, MO_32, gen_helper_sme2_bmopa_s) +TRANS_FEAT(SMOPA2_s, aa64_sme2, do_outprod, a, MO_32, gen_helper_sme2_smopa2_s) +TRANS_FEAT(UMOPA2_s, aa64_sme2, do_outprod, a, MO_32, gen_helper_sme2_umopa2_s) + +static bool do_z2z_n1(DisasContext *s, arg_z2z_en *a, GVecGen3Fn *fn) +{ + int esz, dn, vsz, mofs, n; + bool overlap = false; + + if (!sme_sm_enabled_check(s)) { + return true; + } + + esz = a->esz; + n = a->n; + dn = a->zdn; + mofs = vec_full_reg_offset(s, a->zm); + vsz = streaming_vec_reg_size(s); + + for (int i = 0; i < n; i++) { + int dofs = vec_full_reg_offset(s, dn + i); + if (dofs == mofs) { + overlap = true; + } else { + fn(esz, dofs, dofs, mofs, vsz, vsz); + } + } + if (overlap) { + fn(esz, mofs, mofs, mofs, vsz, vsz); + } + return true; +} + +static void gen_sme2_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[] = { + gen_helper_gvec_srshl_b, gen_helper_sme2_srshl_h, + gen_helper_sme2_srshl_s, gen_helper_sme2_srshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +static void gen_sme2_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[] = { + gen_helper_gvec_urshl_b, gen_helper_sme2_urshl_h, + gen_helper_sme2_urshl_s, gen_helper_sme2_urshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +TRANS_FEAT(ADD_n1, aa64_sme2, do_z2z_n1, a, tcg_gen_gvec_add) +TRANS_FEAT(SMAX_n1, aa64_sme2, do_z2z_n1, a, tcg_gen_gvec_smax) +TRANS_FEAT(SMIN_n1, aa64_sme2, do_z2z_n1, a, tcg_gen_gvec_smin) +TRANS_FEAT(UMAX_n1, aa64_sme2, do_z2z_n1, a, tcg_gen_gvec_umax) +TRANS_FEAT(UMIN_n1, aa64_sme2, do_z2z_n1, a, tcg_gen_gvec_umin) +TRANS_FEAT(SRSHL_n1, aa64_sme2, do_z2z_n1, a, gen_sme2_srshl) +TRANS_FEAT(URSHL_n1, aa64_sme2, do_z2z_n1, a, gen_sme2_urshl) +TRANS_FEAT(SQDMULH_n1, aa64_sme2, do_z2z_n1, a, gen_gvec_sve2_sqdmulh) + +static bool do_z2z_nn(DisasContext *s, arg_z2z_en *a, GVecGen3Fn *fn) +{ + int esz, dn, dm, vsz, n; + + if (!sme_sm_enabled_check(s)) { + return true; + } + + esz = a->esz; + n = a->n; + dn = a->zdn; + dm = a->zm; + vsz = streaming_vec_reg_size(s); + + for (int i = 0; i < n; i++) { + int dofs = vec_full_reg_offset(s, dn + i); + int mofs = vec_full_reg_offset(s, dm + i); + + fn(esz, dofs, dofs, mofs, vsz, vsz); + } + return true; +} + +TRANS_FEAT(SMAX_nn, aa64_sme2, do_z2z_nn, a, tcg_gen_gvec_smax) +TRANS_FEAT(SMIN_nn, aa64_sme2, do_z2z_nn, a, tcg_gen_gvec_smin) +TRANS_FEAT(UMAX_nn, aa64_sme2, do_z2z_nn, a, tcg_gen_gvec_umax) +TRANS_FEAT(UMIN_nn, aa64_sme2, do_z2z_nn, a, tcg_gen_gvec_umin) +TRANS_FEAT(SRSHL_nn, aa64_sme2, do_z2z_nn, a, gen_sme2_srshl) +TRANS_FEAT(URSHL_nn, aa64_sme2, do_z2z_nn, a, gen_sme2_urshl) +TRANS_FEAT(SQDMULH_nn, aa64_sme2, do_z2z_nn, a, gen_gvec_sve2_sqdmulh) + +static bool do_z2z_n1_fpst(DisasContext *s, arg_z2z_en *a, + gen_helper_gvec_3_ptr * const fns[4]) +{ + int esz = a->esz, n, dn, vsz, mofs; + bool overlap = false; + gen_helper_gvec_3_ptr *fn; + TCGv_ptr fpst; + + /* These insns use MO_8 to encode BFloat16. */ + if (esz == MO_8 && !dc_isar_feature(aa64_sme_b16b16, s)) { + return false; + } + if (!sme_sm_enabled_check(s)) { + return true; + } + + fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); + fn = fns[esz]; + n = a->n; + dn = a->zdn; + mofs = vec_full_reg_offset(s, a->zm); + vsz = streaming_vec_reg_size(s); + + for (int i = 0; i < n; i++) { + int dofs = vec_full_reg_offset(s, dn + i); + if (dofs == mofs) { + overlap = true; + } else { + tcg_gen_gvec_3_ptr(dofs, dofs, mofs, fpst, vsz, vsz, 0, fn); + } + } + if (overlap) { + tcg_gen_gvec_3_ptr(mofs, mofs, mofs, fpst, vsz, vsz, 0, fn); + } + return true; +} + +static bool do_z2z_nn_fpst(DisasContext *s, arg_z2z_en *a, + gen_helper_gvec_3_ptr * const fns[4]) +{ + int esz = a->esz, n, dn, dm, vsz; + gen_helper_gvec_3_ptr *fn; + TCGv_ptr fpst; + + if (esz == MO_8 && !dc_isar_feature(aa64_sme_b16b16, s)) { + return false; + } + if (!sme_sm_enabled_check(s)) { + return true; + } + + fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); + fn = fns[esz]; + n = a->n; + dn = a->zdn; + dm = a->zm; + vsz = streaming_vec_reg_size(s); + + for (int i = 0; i < n; i++) { + int dofs = vec_full_reg_offset(s, dn + i); + int mofs = vec_full_reg_offset(s, dm + i); + + tcg_gen_gvec_3_ptr(dofs, dofs, mofs, fpst, vsz, vsz, 0, fn); + } + return true; +} + +static gen_helper_gvec_3_ptr * const f_vector_fmax[2][4] = { + { gen_helper_gvec_fmax_b16, + gen_helper_gvec_fmax_h, + gen_helper_gvec_fmax_s, + gen_helper_gvec_fmax_d }, + { gen_helper_gvec_ah_fmax_b16, + gen_helper_gvec_ah_fmax_h, + gen_helper_gvec_ah_fmax_s, + gen_helper_gvec_ah_fmax_d }, +}; +TRANS_FEAT(FMAX_n1, aa64_sme2, do_z2z_n1_fpst, a, f_vector_fmax[s->fpcr_ah]) +TRANS_FEAT(FMAX_nn, aa64_sme2, do_z2z_nn_fpst, a, f_vector_fmax[s->fpcr_ah]) + +static gen_helper_gvec_3_ptr * const f_vector_fmin[2][4] = { + { gen_helper_gvec_fmin_b16, + gen_helper_gvec_fmin_h, + gen_helper_gvec_fmin_s, + gen_helper_gvec_fmin_d }, + { gen_helper_gvec_ah_fmin_b16, + gen_helper_gvec_ah_fmin_h, + gen_helper_gvec_ah_fmin_s, + gen_helper_gvec_ah_fmin_d }, +}; +TRANS_FEAT(FMIN_n1, aa64_sme2, do_z2z_n1_fpst, a, f_vector_fmin[s->fpcr_ah]) +TRANS_FEAT(FMIN_nn, aa64_sme2, do_z2z_nn_fpst, a, f_vector_fmin[s->fpcr_ah]) + +static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[4] = { + gen_helper_gvec_fmaxnum_b16, + gen_helper_gvec_fmaxnum_h, + gen_helper_gvec_fmaxnum_s, + gen_helper_gvec_fmaxnum_d, +}; +TRANS_FEAT(FMAXNM_n1, aa64_sme2, do_z2z_n1_fpst, a, f_vector_fmaxnm) +TRANS_FEAT(FMAXNM_nn, aa64_sme2, do_z2z_nn_fpst, a, f_vector_fmaxnm) + +static gen_helper_gvec_3_ptr * const f_vector_fminnm[4] = { + gen_helper_gvec_fminnum_b16, + gen_helper_gvec_fminnum_h, + gen_helper_gvec_fminnum_s, + gen_helper_gvec_fminnum_d, +}; +TRANS_FEAT(FMINNM_n1, aa64_sme2, do_z2z_n1_fpst, a, f_vector_fminnm) +TRANS_FEAT(FMINNM_nn, aa64_sme2, do_z2z_nn_fpst, a, f_vector_fminnm) + +/* Add/Sub vector Z[m] to each Z[n*N] with result in ZA[d*N]. */ +static bool do_azz_n1(DisasContext *s, arg_azz_n *a, int esz, + GVecGen3FnVar *fn) +{ + TCGv_ptr t_za; + int svl, n, o_zm; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + n = a->n; + t_za = get_zarray(s, a->rv, a->off, n, 0); + o_zm = vec_full_reg_offset(s, a->zm); + svl = streaming_vec_reg_size(s); + + for (int i = 0; i < n; ++i) { + int o_za = (svl / n * sizeof(ARMVectorReg)) * i; + int o_zn = vec_full_reg_offset(s, (a->zn + i) % 32); + + fn(esz, t_za, o_za, tcg_env, o_zn, tcg_env, o_zm, svl, svl); + } + return true; +} + +TRANS_FEAT(ADD_azz_n1_s, aa64_sme2, do_azz_n1, a, MO_32, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_azz_n1_s, aa64_sme2, do_azz_n1, a, MO_32, tcg_gen_gvec_sub_var) +TRANS_FEAT(ADD_azz_n1_d, aa64_sme2_i16i64, do_azz_n1, a, MO_64, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_azz_n1_d, aa64_sme2_i16i64, do_azz_n1, a, MO_64, tcg_gen_gvec_sub_var) + +/* Add/Sub each vector Z[m*N] to each Z[n*N] with result in ZA[d*N]. */ +static bool do_azz_nn(DisasContext *s, arg_azz_n *a, int esz, + GVecGen3FnVar *fn) +{ + TCGv_ptr t_za; + int svl, n; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + n = a->n; + t_za = get_zarray(s, a->rv, a->off, n, 1); + svl = streaming_vec_reg_size(s); + + for (int i = 0; i < n; ++i) { + int o_za = (svl / n * sizeof(ARMVectorReg)) * i; + int o_zn = vec_full_reg_offset(s, a->zn + i); + int o_zm = vec_full_reg_offset(s, a->zm + i); + + fn(esz, t_za, o_za, tcg_env, o_zn, tcg_env, o_zm, svl, svl); + } + return true; +} + +TRANS_FEAT(ADD_azz_nn_s, aa64_sme2, do_azz_nn, a, MO_32, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_azz_nn_s, aa64_sme2, do_azz_nn, a, MO_32, tcg_gen_gvec_sub_var) +TRANS_FEAT(ADD_azz_nn_d, aa64_sme2_i16i64, do_azz_nn, a, MO_64, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_azz_nn_d, aa64_sme2_i16i64, do_azz_nn, a, MO_64, tcg_gen_gvec_sub_var) + +/* Add/Sub each ZA[d*N] += Z[m*N] */ +static bool do_aaz(DisasContext *s, arg_az_n *a, int esz, GVecGen3FnVar *fn) +{ + TCGv_ptr t_za; + int svl, n; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + n = a->n; + t_za = get_zarray(s, a->rv, a->off, n, 0); + svl = streaming_vec_reg_size(s); + + for (int i = 0; i < n; ++i) { + int o_za = (svl / n * sizeof(ARMVectorReg)) * i; + int o_zm = vec_full_reg_offset(s, a->zm + i); + + fn(esz, t_za, o_za, t_za, o_za, tcg_env, o_zm, svl, svl); + } + return true; +} + +TRANS_FEAT(ADD_aaz_s, aa64_sme2, do_aaz, a, MO_32, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_aaz_s, aa64_sme2, do_aaz, a, MO_32, tcg_gen_gvec_sub_var) +TRANS_FEAT(ADD_aaz_d, aa64_sme2_i16i64, do_aaz, a, MO_64, tcg_gen_gvec_add_var) +TRANS_FEAT(SUB_aaz_d, aa64_sme2_i16i64, do_aaz, a, MO_64, tcg_gen_gvec_sub_var) + +/* + * Expand array multi-vector single (n1), array multi-vector (nn), + * and array multi-vector indexed (nx), for floating-point accumulate. + * multi: true for nn, false for n1. + * fpst: >= 0 to set ptr argument for FPST_*, < 0 for ENV. + * data: stuff for simd_data, including any index. + */ +#define FPST_ENV -1 + +static bool do_azz_fp(DisasContext *s, int nreg, int nsel, + int rv, int off, int zn, int zm, + int data, int shsel, bool multi, int fpst, + gen_helper_gvec_3_ptr *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int vstride = svl / nreg; + TCGv_ptr t_za = get_zarray(s, rv, off, nreg, nsel); + TCGv_ptr t, ptr; + + if (fpst >= 0) { + ptr = fpstatus_ptr(fpst); + } else { + ptr = tcg_env; + } + t = tcg_temp_new_ptr(); + + for (int r = 0; r < nreg; ++r) { + TCGv_ptr t_zn = vec_full_reg_ptr(s, zn); + TCGv_ptr t_zm = vec_full_reg_ptr(s, zm); + + for (int i = 0; i < nsel; ++i) { + int o_za = (r * vstride + i) * sizeof(ARMVectorReg); + int desc = simd_desc(svl, svl, data | (i << shsel)); + + tcg_gen_addi_ptr(t, t_za, o_za); + fn(t, t_zn, t_zm, ptr, tcg_constant_i32(desc)); + } + + /* + * For multiple-and-single vectors, Zn may wrap. + * For multiple vectors, both Zn and Zm are aligned. + */ + zn = (zn + 1) % 32; + zm += multi; + } + } + return true; +} + +static bool do_azz_acc_fp(DisasContext *s, int nreg, int nsel, + int rv, int off, int zn, int zm, + int data, int shsel, bool multi, int fpst, + gen_helper_gvec_4_ptr *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int vstride = svl / nreg; + TCGv_ptr t_za = get_zarray(s, rv, off, nreg, nsel); + TCGv_ptr t, ptr; + + if (fpst >= 0) { + ptr = fpstatus_ptr(fpst); + } else { + ptr = tcg_env; + } + t = tcg_temp_new_ptr(); + + for (int r = 0; r < nreg; ++r) { + TCGv_ptr t_zn = vec_full_reg_ptr(s, zn); + TCGv_ptr t_zm = vec_full_reg_ptr(s, zm); + + for (int i = 0; i < nsel; ++i) { + int o_za = (r * vstride + i) * sizeof(ARMVectorReg); + int desc = simd_desc(svl, svl, data | (i << shsel)); + + tcg_gen_addi_ptr(t, t_za, o_za); + fn(t, t_zn, t_zm, t, ptr, tcg_constant_i32(desc)); + } + + /* + * For multiple-and-single vectors, Zn may wrap. + * For multiple vectors, both Zn and Zm are aligned. + */ + zn = (zn + 1) % 32; + zm += multi; + } + } + return true; +} + +static bool do_fmlal(DisasContext *s, arg_azz_n *a, bool sub, bool multi) +{ + return do_azz_acc_fp(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + (1 << 2) | sub, 1, + multi, FPST_ENV, gen_helper_sve2_fmlal_zzzw_s); +} + +TRANS_FEAT(FMLAL_n1, aa64_sme2, do_fmlal, a, false, false) +TRANS_FEAT(FMLSL_n1, aa64_sme2, do_fmlal, a, true, false) +TRANS_FEAT(FMLAL_nn, aa64_sme2, do_fmlal, a, false, true) +TRANS_FEAT(FMLSL_nn, aa64_sme2, do_fmlal, a, true, true) + +static bool do_fmlal_nx(DisasContext *s, arg_azx_n *a, bool sub) +{ + return do_azz_acc_fp(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + (a->idx << 3) | (1 << 2) | sub, 1, + false, FPST_ENV, gen_helper_sve2_fmlal_zzxw_s); +} + +TRANS_FEAT(FMLAL_nx, aa64_sme2, do_fmlal_nx, a, false) +TRANS_FEAT(FMLSL_nx, aa64_sme2, do_fmlal_nx, a, true) + +static bool do_bfmlal(DisasContext *s, arg_azz_n *a, bool sub, bool multi) +{ + return do_azz_acc_fp(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + 0, 0, multi, FPST_ZA, + (!sub ? gen_helper_gvec_bfmlal + : s->fpcr_ah ? gen_helper_gvec_ah_bfmlsl + : gen_helper_gvec_bfmlsl)); +} + +TRANS_FEAT(BFMLAL_n1, aa64_sme2, do_bfmlal, a, false, false) +TRANS_FEAT(BFMLSL_n1, aa64_sme2, do_bfmlal, a, true, false) +TRANS_FEAT(BFMLAL_nn, aa64_sme2, do_bfmlal, a, false, true) +TRANS_FEAT(BFMLSL_nn, aa64_sme2, do_bfmlal, a, true, true) + +static bool do_bfmlal_nx(DisasContext *s, arg_azx_n *a, bool sub) +{ + return do_azz_acc_fp(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + a->idx << 1, 0, false, FPST_ZA, + !sub ? gen_helper_gvec_bfmlal_idx + : s->fpcr_ah ? gen_helper_gvec_ah_bfmlsl_idx + : gen_helper_gvec_bfmlsl_idx); +} + +TRANS_FEAT(BFMLAL_nx, aa64_sme2, do_bfmlal_nx, a, false) +TRANS_FEAT(BFMLSL_nx, aa64_sme2, do_bfmlal_nx, a, true) + +static bool do_fdot(DisasContext *s, arg_azz_n *a, bool multi) +{ + return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, 1, 0, + multi, FPST_ENV, gen_helper_sme2_fdot_h); +} + +TRANS_FEAT(FDOT_n1, aa64_sme2, do_fdot, a, false) +TRANS_FEAT(FDOT_nn, aa64_sme2, do_fdot, a, true) + +static bool do_fdot_nx(DisasContext *s, arg_azx_n *a) +{ + return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, + a->idx | (1 << 2), 0, false, FPST_ENV, + gen_helper_sme2_fdot_idx_h); +} + +TRANS_FEAT(FDOT_nx, aa64_sme2, do_fdot_nx, a) + +static bool do_bfdot(DisasContext *s, arg_azz_n *a, bool multi) +{ + return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, 0, 0, + multi, FPST_ENV, gen_helper_gvec_bfdot); +} + +TRANS_FEAT(BFDOT_n1, aa64_sme2, do_bfdot, a, false) +TRANS_FEAT(BFDOT_nn, aa64_sme2, do_bfdot, a, true) + +static bool do_bfdot_nx(DisasContext *s, arg_azx_n *a) +{ + return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, a->idx, 0, + false, FPST_ENV, gen_helper_gvec_bfdot_idx); +} + +TRANS_FEAT(BFDOT_nx, aa64_sme2, do_bfdot_nx, a) + +static bool do_vdot(DisasContext *s, arg_azx_n *a, gen_helper_gvec_4_ptr *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int vstride = svl / 2; + TCGv_ptr t_za = get_zarray(s, a->rv, a->off, 2, 1); + TCGv_ptr t_zn = vec_full_reg_ptr(s, a->zn); + TCGv_ptr t_zm = vec_full_reg_ptr(s, a->zm); + TCGv_ptr t = tcg_temp_new_ptr(); + + for (int i = 0; i < 2; ++i) { + int o_za = i * vstride * sizeof(ARMVectorReg); + int desc = simd_desc(svl, svl, a->idx | (i << 2)); + + tcg_gen_addi_ptr(t, t_za, o_za); + fn(t, t_zn, t_zm, t, tcg_env, tcg_constant_i32(desc)); + } + } + return true; +} + +TRANS_FEAT(FVDOT, aa64_sme, do_vdot, a, gen_helper_sme2_fvdot_idx_h) +TRANS_FEAT(BFVDOT, aa64_sme, do_vdot, a, gen_helper_sme2_bfvdot_idx) + +static bool do_fmla(DisasContext *s, arg_azz_n *a, bool multi, + ARMFPStatusFlavour fpst, gen_helper_gvec_3_ptr *fn) +{ + return do_azz_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, + 0, 0, multi, fpst, fn); +} + +TRANS_FEAT(FMLA_n1_h, aa64_sme_f16f16, do_fmla, a, false, FPST_ZA_F16, + gen_helper_gvec_vfma_h) +TRANS_FEAT(FMLS_n1_h, aa64_sme_f16f16, do_fmla, a, false, FPST_ZA_F16, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_h : gen_helper_gvec_vfms_h) +TRANS_FEAT(FMLA_nn_h, aa64_sme_f16f16, do_fmla, a, true, FPST_ZA_F16, + gen_helper_gvec_vfma_h) +TRANS_FEAT(FMLS_nn_h, aa64_sme_f16f16, do_fmla, a, true, FPST_ZA_F16, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_h : gen_helper_gvec_vfms_h) + +TRANS_FEAT(FMLA_n1_s, aa64_sme2, do_fmla, a, false, FPST_ZA, + gen_helper_gvec_vfma_s) +TRANS_FEAT(FMLS_n1_s, aa64_sme2, do_fmla, a, false, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_s : gen_helper_gvec_vfms_s) +TRANS_FEAT(FMLA_nn_s, aa64_sme2, do_fmla, a, true, FPST_ZA, + gen_helper_gvec_vfma_s) +TRANS_FEAT(FMLS_nn_s, aa64_sme2, do_fmla, a, true, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_s : gen_helper_gvec_vfms_s) + +TRANS_FEAT(FMLA_n1_d, aa64_sme2_f64f64, do_fmla, a, false, FPST_ZA, + gen_helper_gvec_vfma_d) +TRANS_FEAT(FMLS_n1_d, aa64_sme2_f64f64, do_fmla, a, false, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_d : gen_helper_gvec_vfms_d) +TRANS_FEAT(FMLA_nn_d, aa64_sme2_f64f64, do_fmla, a, true, FPST_ZA, + gen_helper_gvec_vfma_d) +TRANS_FEAT(FMLS_nn_d, aa64_sme2_f64f64, do_fmla, a, true, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_vfms_d : gen_helper_gvec_vfms_d) + +TRANS_FEAT(BFMLA_n1, aa64_sme_b16b16, do_fmla, a, false, FPST_ZA, + gen_helper_gvec_bfmla) +TRANS_FEAT(BFMLS_n1, aa64_sme_b16b16, do_fmla, a, false, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_bfmls : gen_helper_gvec_bfmls) +TRANS_FEAT(BFMLA_nn, aa64_sme_b16b16, do_fmla, a, true, FPST_ZA, + gen_helper_gvec_bfmla) +TRANS_FEAT(BFMLS_nn, aa64_sme_b16b16, do_fmla, a, true, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_bfmls : gen_helper_gvec_bfmls) + +static bool do_fmla_nx(DisasContext *s, arg_azx_n *a, + ARMFPStatusFlavour fpst, gen_helper_gvec_4_ptr *fn) +{ + return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, + a->idx, 0, false, fpst, fn); +} + +TRANS_FEAT(FMLA_nx_h, aa64_sme_f16f16, do_fmla_nx, a, FPST_ZA_F16, + gen_helper_gvec_fmla_idx_h) +TRANS_FEAT(FMLS_nx_h, aa64_sme_f16f16, do_fmla_nx, a, FPST_ZA_F16, + s->fpcr_ah ? gen_helper_gvec_ah_fmls_idx_h : gen_helper_gvec_fmls_idx_h) +TRANS_FEAT(FMLA_nx_s, aa64_sme2, do_fmla_nx, a, FPST_ZA, + gen_helper_gvec_fmla_idx_s) +TRANS_FEAT(FMLS_nx_s, aa64_sme2, do_fmla_nx, a, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_fmls_idx_s : gen_helper_gvec_fmls_idx_s) +TRANS_FEAT(FMLA_nx_d, aa64_sme2_f64f64, do_fmla_nx, a, FPST_ZA, + gen_helper_gvec_fmla_idx_d) +TRANS_FEAT(FMLS_nx_d, aa64_sme2_f64f64, do_fmla_nx, a, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_fmls_idx_d : gen_helper_gvec_fmls_idx_d) + +TRANS_FEAT(BFMLA_nx, aa64_sme_b16b16, do_fmla_nx, a, FPST_ZA, + gen_helper_gvec_bfmla_idx) +TRANS_FEAT(BFMLS_nx, aa64_sme_b16b16, do_fmla_nx, a, FPST_ZA, + s->fpcr_ah ? gen_helper_gvec_ah_bfmls_idx : gen_helper_gvec_bfmls_idx) + +static bool do_faddsub(DisasContext *s, arg_az_n *a, ARMFPStatusFlavour fpst, + gen_helper_gvec_3_ptr *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int n = a->n; + int zm = a->zm; + int vstride = svl / n; + TCGv_ptr t_za = get_zarray(s, a->rv, a->off, n, 0); + TCGv_ptr ptr = fpstatus_ptr(fpst); + TCGv_ptr t = tcg_temp_new_ptr(); + + for (int r = 0; r < n; ++r) { + TCGv_ptr t_zm = vec_full_reg_ptr(s, zm + r); + int o_za = r * vstride * sizeof(ARMVectorReg); + int desc = simd_desc(svl, svl, 0); + + tcg_gen_addi_ptr(t, t_za, o_za); + fn(t, t, t_zm, ptr, tcg_constant_i32(desc)); + } + } + return true; +} + +TRANS_FEAT(FADD_nn_h, aa64_sme_f16f16, do_faddsub, a, + FPST_ZA_F16, gen_helper_gvec_fadd_h) +TRANS_FEAT(FSUB_nn_h, aa64_sme_f16f16, do_faddsub, a, + FPST_ZA_F16, gen_helper_gvec_fsub_h) + +TRANS_FEAT(FADD_nn_s, aa64_sme2, do_faddsub, a, + FPST_ZA, gen_helper_gvec_fadd_s) +TRANS_FEAT(FSUB_nn_s, aa64_sme2, do_faddsub, a, + FPST_ZA, gen_helper_gvec_fsub_s) + +TRANS_FEAT(FADD_nn_d, aa64_sme2_f64f64, do_faddsub, a, + FPST_ZA, gen_helper_gvec_fadd_d) +TRANS_FEAT(FSUB_nn_d, aa64_sme2_f64f64, do_faddsub, a, + FPST_ZA, gen_helper_gvec_fsub_d) + +TRANS_FEAT(BFADD_nn, aa64_sme_b16b16, do_faddsub, a, + FPST_ZA, gen_helper_gvec_bfadd) +TRANS_FEAT(BFSUB_nn, aa64_sme_b16b16, do_faddsub, a, + FPST_ZA, gen_helper_gvec_bfsub) + +/* + * Expand array multi-vector single (n1), array multi-vector (nn), + * and array multi-vector indexed (nx), for integer accumulate. + * multi: true for nn, false for n1. + * data: stuff for simd_data, including any index. + */ +static bool do_azz_acc(DisasContext *s, int nreg, int nsel, + int rv, int off, int zn, int zm, + int data, int shsel, bool multi, + gen_helper_gvec_4 *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + int vstride = svl / nreg; + TCGv_ptr t_za = get_zarray(s, rv, off, nreg, nsel); + TCGv_ptr t = tcg_temp_new_ptr(); + + for (int r = 0; r < nreg; ++r) { + TCGv_ptr t_zn = vec_full_reg_ptr(s, zn); + TCGv_ptr t_zm = vec_full_reg_ptr(s, zm); + + for (int i = 0; i < nsel; ++i) { + int o_za = (r * vstride + i) * sizeof(ARMVectorReg); + int desc = simd_desc(svl, svl, data | (i << shsel)); + + tcg_gen_addi_ptr(t, t_za, o_za); + fn(t, t_zn, t_zm, t, tcg_constant_i32(desc)); + } + + /* + * For multiple-and-single vectors, Zn may wrap. + * For multiple vectors, both Zn and Zm are aligned. + */ + zn = (zn + 1) % 32; + zm += multi; + } + } + return true; +} + +static bool do_dot(DisasContext *s, arg_azz_n *a, bool multi, + gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 1, a->rv, a->off, a->zn, a->zm, + 0, 0, multi, fn); +} + +static void gen_helper_gvec_sudot_4b(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m, + TCGv_ptr a, TCGv_i32 desc) +{ + gen_helper_gvec_usdot_4b(d, m, n, a, desc); +} + +TRANS_FEAT(USDOT_n1, aa64_sme2, do_dot, a, false, gen_helper_gvec_usdot_4b) +TRANS_FEAT(SUDOT_n1, aa64_sme2, do_dot, a, false, gen_helper_gvec_sudot_4b) +TRANS_FEAT(SDOT_n1_2h, aa64_sme2, do_dot, a, false, gen_helper_gvec_sdot_2h) +TRANS_FEAT(UDOT_n1_2h, aa64_sme2, do_dot, a, false, gen_helper_gvec_udot_2h) +TRANS_FEAT(SDOT_n1_4b, aa64_sme2, do_dot, a, false, gen_helper_gvec_sdot_4b) +TRANS_FEAT(UDOT_n1_4b, aa64_sme2, do_dot, a, false, gen_helper_gvec_udot_4b) +TRANS_FEAT(SDOT_n1_4h, aa64_sme2_i16i64, do_dot, a, false, gen_helper_gvec_sdot_4h) +TRANS_FEAT(UDOT_n1_4h, aa64_sme2_i16i64, do_dot, a, false, gen_helper_gvec_udot_4h) + +TRANS_FEAT(USDOT_nn, aa64_sme2, do_dot, a, true, gen_helper_gvec_usdot_4b) +TRANS_FEAT(SDOT_nn_2h, aa64_sme2, do_dot, a, true, gen_helper_gvec_sdot_2h) +TRANS_FEAT(UDOT_nn_2h, aa64_sme2, do_dot, a, true, gen_helper_gvec_udot_2h) +TRANS_FEAT(SDOT_nn_4b, aa64_sme2, do_dot, a, true, gen_helper_gvec_sdot_4b) +TRANS_FEAT(UDOT_nn_4b, aa64_sme2, do_dot, a, true, gen_helper_gvec_udot_4b) +TRANS_FEAT(SDOT_nn_4h, aa64_sme2_i16i64, do_dot, a, true, gen_helper_gvec_sdot_4h) +TRANS_FEAT(UDOT_nn_4h, aa64_sme2_i16i64, do_dot, a, true, gen_helper_gvec_udot_4h) + +static bool do_dot_nx(DisasContext *s, arg_azx_n *a, gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 1, a->rv, a->off, a->zn, a->zm, + a->idx, 0, false, fn); +} + +TRANS_FEAT(USDOT_nx, aa64_sme2, do_dot_nx, a, gen_helper_gvec_usdot_idx_4b) +TRANS_FEAT(SUDOT_nx, aa64_sme2, do_dot_nx, a, gen_helper_gvec_sudot_idx_4b) +TRANS_FEAT(SDOT_nx_2h, aa64_sme2, do_dot_nx, a, gen_helper_gvec_sdot_idx_2h) +TRANS_FEAT(UDOT_nx_2h, aa64_sme2, do_dot_nx, a, gen_helper_gvec_udot_idx_2h) +TRANS_FEAT(SDOT_nx_4b, aa64_sme2, do_dot_nx, a, gen_helper_gvec_sdot_idx_4b) +TRANS_FEAT(UDOT_nx_4b, aa64_sme2, do_dot_nx, a, gen_helper_gvec_udot_idx_4b) +TRANS_FEAT(SDOT_nx_4h, aa64_sme2_i16i64, do_dot_nx, a, gen_helper_gvec_sdot_idx_4h) +TRANS_FEAT(UDOT_nx_4h, aa64_sme2_i16i64, do_dot_nx, a, gen_helper_gvec_udot_idx_4h) + +static bool do_vdot_nx(DisasContext *s, arg_azx_n *a, gen_helper_gvec_3 *fn) +{ + if (sme_smza_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + fn(get_zarray(s, a->rv, a->off, a->n, 0), + vec_full_reg_ptr(s, a->zn), + vec_full_reg_ptr(s, a->zm), + tcg_constant_i32(simd_desc(svl, svl, a->idx))); + } + return true; +} + +TRANS_FEAT(SVDOT_nx_2h, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_svdot_idx_2h) +TRANS_FEAT(SVDOT_nx_4b, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_svdot_idx_4b) +TRANS_FEAT(SVDOT_nx_4h, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_svdot_idx_4h) + +TRANS_FEAT(UVDOT_nx_2h, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_uvdot_idx_2h) +TRANS_FEAT(UVDOT_nx_4b, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_uvdot_idx_4b) +TRANS_FEAT(UVDOT_nx_4h, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_uvdot_idx_4h) + +TRANS_FEAT(SUVDOT_nx_4b, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_suvdot_idx_4b) +TRANS_FEAT(USVDOT_nx_4b, aa64_sme2, do_vdot_nx, a, gen_helper_sme2_usvdot_idx_4b) + +static bool do_smlal(DisasContext *s, arg_azz_n *a, bool multi, + gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + 0, 0, multi, fn); +} + +TRANS_FEAT(SMLAL_n1, aa64_sme2, do_smlal, a, false, gen_helper_sve2_smlal_zzzw_s) +TRANS_FEAT(SMLSL_n1, aa64_sme2, do_smlal, a, false, gen_helper_sve2_smlsl_zzzw_s) +TRANS_FEAT(UMLAL_n1, aa64_sme2, do_smlal, a, false, gen_helper_sve2_umlal_zzzw_s) +TRANS_FEAT(UMLSL_n1, aa64_sme2, do_smlal, a, false, gen_helper_sve2_umlsl_zzzw_s) + +TRANS_FEAT(SMLAL_nn, aa64_sme2, do_smlal, a, true, gen_helper_sve2_smlal_zzzw_s) +TRANS_FEAT(SMLSL_nn, aa64_sme2, do_smlal, a, true, gen_helper_sve2_smlsl_zzzw_s) +TRANS_FEAT(UMLAL_nn, aa64_sme2, do_smlal, a, true, gen_helper_sve2_umlal_zzzw_s) +TRANS_FEAT(UMLSL_nn, aa64_sme2, do_smlal, a, true, gen_helper_sve2_umlsl_zzzw_s) + +static bool do_smlal_nx(DisasContext *s, arg_azx_n *a, + gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 2, a->rv, a->off, a->zn, a->zm, + a->idx << 1, 0, false, fn); +} + +TRANS_FEAT(SMLAL_nx, aa64_sme2, do_smlal_nx, a, gen_helper_sve2_smlal_idx_s) +TRANS_FEAT(SMLSL_nx, aa64_sme2, do_smlal_nx, a, gen_helper_sve2_smlsl_idx_s) +TRANS_FEAT(UMLAL_nx, aa64_sme2, do_smlal_nx, a, gen_helper_sve2_umlal_idx_s) +TRANS_FEAT(UMLSL_nx, aa64_sme2, do_smlal_nx, a, gen_helper_sve2_umlsl_idx_s) + +static bool do_smlall(DisasContext *s, arg_azz_n *a, bool multi, + gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 4, a->rv, a->off, a->zn, a->zm, + 0, 0, multi, fn); +} + +static void gen_helper_sme2_sumlall_s(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m, + TCGv_ptr a, TCGv_i32 desc) +{ + gen_helper_sme2_usmlall_s(d, m, n, a, desc); +} + +TRANS_FEAT(SMLALL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_smlall_s) +TRANS_FEAT(SMLSLL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_smlsll_s) +TRANS_FEAT(UMLALL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_umlall_s) +TRANS_FEAT(UMLSLL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_umlsll_s) +TRANS_FEAT(USMLALL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_usmlall_s) +TRANS_FEAT(SUMLALL_n1_s, aa64_sme2, do_smlall, a, false, gen_helper_sme2_sumlall_s) + +TRANS_FEAT(SMLALL_n1_d, aa64_sme2_i16i64, do_smlall, a, false, gen_helper_sme2_smlall_d) +TRANS_FEAT(SMLSLL_n1_d, aa64_sme2_i16i64, do_smlall, a, false, gen_helper_sme2_smlsll_d) +TRANS_FEAT(UMLALL_n1_d, aa64_sme2_i16i64, do_smlall, a, false, gen_helper_sme2_umlall_d) +TRANS_FEAT(UMLSLL_n1_d, aa64_sme2_i16i64, do_smlall, a, false, gen_helper_sme2_umlsll_d) + +TRANS_FEAT(SMLALL_nn_s, aa64_sme2, do_smlall, a, true, gen_helper_sme2_smlall_s) +TRANS_FEAT(SMLSLL_nn_s, aa64_sme2, do_smlall, a, true, gen_helper_sme2_smlsll_s) +TRANS_FEAT(UMLALL_nn_s, aa64_sme2, do_smlall, a, true, gen_helper_sme2_umlall_s) +TRANS_FEAT(UMLSLL_nn_s, aa64_sme2, do_smlall, a, true, gen_helper_sme2_umlsll_s) +TRANS_FEAT(USMLALL_nn_s, aa64_sme2, do_smlall, a, true, gen_helper_sme2_usmlall_s) + +TRANS_FEAT(SMLALL_nn_d, aa64_sme2_i16i64, do_smlall, a, true, gen_helper_sme2_smlall_d) +TRANS_FEAT(SMLSLL_nn_d, aa64_sme2_i16i64, do_smlall, a, true, gen_helper_sme2_smlsll_d) +TRANS_FEAT(UMLALL_nn_d, aa64_sme2_i16i64, do_smlall, a, true, gen_helper_sme2_umlall_d) +TRANS_FEAT(UMLSLL_nn_d, aa64_sme2_i16i64, do_smlall, a, true, gen_helper_sme2_umlsll_d) + +static bool do_smlall_nx(DisasContext *s, arg_azx_n *a, + gen_helper_gvec_4 *fn) +{ + return do_azz_acc(s, a->n, 4, a->rv, a->off, a->zn, a->zm, + a->idx << 2, 0, false, fn); +} + +TRANS_FEAT(SMLALL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_smlall_idx_s) +TRANS_FEAT(SMLSLL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_smlsll_idx_s) +TRANS_FEAT(UMLALL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_umlall_idx_s) +TRANS_FEAT(UMLSLL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_umlsll_idx_s) +TRANS_FEAT(USMLALL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_usmlall_idx_s) +TRANS_FEAT(SUMLALL_nx_s, aa64_sme2, do_smlall_nx, a, gen_helper_sme2_sumlall_idx_s) + +TRANS_FEAT(SMLALL_nx_d, aa64_sme2_i16i64, do_smlall_nx, a, gen_helper_sme2_smlall_idx_d) +TRANS_FEAT(SMLSLL_nx_d, aa64_sme2_i16i64, do_smlall_nx, a, gen_helper_sme2_smlsll_idx_d) +TRANS_FEAT(UMLALL_nx_d, aa64_sme2_i16i64, do_smlall_nx, a, gen_helper_sme2_umlall_idx_d) +TRANS_FEAT(UMLSLL_nx_d, aa64_sme2_i16i64, do_smlall_nx, a, gen_helper_sme2_umlsll_idx_d) + +static bool do_zz_fpst(DisasContext *s, arg_zz_n *a, int data, + ARMFPStatusFlavour type, gen_helper_gvec_2_ptr *fn) +{ + if (sme_sm_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + TCGv_ptr fpst = fpstatus_ptr(type); + + for (int i = 0, n = a->n; i < n; ++i) { + tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->zd + i), + vec_full_reg_offset(s, a->zn + i), + fpst, svl, svl, data, fn); + } + } + return true; +} + +TRANS_FEAT(BFCVT, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_bfcvt) +TRANS_FEAT(BFCVTN, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_bfcvtn) +TRANS_FEAT(FCVT_n, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_fcvt_n) +TRANS_FEAT(FCVTN, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_fcvtn) + +TRANS_FEAT(FCVT_w, aa64_sme_f16f16, do_zz_fpst, a, 0, + FPST_A64_F16, gen_helper_sme2_fcvt_w) +TRANS_FEAT(FCVTL, aa64_sme_f16f16, do_zz_fpst, a, 0, + FPST_A64_F16, gen_helper_sme2_fcvtl) + +TRANS_FEAT(FCVTZS, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_gvec_vcvt_rz_fs) +TRANS_FEAT(FCVTZU, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_gvec_vcvt_rz_fu) + +TRANS_FEAT(SCVTF, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_scvtf) +TRANS_FEAT(UCVTF, aa64_sme2, do_zz_fpst, a, 0, + FPST_A64, gen_helper_sme2_ucvtf) + +TRANS_FEAT(FRINTN, aa64_sme2, do_zz_fpst, a, float_round_nearest_even, + FPST_A64, gen_helper_gvec_vrint_rm_s) +TRANS_FEAT(FRINTP, aa64_sme2, do_zz_fpst, a, float_round_up, + FPST_A64, gen_helper_gvec_vrint_rm_s) +TRANS_FEAT(FRINTM, aa64_sme2, do_zz_fpst, a, float_round_down, + FPST_A64, gen_helper_gvec_vrint_rm_s) +TRANS_FEAT(FRINTA, aa64_sme2, do_zz_fpst, a, float_round_ties_away, + FPST_A64, gen_helper_gvec_vrint_rm_s) + +static bool do_zz(DisasContext *s, arg_zz_n *a, int data, + gen_helper_gvec_2 *fn) +{ + if (sme_sm_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + + for (int i = 0, n = a->n; i < n; ++i) { + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->zd + i), + vec_full_reg_offset(s, a->zn + i), + svl, svl, data, fn); + } + } + return true; +} + +TRANS_FEAT(SQCVT_sh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvt_sh) +TRANS_FEAT(UQCVT_sh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uqcvt_sh) +TRANS_FEAT(SQCVTU_sh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtu_sh) + +TRANS_FEAT(SQCVT_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvt_sb) +TRANS_FEAT(UQCVT_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uqcvt_sb) +TRANS_FEAT(SQCVTU_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtu_sb) + +TRANS_FEAT(SQCVT_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvt_dh) +TRANS_FEAT(UQCVT_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uqcvt_dh) +TRANS_FEAT(SQCVTU_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtu_dh) + +TRANS_FEAT(SQCVTN_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtn_sb) +TRANS_FEAT(UQCVTN_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uqcvtn_sb) +TRANS_FEAT(SQCVTUN_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtun_sb) + +TRANS_FEAT(SQCVTN_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtn_dh) +TRANS_FEAT(UQCVTN_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uqcvtn_dh) +TRANS_FEAT(SQCVTUN_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtun_dh) + +TRANS_FEAT(SUNPK_2bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk2_bh) +TRANS_FEAT(SUNPK_2hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk2_hs) +TRANS_FEAT(SUNPK_2sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk2_sd) + +TRANS_FEAT(SUNPK_4bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk4_bh) +TRANS_FEAT(SUNPK_4hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk4_hs) +TRANS_FEAT(SUNPK_4sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk4_sd) + +TRANS_FEAT(UUNPK_2bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk2_bh) +TRANS_FEAT(UUNPK_2hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk2_hs) +TRANS_FEAT(UUNPK_2sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk2_sd) + +TRANS_FEAT(UUNPK_4bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk4_bh) +TRANS_FEAT(UUNPK_4hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk4_hs) +TRANS_FEAT(UUNPK_4sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk4_sd) + +static bool do_zipuzp_4(DisasContext *s, arg_zz_e *a, + gen_helper_gvec_2 * const fn[5]) +{ + int bytes_per_op = 4 << a->esz; + + /* Both MO_64 and MO_128 can fail the size test. */ + if (s->max_svl < bytes_per_op) { + unallocated_encoding(s); + } else if (sme_sm_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + if (svl < bytes_per_op) { + unallocated_encoding(s); + } else { + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + svl, svl, 0, fn[a->esz]); + } + } + return true; +} + +static gen_helper_gvec_2 * const zip4_fns[] = { + gen_helper_sme2_zip4_b, + gen_helper_sme2_zip4_h, + gen_helper_sme2_zip4_s, + gen_helper_sme2_zip4_d, + gen_helper_sme2_zip4_q, +}; +TRANS_FEAT(ZIP_4, aa64_sme2, do_zipuzp_4, a, zip4_fns) + +static gen_helper_gvec_2 * const uzp4_fns[] = { + gen_helper_sme2_uzp4_b, + gen_helper_sme2_uzp4_h, + gen_helper_sme2_uzp4_s, + gen_helper_sme2_uzp4_d, + gen_helper_sme2_uzp4_q, +}; +TRANS_FEAT(UZP_4, aa64_sme2, do_zipuzp_4, a, uzp4_fns) + +static bool do_zz_rshr(DisasContext *s, arg_rshr *a, gen_helper_gvec_2 *fn) +{ + if (sve_access_check(s)) { + int vl = vec_full_reg_size(s); + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + vl, vl, a->shift, fn); + } + return true; +} + +TRANS_FEAT(SQRSHR_sh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshr_sh) +TRANS_FEAT(UQRSHR_sh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshr_sh) +TRANS_FEAT(SQRSHRU_sh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshru_sh) + +TRANS_FEAT(SQRSHR_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshr_sb) +TRANS_FEAT(SQRSHR_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshr_dh) +TRANS_FEAT(UQRSHR_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshr_sb) +TRANS_FEAT(UQRSHR_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshr_dh) +TRANS_FEAT(SQRSHRU_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshru_sb) +TRANS_FEAT(SQRSHRU_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshru_dh) + +TRANS_FEAT(SQRSHRN_sh, aa64_sme2_or_sve2p1, do_zz_rshr, a, gen_helper_sme2_sqrshrn_sh) +TRANS_FEAT(UQRSHRN_sh, aa64_sme2_or_sve2p1, do_zz_rshr, a, gen_helper_sme2_uqrshrn_sh) +TRANS_FEAT(SQRSHRUN_sh, aa64_sme2_or_sve2p1, do_zz_rshr, a, gen_helper_sme2_sqrshrun_sh) + +TRANS_FEAT(SQRSHRN_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrn_sb) +TRANS_FEAT(SQRSHRN_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrn_dh) +TRANS_FEAT(UQRSHRN_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshrn_sb) +TRANS_FEAT(UQRSHRN_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_uqrshrn_dh) +TRANS_FEAT(SQRSHRUN_sb, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrun_sb) +TRANS_FEAT(SQRSHRUN_dh, aa64_sme2, do_zz_rshr, a, gen_helper_sme2_sqrshrun_dh) + +static bool do_zipuzp_2(DisasContext *s, arg_zzz_e *a, + gen_helper_gvec_3 * const fn[5]) +{ + int bytes_per_op = 2 << a->esz; + + /* MO_128 can fail the size test. */ + if (s->max_svl < bytes_per_op) { + unallocated_encoding(s); + } else if (sme_sm_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + if (svl < bytes_per_op) { + unallocated_encoding(s); + } else { + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + vec_full_reg_offset(s, a->zm), + svl, svl, 0, fn[a->esz]); + } + } + return true; +} + +static gen_helper_gvec_3 * const zip2_fns[] = { + gen_helper_sme2_zip2_b, + gen_helper_sme2_zip2_h, + gen_helper_sme2_zip2_s, + gen_helper_sme2_zip2_d, + gen_helper_sme2_zip2_q, +}; +TRANS_FEAT(ZIP_2, aa64_sme2, do_zipuzp_2, a, zip2_fns) + +static gen_helper_gvec_3 * const uzp2_fns[] = { + gen_helper_sme2_uzp2_b, + gen_helper_sme2_uzp2_h, + gen_helper_sme2_uzp2_s, + gen_helper_sme2_uzp2_d, + gen_helper_sme2_uzp2_q, +}; +TRANS_FEAT(UZP_2, aa64_sme2, do_zipuzp_2, a, uzp2_fns) + +static bool trans_FCLAMP(DisasContext *s, arg_zzz_en *a) +{ + static gen_helper_gvec_3_ptr * const fn[] = { + gen_helper_sme2_bfclamp, + gen_helper_sme2_fclamp_h, + gen_helper_sme2_fclamp_s, + gen_helper_sme2_fclamp_d, + }; + TCGv_ptr fpst; + int vl; + + if (!dc_isar_feature(aa64_sme2, s)) { + return false; + } + /* This insn uses MO_8 to encode BFloat16. */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sme_b16b16, s)) { + return false; + } + if (!sme_sm_enabled_check(s)) { + return true; + } + + fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); + vl = vec_full_reg_size(s); + + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + vec_full_reg_offset(s, a->zm), + fpst, vl, vl, a->n, fn[a->esz]); + return true; +} + +static bool do_clamp(DisasContext *s, arg_zzz_en *a, + gen_helper_gvec_3 * const fn[4]) +{ + int vl; + + if (!dc_isar_feature(aa64_sme2, s)) { + return false; + } + if (!sme_sm_enabled_check(s)) { + return true; + } + + /* + * Clamp is just a min+max, easily supported by most host + * vector operations -- we already have such an expansion in + * translate-sve.c for a single output. + * TODO: Add support in gvec for multiple simultaneous output, + * and/or copy to temporary upon overlap. + */ + vl = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + vec_full_reg_offset(s, a->zm), + vl, vl, a->n, fn[a->esz]); + return true; +} + +static gen_helper_gvec_3 * const sclamp_fns[] = { + gen_helper_sme2_sclamp_b, + gen_helper_sme2_sclamp_h, + gen_helper_sme2_sclamp_s, + gen_helper_sme2_sclamp_d, +}; +TRANS(SCLAMP, do_clamp, a, sclamp_fns) + +static gen_helper_gvec_3 * const uclamp_fns[] = { + gen_helper_sme2_uclamp_b, + gen_helper_sme2_uclamp_h, + gen_helper_sme2_uclamp_s, + gen_helper_sme2_uclamp_d, +}; +TRANS(UCLAMP, do_clamp, a, uclamp_fns) + +static bool trans_SEL(DisasContext *s, arg_SEL *a) +{ + typedef void sme_sel_fn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); + static sme_sel_fn * const fns[4] = { + gen_helper_sme2_sel_b, gen_helper_sme2_sel_h, + gen_helper_sme2_sel_s, gen_helper_sme2_sel_d + }; + + if (!dc_isar_feature(aa64_sme2, s)) { + return false; + } + if (sme_sm_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + uint32_t desc = simd_desc(svl, svl, a->n); + TCGv_ptr t_d = tcg_temp_new_ptr(); + TCGv_ptr t_n = tcg_temp_new_ptr(); + TCGv_ptr t_m = tcg_temp_new_ptr(); + TCGv_i32 png = tcg_temp_new_i32(); + + tcg_gen_addi_ptr(t_d, tcg_env, vec_full_reg_offset(s, a->zd)); + tcg_gen_addi_ptr(t_n, tcg_env, vec_full_reg_offset(s, a->zn)); + tcg_gen_addi_ptr(t_m, tcg_env, vec_full_reg_offset(s, a->zm)); + + tcg_gen_ld16u_i32(png, tcg_env, pred_full_reg_offset(s, a->pg) + ^ (HOST_BIG_ENDIAN ? 6 : 0)); + + fns[a->esz](t_d, t_n, t_m, png, tcg_constant_i32(desc)); + } + return true; +} + +static bool do_lut(DisasContext *s, arg_lut *a, + gen_helper_gvec_2_ptr *fn, bool strided) +{ + if (sme_sm_enabled_check(s) && sme2_zt0_enabled_check(s)) { + int svl = streaming_vec_reg_size(s); + tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->zd), + vec_full_reg_offset(s, a->zn), + tcg_env, svl, svl, strided | (a->idx << 1), fn); + } + return true; +} + +TRANS_FEAT(LUTI2_c_1b, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_1b, false) +TRANS_FEAT(LUTI2_c_1h, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_1h, false) +TRANS_FEAT(LUTI2_c_1s, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_1s, false) + +TRANS_FEAT(LUTI2_c_2b, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_2b, false) +TRANS_FEAT(LUTI2_c_2h, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_2h, false) +TRANS_FEAT(LUTI2_c_2s, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_2s, false) + +TRANS_FEAT(LUTI2_c_4b, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_4b, false) +TRANS_FEAT(LUTI2_c_4h, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_4h, false) +TRANS_FEAT(LUTI2_c_4s, aa64_sme2, do_lut, a, gen_helper_sme2_luti2_4s, false) + +TRANS_FEAT(LUTI4_c_1b, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_1b, false) +TRANS_FEAT(LUTI4_c_1h, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_1h, false) +TRANS_FEAT(LUTI4_c_1s, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_1s, false) + +TRANS_FEAT(LUTI4_c_2b, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_2b, false) +TRANS_FEAT(LUTI4_c_2h, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_2h, false) +TRANS_FEAT(LUTI4_c_2s, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_2s, false) + +TRANS_FEAT(LUTI4_c_4h, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_4h, false) +TRANS_FEAT(LUTI4_c_4s, aa64_sme2, do_lut, a, gen_helper_sme2_luti4_4s, false) + +static bool do_lut_s4(DisasContext *s, arg_lut *a, gen_helper_gvec_2_ptr *fn) +{ + return !(a->zd & 0b01100) && do_lut(s, a, fn, true); +} + +static bool do_lut_s8(DisasContext *s, arg_lut *a, gen_helper_gvec_2_ptr *fn) +{ + return !(a->zd & 0b01000) && do_lut(s, a, fn, true); +} + +TRANS_FEAT(LUTI2_s_2b, aa64_sme2p1, do_lut_s8, a, gen_helper_sme2_luti2_2b) +TRANS_FEAT(LUTI2_s_2h, aa64_sme2p1, do_lut_s8, a, gen_helper_sme2_luti2_2h) + +TRANS_FEAT(LUTI2_s_4b, aa64_sme2p1, do_lut_s4, a, gen_helper_sme2_luti2_4b) +TRANS_FEAT(LUTI2_s_4h, aa64_sme2p1, do_lut_s4, a, gen_helper_sme2_luti2_4h) + +TRANS_FEAT(LUTI4_s_2b, aa64_sme2p1, do_lut_s8, a, gen_helper_sme2_luti4_2b) +TRANS_FEAT(LUTI4_s_2h, aa64_sme2p1, do_lut_s8, a, gen_helper_sme2_luti4_2h) + +TRANS_FEAT(LUTI4_s_4h, aa64_sme2p1, do_lut_s4, a, gen_helper_sme2_luti4_4h) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index f3cf028..07b827f 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -31,9 +31,9 @@ typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); -typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); +typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i64); typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, - TCGv_ptr, TCGv_i64, TCGv_i32); + TCGv_ptr, TCGv_i64, TCGv_i64); /* * Helpers for extracting complex instruction fields. @@ -89,7 +89,7 @@ static inline int expand_imm_sh8u(DisasContext *s, int x) */ static inline int msz_dtype(DisasContext *s, int msz) { - static const uint8_t dtype[4] = { 0, 5, 10, 15 }; + static const uint8_t dtype[5] = { 0, 5, 10, 15, 18 }; return dtype[msz]; } @@ -190,6 +190,10 @@ static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, arg_rrr_esz *a, int data) { + /* These insns use MO_8 to encode BFloat16 */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); } @@ -403,6 +407,10 @@ static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, arg_rprr_esz *a) { + /* These insns use MO_8 to encode BFloat16. */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); } @@ -778,6 +786,9 @@ DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) DO_ZPZ(ABS, aa64_sve, sve_abs) DO_ZPZ(NEG, aa64_sve, sve_neg) DO_ZPZ(RBIT, aa64_sve, sve_rbit) +DO_ZPZ(ORQV, aa64_sme2p1_or_sve2p1, sve2p1_orqv) +DO_ZPZ(EORQV, aa64_sme2p1_or_sve2p1, sve2p1_eorqv) +DO_ZPZ(ANDQV, aa64_sme2p1_or_sve2p1, sve2p1_andqv) static gen_helper_gvec_3 * const fabs_fns[4] = { NULL, gen_helper_sve_fabs_h, @@ -828,6 +839,41 @@ TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) +static gen_helper_gvec_3 * const addqv_fns[4] = { + gen_helper_sve2p1_addqv_b, gen_helper_sve2p1_addqv_h, + gen_helper_sve2p1_addqv_s, gen_helper_sve2p1_addqv_d, +}; +TRANS_FEAT(ADDQV, aa64_sme2p1_or_sve2p1, + gen_gvec_ool_arg_zpz, addqv_fns[a->esz], a, 0) + +static gen_helper_gvec_3 * const smaxqv_fns[4] = { + gen_helper_sve2p1_smaxqv_b, gen_helper_sve2p1_smaxqv_h, + gen_helper_sve2p1_smaxqv_s, gen_helper_sve2p1_smaxqv_d, +}; +TRANS_FEAT(SMAXQV, aa64_sme2p1_or_sve2p1, + gen_gvec_ool_arg_zpz, smaxqv_fns[a->esz], a, 0) + +static gen_helper_gvec_3 * const sminqv_fns[4] = { + gen_helper_sve2p1_sminqv_b, gen_helper_sve2p1_sminqv_h, + gen_helper_sve2p1_sminqv_s, gen_helper_sve2p1_sminqv_d, +}; +TRANS_FEAT(SMINQV, aa64_sme2p1_or_sve2p1, + gen_gvec_ool_arg_zpz, sminqv_fns[a->esz], a, 0) + +static gen_helper_gvec_3 * const umaxqv_fns[4] = { + gen_helper_sve2p1_umaxqv_b, gen_helper_sve2p1_umaxqv_h, + gen_helper_sve2p1_umaxqv_s, gen_helper_sve2p1_umaxqv_d, +}; +TRANS_FEAT(UMAXQV, aa64_sme2p1_or_sve2p1, + gen_gvec_ool_arg_zpz, umaxqv_fns[a->esz], a, 0) + +static gen_helper_gvec_3 * const uminqv_fns[4] = { + gen_helper_sve2p1_uminqv_b, gen_helper_sve2p1_uminqv_h, + gen_helper_sve2p1_uminqv_s, gen_helper_sve2p1_uminqv_d, +}; +TRANS_FEAT(UMINQV, aa64_sme2p1_or_sve2p1, + gen_gvec_ool_arg_zpz, uminqv_fns[a->esz], a, 0) + /* *** SVE Integer Reduction Group */ @@ -1679,6 +1725,22 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) +static bool trans_PTRUE_cnt(DisasContext *s, arg_PTRUE_cnt *a) +{ + if (!dc_isar_feature(aa64_sme2_or_sve2p1, s)) { + return false; + } + if (sve_access_check(s)) { + /* Canonical TRUE is 0 count, invert bit, plus element size. */ + int val = (1 << 15) | (1 << a->esz); + + /* Write val to the first uint64_t; clear all of the rest. */ + tcg_gen_gvec_dup_imm(MO_64, pred_full_reg_offset(s, a->rd), + 8, size_for_gvec(pred_full_reg_size(s)), val); + } + return true; +} + /* Note pat == 31 is #all, to set all elements. */ TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false) @@ -2148,6 +2210,55 @@ static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) +static bool trans_EXTQ(DisasContext *s, arg_EXTQ *a) +{ + unsigned vl, dofs, sofs0, sofs1, sofs2, imm; + + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + imm = a->imm; + if (imm == 0) { + /* So far we never optimize Zdn with MOVPRFX, so zd = zn is a nop. */ + return true; + } + + vl = vec_full_reg_size(s); + dofs = vec_full_reg_offset(s, a->rd); + sofs2 = vec_full_reg_offset(s, a->rn); + + if (imm & 8) { + sofs0 = dofs + 8; + sofs1 = sofs2; + sofs2 += 8; + } else { + sofs0 = dofs; + sofs1 = dofs + 8; + } + imm = (imm & 7) << 3; + + for (unsigned i = 0; i < vl; i += 16) { + TCGv_i64 s0 = tcg_temp_new_i64(); + TCGv_i64 s1 = tcg_temp_new_i64(); + TCGv_i64 s2 = tcg_temp_new_i64(); + + tcg_gen_ld_i64(s0, tcg_env, sofs0 + i); + tcg_gen_ld_i64(s1, tcg_env, sofs1 + i); + tcg_gen_ld_i64(s2, tcg_env, sofs2 + i); + + tcg_gen_extract2_i64(s0, s0, s1, imm); + tcg_gen_extract2_i64(s1, s1, s2, imm); + + tcg_gen_st_i64(s0, tcg_env, dofs + i); + tcg_gen_st_i64(s1, tcg_env, dofs + i + 8); + } + return true; +} + /* *** SVE Permute - Unpredicated Group */ @@ -2195,6 +2306,27 @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) return true; } +static bool trans_DUPQ(DisasContext *s, arg_DUPQ *a) +{ + unsigned vl, dofs, nofs; + + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + vl = vec_full_reg_size(s); + dofs = vec_full_reg_offset(s, a->rd); + nofs = vec_reg_offset(s, a->rn, a->imm, a->esz); + + for (unsigned i = 0; i < vl; i += 16) { + tcg_gen_gvec_dup_mem(a->esz, dofs + i, nofs + i, 16, 16); + } + return true; +} + static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) { typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); @@ -2256,12 +2388,124 @@ static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) +static gen_helper_gvec_3 * const tblq_fns[4] = { + gen_helper_sve2p1_tblq_b, gen_helper_sve2p1_tblq_h, + gen_helper_sve2p1_tblq_s, gen_helper_sve2p1_tblq_d +}; +TRANS_FEAT(TBLQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + tblq_fns[a->esz], a, 0) + static gen_helper_gvec_3 * const tbx_fns[4] = { gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d }; TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) +static gen_helper_gvec_3 * const tbxq_fns[4] = { + gen_helper_sve2p1_tbxq_b, gen_helper_sve2p1_tbxq_h, + gen_helper_sve2p1_tbxq_s, gen_helper_sve2p1_tbxq_d +}; +TRANS_FEAT(TBXQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + tbxq_fns[a->esz], a, 0) + +static bool trans_PMOV_pv(DisasContext *s, arg_PMOV_pv *a) +{ + static gen_helper_gvec_2 * const fns[4] = { + NULL, gen_helper_pmov_pv_h, + gen_helper_pmov_pv_s, gen_helper_pmov_pv_d + }; + unsigned vl, pl, vofs, pofs; + TCGv_i64 tmp; + + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + vl = vec_full_reg_size(s); + if (a->esz != MO_8) { + tcg_gen_gvec_2_ool(pred_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vl, vl, a->imm, fns[a->esz]); + return true; + } + + /* + * Copy the low PL bytes from vector Zn, zero-extending to a + * multiple of 8 bytes, so that Pd is properly cleared. + */ + + pl = vl / 8; + pofs = pred_full_reg_offset(s, a->rd); + vofs = vec_full_reg_offset(s, a->rn); + + QEMU_BUILD_BUG_ON(sizeof(ARMPredicateReg) != 32); + for (unsigned i = 32; i >= 8; i >>= 1) { + if (pl & i) { + tcg_gen_gvec_mov(MO_64, pofs, vofs, i, i); + pofs += i; + vofs += i; + } + } + switch (pl & 7) { + case 0: + return true; + case 2: + tmp = tcg_temp_new_i64(); + tcg_gen_ld16u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 6 : 0)); + break; + case 4: + tmp = tcg_temp_new_i64(); + tcg_gen_ld32u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 4 : 0)); + break; + case 6: + tmp = tcg_temp_new_i64(); + tcg_gen_ld_i64(tmp, tcg_env, vofs); + tcg_gen_extract_i64(tmp, tmp, 0, 48); + break; + default: + g_assert_not_reached(); + } + tcg_gen_st_i64(tmp, tcg_env, pofs); + return true; +} + +static bool trans_PMOV_vp(DisasContext *s, arg_PMOV_pv *a) +{ + static gen_helper_gvec_2 * const fns[4] = { + NULL, gen_helper_pmov_vp_h, + gen_helper_pmov_vp_s, gen_helper_pmov_vp_d + }; + unsigned vl; + + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + vl = vec_full_reg_size(s); + + if (a->esz == MO_8) { + /* + * The low PL bytes are copied from Pn to Zd unchanged. + * We know that the unused portion of Pn is zero, and + * that imm == 0, so the balance of Zd must be zeroed. + */ + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, a->rd), + pred_full_reg_offset(s, a->rn), + size_for_gvec(vl / 8), vl); + } else { + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), + pred_full_reg_offset(s, a->rn), + vl, vl, a->imm, fns[a->esz]); + } + return true; +} + static bool trans_UNPK(DisasContext *s, arg_UNPK *a) { static gen_helper_gvec_2 * const fns[4][2] = { @@ -2352,6 +2596,23 @@ TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) *** SVE Permute - Interleaving Group */ +static bool do_interleave_q(DisasContext *s, gen_helper_gvec_3 *fn, + arg_rrr_esz *a, int data) +{ + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + if (vsz < 32) { + unallocated_encoding(s); + } else { + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vsz, vsz, data, fn); + } + } + return true; +} + static gen_helper_gvec_3 * const zip_fns[4] = { gen_helper_sve_zip_b, gen_helper_sve_zip_h, gen_helper_sve_zip_s, gen_helper_sve_zip_d, @@ -2361,26 +2622,43 @@ TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, zip_fns[a->esz], a, vec_full_reg_size(s) / 2) -TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_zip_q, a, 0) -TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_zip_q, a, - QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) +TRANS_FEAT_NONSTREAMING(ZIP1_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_zip_q, a, 0) +TRANS_FEAT_NONSTREAMING(ZIP2_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_zip_q, a, + QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) + +static gen_helper_gvec_3 * const zipq_fns[4] = { + gen_helper_sve2p1_zipq_b, gen_helper_sve2p1_zipq_h, + gen_helper_sve2p1_zipq_s, gen_helper_sve2p1_zipq_d, +}; +TRANS_FEAT(ZIPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + zipq_fns[a->esz], a, 0) +TRANS_FEAT(ZIPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + zipq_fns[a->esz], a, 16 / 2) static gen_helper_gvec_3 * const uzp_fns[4] = { gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, }; - TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, uzp_fns[a->esz], a, 0) TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, uzp_fns[a->esz], a, 1 << a->esz) -TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_uzp_q, a, 0) -TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_uzp_q, a, 16) +TRANS_FEAT_NONSTREAMING(UZP1_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_uzp_q, a, 0) +TRANS_FEAT_NONSTREAMING(UZP2_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_uzp_q, a, 16) + +static gen_helper_gvec_3 * const uzpq_fns[4] = { + gen_helper_sve2p1_uzpq_b, gen_helper_sve2p1_uzpq_h, + gen_helper_sve2p1_uzpq_s, gen_helper_sve2p1_uzpq_d, +}; +TRANS_FEAT(UZPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + uzpq_fns[a->esz], a, 0) +TRANS_FEAT(UZPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz, + uzpq_fns[a->esz], a, 1 << a->esz) static gen_helper_gvec_3 * const trn_fns[4] = { gen_helper_sve_trn_b, gen_helper_sve_trn_h, @@ -2392,10 +2670,10 @@ TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, trn_fns[a->esz], a, 1 << a->esz) -TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_trn_q, a, 0) -TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_trn_q, a, 16) +TRANS_FEAT_NONSTREAMING(TRN1_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_trn_q, a, 0) +TRANS_FEAT_NONSTREAMING(TRN2_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_trn_q, a, 16) /* *** SVE Permute Vector - Predicated Group @@ -2981,6 +3259,36 @@ static bool trans_CNTP(DisasContext *s, arg_CNTP *a) return true; } +static bool trans_CNTP_c(DisasContext *s, arg_CNTP_c *a) +{ + TCGv_i32 t_png; + uint32_t desc = 0; + + if (dc_isar_feature(aa64_sve2p1, s)) { + if (!sve_access_check(s)) { + return true; + } + } else if (dc_isar_feature(aa64_sme2, s)) { + if (!sme_sm_enabled_check(s)) { + return true; + } + } else { + return false; + } + + t_png = tcg_temp_new_i32(); + tcg_gen_ld16u_i32(t_png, tcg_env, + pred_full_reg_offset(s, a->rn) ^ + (HOST_BIG_ENDIAN ? 6 : 0)); + + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); + desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); + desc = FIELD_DP32(desc, PREDDESC, DATA, a->vl); + + gen_helper_sve2p1_cntp_c(cpu_reg(s, a->rd), t_png, tcg_constant_i32(desc)); + return true; +} + static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) { if (!dc_isar_feature(aa64_sve, s)) { @@ -3091,7 +3399,9 @@ static bool trans_CTERM(DisasContext *s, arg_CTERM *a) return true; } -static bool trans_WHILE(DisasContext *s, arg_WHILE *a) +typedef void gen_while_fn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); +static bool do_WHILE(DisasContext *s, arg_while *a, + bool lt, int scale, int data, gen_while_fn *fn) { TCGv_i64 op0, op1, t0, t1, tmax; TCGv_i32 t2; @@ -3101,14 +3411,8 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) TCGCond cond; uint64_t maxval; /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ - bool eq = a->eq == a->lt; + bool eq = a->eq == lt; - /* The greater-than conditions are all SVE2. */ - if (a->lt - ? !dc_isar_feature(aa64_sve, s) - : !dc_isar_feature(aa64_sve2, s)) { - return false; - } if (!sve_access_check(s)) { return true; } @@ -3132,7 +3436,7 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); - if (a->lt) { + if (lt) { tcg_gen_sub_i64(t0, op1, op0); if (a->u) { maxval = a->sf ? UINT64_MAX : UINT32_MAX; @@ -3152,7 +3456,7 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) } } - tmax = tcg_constant_i64(vsz >> a->esz); + tmax = tcg_constant_i64((vsz << scale) >> a->esz); if (eq) { /* Equality means one more iteration. */ tcg_gen_addi_i64(t0, t0, 1); @@ -3181,24 +3485,38 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) t2 = tcg_temp_new_i32(); tcg_gen_extrl_i64_i32(t2, t0); - /* Scale elements to bits. */ - tcg_gen_shli_i32(t2, t2, a->esz); - desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); + desc = FIELD_DP32(desc, PREDDESC, DATA, data); ptr = tcg_temp_new_ptr(); tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); - if (a->lt) { - gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); - } else { - gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); - } + fn(t2, ptr, t2, tcg_constant_i32(desc)); + do_pred_flags(t2); return true; } +TRANS_FEAT(WHILE_lt, aa64_sve, do_WHILE, + a, true, 0, 0, gen_helper_sve_whilel) +TRANS_FEAT(WHILE_gt, aa64_sve2, do_WHILE, + a, false, 0, 0, gen_helper_sve_whileg) + +TRANS_FEAT(WHILE_lt_pair, aa64_sme2_or_sve2p1, do_WHILE, + a, true, 1, 0, gen_helper_sve_while2l) +TRANS_FEAT(WHILE_gt_pair, aa64_sme2_or_sve2p1, do_WHILE, + a, false, 1, 0, gen_helper_sve_while2g) + +TRANS_FEAT(WHILE_lt_cnt2, aa64_sme2_or_sve2p1, do_WHILE, + a, true, 1, 1, gen_helper_sve_whilecl) +TRANS_FEAT(WHILE_lt_cnt4, aa64_sme2_or_sve2p1, do_WHILE, + a, true, 2, 2, gen_helper_sve_whilecl) +TRANS_FEAT(WHILE_gt_cnt2, aa64_sme2_or_sve2p1, do_WHILE, + a, false, 1, 1, gen_helper_sve_whilecg) +TRANS_FEAT(WHILE_gt_cnt4, aa64_sme2_or_sve2p1, do_WHILE, + a, false, 2, 2, gen_helper_sve_whilecg) + static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) { TCGv_i64 op0, op1, diff, t1, tmax; @@ -3217,7 +3535,7 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) op0 = read_cpu_reg(s, a->rn, 1); op1 = read_cpu_reg(s, a->rm, 1); - tmax = tcg_constant_i64(vsz); + tmax = tcg_constant_i64(vsz >> a->esz); diff = tcg_temp_new_i64(); if (a->rw) { @@ -3227,15 +3545,15 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) tcg_gen_sub_i64(diff, op0, op1); tcg_gen_sub_i64(t1, op1, op0); tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); - /* Round down to a multiple of ESIZE. */ - tcg_gen_andi_i64(diff, diff, -1 << a->esz); + /* Divide, rounding down, by ESIZE. */ + tcg_gen_shri_i64(diff, diff, a->esz); /* If op1 == op0, diff == 0, and the condition is always true. */ tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); } else { /* WHILEWR */ tcg_gen_sub_i64(diff, op1, op0); - /* Round down to a multiple of ESIZE. */ - tcg_gen_andi_i64(diff, diff, -1 << a->esz); + /* Divide, rounding down, by ESIZE. */ + tcg_gen_shri_i64(diff, diff, a->esz); /* If op0 >= op1, diff <= 0, the condition is always true. */ tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); } @@ -3258,6 +3576,42 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) return true; } +static bool do_pext(DisasContext *s, arg_pext *a, int n) +{ + TCGv_i32 t_png; + TCGv_ptr t_pd; + int pl; + + if (!sve_access_check(s)) { + return true; + } + + t_png = tcg_temp_new_i32(); + tcg_gen_ld16u_i32(t_png, tcg_env, + pred_full_reg_offset(s, a->rn) ^ + (HOST_BIG_ENDIAN ? 6 : 0)); + + t_pd = tcg_temp_new_ptr(); + pl = pred_full_reg_size(s); + + for (int i = 0; i < n; ++i) { + int rd = (a->rd + i) % 16; + int part = a->imm * n + i; + unsigned desc = 0; + + desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pl); + desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); + desc = FIELD_DP32(desc, PREDDESC, DATA, part); + + tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, rd)); + gen_helper_pext(t_pd, t_png, tcg_constant_i32(desc)); + } + return true; +} + +TRANS_FEAT(PEXT_1, aa64_sme2_or_sve2p1, do_pext, a, 1) +TRANS_FEAT(PEXT_2, aa64_sme2_or_sve2p1, do_pext, a, 2) + /* *** SVE Integer Wide Immediate - Unpredicated Group */ @@ -3385,8 +3739,8 @@ DO_ZZI(UMIN, umin) #undef DO_ZZI static gen_helper_gvec_4 * const dot_fns[2][2] = { - { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, - { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } + { gen_helper_gvec_sdot_4b, gen_helper_gvec_sdot_4h }, + { gen_helper_gvec_udot_4b, gen_helper_gvec_udot_4h } }; TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) @@ -3395,19 +3749,24 @@ TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, * SVE Multiply - Indexed */ -TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_sdot_idx_b, a) -TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_sdot_idx_h, a) -TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_udot_idx_b, a) -TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_udot_idx_h, a) - -TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_sudot_idx_b, a) -TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, - gen_helper_gvec_usdot_idx_b, a) +TRANS_FEAT(SDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_sdot_idx_4b, a) +TRANS_FEAT(SDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_sdot_idx_4h, a) +TRANS_FEAT(UDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_udot_idx_4b, a) +TRANS_FEAT(UDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_udot_idx_4h, a) + +TRANS_FEAT(SUDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_sudot_idx_4b, a) +TRANS_FEAT(USDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_usdot_idx_4b, a) + +TRANS_FEAT(SDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_sdot_idx_2h, a) +TRANS_FEAT(UDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_udot_idx_2h, a) #define DO_SVE2_RRX(NAME, FUNC) \ TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ @@ -3524,31 +3883,38 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) *** SVE Floating Point Multiply-Add Indexed Group */ +static bool do_fmla_zzxz(DisasContext *s, arg_rrxr_esz *a, + gen_helper_gvec_4_ptr *fn) +{ + /* These insns use MO_8 to encode BFloat16 */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } + return gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); +} + static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { - NULL, gen_helper_gvec_fmla_idx_h, + gen_helper_gvec_bfmla_idx, gen_helper_gvec_fmla_idx_h, gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d }; -TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, - fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) +TRANS_FEAT(FMLA_zzxz, aa64_sve, do_fmla_zzxz, a, fmla_idx_fns[a->esz]) static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { - { NULL, NULL }, + { gen_helper_gvec_bfmls_idx, gen_helper_gvec_ah_bfmls_idx }, { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, }; -TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, - fmls_idx_fns[a->esz][s->fpcr_ah], - a->rd, a->rn, a->rm, a->ra, a->index, - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) +TRANS_FEAT(FMLS_zzxz, aa64_sve, do_fmla_zzxz, a, + fmls_idx_fns[a->esz][s->fpcr_ah]) /* *** SVE Floating Point Multiply Indexed Group */ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { - NULL, gen_helper_gvec_fmul_idx_h, + gen_helper_gvec_fmul_idx_b16, gen_helper_gvec_fmul_idx_h, gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, }; TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, @@ -3621,6 +3987,54 @@ DO_VPZ_AH(FMAXV, fmaxv) #undef DO_VPZ +static gen_helper_gvec_3_ptr * const faddqv_fns[4] = { + NULL, gen_helper_sve2p1_faddqv_h, + gen_helper_sve2p1_faddqv_s, gen_helper_sve2p1_faddqv_d, +}; +TRANS_FEAT(FADDQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, + faddqv_fns[a->esz], a, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + +static gen_helper_gvec_3_ptr * const fmaxnmqv_fns[4] = { + NULL, gen_helper_sve2p1_fmaxnmqv_h, + gen_helper_sve2p1_fmaxnmqv_s, gen_helper_sve2p1_fmaxnmqv_d, +}; +TRANS_FEAT(FMAXNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, + fmaxnmqv_fns[a->esz], a, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + +static gen_helper_gvec_3_ptr * const fminnmqv_fns[4] = { + NULL, gen_helper_sve2p1_fminnmqv_h, + gen_helper_sve2p1_fminnmqv_s, gen_helper_sve2p1_fminnmqv_d, +}; +TRANS_FEAT(FMINNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, + fminnmqv_fns[a->esz], a, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + +static gen_helper_gvec_3_ptr * const fmaxqv_fns[4] = { + NULL, gen_helper_sve2p1_fmaxqv_h, + gen_helper_sve2p1_fmaxqv_s, gen_helper_sve2p1_fmaxqv_d, +}; +static gen_helper_gvec_3_ptr * const fmaxqv_ah_fns[4] = { + NULL, gen_helper_sve2p1_ah_fmaxqv_h, + gen_helper_sve2p1_ah_fmaxqv_s, gen_helper_sve2p1_ah_fmaxqv_d, +}; +TRANS_FEAT(FMAXQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, + (s->fpcr_ah ? fmaxqv_ah_fns : fmaxqv_fns)[a->esz], a, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + +static gen_helper_gvec_3_ptr * const fminqv_fns[4] = { + NULL, gen_helper_sve2p1_fminqv_h, + gen_helper_sve2p1_fminqv_s, gen_helper_sve2p1_fminqv_d, +}; +static gen_helper_gvec_3_ptr * const fminqv_ah_fns[4] = { + NULL, gen_helper_sve2p1_ah_fminqv_h, + gen_helper_sve2p1_ah_fminqv_s, gen_helper_sve2p1_ah_fminqv_d, +}; +TRANS_FEAT(FMINQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, + (s->fpcr_ah ? fminqv_ah_fns : fminqv_fns)[a->esz], a, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + /* *** SVE Floating Point Unary Operations - Unpredicated Group */ @@ -3747,7 +4161,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) #define DO_FP3(NAME, name) \ static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ - NULL, gen_helper_gvec_##name##_h, \ + gen_helper_gvec_##name##_b16, gen_helper_gvec_##name##_h, \ gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ }; \ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) @@ -3803,13 +4217,34 @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ name##_zpzz_fns[a->esz], a) -DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) -DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) -DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) -DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) -DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) -DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) -DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) +/* Similar, but for insns where sz == 0 encodes bfloat16 */ +#define DO_ZPZZ_FP_B16(NAME, FEAT, name) \ + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ + gen_helper_##name##_b16, gen_helper_##name##_h, \ + gen_helper_##name##_s, gen_helper_##name##_d \ + }; \ + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) + +#define DO_ZPZZ_AH_FP_B16(NAME, FEAT, name, ah_name) \ + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ + gen_helper_##name##_b16, gen_helper_##name##_h, \ + gen_helper_##name##_s, gen_helper_##name##_d \ + }; \ + static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ + gen_helper_##ah_name##_b16, gen_helper_##ah_name##_h, \ + gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ + }; \ + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ + s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ + name##_zpzz_fns[a->esz], a) + +DO_ZPZZ_FP_B16(FADD_zpzz, aa64_sve, sve_fadd) +DO_ZPZZ_FP_B16(FSUB_zpzz, aa64_sve, sve_fsub) +DO_ZPZZ_FP_B16(FMUL_zpzz, aa64_sve, sve_fmul) +DO_ZPZZ_AH_FP_B16(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) +DO_ZPZZ_AH_FP_B16(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) +DO_ZPZZ_FP_B16(FMINNM_zpzz, aa64_sve, sve_fminnum) +DO_ZPZZ_FP_B16(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) @@ -3940,19 +4375,28 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) +static bool do_fmla_zpzzz(DisasContext *s, arg_rprrr_esz *a, + gen_helper_gvec_5_ptr *fn) +{ + /* These insns use MO_8 to encode BFloat16 */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } + return gen_gvec_fpst_zzzzp(s, fn, a->rd, a->rn, a->rm, a->ra, a->pg, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); +} + #define DO_FMLA(NAME, name, ah_name) \ static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ - NULL, gen_helper_sve_##name##_h, \ + gen_helper_sve_##name##_b16, gen_helper_sve_##name##_h, \ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ }; \ static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ - NULL, gen_helper_sve_##ah_name##_h, \ + gen_helper_sve_##ah_name##_b16, gen_helper_sve_##ah_name##_h, \ gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ }; \ - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ - s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ - a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + TRANS_FEAT(NAME, aa64_sve, do_fmla_zpzzz, a, \ + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) /* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) @@ -4143,7 +4587,7 @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, */ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, - int len, int rn, int imm) + int len, int rn, int imm, MemOp align) { int len_align = QEMU_ALIGN_DOWN(len, 16); int len_remain = len % 16; @@ -4172,12 +4616,15 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, for (i = 0; i < len_align; i += 16) { tcg_gen_qemu_ld_i128(t16, clean_addr, midx, - MO_LE | MO_128 | MO_ATOM_NONE); + MO_LE | MO_128 | MO_ATOM_NONE | align); tcg_gen_extr_i128_i64(t0, t1, t16); tcg_gen_st_i64(t0, base, vofs + i); tcg_gen_st_i64(t1, base, vofs + i + 8); tcg_gen_addi_i64(clean_addr, clean_addr, 16); } + if (len_align) { + align = MO_UNALN; + } } else { TCGLabel *loop = gen_new_label(); TCGv_ptr tp, i = tcg_temp_new_ptr(); @@ -4187,7 +4634,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, t16 = tcg_temp_new_i128(); tcg_gen_qemu_ld_i128(t16, clean_addr, midx, - MO_LE | MO_128 | MO_ATOM_NONE); + MO_LE | MO_128 | MO_ATOM_NONE | align); tcg_gen_addi_i64(clean_addr, clean_addr, 16); tp = tcg_temp_new_ptr(); @@ -4202,6 +4649,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, tcg_gen_st_i64(t1, tp, vofs + 8); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); + align = MO_UNALN; } /* @@ -4210,7 +4658,9 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, */ if (len_remain >= 8) { t0 = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, + MO_LEUQ | MO_ATOM_NONE | align); + align = MO_UNALN; tcg_gen_st_i64(t0, base, vofs + len_align); len_remain -= 8; len_align += 8; @@ -4225,12 +4675,14 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, case 4: case 8: tcg_gen_qemu_ld_i64(t0, clean_addr, midx, - MO_LE | ctz32(len_remain) | MO_ATOM_NONE); + MO_LE | ctz32(len_remain) + | MO_ATOM_NONE | align); break; case 6: t1 = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, + MO_LEUL | MO_ATOM_NONE | align); tcg_gen_addi_i64(clean_addr, clean_addr, 4); tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); tcg_gen_deposit_i64(t0, t0, t1, 32, 32); @@ -4245,7 +4697,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, /* Similarly for stores. */ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, - int len, int rn, int imm) + int len, int rn, int imm, MemOp align) { int len_align = QEMU_ALIGN_DOWN(len, 16); int len_remain = len % 16; @@ -4277,9 +4729,12 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, tcg_gen_ld_i64(t1, base, vofs + i + 8); tcg_gen_concat_i64_i128(t16, t0, t1); tcg_gen_qemu_st_i128(t16, clean_addr, midx, - MO_LE | MO_128 | MO_ATOM_NONE); + MO_LE | MO_128 | MO_ATOM_NONE | align); tcg_gen_addi_i64(clean_addr, clean_addr, 16); } + if (len_align) { + align = MO_UNALN; + } } else { TCGLabel *loop = gen_new_label(); TCGv_ptr tp, i = tcg_temp_new_ptr(); @@ -4303,13 +4758,16 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, tcg_gen_addi_i64(clean_addr, clean_addr, 16); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); + align = MO_UNALN; } /* Predicate register stores can be any multiple of 2. */ if (len_remain >= 8) { t0 = tcg_temp_new_i64(); tcg_gen_ld_i64(t0, base, vofs + len_align); - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, + MO_LEUQ | MO_ATOM_NONE | align); + align = MO_UNALN; len_remain -= 8; len_align += 8; if (len_remain) { @@ -4325,11 +4783,13 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, case 4: case 8: tcg_gen_qemu_st_i64(t0, clean_addr, midx, - MO_LE | ctz32(len_remain) | MO_ATOM_NONE); + MO_LE | ctz32(len_remain) + | MO_ATOM_NONE | align); break; case 6: - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, + MO_LEUL | MO_ATOM_NONE | align); tcg_gen_addi_i64(clean_addr, clean_addr, 4); tcg_gen_shri_i64(t0, t0, 32); tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); @@ -4349,7 +4809,8 @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a) if (sve_access_check(s)) { int size = vec_full_reg_size(s); int off = vec_full_reg_offset(s, a->rd); - gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); + gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size, + s->align_mem ? MO_ALIGN_16 : MO_UNALN); } return true; } @@ -4362,7 +4823,8 @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a) if (sve_access_check(s)) { int size = pred_full_reg_size(s); int off = pred_full_reg_offset(s, a->rd); - gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); + gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size, + s->align_mem ? MO_ALIGN_2 : MO_UNALN); } return true; } @@ -4375,7 +4837,8 @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a) if (sve_access_check(s)) { int size = vec_full_reg_size(s); int off = vec_full_reg_offset(s, a->rd); - gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); + gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size, + s->align_mem ? MO_ALIGN_16 : MO_UNALN); } return true; } @@ -4388,7 +4851,8 @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a) if (sve_access_check(s)) { int size = pred_full_reg_size(s); int off = pred_full_reg_offset(s, a->rd); - gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); + gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size, + s->align_mem ? MO_ALIGN_2 : MO_UNALN); } return true; } @@ -4398,34 +4862,37 @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a) */ /* The memory mode of the dtype. */ -static const MemOp dtype_mop[16] = { +static const MemOp dtype_mop[19] = { MO_UB, MO_UB, MO_UB, MO_UB, MO_SL, MO_UW, MO_UW, MO_UW, MO_SW, MO_SW, MO_UL, MO_UL, - MO_SB, MO_SB, MO_SB, MO_UQ + MO_SB, MO_SB, MO_SB, MO_UQ, + /* Artificial values used by decode */ + MO_UL, MO_UQ, MO_128, }; #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) /* The vector element size of dtype. */ -static const uint8_t dtype_esz[16] = { +static const uint8_t dtype_esz[19] = { 0, 1, 2, 3, 3, 1, 2, 3, 3, 2, 2, 3, - 3, 2, 1, 3 + 3, 2, 1, 3, + /* Artificial values used by decode */ + 4, 4, 4, }; -uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, +uint64_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, uint32_t msz, bool is_write, uint32_t data) { uint32_t sizem1; - uint32_t desc = 0; + uint64_t desc = 0; /* Assert all of the data fits, with or without MTE enabled. */ assert(nregs >= 1 && nregs <= 4); sizem1 = (nregs << msz) - 1; assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT); - assert(data < 1u << SVE_MTEDESC_SHIFT); if (s->mte_active[0]) { desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); @@ -4433,9 +4900,9 @@ uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1); - desc <<= SVE_MTEDESC_SHIFT; + desc <<= 32; } - return simd_desc(vsz, vsz, desc | data); + return simd_desc(vsz, vsz, data) | desc; } static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, @@ -4443,7 +4910,7 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, gen_helper_gvec_mem *fn) { TCGv_ptr t_pg; - uint32_t desc; + uint64_t desc; if (!s->mte_active[0]) { addr = clean_data_tbi(s, addr); @@ -4459,11 +4926,11 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, t_pg = tcg_temp_new_ptr(); tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); - fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); + fn(tcg_env, t_pg, addr, tcg_constant_i64(desc)); } /* Indexed by [mte][be][dtype][nreg] */ -static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { +static gen_helper_gvec_mem * const ldr_fns[2][2][19][4] = { { /* mte inactive, little-endian */ { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, @@ -4487,7 +4954,13 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, - gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, + gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r }, + + { gen_helper_sve_ld1squ_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dqu_le_r, NULL, NULL, NULL }, + { NULL, gen_helper_sve_ld2qq_le_r, + gen_helper_sve_ld3qq_le_r, gen_helper_sve_ld4qq_le_r }, + }, /* mte inactive, big-endian */ { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, @@ -4512,7 +4985,14 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, - gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, + gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r }, + + { gen_helper_sve_ld1squ_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dqu_be_r, NULL, NULL, NULL }, + { NULL, gen_helper_sve_ld2qq_be_r, + gen_helper_sve_ld3qq_be_r, gen_helper_sve_ld4qq_be_r }, + }, + }, { /* mte active, little-endian */ { { gen_helper_sve_ld1bb_r_mte, @@ -4545,7 +5025,15 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { { gen_helper_sve_ld1dd_le_r_mte, gen_helper_sve_ld2dd_le_r_mte, gen_helper_sve_ld3dd_le_r_mte, - gen_helper_sve_ld4dd_le_r_mte } }, + gen_helper_sve_ld4dd_le_r_mte }, + + { gen_helper_sve_ld1squ_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dqu_le_r_mte, NULL, NULL, NULL }, + { NULL, + gen_helper_sve_ld2qq_le_r_mte, + gen_helper_sve_ld3qq_le_r_mte, + gen_helper_sve_ld4qq_le_r_mte }, + }, /* mte active, big-endian */ { { gen_helper_sve_ld1bb_r_mte, @@ -4578,7 +5066,16 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { { gen_helper_sve_ld1dd_be_r_mte, gen_helper_sve_ld2dd_be_r_mte, gen_helper_sve_ld3dd_be_r_mte, - gen_helper_sve_ld4dd_be_r_mte } } }, + gen_helper_sve_ld4dd_be_r_mte }, + + { gen_helper_sve_ld1squ_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dqu_be_r_mte, NULL, NULL, NULL }, + { NULL, + gen_helper_sve_ld2qq_be_r_mte, + gen_helper_sve_ld3qq_be_r_mte, + gen_helper_sve_ld4qq_be_r_mte }, + }, + }, }; static void do_ld_zpa(DisasContext *s, int zt, int pg, @@ -4597,9 +5094,32 @@ static void do_ld_zpa(DisasContext *s, int zt, int pg, static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) { - if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { + if (a->rm == 31) { return false; } + + /* dtypes 16-18 are artificial, representing 128-bit element */ + switch (a->dtype) { + case 0 ... 15: + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + break; + case 16: case 17: + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + break; + case 18: + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + break; + default: + g_assert_not_reached(); + } + if (sve_access_check(s)) { TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); @@ -4611,9 +5131,28 @@ static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) { - if (!dc_isar_feature(aa64_sve, s)) { - return false; + /* dtypes 16-18 are artificial, representing 128-bit element */ + switch (a->dtype) { + case 0 ... 15: + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + break; + case 16: case 17: + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + break; + case 18: + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + break; + default: + g_assert_not_reached(); } + if (sve_access_check(s)) { int vsz = vec_full_reg_size(s); int elements = vsz >> dtype_esz[a->dtype]; @@ -4839,7 +5378,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_pg; int poff; - uint32_t desc; + uint64_t desc; /* Load the first quadword using the normal predicated load helpers. */ if (!s->mte_active[0]) { @@ -4870,7 +5409,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) gen_helper_gvec_mem *fn = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt); - fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); + fn(tcg_env, t_pg, addr, tcg_constant_i64(desc)); /* Replicate that first quadword. */ if (vsz > 16) { @@ -4913,7 +5452,7 @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) unsigned vsz_r32; TCGv_ptr t_pg; int poff, doff; - uint32_t desc; + uint64_t desc; if (vsz < 32) { /* @@ -4954,7 +5493,7 @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) gen_helper_gvec_mem *fn = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt); - fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); + fn(tcg_env, t_pg, addr, tcg_constant_i64(desc)); /* * Replicate that first octaword. @@ -5060,7 +5599,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz, int esz, int nreg) { - static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { + static gen_helper_gvec_mem * const fn_single[2][2][4][5] = { { { { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r, gen_helper_sve_st1bs_r, @@ -5071,9 +5610,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, gen_helper_sve_st1hd_le_r }, { NULL, NULL, gen_helper_sve_st1ss_le_r, - gen_helper_sve_st1sd_le_r }, + gen_helper_sve_st1sd_le_r, + gen_helper_sve_st1sq_le_r, }, { NULL, NULL, NULL, - gen_helper_sve_st1dd_le_r } }, + gen_helper_sve_st1dd_le_r, + gen_helper_sve_st1dq_le_r, } }, { { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r, gen_helper_sve_st1bs_r, @@ -5084,9 +5625,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, gen_helper_sve_st1hd_be_r }, { NULL, NULL, gen_helper_sve_st1ss_be_r, - gen_helper_sve_st1sd_be_r }, + gen_helper_sve_st1sd_be_r, + gen_helper_sve_st1sq_be_r }, { NULL, NULL, NULL, - gen_helper_sve_st1dd_be_r } } }, + gen_helper_sve_st1dd_be_r, + gen_helper_sve_st1dq_be_r } } }, { { { gen_helper_sve_st1bb_r_mte, gen_helper_sve_st1bh_r_mte, @@ -5098,9 +5641,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, gen_helper_sve_st1hd_le_r_mte }, { NULL, NULL, gen_helper_sve_st1ss_le_r_mte, - gen_helper_sve_st1sd_le_r_mte }, + gen_helper_sve_st1sd_le_r_mte, + gen_helper_sve_st1sq_le_r_mte }, { NULL, NULL, NULL, - gen_helper_sve_st1dd_le_r_mte } }, + gen_helper_sve_st1dd_le_r_mte, + gen_helper_sve_st1dq_le_r_mte } }, { { gen_helper_sve_st1bb_r_mte, gen_helper_sve_st1bh_r_mte, gen_helper_sve_st1bs_r_mte, @@ -5111,59 +5656,73 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, gen_helper_sve_st1hd_be_r_mte }, { NULL, NULL, gen_helper_sve_st1ss_be_r_mte, - gen_helper_sve_st1sd_be_r_mte }, + gen_helper_sve_st1sd_be_r_mte, + gen_helper_sve_st1sq_be_r_mte }, { NULL, NULL, NULL, - gen_helper_sve_st1dd_be_r_mte } } }, + gen_helper_sve_st1dd_be_r_mte, + gen_helper_sve_st1dq_be_r_mte } } }, }; - static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { + static gen_helper_gvec_mem * const fn_multiple[2][2][3][5] = { { { { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_le_r, gen_helper_sve_st2ss_le_r, - gen_helper_sve_st2dd_le_r }, + gen_helper_sve_st2dd_le_r, + gen_helper_sve_st2qq_le_r }, { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_le_r, gen_helper_sve_st3ss_le_r, - gen_helper_sve_st3dd_le_r }, + gen_helper_sve_st3dd_le_r, + gen_helper_sve_st3qq_le_r }, { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_le_r, gen_helper_sve_st4ss_le_r, - gen_helper_sve_st4dd_le_r } }, + gen_helper_sve_st4dd_le_r, + gen_helper_sve_st4qq_le_r } }, { { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_be_r, gen_helper_sve_st2ss_be_r, - gen_helper_sve_st2dd_be_r }, + gen_helper_sve_st2dd_be_r, + gen_helper_sve_st2qq_be_r }, { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_be_r, gen_helper_sve_st3ss_be_r, - gen_helper_sve_st3dd_be_r }, + gen_helper_sve_st3dd_be_r, + gen_helper_sve_st3qq_be_r }, { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_be_r, gen_helper_sve_st4ss_be_r, - gen_helper_sve_st4dd_be_r } } }, + gen_helper_sve_st4dd_be_r, + gen_helper_sve_st4qq_be_r } } }, { { { gen_helper_sve_st2bb_r_mte, gen_helper_sve_st2hh_le_r_mte, gen_helper_sve_st2ss_le_r_mte, - gen_helper_sve_st2dd_le_r_mte }, + gen_helper_sve_st2dd_le_r_mte, + gen_helper_sve_st2qq_le_r_mte }, { gen_helper_sve_st3bb_r_mte, gen_helper_sve_st3hh_le_r_mte, gen_helper_sve_st3ss_le_r_mte, - gen_helper_sve_st3dd_le_r_mte }, + gen_helper_sve_st3dd_le_r_mte, + gen_helper_sve_st3qq_le_r_mte }, { gen_helper_sve_st4bb_r_mte, gen_helper_sve_st4hh_le_r_mte, gen_helper_sve_st4ss_le_r_mte, - gen_helper_sve_st4dd_le_r_mte } }, + gen_helper_sve_st4dd_le_r_mte, + gen_helper_sve_st4qq_le_r_mte } }, { { gen_helper_sve_st2bb_r_mte, gen_helper_sve_st2hh_be_r_mte, gen_helper_sve_st2ss_be_r_mte, - gen_helper_sve_st2dd_be_r_mte }, + gen_helper_sve_st2dd_be_r_mte, + gen_helper_sve_st2qq_be_r_mte }, { gen_helper_sve_st3bb_r_mte, gen_helper_sve_st3hh_be_r_mte, gen_helper_sve_st3ss_be_r_mte, - gen_helper_sve_st3dd_be_r_mte }, + gen_helper_sve_st3dd_be_r_mte, + gen_helper_sve_st3qq_be_r_mte }, { gen_helper_sve_st4bb_r_mte, gen_helper_sve_st4hh_be_r_mte, gen_helper_sve_st4ss_be_r_mte, - gen_helper_sve_st4dd_be_r_mte } } }, + gen_helper_sve_st4dd_be_r_mte, + gen_helper_sve_st4qq_be_r_mte } } }, }; gen_helper_gvec_mem *fn; int be = s->be_data == MO_BE; @@ -5182,12 +5741,32 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) { - if (!dc_isar_feature(aa64_sve, s)) { - return false; - } if (a->rm == 31 || a->msz > a->esz) { return false; } + switch (a->esz) { + case MO_8 ... MO_64: + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + break; + case MO_128: + if (a->nreg == 0) { + assert(a->msz < a->esz); + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + } else { + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + } + break; + default: + g_assert_not_reached(); + } + if (sve_access_check(s)) { TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); @@ -5199,12 +5778,32 @@ static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) { - if (!dc_isar_feature(aa64_sve, s)) { - return false; - } if (a->msz > a->esz) { return false; } + switch (a->esz) { + case MO_8 ... MO_64: + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + break; + case MO_128: + if (a->nreg == 0) { + assert(a->msz < a->esz); + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + } else { + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + } + break; + default: + g_assert_not_reached(); + } + if (sve_access_check(s)) { int vsz = vec_full_reg_size(s); int elements = vsz >> a->esz; @@ -5228,14 +5827,14 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, TCGv_ptr t_zm = tcg_temp_new_ptr(); TCGv_ptr t_pg = tcg_temp_new_ptr(); TCGv_ptr t_zt = tcg_temp_new_ptr(); - uint32_t desc; + uint64_t desc; tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm)); tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt)); desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale); - fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); + fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i64(desc)); } /* Indexed by [mte][be][ff][xs][u][msz]. */ @@ -5566,6 +6165,14 @@ gather_load_fn64[2][2][2][3][2][4] = { gen_helper_sve_ldffdd_be_zd_mte, } } } } }, }; +static gen_helper_gvec_mem_scatter * const +gather_load_fn128[2][2] = { + { gen_helper_sve_ldqq_le_zd, + gen_helper_sve_ldqq_be_zd }, + { gen_helper_sve_ldqq_le_zd_mte, + gen_helper_sve_ldqq_be_zd_mte } +}; + static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) { gen_helper_gvec_mem_scatter *fn = NULL; @@ -5587,6 +6194,8 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) case MO_64: fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; break; + default: + g_assert_not_reached(); } assert(fn != NULL); @@ -5595,6 +6204,32 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) return true; } +static bool trans_LD1Q(DisasContext *s, arg_LD1Q *a) +{ + gen_helper_gvec_mem_scatter *fn = NULL; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; + + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + if (!sve_access_check(s)) { + return true; + } + + fn = gather_load_fn128[mte][be]; + assert(fn != NULL); + + /* + * Unlike LD1_zprz, a->rm is the scalar register and it can be XZR, not XSP. + * a->rn is the vector register. + */ + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, + cpu_reg(s, a->rm), MO_128, false, fn); + return true; +} + static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) { gen_helper_gvec_mem_scatter *fn = NULL; @@ -5754,6 +6389,14 @@ static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { gen_helper_sve_stdd_be_zd_mte, } } }, }; +static gen_helper_gvec_mem_scatter * const +scatter_store_fn128[2][2] = { + { gen_helper_sve_stqq_le_zd, + gen_helper_sve_stqq_be_zd }, + { gen_helper_sve_stqq_le_zd_mte, + gen_helper_sve_stqq_be_zd_mte } +}; + static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) { gen_helper_gvec_mem_scatter *fn; @@ -5785,6 +6428,29 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) return true; } +static bool trans_ST1Q(DisasContext *s, arg_ST1Q *a) +{ + gen_helper_gvec_mem_scatter *fn; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; + + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + if (!sve_access_check(s)) { + return true; + } + fn = scatter_store_fn128[mte][be]; + /* + * Unlike ST1_zprz, a->rm is the scalar register, and it + * can be XZR, not XSP. a->rn is the vector register. + */ + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, + cpu_reg(s, a->rm), MO_128, true, fn); + return true; +} + static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) { gen_helper_gvec_mem_scatter *fn = NULL; @@ -5911,6 +6577,7 @@ TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) */ TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) +TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_sve2_sqdmulh, a) static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, @@ -5929,13 +6596,6 @@ TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, gen_helper_gvec_pmul_b, a, 0) -static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { - gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, - gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, -}; -TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, - sqdmulh_zzz_fns[a->esz], a, 0) - static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, @@ -7008,17 +7668,26 @@ DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) +static bool do_fmmla(DisasContext *s, arg_rrrr_esz *a, + gen_helper_gvec_4_ptr *fn) +{ + if (sve_access_check(s)) { + if (vec_full_reg_size(s) < 4 * memop_size(a->esz)) { + unallocated_encoding(s); + } else { + gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, 0, FPST_A64); + } + } + return true; +} + +TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, do_fmmla, a, gen_helper_fmmla_s) +TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, do_fmmla, a, gen_helper_fmmla_d) + /* * SVE Integer Multiply-Add (unpredicated) */ -TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, - gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, - 0, FPST_A64) -TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, - gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, - 0, FPST_A64) - static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { NULL, gen_helper_sve2_sqdmlal_zzzw_h, gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, @@ -7111,8 +7780,13 @@ static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) -TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, - a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) +TRANS_FEAT(USDOT_zzzz_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, + gen_helper_gvec_usdot_4b, a, 0) + +TRANS_FEAT(SDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz, + gen_helper_gvec_sdot_2h, a, 0) +TRANS_FEAT(UDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz, + gen_helper_gvec_udot_2h, a, 0) TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, gen_helper_crypto_aesmc, a->rd, a->rd, 0) @@ -7174,7 +7848,7 @@ static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) { return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, a->rd, a->rn, a->rm, a->ra, - (a->index << 2) | (sel << 1) | sub, tcg_env); + (a->index << 3) | (sel << 1) | sub, tcg_env); } TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) @@ -7189,6 +7863,11 @@ TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, gen_helper_gvec_ummla_b, a, 0) +TRANS_FEAT(FDOT_zzzz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzzz, + gen_helper_sme2_fdot_h, a, 0) +TRANS_FEAT(FDOT_zzxz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzxz, + gen_helper_sme2_fdot_idx_h, a) + TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz, gen_helper_gvec_bfdot, a, 0) TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_env_arg_zzxz, @@ -7218,6 +7897,36 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) +static bool do_BFMLSL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) +{ + if (s->fpcr_ah) { + return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl, + a->rd, a->rn, a->rm, a->ra, sel, FPST_AH); + } else { + return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl, + a->rd, a->rn, a->rm, a->ra, sel, FPST_A64); + } +} + +TRANS_FEAT(BFMLSLB_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, false) +TRANS_FEAT(BFMLSLT_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, true) + +static bool do_BFMLSL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) +{ + if (s->fpcr_ah) { + return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl_idx, + a->rd, a->rn, a->rm, a->ra, + (a->index << 1) | sel, FPST_AH); + } else { + return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl_idx, + a->rd, a->rn, a->rm, a->ra, + (a->index << 1) | sel, FPST_A64); + } +} + +TRANS_FEAT(BFMLSLB_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, false) +TRANS_FEAT(BFMLSLT_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, true) + static bool trans_PSEL(DisasContext *s, arg_psel *a) { int vl = vec_full_reg_size(s); @@ -7226,7 +7935,7 @@ static bool trans_PSEL(DisasContext *s, arg_psel *a) TCGv_i64 tmp, didx, dbit; TCGv_ptr ptr; - if (!dc_isar_feature(aa64_sme, s)) { + if (!dc_isar_feature(aa64_sme_or_sve2p1, s)) { return false; } if (!sve_access_check(s)) { @@ -7265,6 +7974,7 @@ static bool trans_PSEL(DisasContext *s, arg_psel *a) tcg_gen_neg_i64(tmp, tmp); /* Apply to either copy the source, or write zeros. */ + pl = size_for_gvec(pl); tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), pred_full_reg_offset(s, a->pn), tmp, pl, pl); return true; @@ -7319,7 +8029,7 @@ static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); } -TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) +TRANS_FEAT(SCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_sclamp, a) static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) { @@ -7370,4 +8080,137 @@ static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); } -TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) +TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_uclamp, a) + +static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a) +{ + static gen_helper_gvec_3_ptr * const fn[] = { + gen_helper_sme2_bfclamp, + gen_helper_sme2_fclamp_h, + gen_helper_sme2_fclamp_s, + gen_helper_sme2_fclamp_d, + }; + + /* This insn uses MO_8 to encode BFloat16. */ + if (a->esz == MO_8 + ? !dc_isar_feature(aa64_sve_b16b16, s) + : !dc_isar_feature(aa64_sme2_or_sve2p1, s)) { + return false; + } + + /* So far we never optimize rda with MOVPRFX */ + assert(a->rd == a->ra); + return gen_gvec_fpst_zzz(s, fn[a->esz], a->rd, a->rn, a->rm, 1, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); +} + +TRANS_FEAT(SQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, + gen_helper_sme2_sqcvtn_sh, a->rd, a->rn, 0) +TRANS_FEAT(UQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, + gen_helper_sme2_uqcvtn_sh, a->rd, a->rn, 0) +TRANS_FEAT(SQCVTUN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, + gen_helper_sme2_sqcvtun_sh, a->rd, a->rn, 0) + +static bool gen_ldst_c(DisasContext *s, TCGv_i64 addr, int zd, int png, + MemOp esz, bool is_write, int n, bool strided) +{ + typedef void ldst_c_fn(TCGv_env, TCGv_ptr, TCGv_i64, + TCGv_i32, TCGv_i64); + static ldst_c_fn * const f_ldst[2][2][4] = { + { { gen_helper_sve2p1_ld1bb_c, + gen_helper_sve2p1_ld1hh_le_c, + gen_helper_sve2p1_ld1ss_le_c, + gen_helper_sve2p1_ld1dd_le_c, }, + { gen_helper_sve2p1_ld1bb_c, + gen_helper_sve2p1_ld1hh_be_c, + gen_helper_sve2p1_ld1ss_be_c, + gen_helper_sve2p1_ld1dd_be_c, } }, + + { { gen_helper_sve2p1_st1bb_c, + gen_helper_sve2p1_st1hh_le_c, + gen_helper_sve2p1_st1ss_le_c, + gen_helper_sve2p1_st1dd_le_c, }, + { gen_helper_sve2p1_st1bb_c, + gen_helper_sve2p1_st1hh_be_c, + gen_helper_sve2p1_st1ss_be_c, + gen_helper_sve2p1_st1dd_be_c, } } + }; + + TCGv_i32 t_png; + TCGv_i64 t_desc; + TCGv_ptr t_zd; + uint64_t desc, lg2_rstride = 0; + bool be = s->be_data == MO_BE; + + assert(n == 2 || n == 4); + if (strided) { + lg2_rstride = 3; + if (n == 4) { + /* Validate ZD alignment. */ + if (zd & 4) { + return false; + } + lg2_rstride = 2; + } + /* Ignore non-temporal bit */ + zd &= ~8; + } + + if (strided || !dc_isar_feature(aa64_sve2p1, s) + ? !sme_sm_enabled_check(s) + : !sve_access_check(s)) { + return true; + } + + if (!s->mte_active[0]) { + addr = clean_data_tbi(s, addr); + } + + desc = n == 2 ? 0 : 1; + desc = desc | (lg2_rstride << 1); + desc = make_svemte_desc(s, vec_full_reg_size(s), 1, esz, is_write, desc); + t_desc = tcg_constant_i64(desc); + + t_png = tcg_temp_new_i32(); + tcg_gen_ld16u_i32(t_png, tcg_env, + pred_full_reg_offset(s, png) ^ + (HOST_BIG_ENDIAN ? 6 : 0)); + + t_zd = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); + + f_ldst[is_write][be][esz](tcg_env, t_zd, addr, t_png, t_desc); + return true; +} + +static bool gen_ldst_zcrr_c(DisasContext *s, arg_zcrr_ldst *a, + bool is_write, bool strided) +{ + TCGv_i64 addr = tcg_temp_new_i64(); + + tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz); + tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); + return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write, + a->nreg, strided); +} + +static bool gen_ldst_zcri_c(DisasContext *s, arg_zcri_ldst *a, + bool is_write, bool strided) +{ + TCGv_i64 addr = tcg_temp_new_i64(); + + tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), + a->imm * a->nreg * vec_full_reg_size(s)); + return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write, + a->nreg, strided); +} + +TRANS_FEAT(LD1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, false, false) +TRANS_FEAT(LD1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, false, false) +TRANS_FEAT(ST1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, true, false) +TRANS_FEAT(ST1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, true, false) + +TRANS_FEAT(LD1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, false, true) +TRANS_FEAT(LD1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, false, true) +TRANS_FEAT(ST1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, true, true) +TRANS_FEAT(ST1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, true, true) diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index 9962f43..5f64fed 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -44,8 +44,6 @@ #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7) #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8) -/* These are TCG temporaries used only by the legacy iwMMXt decoder */ -static TCGv_i64 cpu_V0, cpu_V1, cpu_M0; /* These are TCG globals which alias CPUARMState fields */ static TCGv_i32 cpu_R[16]; TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF; @@ -372,7 +370,7 @@ static void gen_rebuild_hflags(DisasContext *s, bool new_el) } } -static void gen_exception_internal(int excp) +void gen_exception_internal(int excp) { assert(excp_is_internal(excp)); gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); @@ -1090,6 +1088,57 @@ void gen_exception_insn(DisasContext *s, target_long pc_diff, s->base.is_jmp = DISAS_NORETURN; } +TCGLabel *delay_exception_el(DisasContext *s, int excp, + uint32_t syn, uint32_t target_el) +{ + /* Use tcg_malloc for automatic release on longjmp out of translation. */ + DisasDelayException *e = tcg_malloc(sizeof(DisasDelayException)); + + memset(e, 0, sizeof(*e)); + + /* Save enough of the current state to satisfy gen_exception_insn. */ + e->pc_curr = s->pc_curr; + e->pc_save = s->pc_save; + if (!s->aarch64) { + e->condexec_cond = s->condexec_cond; + e->condexec_mask = s->condexec_mask; + } + + e->excp = excp; + e->syn = syn; + e->target_el = target_el; + + e->next = s->delay_excp_list; + s->delay_excp_list = e; + + e->lab = gen_new_label(); + return e->lab; +} + +TCGLabel *delay_exception(DisasContext *s, int excp, uint32_t syn) +{ + return delay_exception_el(s, excp, syn, 0); +} + +void emit_delayed_exceptions(DisasContext *s) +{ + for (DisasDelayException *e = s->delay_excp_list; e ; e = e->next) { + gen_set_label(e->lab); + + /* Restore the insn state to satisfy gen_exception_insn. */ + s->pc_curr = e->pc_curr; + s->pc_save = e->pc_save; + s->condexec_cond = e->condexec_cond; + s->condexec_mask = e->condexec_mask; + + if (e->target_el) { + gen_exception_insn_el(s, 0, e->excp, e->syn, e->target_el); + } else { + gen_exception_insn(s, 0, e->excp, e->syn); + } + } +} + static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn) { gen_set_condexec(s); @@ -1252,1263 +1301,6 @@ void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop) } } -#define ARM_CP_RW_BIT (1 << 20) - -static inline void iwmmxt_load_reg(TCGv_i64 var, int reg) -{ - tcg_gen_ld_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg])); -} - -static inline void iwmmxt_store_reg(TCGv_i64 var, int reg) -{ - tcg_gen_st_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg])); -} - -static inline TCGv_i32 iwmmxt_load_creg(int reg) -{ - TCGv_i32 var = tcg_temp_new_i32(); - tcg_gen_ld_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg])); - return var; -} - -static inline void iwmmxt_store_creg(int reg, TCGv_i32 var) -{ - tcg_gen_st_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg])); -} - -static inline void gen_op_iwmmxt_movq_wRn_M0(int rn) -{ - iwmmxt_store_reg(cpu_M0, rn); -} - -static inline void gen_op_iwmmxt_movq_M0_wRn(int rn) -{ - iwmmxt_load_reg(cpu_M0, rn); -} - -static inline void gen_op_iwmmxt_orq_M0_wRn(int rn) -{ - iwmmxt_load_reg(cpu_V1, rn); - tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1); -} - -static inline void gen_op_iwmmxt_andq_M0_wRn(int rn) -{ - iwmmxt_load_reg(cpu_V1, rn); - tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1); -} - -static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn) -{ - iwmmxt_load_reg(cpu_V1, rn); - tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1); -} - -#define IWMMXT_OP(name) \ -static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ -{ \ - iwmmxt_load_reg(cpu_V1, rn); \ - gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \ -} - -#define IWMMXT_OP_ENV(name) \ -static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ -{ \ - iwmmxt_load_reg(cpu_V1, rn); \ - gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0, cpu_V1); \ -} - -#define IWMMXT_OP_ENV_SIZE(name) \ -IWMMXT_OP_ENV(name##b) \ -IWMMXT_OP_ENV(name##w) \ -IWMMXT_OP_ENV(name##l) - -#define IWMMXT_OP_ENV1(name) \ -static inline void gen_op_iwmmxt_##name##_M0(void) \ -{ \ - gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0); \ -} - -IWMMXT_OP(maddsq) -IWMMXT_OP(madduq) -IWMMXT_OP(sadb) -IWMMXT_OP(sadw) -IWMMXT_OP(mulslw) -IWMMXT_OP(mulshw) -IWMMXT_OP(mululw) -IWMMXT_OP(muluhw) -IWMMXT_OP(macsw) -IWMMXT_OP(macuw) - -IWMMXT_OP_ENV_SIZE(unpackl) -IWMMXT_OP_ENV_SIZE(unpackh) - -IWMMXT_OP_ENV1(unpacklub) -IWMMXT_OP_ENV1(unpackluw) -IWMMXT_OP_ENV1(unpacklul) -IWMMXT_OP_ENV1(unpackhub) -IWMMXT_OP_ENV1(unpackhuw) -IWMMXT_OP_ENV1(unpackhul) -IWMMXT_OP_ENV1(unpacklsb) -IWMMXT_OP_ENV1(unpacklsw) -IWMMXT_OP_ENV1(unpacklsl) -IWMMXT_OP_ENV1(unpackhsb) -IWMMXT_OP_ENV1(unpackhsw) -IWMMXT_OP_ENV1(unpackhsl) - -IWMMXT_OP_ENV_SIZE(cmpeq) -IWMMXT_OP_ENV_SIZE(cmpgtu) -IWMMXT_OP_ENV_SIZE(cmpgts) - -IWMMXT_OP_ENV_SIZE(mins) -IWMMXT_OP_ENV_SIZE(minu) -IWMMXT_OP_ENV_SIZE(maxs) -IWMMXT_OP_ENV_SIZE(maxu) - -IWMMXT_OP_ENV_SIZE(subn) -IWMMXT_OP_ENV_SIZE(addn) -IWMMXT_OP_ENV_SIZE(subu) -IWMMXT_OP_ENV_SIZE(addu) -IWMMXT_OP_ENV_SIZE(subs) -IWMMXT_OP_ENV_SIZE(adds) - -IWMMXT_OP_ENV(avgb0) -IWMMXT_OP_ENV(avgb1) -IWMMXT_OP_ENV(avgw0) -IWMMXT_OP_ENV(avgw1) - -IWMMXT_OP_ENV(packuw) -IWMMXT_OP_ENV(packul) -IWMMXT_OP_ENV(packuq) -IWMMXT_OP_ENV(packsw) -IWMMXT_OP_ENV(packsl) -IWMMXT_OP_ENV(packsq) - -static void gen_op_iwmmxt_set_mup(void) -{ - TCGv_i32 tmp; - tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]); - tcg_gen_ori_i32(tmp, tmp, 2); - store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]); -} - -static void gen_op_iwmmxt_set_cup(void) -{ - TCGv_i32 tmp; - tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]); - tcg_gen_ori_i32(tmp, tmp, 1); - store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]); -} - -static void gen_op_iwmmxt_setpsr_nz(void) -{ - TCGv_i32 tmp = tcg_temp_new_i32(); - gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0); - store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]); -} - -static inline void gen_op_iwmmxt_addl_M0_wRn(int rn) -{ - iwmmxt_load_reg(cpu_V1, rn); - tcg_gen_ext32u_i64(cpu_V1, cpu_V1); - tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1); -} - -static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, - TCGv_i32 dest) -{ - int rd; - uint32_t offset; - TCGv_i32 tmp; - - rd = (insn >> 16) & 0xf; - tmp = load_reg(s, rd); - - offset = (insn & 0xff) << ((insn >> 7) & 2); - if (insn & (1 << 24)) { - /* Pre indexed */ - if (insn & (1 << 23)) - tcg_gen_addi_i32(tmp, tmp, offset); - else - tcg_gen_addi_i32(tmp, tmp, -offset); - tcg_gen_mov_i32(dest, tmp); - if (insn & (1 << 21)) { - store_reg(s, rd, tmp); - } - } else if (insn & (1 << 21)) { - /* Post indexed */ - tcg_gen_mov_i32(dest, tmp); - if (insn & (1 << 23)) - tcg_gen_addi_i32(tmp, tmp, offset); - else - tcg_gen_addi_i32(tmp, tmp, -offset); - store_reg(s, rd, tmp); - } else if (!(insn & (1 << 23))) - return 1; - return 0; -} - -static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest) -{ - int rd = (insn >> 0) & 0xf; - TCGv_i32 tmp; - - if (insn & (1 << 8)) { - if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) { - return 1; - } else { - tmp = iwmmxt_load_creg(rd); - } - } else { - tmp = tcg_temp_new_i32(); - iwmmxt_load_reg(cpu_V0, rd); - tcg_gen_extrl_i64_i32(tmp, cpu_V0); - } - tcg_gen_andi_i32(tmp, tmp, mask); - tcg_gen_mov_i32(dest, tmp); - return 0; -} - -/* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred - (ie. an undefined instruction). */ -static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) -{ - int rd, wrd; - int rdhi, rdlo, rd0, rd1, i; - TCGv_i32 addr; - TCGv_i32 tmp, tmp2, tmp3; - - if ((insn & 0x0e000e00) == 0x0c000000) { - if ((insn & 0x0fe00ff0) == 0x0c400000) { - wrd = insn & 0xf; - rdlo = (insn >> 12) & 0xf; - rdhi = (insn >> 16) & 0xf; - if (insn & ARM_CP_RW_BIT) { /* TMRRC */ - iwmmxt_load_reg(cpu_V0, wrd); - tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0); - tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0); - } else { /* TMCRR */ - tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]); - iwmmxt_store_reg(cpu_V0, wrd); - gen_op_iwmmxt_set_mup(); - } - return 0; - } - - wrd = (insn >> 12) & 0xf; - addr = tcg_temp_new_i32(); - if (gen_iwmmxt_address(s, insn, addr)) { - return 1; - } - if (insn & ARM_CP_RW_BIT) { - if ((insn >> 28) == 0xf) { /* WLDRW wCx */ - tmp = tcg_temp_new_i32(); - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - iwmmxt_store_creg(wrd, tmp); - } else { - i = 1; - if (insn & (1 << 8)) { - if (insn & (1 << 22)) { /* WLDRD */ - gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s)); - i = 0; - } else { /* WLDRW wRd */ - tmp = tcg_temp_new_i32(); - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - } - } else { - tmp = tcg_temp_new_i32(); - if (insn & (1 << 22)) { /* WLDRH */ - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - } else { /* WLDRB */ - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - } - } - if (i) { - tcg_gen_extu_i32_i64(cpu_M0, tmp); - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - } - } else { - if ((insn >> 28) == 0xf) { /* WSTRW wCx */ - tmp = iwmmxt_load_creg(wrd); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - } else { - gen_op_iwmmxt_movq_M0_wRn(wrd); - tmp = tcg_temp_new_i32(); - if (insn & (1 << 8)) { - if (insn & (1 << 22)) { /* WSTRD */ - gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s)); - } else { /* WSTRW wRd */ - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - } - } else { - if (insn & (1 << 22)) { /* WSTRH */ - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - } else { /* WSTRB */ - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - } - } - } - } - return 0; - } - - if ((insn & 0x0f000000) != 0x0e000000) - return 1; - - switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) { - case 0x000: /* WOR */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 0) & 0xf; - rd1 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - gen_op_iwmmxt_orq_M0_wRn(rd1); - gen_op_iwmmxt_setpsr_nz(); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x011: /* TMCR */ - if (insn & 0xf) - return 1; - rd = (insn >> 12) & 0xf; - wrd = (insn >> 16) & 0xf; - switch (wrd) { - case ARM_IWMMXT_wCID: - case ARM_IWMMXT_wCASF: - break; - case ARM_IWMMXT_wCon: - gen_op_iwmmxt_set_cup(); - /* Fall through. */ - case ARM_IWMMXT_wCSSF: - tmp = iwmmxt_load_creg(wrd); - tmp2 = load_reg(s, rd); - tcg_gen_andc_i32(tmp, tmp, tmp2); - iwmmxt_store_creg(wrd, tmp); - break; - case ARM_IWMMXT_wCGR0: - case ARM_IWMMXT_wCGR1: - case ARM_IWMMXT_wCGR2: - case ARM_IWMMXT_wCGR3: - gen_op_iwmmxt_set_cup(); - tmp = load_reg(s, rd); - iwmmxt_store_creg(wrd, tmp); - break; - default: - return 1; - } - break; - case 0x100: /* WXOR */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 0) & 0xf; - rd1 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - gen_op_iwmmxt_xorq_M0_wRn(rd1); - gen_op_iwmmxt_setpsr_nz(); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x111: /* TMRC */ - if (insn & 0xf) - return 1; - rd = (insn >> 12) & 0xf; - wrd = (insn >> 16) & 0xf; - tmp = iwmmxt_load_creg(wrd); - store_reg(s, rd, tmp); - break; - case 0x300: /* WANDN */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 0) & 0xf; - rd1 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tcg_gen_neg_i64(cpu_M0, cpu_M0); - gen_op_iwmmxt_andq_M0_wRn(rd1); - gen_op_iwmmxt_setpsr_nz(); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x200: /* WAND */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 0) & 0xf; - rd1 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - gen_op_iwmmxt_andq_M0_wRn(rd1); - gen_op_iwmmxt_setpsr_nz(); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x810: case 0xa10: /* WMADD */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 0) & 0xf; - rd1 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - if (insn & (1 << 21)) - gen_op_iwmmxt_maddsq_M0_wRn(rd1); - else - gen_op_iwmmxt_madduq_M0_wRn(rd1); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - gen_op_iwmmxt_unpacklb_M0_wRn(rd1); - break; - case 1: - gen_op_iwmmxt_unpacklw_M0_wRn(rd1); - break; - case 2: - gen_op_iwmmxt_unpackll_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - gen_op_iwmmxt_unpackhb_M0_wRn(rd1); - break; - case 1: - gen_op_iwmmxt_unpackhw_M0_wRn(rd1); - break; - case 2: - gen_op_iwmmxt_unpackhl_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - if (insn & (1 << 22)) - gen_op_iwmmxt_sadw_M0_wRn(rd1); - else - gen_op_iwmmxt_sadb_M0_wRn(rd1); - if (!(insn & (1 << 20))) - gen_op_iwmmxt_addl_M0_wRn(wrd); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - if (insn & (1 << 21)) { - if (insn & (1 << 20)) - gen_op_iwmmxt_mulshw_M0_wRn(rd1); - else - gen_op_iwmmxt_mulslw_M0_wRn(rd1); - } else { - if (insn & (1 << 20)) - gen_op_iwmmxt_muluhw_M0_wRn(rd1); - else - gen_op_iwmmxt_mululw_M0_wRn(rd1); - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - if (insn & (1 << 21)) - gen_op_iwmmxt_macsw_M0_wRn(rd1); - else - gen_op_iwmmxt_macuw_M0_wRn(rd1); - if (!(insn & (1 << 20))) { - iwmmxt_load_reg(cpu_V1, wrd); - tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1); - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - gen_op_iwmmxt_cmpeqb_M0_wRn(rd1); - break; - case 1: - gen_op_iwmmxt_cmpeqw_M0_wRn(rd1); - break; - case 2: - gen_op_iwmmxt_cmpeql_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - if (insn & (1 << 22)) { - if (insn & (1 << 20)) - gen_op_iwmmxt_avgw1_M0_wRn(rd1); - else - gen_op_iwmmxt_avgw0_M0_wRn(rd1); - } else { - if (insn & (1 << 20)) - gen_op_iwmmxt_avgb1_M0_wRn(rd1); - else - gen_op_iwmmxt_avgb0_M0_wRn(rd1); - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3)); - tcg_gen_andi_i32(tmp, tmp, 7); - iwmmxt_load_reg(cpu_V1, rd1); - gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */ - if (((insn >> 6) & 3) == 3) - return 1; - rd = (insn >> 12) & 0xf; - wrd = (insn >> 16) & 0xf; - tmp = load_reg(s, rd); - gen_op_iwmmxt_movq_M0_wRn(wrd); - switch ((insn >> 6) & 3) { - case 0: - tmp2 = tcg_constant_i32(0xff); - tmp3 = tcg_constant_i32((insn & 7) << 3); - break; - case 1: - tmp2 = tcg_constant_i32(0xffff); - tmp3 = tcg_constant_i32((insn & 3) << 4); - break; - case 2: - tmp2 = tcg_constant_i32(0xffffffff); - tmp3 = tcg_constant_i32((insn & 1) << 5); - break; - default: - g_assert_not_reached(); - } - gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */ - rd = (insn >> 12) & 0xf; - wrd = (insn >> 16) & 0xf; - if (rd == 15 || ((insn >> 22) & 3) == 3) - return 1; - gen_op_iwmmxt_movq_M0_wRn(wrd); - tmp = tcg_temp_new_i32(); - switch ((insn >> 22) & 3) { - case 0: - tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3); - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - if (insn & 8) { - tcg_gen_ext8s_i32(tmp, tmp); - } else { - tcg_gen_andi_i32(tmp, tmp, 0xff); - } - break; - case 1: - tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4); - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - if (insn & 8) { - tcg_gen_ext16s_i32(tmp, tmp); - } else { - tcg_gen_andi_i32(tmp, tmp, 0xffff); - } - break; - case 2: - tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5); - tcg_gen_extrl_i64_i32(tmp, cpu_M0); - break; - } - store_reg(s, rd, tmp); - break; - case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */ - if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3) - return 1; - tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); - switch ((insn >> 22) & 3) { - case 0: - tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0); - break; - case 1: - tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4); - break; - case 2: - tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12); - break; - } - tcg_gen_shli_i32(tmp, tmp, 28); - gen_set_nzcv(tmp); - break; - case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */ - if (((insn >> 6) & 3) == 3) - return 1; - rd = (insn >> 12) & 0xf; - wrd = (insn >> 16) & 0xf; - tmp = load_reg(s, rd); - switch ((insn >> 6) & 3) { - case 0: - gen_helper_iwmmxt_bcstb(cpu_M0, tmp); - break; - case 1: - gen_helper_iwmmxt_bcstw(cpu_M0, tmp); - break; - case 2: - gen_helper_iwmmxt_bcstl(cpu_M0, tmp); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */ - if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3) - return 1; - tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - switch ((insn >> 22) & 3) { - case 0: - for (i = 0; i < 7; i ++) { - tcg_gen_shli_i32(tmp2, tmp2, 4); - tcg_gen_and_i32(tmp, tmp, tmp2); - } - break; - case 1: - for (i = 0; i < 3; i ++) { - tcg_gen_shli_i32(tmp2, tmp2, 8); - tcg_gen_and_i32(tmp, tmp, tmp2); - } - break; - case 2: - tcg_gen_shli_i32(tmp2, tmp2, 16); - tcg_gen_and_i32(tmp, tmp, tmp2); - break; - } - gen_set_nzcv(tmp); - break; - case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */ - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0); - break; - case 1: - gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0); - break; - case 2: - gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */ - if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3) - return 1; - tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); - tmp2 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp2, tmp); - switch ((insn >> 22) & 3) { - case 0: - for (i = 0; i < 7; i ++) { - tcg_gen_shli_i32(tmp2, tmp2, 4); - tcg_gen_or_i32(tmp, tmp, tmp2); - } - break; - case 1: - for (i = 0; i < 3; i ++) { - tcg_gen_shli_i32(tmp2, tmp2, 8); - tcg_gen_or_i32(tmp, tmp, tmp2); - } - break; - case 2: - tcg_gen_shli_i32(tmp2, tmp2, 16); - tcg_gen_or_i32(tmp, tmp, tmp2); - break; - } - gen_set_nzcv(tmp); - break; - case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */ - rd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3) - return 1; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_temp_new_i32(); - switch ((insn >> 22) & 3) { - case 0: - gen_helper_iwmmxt_msbb(tmp, cpu_M0); - break; - case 1: - gen_helper_iwmmxt_msbw(tmp, cpu_M0); - break; - case 2: - gen_helper_iwmmxt_msbl(tmp, cpu_M0); - break; - } - store_reg(s, rd, tmp); - break; - case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */ - case 0x906: case 0xb06: case 0xd06: case 0xf06: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - if (insn & (1 << 21)) - gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1); - else - gen_op_iwmmxt_cmpgtub_M0_wRn(rd1); - break; - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1); - else - gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1); - else - gen_op_iwmmxt_cmpgtul_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */ - case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpacklsb_M0(); - else - gen_op_iwmmxt_unpacklub_M0(); - break; - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpacklsw_M0(); - else - gen_op_iwmmxt_unpackluw_M0(); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpacklsl_M0(); - else - gen_op_iwmmxt_unpacklul_M0(); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */ - case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpackhsb_M0(); - else - gen_op_iwmmxt_unpackhub_M0(); - break; - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpackhsw_M0(); - else - gen_op_iwmmxt_unpackhuw_M0(); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_unpackhsl_M0(); - else - gen_op_iwmmxt_unpackhul_M0(); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */ - case 0x214: case 0x614: case 0xa14: case 0xe14: - if (((insn >> 22) & 3) == 0) - return 1; - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_temp_new_i32(); - if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - return 1; - } - switch ((insn >> 22) & 3) { - case 1: - gen_helper_iwmmxt_srlw(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 2: - gen_helper_iwmmxt_srll(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 3: - gen_helper_iwmmxt_srlq(cpu_M0, tcg_env, cpu_M0, tmp); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */ - case 0x014: case 0x414: case 0x814: case 0xc14: - if (((insn >> 22) & 3) == 0) - return 1; - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_temp_new_i32(); - if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - return 1; - } - switch ((insn >> 22) & 3) { - case 1: - gen_helper_iwmmxt_sraw(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 2: - gen_helper_iwmmxt_sral(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 3: - gen_helper_iwmmxt_sraq(cpu_M0, tcg_env, cpu_M0, tmp); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */ - case 0x114: case 0x514: case 0x914: case 0xd14: - if (((insn >> 22) & 3) == 0) - return 1; - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_temp_new_i32(); - if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - return 1; - } - switch ((insn >> 22) & 3) { - case 1: - gen_helper_iwmmxt_sllw(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 2: - gen_helper_iwmmxt_slll(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 3: - gen_helper_iwmmxt_sllq(cpu_M0, tcg_env, cpu_M0, tmp); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */ - case 0x314: case 0x714: case 0xb14: case 0xf14: - if (((insn >> 22) & 3) == 0) - return 1; - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_temp_new_i32(); - switch ((insn >> 22) & 3) { - case 1: - if (gen_iwmmxt_shift(insn, 0xf, tmp)) { - return 1; - } - gen_helper_iwmmxt_rorw(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 2: - if (gen_iwmmxt_shift(insn, 0x1f, tmp)) { - return 1; - } - gen_helper_iwmmxt_rorl(cpu_M0, tcg_env, cpu_M0, tmp); - break; - case 3: - if (gen_iwmmxt_shift(insn, 0x3f, tmp)) { - return 1; - } - gen_helper_iwmmxt_rorq(cpu_M0, tcg_env, cpu_M0, tmp); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */ - case 0x916: case 0xb16: case 0xd16: case 0xf16: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - if (insn & (1 << 21)) - gen_op_iwmmxt_minsb_M0_wRn(rd1); - else - gen_op_iwmmxt_minub_M0_wRn(rd1); - break; - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_minsw_M0_wRn(rd1); - else - gen_op_iwmmxt_minuw_M0_wRn(rd1); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_minsl_M0_wRn(rd1); - else - gen_op_iwmmxt_minul_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */ - case 0x816: case 0xa16: case 0xc16: case 0xe16: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 0: - if (insn & (1 << 21)) - gen_op_iwmmxt_maxsb_M0_wRn(rd1); - else - gen_op_iwmmxt_maxub_M0_wRn(rd1); - break; - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_maxsw_M0_wRn(rd1); - else - gen_op_iwmmxt_maxuw_M0_wRn(rd1); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_maxsl_M0_wRn(rd1); - else - gen_op_iwmmxt_maxul_M0_wRn(rd1); - break; - case 3: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */ - case 0x402: case 0x502: case 0x602: case 0x702: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - iwmmxt_load_reg(cpu_V1, rd1); - gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, - tcg_constant_i32((insn >> 20) & 3)); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */ - case 0x41a: case 0x51a: case 0x61a: case 0x71a: - case 0x81a: case 0x91a: case 0xa1a: case 0xb1a: - case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 20) & 0xf) { - case 0x0: - gen_op_iwmmxt_subnb_M0_wRn(rd1); - break; - case 0x1: - gen_op_iwmmxt_subub_M0_wRn(rd1); - break; - case 0x3: - gen_op_iwmmxt_subsb_M0_wRn(rd1); - break; - case 0x4: - gen_op_iwmmxt_subnw_M0_wRn(rd1); - break; - case 0x5: - gen_op_iwmmxt_subuw_M0_wRn(rd1); - break; - case 0x7: - gen_op_iwmmxt_subsw_M0_wRn(rd1); - break; - case 0x8: - gen_op_iwmmxt_subnl_M0_wRn(rd1); - break; - case 0x9: - gen_op_iwmmxt_subul_M0_wRn(rd1); - break; - case 0xb: - gen_op_iwmmxt_subsl_M0_wRn(rd1); - break; - default: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */ - case 0x41e: case 0x51e: case 0x61e: case 0x71e: - case 0x81e: case 0x91e: case 0xa1e: case 0xb1e: - case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f)); - gen_helper_iwmmxt_shufh(cpu_M0, tcg_env, cpu_M0, tmp); - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */ - case 0x418: case 0x518: case 0x618: case 0x718: - case 0x818: case 0x918: case 0xa18: case 0xb18: - case 0xc18: case 0xd18: case 0xe18: case 0xf18: - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 20) & 0xf) { - case 0x0: - gen_op_iwmmxt_addnb_M0_wRn(rd1); - break; - case 0x1: - gen_op_iwmmxt_addub_M0_wRn(rd1); - break; - case 0x3: - gen_op_iwmmxt_addsb_M0_wRn(rd1); - break; - case 0x4: - gen_op_iwmmxt_addnw_M0_wRn(rd1); - break; - case 0x5: - gen_op_iwmmxt_adduw_M0_wRn(rd1); - break; - case 0x7: - gen_op_iwmmxt_addsw_M0_wRn(rd1); - break; - case 0x8: - gen_op_iwmmxt_addnl_M0_wRn(rd1); - break; - case 0x9: - gen_op_iwmmxt_addul_M0_wRn(rd1); - break; - case 0xb: - gen_op_iwmmxt_addsl_M0_wRn(rd1); - break; - default: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */ - case 0x408: case 0x508: case 0x608: case 0x708: - case 0x808: case 0x908: case 0xa08: case 0xb08: - case 0xc08: case 0xd08: case 0xe08: case 0xf08: - if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0) - return 1; - wrd = (insn >> 12) & 0xf; - rd0 = (insn >> 16) & 0xf; - rd1 = (insn >> 0) & 0xf; - gen_op_iwmmxt_movq_M0_wRn(rd0); - switch ((insn >> 22) & 3) { - case 1: - if (insn & (1 << 21)) - gen_op_iwmmxt_packsw_M0_wRn(rd1); - else - gen_op_iwmmxt_packuw_M0_wRn(rd1); - break; - case 2: - if (insn & (1 << 21)) - gen_op_iwmmxt_packsl_M0_wRn(rd1); - else - gen_op_iwmmxt_packul_M0_wRn(rd1); - break; - case 3: - if (insn & (1 << 21)) - gen_op_iwmmxt_packsq_M0_wRn(rd1); - else - gen_op_iwmmxt_packuq_M0_wRn(rd1); - break; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - gen_op_iwmmxt_set_cup(); - break; - case 0x201: case 0x203: case 0x205: case 0x207: - case 0x209: case 0x20b: case 0x20d: case 0x20f: - case 0x211: case 0x213: case 0x215: case 0x217: - case 0x219: case 0x21b: case 0x21d: case 0x21f: - wrd = (insn >> 5) & 0xf; - rd0 = (insn >> 12) & 0xf; - rd1 = (insn >> 0) & 0xf; - if (rd0 == 0xf || rd1 == 0xf) - return 1; - gen_op_iwmmxt_movq_M0_wRn(wrd); - tmp = load_reg(s, rd0); - tmp2 = load_reg(s, rd1); - switch ((insn >> 16) & 0xf) { - case 0x0: /* TMIA */ - gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2); - break; - case 0x8: /* TMIAPH */ - gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2); - break; - case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */ - if (insn & (1 << 16)) - tcg_gen_shri_i32(tmp, tmp, 16); - if (insn & (1 << 17)) - tcg_gen_shri_i32(tmp2, tmp2, 16); - gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2); - break; - default: - return 1; - } - gen_op_iwmmxt_movq_wRn_M0(wrd); - gen_op_iwmmxt_set_mup(); - break; - default: - return 1; - } - - return 0; -} - -/* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred - (ie. an undefined instruction). */ -static int disas_dsp_insn(DisasContext *s, uint32_t insn) -{ - int acc, rd0, rd1, rdhi, rdlo; - TCGv_i32 tmp, tmp2; - - if ((insn & 0x0ff00f10) == 0x0e200010) { - /* Multiply with Internal Accumulate Format */ - rd0 = (insn >> 12) & 0xf; - rd1 = insn & 0xf; - acc = (insn >> 5) & 7; - - if (acc != 0) - return 1; - - tmp = load_reg(s, rd0); - tmp2 = load_reg(s, rd1); - switch ((insn >> 16) & 0xf) { - case 0x0: /* MIA */ - gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2); - break; - case 0x8: /* MIAPH */ - gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2); - break; - case 0xc: /* MIABB */ - case 0xd: /* MIABT */ - case 0xe: /* MIATB */ - case 0xf: /* MIATT */ - if (insn & (1 << 16)) - tcg_gen_shri_i32(tmp, tmp, 16); - if (insn & (1 << 17)) - tcg_gen_shri_i32(tmp2, tmp2, 16); - gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2); - break; - default: - return 1; - } - - gen_op_iwmmxt_movq_wRn_M0(acc); - return 0; - } - - if ((insn & 0x0fe00ff8) == 0x0c400000) { - /* Internal Accumulator Access Format */ - rdhi = (insn >> 16) & 0xf; - rdlo = (insn >> 12) & 0xf; - acc = insn & 7; - - if (acc != 0) - return 1; - - if (insn & ARM_CP_RW_BIT) { /* MRA */ - iwmmxt_load_reg(cpu_V0, acc); - tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0); - tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0); - tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1); - } else { /* MAR */ - tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]); - iwmmxt_store_reg(cpu_V0, acc); - } - return 0; - } - - return 1; -} - static void gen_goto_ptr(void) { tcg_gen_lookup_and_goto_ptr(); @@ -2518,7 +1310,7 @@ static void gen_goto_ptr(void) * cpu_loop_exec. Any live exit_requests will be processed as we * enter the next TB. */ -static void gen_goto_tb(DisasContext *s, int n, target_long diff) +static void gen_goto_tb(DisasContext *s, unsigned tb_slot_idx, target_long diff) { if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) { /* @@ -2531,12 +1323,12 @@ static void gen_goto_tb(DisasContext *s, int n, target_long diff) */ if (tb_cflags(s->base.tb) & CF_PCREL) { gen_update_pc(s, diff); - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); } else { - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); gen_update_pc(s, diff); } - tcg_gen_exit_tb(s->base.tb, n); + tcg_gen_exit_tb(s->base.tb, tb_slot_idx); } else { gen_update_pc(s, diff); gen_goto_ptr(); @@ -2982,21 +1774,11 @@ static void do_coproc_insn(DisasContext *s, int cpnum, int is64, if (maskbit != 4 && maskbit != 14) { /* T4 and T14 are RES0 so never cause traps */ - TCGv_i32 t; - DisasLabel over = gen_disas_label(s); - - t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2)); - tcg_gen_andi_i32(t, t, 1u << maskbit); - tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label); + TCGLabel *fail = delay_exception_el(s, EXCP_UDEF, syndrome, 2); + TCGv_i32 t = + load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2)); - gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); - /* - * gen_exception_insn() will set is_jmp to DISAS_NORETURN, - * but since we're conditionally branching over it, we want - * to assume continue-to-next-instruction. - */ - s->base.is_jmp = DISAS_NEXT; - set_disas_label(s, over); + tcg_gen_brcondi_i32(TCG_COND_TSTNE, t, 1u << maskbit, fail); } } @@ -3048,13 +1830,10 @@ static void do_coproc_insn(DisasContext *s, int cpnum, int is64, } if ((s->hstr_active && s->current_el == 0) || ri->accessfn || - (ri->fgt && s->fgt_active) || - (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) { + (ri->fgt && s->fgt_active)) { /* * Emit code to perform further access permissions checks at * runtime; this may result in an exception. - * Note that on XScale all cp0..c13 registers do an access check - * call in order to handle c15_cpar. */ gen_set_condexec(s); gen_update_pc(s, 0); @@ -3192,24 +1971,6 @@ static void do_coproc_insn(DisasContext *s, int cpnum, int is64, } } -/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */ -static void disas_xscale_insn(DisasContext *s, uint32_t insn) -{ - int cpnum = (insn >> 8) & 0xf; - - if (extract32(s->c15_cpar, cpnum, 1) == 0) { - unallocated_encoding(s); - } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) { - if (disas_iwmmxt_insn(s, insn)) { - unallocated_encoding(s); - } - } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) { - if (disas_dsp_insn(s, insn)) { - unallocated_encoding(s); - } - } -} - /* Store a 64-bit value to a register pair. Clobbers val. */ static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val) { @@ -3569,14 +2330,7 @@ static bool valid_cp(DisasContext *s, int cp) * only cp14 and cp15 are valid, and other values aren't considered * to be in the coprocessor-instruction space at all. v8M still * permits coprocessors 0..7. - * For XScale, we must not decode the XScale cp0, cp1 space as - * a standard coprocessor insn, because we want to fall through to - * the legacy disas_xscale_insn() decoder after decodetree is done. */ - if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) { - return false; - } - if (arm_dc_feature(s, ARM_FEATURE_V8) && !arm_dc_feature(s, ARM_FEATURE_M)) { return cp >= 14; @@ -6844,11 +5598,10 @@ static bool trans_LE(DisasContext *s, arg_LE *a) if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) { /* Need to do a runtime check for LTPSIZE != 4 */ - DisasLabel skipexc = gen_disas_label(s); + TCGLabel *fail = delay_exception(s, EXCP_INVSTATE, syn_uncategorized()); + tmp = load_cpu_field(v7m.ltpsize); - tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label); - gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized()); - set_disas_label(s, skipexc); + tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 4, fail); } if (a->f) { @@ -7343,18 +6096,6 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) disas_neon_shared(s, insn)) { return; } - /* fall back to legacy decoder */ - - if ((insn & 0x0e000f00) == 0x0c000100) { - if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) { - /* iWMMXt register transfer. */ - if (extract32(s->c15_cpar, 1, 1)) { - if (!disas_iwmmxt_insn(s, insn)) { - return; - } - } - } - } goto illegal_op; } if (cond != 0xe) { @@ -7368,16 +6109,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) disas_vfp(s, insn)) { return; } - /* fall back to legacy decoder */ - /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */ - if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) { - if (((insn & 0x0c000e00) == 0x0c000000) - && ((insn & 0x03000000) != 0x03000000)) { - /* Coprocessor insn, coprocessor 0 or 1 */ - disas_xscale_insn(s, insn); - return; - } - } + /* We didn't match anything in the decoder: UNDEF */ illegal_op: unallocated_encoding(s); @@ -7606,12 +6338,8 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE); dc->ns = EX_TBFLAG_A32(tb_flags, NS); dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN); - if (arm_feature(env, ARM_FEATURE_XSCALE)) { - dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR); - } else { - dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN); - dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE); - } + dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN); + dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE); dc->sme_trap_nonstreaming = EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING); } @@ -7651,10 +6379,6 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; dc->base.max_insns = MIN(dc->base.max_insns, bound); } - - cpu_V0 = tcg_temp_new_i64(); - cpu_V1 = tcg_temp_new_i64(); - cpu_M0 = tcg_temp_new_i64(); } static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu) @@ -8107,6 +6831,8 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_goto_tb(dc, 1, curr_insn_len(dc)); } } + + emit_delayed_exceptions(dc); } static const TranslatorOps arm_translator_ops = { diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index 1bfdb0f..9a85ea7 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -21,9 +21,25 @@ typedef struct DisasLabel { target_ulong pc_save; } DisasLabel; +/* + * Emit an exception call out of line. + */ +typedef struct DisasDelayException { + struct DisasDelayException *next; + TCGLabel *lab; + target_long pc_curr; + target_long pc_save; + int condexec_mask; + int condexec_cond; + uint32_t excp; + uint32_t syn; + uint32_t target_el; +} DisasDelayException; + typedef struct DisasContext { DisasContextBase base; const ARMISARegisters *isar; + DisasDelayException *delay_excp_list; /* The address of the current instruction being translated. */ target_ulong pc_curr; @@ -70,8 +86,10 @@ typedef struct DisasContext { int fp_excp_el; /* FP exception EL or 0 if enabled */ int sve_excp_el; /* SVE exception EL or 0 if enabled */ int sme_excp_el; /* SME exception EL or 0 if enabled */ + int zt0_excp_el; /* ZT0 exception EL or 0 if enabled */ int vl; /* current vector length in bytes */ int svl; /* current streaming vector length in bytes */ + int max_svl; /* maximum implemented streaming vector length */ bool vfp_enabled; /* FP enabled via FPSCR.EN */ int vec_len; int vec_stride; @@ -148,6 +166,8 @@ typedef struct DisasContext { bool trap_eret; /* True if FEAT_LSE2 SCTLR_ELx.nAA is set */ bool naa; + /* True if HCR_EL2.E2H is set */ + bool e2h; /* True if FEAT_NV HCR_EL2.NV is enabled */ bool nv; /* True if NV enabled and HCR_EL2.NV1 is set */ @@ -162,6 +182,12 @@ typedef struct DisasContext { bool fpcr_ah; /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */ bool fpcr_nep; + /* True if GCSEnabled. */ + bool gcs_en; + /* True if GCSReturnValueCheckEnabled. */ + bool gcs_rvcen; + /* GCSSTR exception EL or 0 if enabled */ + uint8_t gcsstr_el; /* * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. * < 0, set by the current instruction. @@ -173,8 +199,6 @@ typedef struct DisasContext { uint8_t gm_blocksize; /* True if the current insn_start has been updated. */ bool insn_start_updated; - /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ - int c15_cpar; /* Offset from VNCR_EL2 when FEAT_NV2 redirects this reg to memory */ uint32_t nv2_redirect_offset; } DisasContext; @@ -208,6 +232,11 @@ static inline int plus_2(DisasContext *s, int x) return x + 2; } +static inline int plus_8(DisasContext *s, int x) +{ + return x + 8; +} + static inline int plus_12(DisasContext *s, int x) { return x + 12; @@ -347,10 +376,15 @@ void arm_jump_cc(DisasCompare *cmp, TCGLabel *label); void arm_gen_test_cc(int cc, TCGLabel *label); MemOp pow2_align(unsigned i); void unallocated_encoding(DisasContext *s); +void gen_exception_internal(int excp); void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp, uint32_t syn, uint32_t target_el); void gen_exception_insn(DisasContext *s, target_long pc_diff, int excp, uint32_t syn); +TCGLabel *delay_exception_el(DisasContext *s, int excp, + uint32_t syn, uint32_t target_el); +TCGLabel *delay_exception(DisasContext *s, int excp, uint32_t syn); +void emit_delayed_exceptions(DisasContext *s); /* Return state of Alternate Half-precision flag, caller frees result */ static inline TCGv_i32 get_ahp_flag(void) @@ -364,27 +398,27 @@ static inline TCGv_i32 get_ahp_flag(void) } /* Set bits within PSTATE. */ -static inline void set_pstate_bits(uint32_t bits) +static inline void set_pstate_bits(uint64_t bits) { - TCGv_i32 p = tcg_temp_new_i32(); + TCGv_i64 p = tcg_temp_new_i64(); tcg_debug_assert(!(bits & CACHED_PSTATE_BITS)); - tcg_gen_ld_i32(p, tcg_env, offsetof(CPUARMState, pstate)); - tcg_gen_ori_i32(p, p, bits); - tcg_gen_st_i32(p, tcg_env, offsetof(CPUARMState, pstate)); + tcg_gen_ld_i64(p, tcg_env, offsetof(CPUARMState, pstate)); + tcg_gen_ori_i64(p, p, bits); + tcg_gen_st_i64(p, tcg_env, offsetof(CPUARMState, pstate)); } /* Clear bits within PSTATE. */ -static inline void clear_pstate_bits(uint32_t bits) +static inline void clear_pstate_bits(uint64_t bits) { - TCGv_i32 p = tcg_temp_new_i32(); + TCGv_i64 p = tcg_temp_new_i64(); tcg_debug_assert(!(bits & CACHED_PSTATE_BITS)); - tcg_gen_ld_i32(p, tcg_env, offsetof(CPUARMState, pstate)); - tcg_gen_andi_i32(p, p, ~bits); - tcg_gen_st_i32(p, tcg_env, offsetof(CPUARMState, pstate)); + tcg_gen_ld_i64(p, tcg_env, offsetof(CPUARMState, pstate)); + tcg_gen_andi_i64(p, p, ~bits); + tcg_gen_st_i64(p, tcg_env, offsetof(CPUARMState, pstate)); } /* If the singlestep state is Active-not-pending, advance to Active-pending. */ @@ -635,6 +669,8 @@ typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t); typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t); +typedef void GVecGen3FnVar(unsigned, TCGv_ptr, uint32_t, TCGv_ptr, uint32_t, + TCGv_ptr, uint32_t, uint32_t, uint32_t); /* Function prototype for gen_ functions for calling Neon helpers */ typedef void NeonGenOneOpFn(TCGv_i32, TCGv_i32); diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 986eaf8..33a136b 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -825,11 +825,11 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ clear_tail(d, opr_sz, simd_maxsz(desc)); \ } -DO_DOT(gvec_sdot_b, int32_t, int8_t, int8_t) -DO_DOT(gvec_udot_b, uint32_t, uint8_t, uint8_t) -DO_DOT(gvec_usdot_b, uint32_t, uint8_t, int8_t) -DO_DOT(gvec_sdot_h, int64_t, int16_t, int16_t) -DO_DOT(gvec_udot_h, uint64_t, uint16_t, uint16_t) +DO_DOT(gvec_sdot_4b, int32_t, int8_t, int8_t) +DO_DOT(gvec_udot_4b, uint32_t, uint8_t, uint8_t) +DO_DOT(gvec_usdot_4b, uint32_t, uint8_t, int8_t) +DO_DOT(gvec_sdot_4h, int64_t, int16_t, int16_t) +DO_DOT(gvec_udot_4h, uint64_t, uint16_t, uint16_t) #define DO_DOT_IDX(NAME, TYPED, TYPEN, TYPEM, HD) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ @@ -865,12 +865,63 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ clear_tail(d, opr_sz, simd_maxsz(desc)); \ } -DO_DOT_IDX(gvec_sdot_idx_b, int32_t, int8_t, int8_t, H4) -DO_DOT_IDX(gvec_udot_idx_b, uint32_t, uint8_t, uint8_t, H4) -DO_DOT_IDX(gvec_sudot_idx_b, int32_t, int8_t, uint8_t, H4) -DO_DOT_IDX(gvec_usdot_idx_b, int32_t, uint8_t, int8_t, H4) -DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, H8) -DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, H8) +DO_DOT_IDX(gvec_sdot_idx_4b, int32_t, int8_t, int8_t, H4) +DO_DOT_IDX(gvec_udot_idx_4b, uint32_t, uint8_t, uint8_t, H4) +DO_DOT_IDX(gvec_sudot_idx_4b, int32_t, int8_t, uint8_t, H4) +DO_DOT_IDX(gvec_usdot_idx_4b, int32_t, uint8_t, int8_t, H4) +DO_DOT_IDX(gvec_sdot_idx_4h, int64_t, int16_t, int16_t, H8) +DO_DOT_IDX(gvec_udot_idx_4h, uint64_t, uint16_t, uint16_t, H8) + +#undef DO_DOT +#undef DO_DOT_IDX + +/* Similar for 2-way dot product */ +#define DO_DOT(NAME, TYPED, TYPEN, TYPEM) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPED *d = vd, *a = va; \ + TYPEN *n = vn; \ + TYPEM *m = vm; \ + for (i = 0; i < opr_sz / sizeof(TYPED); ++i) { \ + d[i] = (a[i] + \ + (TYPED)n[i * 2 + 0] * m[i * 2 + 0] + \ + (TYPED)n[i * 2 + 1] * m[i * 2 + 1]); \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + +#define DO_DOT_IDX(NAME, TYPED, TYPEN, TYPEM, HD) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ +{ \ + intptr_t i = 0, opr_sz = simd_oprsz(desc); \ + intptr_t opr_sz_n = opr_sz / sizeof(TYPED); \ + intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); \ + intptr_t index = simd_data(desc); \ + TYPED *d = vd, *a = va; \ + TYPEN *n = vn; \ + TYPEM *m_indexed = (TYPEM *)vm + HD(index) * 2; \ + do { \ + TYPED m0 = m_indexed[i * 2 + 0]; \ + TYPED m1 = m_indexed[i * 2 + 1]; \ + do { \ + d[i] = (a[i] + \ + n[i * 2 + 0] * m0 + \ + n[i * 2 + 1] * m1); \ + } while (++i < segend); \ + segend = i + (16 / sizeof(TYPED)); \ + } while (i < opr_sz_n); \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + +DO_DOT(gvec_sdot_2h, int32_t, int16_t, int16_t) +DO_DOT(gvec_udot_2h, uint32_t, uint16_t, uint16_t) + +DO_DOT_IDX(gvec_sdot_idx_2h, int32_t, int16_t, int16_t, H4) +DO_DOT_IDX(gvec_udot_idx_2h, uint32_t, uint16_t, uint16_t, H4) + +#undef DO_DOT +#undef DO_DOT_IDX void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, float_status *fpst, uint32_t desc) @@ -1416,14 +1467,19 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \ clear_tail(d, oprsz, simd_maxsz(desc)); \ } +DO_3OP(gvec_fadd_b16, bfloat16_add, float16) DO_3OP(gvec_fadd_h, float16_add, float16) DO_3OP(gvec_fadd_s, float32_add, float32) DO_3OP(gvec_fadd_d, float64_add, float64) +DO_3OP(gvec_bfadd, bfloat16_add, bfloat16) +DO_3OP(gvec_fsub_b16, bfloat16_sub, float16) DO_3OP(gvec_fsub_h, float16_sub, float16) DO_3OP(gvec_fsub_s, float32_sub, float32) DO_3OP(gvec_fsub_d, float64_sub, float64) +DO_3OP(gvec_bfsub, bfloat16_sub, bfloat16) +DO_3OP(gvec_fmul_b16, bfloat16_mul, float16) DO_3OP(gvec_fmul_h, float16_mul, float16) DO_3OP(gvec_fmul_s, float32_mul, float32) DO_3OP(gvec_fmul_d, float64_mul, float64) @@ -1515,6 +1571,13 @@ DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16) DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32) DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64) +DO_3OP(gvec_fmax_b16, bfloat16_max, bfloat16) +DO_3OP(gvec_fmin_b16, bfloat16_min, bfloat16) +DO_3OP(gvec_fmaxnum_b16, bfloat16_maxnum, bfloat16) +DO_3OP(gvec_fminnum_b16, bfloat16_minnum, bfloat16) +DO_3OP(gvec_ah_fmax_b16, helper_sme2_ah_fmax_b16, bfloat16) +DO_3OP(gvec_ah_fmin_b16, helper_sme2_ah_fmin_b16, bfloat16) + #endif #undef DO_3OP @@ -1550,6 +1613,12 @@ static float16 float16_muladd_f(float16 dest, float16 op1, float16 op2, return float16_muladd(op1, op2, dest, 0, stat); } +static bfloat16 bfloat16_muladd_f(bfloat16 dest, bfloat16 op1, bfloat16 op2, + float_status *stat) +{ + return bfloat16_muladd(op1, op2, dest, 0, stat); +} + static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2, float_status *stat) { @@ -1568,6 +1637,12 @@ static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2, return float16_muladd(float16_chs(op1), op2, dest, 0, stat); } +static bfloat16 bfloat16_mulsub_f(bfloat16 dest, bfloat16 op1, bfloat16 op2, + float_status *stat) +{ + return bfloat16_muladd(bfloat16_chs(op1), op2, dest, 0, stat); +} + static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2, float_status *stat) { @@ -1586,6 +1661,12 @@ static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2, return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat); } +static bfloat16 bfloat16_ah_mulsub_f(bfloat16 dest, bfloat16 op1, bfloat16 op2, + float_status *stat) +{ + return bfloat16_muladd(op1, op2, dest, float_muladd_negate_product, stat); +} + static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2, float_status *stat) { @@ -1610,23 +1691,28 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \ clear_tail(d, oprsz, simd_maxsz(desc)); \ } -DO_MULADD(gvec_fmla_h, float16_muladd_nf, float16) -DO_MULADD(gvec_fmla_s, float32_muladd_nf, float32) +DO_MULADD(gvec_fmla_nf_h, float16_muladd_nf, float16) +DO_MULADD(gvec_fmla_nf_s, float32_muladd_nf, float32) -DO_MULADD(gvec_fmls_h, float16_mulsub_nf, float16) -DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32) +DO_MULADD(gvec_fmls_nf_h, float16_mulsub_nf, float16) +DO_MULADD(gvec_fmls_nf_s, float32_mulsub_nf, float32) DO_MULADD(gvec_vfma_h, float16_muladd_f, float16) DO_MULADD(gvec_vfma_s, float32_muladd_f, float32) DO_MULADD(gvec_vfma_d, float64_muladd_f, float64) +DO_MULADD(gvec_bfmla, bfloat16_muladd_f, bfloat16) DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) +DO_MULADD(gvec_bfmls, bfloat16_mulsub_f, bfloat16) DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16) DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32) DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64) +DO_MULADD(gvec_ah_bfmls, bfloat16_ah_mulsub_f, bfloat16) + +#undef DO_MULADD /* For the indexed ops, SVE applies the index per 128-bit vector segment. * For AdvSIMD, there is of course only one such vector segment. @@ -1699,6 +1785,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \ #define nop(N, M, S) (M) +DO_FMUL_IDX(gvec_fmul_idx_b16, nop, bfloat16_mul, float16, H2) DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16_mul, float16, H2) DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32_mul, float32, H4) DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64_mul, float64, H8) @@ -1745,14 +1832,17 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0) DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0) DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0) +DO_FMLA_IDX(gvec_bfmla_idx, bfloat16, H2, 0, 0) DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0) DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0) DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0) +DO_FMLA_IDX(gvec_bfmls_idx, bfloat16, H2, INT16_MIN, 0) DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product) DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product) DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product) +DO_FMLA_IDX(gvec_ah_bfmls_idx, bfloat16, H2, 0, float_muladd_negate_product) #undef DO_FMLA_IDX @@ -2184,7 +2274,8 @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va, intptr_t i, oprsz = simd_oprsz(desc); bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); - float_status *status = &env->vfp.fp_status[FPST_A64]; + bool za = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + float_status *status = &env->vfp.fp_status[za ? FPST_ZA : FPST_A64]; bool fz16 = env->vfp.fpcr & FPCR_FZ16; int negx = 0, negf = 0; @@ -2267,8 +2358,9 @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va, intptr_t i, j, oprsz = simd_oprsz(desc); bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1); intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16); - intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16); - float_status *status = &env->vfp.fp_status[FPST_A64]; + bool za = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 3, 3) * sizeof(float16); + float_status *status = &env->vfp.fp_status[za ? FPST_ZA : FPST_A64]; bool fz16 = env->vfp.fpcr & FPCR_FZ16; int negx = 0, negf = 0; @@ -2989,31 +3081,62 @@ float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst) float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst, float_status *fpst_odd) { - /* - * Compare f16_dotadd() in sme_helper.c, but here we have - * bfloat16 inputs. In particular that means that we do not - * want the FPCR.FZ16 flush semantics, so we use the normal - * float_status for the input handling here. - */ - float64 e1r = float32_to_float64(e1 << 16, fpst); - float64 e1c = float32_to_float64(e1 & 0xffff0000u, fpst); - float64 e2r = float32_to_float64(e2 << 16, fpst); - float64 e2c = float32_to_float64(e2 & 0xffff0000u, fpst); - float64 t64; + float32 s1r = e1 << 16; + float32 s1c = e1 & 0xffff0000u; + float32 s2r = e2 << 16; + float32 s2c = e2 & 0xffff0000u; float32 t32; - /* - * The ARM pseudocode function FPDot performs both multiplies - * and the add with a single rounding operation. Emulate this - * by performing the first multiply in round-to-odd, then doing - * the second multiply as fused multiply-add, and rounding to - * float32 all in one step. - */ - t64 = float64_mul(e1r, e2r, fpst_odd); - t64 = float64r32_muladd(e1c, e2c, t64, 0, fpst); + /* C.f. FPProcessNaNs4 */ + if (float32_is_any_nan(s1r) || float32_is_any_nan(s1c) || + float32_is_any_nan(s2r) || float32_is_any_nan(s2c)) { + if (float32_is_signaling_nan(s1r, fpst)) { + t32 = s1r; + } else if (float32_is_signaling_nan(s1c, fpst)) { + t32 = s1c; + } else if (float32_is_signaling_nan(s2r, fpst)) { + t32 = s2r; + } else if (float32_is_signaling_nan(s2c, fpst)) { + t32 = s2c; + } else if (float32_is_any_nan(s1r)) { + t32 = s1r; + } else if (float32_is_any_nan(s1c)) { + t32 = s1c; + } else if (float32_is_any_nan(s2r)) { + t32 = s2r; + } else { + t32 = s2c; + } + /* + * FPConvertNaN(FPProcessNaN(t32)) will be done as part + * of the final addition below. + */ + } else { + /* + * Compare f16_dotadd() in sme_helper.c, but here we have + * bfloat16 inputs. In particular that means that we do not + * want the FPCR.FZ16 flush semantics, so we use the normal + * float_status for the input handling here. + */ + float64 e1r = float32_to_float64(s1r, fpst); + float64 e1c = float32_to_float64(s1c, fpst); + float64 e2r = float32_to_float64(s2r, fpst); + float64 e2c = float32_to_float64(s2c, fpst); + float64 t64; + + /* + * The ARM pseudocode function FPDot performs both multiplies + * and the add with a single rounding operation. Emulate this + * by performing the first multiply in round-to-odd, then doing + * the second multiply as fused multiply-add, and rounding to + * float32 all in one step. + */ + t64 = float64_mul(e1r, e2r, fpst_odd); + t64 = float64r32_muladd(e1c, e2c, t64, 0, fpst); - /* This conversion is exact, because we've already rounded. */ - t32 = float64_to_float32(t64, fpst); + /* This conversion is exact, because we've already rounded. */ + t32 = float64_to_float32(t64, fpst); + } /* The final accumulation step is not fused. */ return float32_add(sum, t32, fpst); @@ -3070,6 +3193,45 @@ void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(sme2_bfvdot_idx)(void *vd, void *vn, void *vm, + void *va, CPUARMState *env, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + intptr_t idx = extract32(desc, SIMD_DATA_SHIFT, 2); + intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 2, 1); + intptr_t elements = opr_sz / 4; + intptr_t eltspersegment = MIN(16 / 4, elements); + float32 *d = vd, *a = va; + uint16_t *n0 = vn; + uint16_t *n1 = vn + sizeof(ARMVectorReg); + uint32_t *m = vm; + float_status fpst, fpst_odd; + + if (is_ebf(env, &fpst, &fpst_odd)) { + for (i = 0; i < elements; i += eltspersegment) { + uint32_t m_idx = m[i + H4(idx)]; + + for (j = 0; j < eltspersegment; j++) { + uint32_t nn = (n0[H2(2 * (i + j) + sel)]) + | (n1[H2(2 * (i + j) + sel)] << 16); + d[i + H4(j)] = bfdotadd_ebf(a[i + H4(j)], nn, m_idx, + &fpst, &fpst_odd); + } + } + } else { + for (i = 0; i < elements; i += eltspersegment) { + uint32_t m_idx = m[i + H4(idx)]; + + for (j = 0; j < eltspersegment; j++) { + uint32_t nn = (n0[H2(2 * (i + j) + sel)]) + | (n1[H2(2 * (i + j) + sel)] << 16); + d[i + H4(j)] = bfdotadd(a[i + H4(j)], nn, m_idx, &fpst); + } + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, CPUARMState *env, uint32_t desc) { @@ -3146,44 +3308,76 @@ void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, clear_tail(d, opr_sz, simd_maxsz(desc)); } -void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va, - float_status *stat, uint32_t desc) +static void do_bfmlal(float32 *d, bfloat16 *n, bfloat16 *m, float32 *a, + float_status *stat, uint32_t desc, int negx, int negf) { intptr_t i, opr_sz = simd_oprsz(desc); - intptr_t sel = simd_data(desc); - float32 *d = vd, *a = va; - bfloat16 *n = vn, *m = vm; + intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1); for (i = 0; i < opr_sz / 4; ++i) { - float32 nn = n[H2(i * 2 + sel)] << 16; + float32 nn = (negx ^ n[H2(i * 2 + sel)]) << 16; float32 mm = m[H2(i * 2 + sel)] << 16; - d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], 0, stat); + d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], negf, stat); } clear_tail(d, opr_sz, simd_maxsz(desc)); } -void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm, - void *va, float_status *stat, uint32_t desc) +void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal(vd, vn, vm, va, stat, desc, 0, 0); +} + +void HELPER(gvec_bfmlsl)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal(vd, vn, vm, va, stat, desc, 0x8000, 0); +} + +void HELPER(gvec_ah_bfmlsl)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal(vd, vn, vm, va, stat, desc, 0, float_muladd_negate_product); +} + +static void do_bfmlal_idx(float32 *d, bfloat16 *n, bfloat16 *m, float32 *a, + float_status *stat, uint32_t desc, int negx, int negf) { intptr_t i, j, opr_sz = simd_oprsz(desc); intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1); intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 1, 3); intptr_t elements = opr_sz / 4; intptr_t eltspersegment = MIN(16 / 4, elements); - float32 *d = vd, *a = va; - bfloat16 *n = vn, *m = vm; for (i = 0; i < elements; i += eltspersegment) { float32 m_idx = m[H2(2 * i + index)] << 16; for (j = i; j < i + eltspersegment; j++) { - float32 n_j = n[H2(2 * j + sel)] << 16; - d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], 0, stat); + float32 n_j = (negx ^ n[H2(2 * j + sel)]) << 16; + d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], negf, stat); } } clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal_idx(vd, vn, vm, va, stat, desc, 0, 0); +} + +void HELPER(gvec_bfmlsl_idx)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal_idx(vd, vn, vm, va, stat, desc, 0x8000, 0); +} + +void HELPER(gvec_ah_bfmlsl_idx)(void *vd, void *vn, void *vm, void *va, + float_status *stat, uint32_t desc) +{ + do_bfmlal_idx(vd, vn, vm, va, stat, desc, 0, float_muladd_negate_product); +} + #define DO_CLAMP(NAME, TYPE) \ void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \ { \ @@ -3253,3 +3447,90 @@ void HELPER(gvec_ursqrte_s)(void *vd, void *vn, uint32_t desc) } clear_tail(d, opr_sz, simd_maxsz(desc)); } + +static inline void do_lut_b(void *zd, uint64_t *indexes, uint64_t *table, + unsigned elements, unsigned segbase, + unsigned dstride, unsigned isize, + unsigned tsize, unsigned nreg) +{ + for (unsigned r = 0; r < nreg; ++r) { + uint8_t *dst = zd + dstride * r; + unsigned base = segbase + r * elements; + + for (unsigned e = 0; e < elements; ++e) { + unsigned index = extractn(indexes, (base + e) * isize, isize); + dst[H1(e)] = extractn(table, index * tsize, 8); + } + } +} + +static inline void do_lut_h(void *zd, uint64_t *indexes, uint64_t *table, + unsigned elements, unsigned segbase, + unsigned dstride, unsigned isize, + unsigned tsize, unsigned nreg) +{ + for (unsigned r = 0; r < nreg; ++r) { + uint16_t *dst = zd + dstride * r; + unsigned base = segbase + r * elements; + + for (unsigned e = 0; e < elements; ++e) { + unsigned index = extractn(indexes, (base + e) * isize, isize); + dst[H2(e)] = extractn(table, index * tsize, 16); + } + } +} + +static inline void do_lut_s(void *zd, uint64_t *indexes, uint32_t *table, + unsigned elements, unsigned segbase, + unsigned dstride, unsigned isize, + unsigned tsize, unsigned nreg) +{ + for (unsigned r = 0; r < nreg; ++r) { + uint32_t *dst = zd + dstride * r; + unsigned base = segbase + r * elements; + + for (unsigned e = 0; e < elements; ++e) { + unsigned index = extractn(indexes, (base + e) * isize, isize); + dst[H4(e)] = table[H4(index)]; + } + } +} + +#define DO_SME2_LUT(ISIZE, NREG, SUFF, ESIZE) \ +void helper_sme2_luti##ISIZE##_##NREG##SUFF \ + (void *zd, void *zn, CPUARMState *env, uint32_t desc) \ +{ \ + unsigned vl = simd_oprsz(desc); \ + unsigned strided = extract32(desc, SIMD_DATA_SHIFT, 1); \ + unsigned idx = extract32(desc, SIMD_DATA_SHIFT + 1, 4); \ + unsigned elements = vl / ESIZE; \ + unsigned dstride = (!strided ? 1 : NREG == 4 ? 4 : 8); \ + unsigned segments = (ESIZE * 8) / (ISIZE * NREG); \ + unsigned segment = idx & (segments - 1); \ + ARMVectorReg indexes; \ + memcpy(&indexes, zn, vl); \ + do_lut_##SUFF(zd, indexes.d, (void *)env->za_state.zt0, elements, \ + segment * NREG * elements, \ + dstride * sizeof(ARMVectorReg), ISIZE, 32, NREG); \ +} + +DO_SME2_LUT(2,1,b, 1) +DO_SME2_LUT(2,1,h, 2) +DO_SME2_LUT(2,1,s, 4) +DO_SME2_LUT(2,2,b, 1) +DO_SME2_LUT(2,2,h, 2) +DO_SME2_LUT(2,2,s, 4) +DO_SME2_LUT(2,4,b, 1) +DO_SME2_LUT(2,4,h, 2) +DO_SME2_LUT(2,4,s, 4) + +DO_SME2_LUT(4,1,b, 1) +DO_SME2_LUT(4,1,h, 2) +DO_SME2_LUT(4,1,s, 4) +DO_SME2_LUT(4,2,b, 1) +DO_SME2_LUT(4,2,h, 2) +DO_SME2_LUT(4,2,s, 4) +DO_SME2_LUT(4,4,h, 2) +DO_SME2_LUT(4,4,s, 4) + +#undef DO_SME2_LUT diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h index c02f9c3..cf41b03 100644 --- a/target/arm/tcg/vec_internal.h +++ b/target/arm/tcg/vec_internal.h @@ -223,6 +223,34 @@ int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *); int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *); int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool); +#define do_ssat_b(val) MIN(MAX(val, INT8_MIN), INT8_MAX) +#define do_ssat_h(val) MIN(MAX(val, INT16_MIN), INT16_MAX) +#define do_ssat_s(val) MIN(MAX(val, INT32_MIN), INT32_MAX) +#define do_usat_b(val) MIN(MAX(val, 0), UINT8_MAX) +#define do_usat_h(val) MIN(MAX(val, 0), UINT16_MAX) +#define do_usat_s(val) MIN(MAX(val, 0), UINT32_MAX) + +static inline uint64_t do_urshr(uint64_t x, unsigned sh) +{ + if (likely(sh < 64)) { + return (x >> sh) + ((x >> (sh - 1)) & 1); + } else if (sh == 64) { + return x >> 63; + } else { + return 0; + } +} + +static inline int64_t do_srshr(int64_t x, unsigned sh) +{ + if (likely(sh < 64)) { + return (x >> sh) + ((x >> (sh - 1)) & 1); + } else { + /* Rounding the sign bit always produces 0. */ + return 0; + } +} + /** * bfdotadd: * @sum: addend @@ -272,6 +300,11 @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); /* * Negate as for FPCR.AH=1 -- do not negate NaNs. */ +static inline float16 bfloat16_ah_chs(float16 a) +{ + return bfloat16_is_any_nan(a) ? a : bfloat16_chs(a); +} + static inline float16 float16_ah_chs(float16 a) { return float16_is_any_nan(a) ? a : float16_chs(a); @@ -302,4 +335,119 @@ static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah) return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a); } +/* Not actually called directly as a helper, but uses similar machinery. */ +bfloat16 helper_sme2_ah_fmax_b16(bfloat16 a, bfloat16 b, float_status *fpst); +bfloat16 helper_sme2_ah_fmin_b16(bfloat16 a, bfloat16 b, float_status *fpst); + +float32 sve_f16_to_f32(float16 f, float_status *fpst); +float16 sve_f32_to_f16(float32 f, float_status *fpst); + +/* + * Decode helper functions for predicate as counter. + */ + +typedef struct { + unsigned count; + unsigned lg2_stride; + bool invert; +} DecodeCounter; + +static inline DecodeCounter +decode_counter(unsigned png, unsigned vl, unsigned v_esz) +{ + DecodeCounter ret = { }; + + /* C.f. Arm pseudocode CounterToPredicate. */ + if (likely(png & 0xf)) { + unsigned p_esz = ctz32(png); + + /* + * maxbit = log2(pl(bits) * 4) + * = log2(vl(bytes) * 4) + * = log2(vl) + 2 + * maxbit_mask = ones<maxbit:0> + * = (1 << (maxbit + 1)) - 1 + * = (1 << (log2(vl) + 2 + 1)) - 1 + * = (1 << (log2(vl) + 3)) - 1 + * = (pow2ceil(vl) << 3) - 1 + */ + ret.count = png & (((unsigned)pow2ceil(vl) << 3) - 1); + ret.count >>= p_esz + 1; + + ret.invert = (png >> 15) & 1; + + /* + * The Arm pseudocode for CounterToPredicate expands the count to + * a set of bits, and then the operation proceeds as for the original + * interpretation of predicates as a set of bits. + * + * We can avoid the expansion by adjusting the count and supplying + * an element stride. + */ + if (unlikely(p_esz != v_esz)) { + if (p_esz < v_esz) { + /* + * For predicate esz < vector esz, the expanded predicate + * will have more bits set than will be consumed. + * Adjust the count down, rounding up. + * Consider p_esz = MO_8, v_esz = MO_64, count 14: + * The expanded predicate would be + * 0011 1111 1111 1111 + * The significant bits are + * ...1 ...1 ...1 ...1 + */ + unsigned shift = v_esz - p_esz; + unsigned trunc = ret.count >> shift; + ret.count = trunc + (ret.count != (trunc << shift)); + } else { + /* + * For predicate esz > vector esz, the expanded predicate + * will have bits set only at power-of-two multiples of + * the vector esz. Bits at other multiples will all be + * false. Adjust the count up, and supply the caller + * with a stride of elements to skip. + */ + unsigned shift = p_esz - v_esz; + ret.count <<= shift; + ret.lg2_stride = shift; + } + } + } + return ret; +} + +/* Extract @len bits from an array of uint64_t at offset @pos bits. */ +static inline uint64_t extractn(uint64_t *p, unsigned pos, unsigned len) +{ + uint64_t x; + + p += pos / 64; + pos = pos % 64; + + x = p[0]; + if (pos + len > 64) { + x = (x >> pos) | (p[1] << (-pos & 63)); + pos = 0; + } + return extract64(x, pos, len); +} + +/* Deposit @len bits into an array of uint64_t at offset @pos bits. */ +static inline void depositn(uint64_t *p, unsigned pos, + unsigned len, uint64_t val) +{ + p += pos / 64; + pos = pos % 64; + + if (pos + len <= 64) { + p[0] = deposit64(p[0], pos, len, val); + } else { + unsigned len0 = 64 - pos; + unsigned len1 = len - len0; + + p[0] = deposit64(p[0], pos, len0, val); + p[1] = deposit64(p[1], 0, len1, val >> len0); + } +} + #endif /* TARGET_ARM_VEC_INTERNAL_H */ diff --git a/target/arm/tcg/vfp_helper.c b/target/arm/tcg/vfp_helper.c index b1324c5..e156e37 100644 --- a/target/arm/tcg/vfp_helper.c +++ b/target/arm/tcg/vfp_helper.c @@ -123,7 +123,7 @@ uint32_t vfp_get_fpsr_from_host(CPUARMState *env) a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16]) & ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used)); /* - * We do not merge in flags from FPST_AH or FPST_AH_F16, because + * We do not merge in flags from FPST_{AH,ZA} or FPST_{AH,ZA}_F16, because * they are used for insns that must not set the cumulative exception bits. */ @@ -196,6 +196,8 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]); set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]); set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]); + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_ZA]); + set_float_rounding_mode(i, &env->vfp.fp_status[FPST_ZA_F16]); } if (changed & FPCR_FZ16) { bool ftz_enabled = val & FPCR_FZ16; @@ -203,15 +205,18 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_ZA_F16]); set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]); set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]); set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]); set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]); + set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_ZA_F16]); } if (changed & FPCR_FZ) { bool ftz_enabled = val & FPCR_FZ; set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]); + set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_ZA]); /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]); } @@ -223,6 +228,7 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) bool fitz_enabled = (val & FPCR_FIZ) || (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ; set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]); + set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_ZA]); } if (changed & FPCR_DN) { bool dnan_enabled = val & FPCR_DN; @@ -240,9 +246,13 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask) /* Change behaviours for A64 FP operations */ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]); arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_ZA]); + arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_ZA_F16]); } else { arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]); arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA]); + arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA_F16]); } } /* diff --git a/target/arm/trace-events b/target/arm/trace-events index 4438dce..72a2c7d 100644 --- a/target/arm/trace-events +++ b/target/arm/trace-events @@ -13,3 +13,13 @@ arm_gt_update_irq(int timer, int irqstate) "gt_update_irq: timer %d irqstate %d" # kvm.c kvm_arm_fixup_msi_route(uint64_t iova, uint64_t gpa) "MSI iova = 0x%"PRIx64" is translated into 0x%"PRIx64 + +# cpu.c +arm_cpu_reset(uint64_t mp_aff) "cpu %" PRIu64 +arm_emulate_firmware_reset(uint64_t mp_aff, unsigned target_el) "cpu %" PRIu64 " @EL%u" + +# arm-powerctl.c +arm_powerctl_set_cpu_on(uint64_t mp_aff, unsigned target_el, const char *mode, uint64_t entry, uint64_t context_id) "cpu %" PRIu64 " (EL %u, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64 +arm_powerctl_set_cpu_on_and_reset(uint64_t mp_aff) "cpu %" PRIu64 +arm_powerctl_set_cpu_off(uint64_t mp_aff) "cpu %" PRIu64 +arm_powerctl_reset_cpu(uint64_t mp_aff) "cpu %" PRIu64 diff --git a/target/avr/cpu.c b/target/avr/cpu.c index 2502415..a6df71d 100644 --- a/target/avr/cpu.c +++ b/target/avr/cpu.c @@ -45,7 +45,7 @@ static vaddr avr_cpu_get_pc(CPUState *cs) static bool avr_cpu_has_work(CPUState *cs) { - return (cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_RESET)) + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_RESET) && cpu_interrupts_enabled(cpu_env(cs)); } @@ -250,6 +250,12 @@ static const TCGCPUOps avr_tcg_ops = { .cpu_exec_reset = cpu_reset, .tlb_fill = avr_cpu_tlb_fill, .do_interrupt = avr_cpu_do_interrupt, + /* + * TODO: code and data wrapping are different, but for the most part + * AVR only references bytes or aligned code fetches. But we use + * non-aligned MO_16 accesses for stack push/pop. + */ + .pointer_wrap = cpu_pointer_wrap_uint32, }; static void avr_cpu_class_init(ObjectClass *oc, const void *data) diff --git a/target/avr/helper.c b/target/avr/helper.c index b9cd6d5..4b29ab3 100644 --- a/target/avr/helper.c +++ b/target/avr/helper.c @@ -47,7 +47,7 @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request) cs->exception_index = EXCP_RESET; avr_cpu_do_interrupt(cs); - cs->interrupt_request &= ~CPU_INTERRUPT_RESET; + cpu_reset_interrupt(cs, CPU_INTERRUPT_RESET); return true; } } @@ -59,7 +59,7 @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request) env->intsrc &= env->intsrc - 1; /* clear the interrupt */ if (!env->intsrc) { - cs->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } return true; } diff --git a/target/avr/translate.c b/target/avr/translate.c index 804b0b2..ef6f655 100644 --- a/target/avr/translate.c +++ b/target/avr/translate.c @@ -981,14 +981,15 @@ static void gen_pop_ret(DisasContext *ctx, TCGv ret) } } -static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) +static void gen_goto_tb(DisasContext *ctx, unsigned tb_slot_idx, + target_ulong dest) { const TranslationBlock *tb = ctx->base.tb; if (translator_use_goto_tb(&ctx->base, dest)) { - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); tcg_gen_movi_i32(cpu_pc, dest); - tcg_gen_exit_tb(tb, n); + tcg_gen_exit_tb(tb, tb_slot_idx); } else { tcg_gen_movi_i32(cpu_pc, dest); tcg_gen_lookup_and_goto_ptr(); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 02fd40c..50766ea 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -133,15 +133,15 @@ static bool use_goto_tb(DisasContext *ctx, target_ulong dest) return translator_use_goto_tb(&ctx->base, dest); } -static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool - move_to_pc) +static void gen_goto_tb(DisasContext *ctx, unsigned tb_slot_idx, + target_ulong dest, bool move_to_pc) { if (use_goto_tb(ctx, dest)) { - tcg_gen_goto_tb(idx); + tcg_gen_goto_tb(tb_slot_idx); if (move_to_pc) { tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); } - tcg_gen_exit_tb(ctx->base.tb, idx); + tcg_gen_exit_tb(ctx->base.tb, tb_slot_idx); } else { if (move_to_pc) { tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c index 6465181..0ca79ee 100644 --- a/target/hppa/cpu.c +++ b/target/hppa/cpu.c @@ -135,7 +135,7 @@ static void hppa_restore_state_to_opc(CPUState *cs, #ifndef CONFIG_USER_ONLY static bool hppa_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI); + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI); } #endif /* !CONFIG_USER_ONLY */ @@ -269,6 +269,7 @@ static const TCGCPUOps hppa_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill_align = hppa_cpu_tlb_fill_align, + .pointer_wrap = cpu_pointer_wrap_notreached, .cpu_exec_interrupt = hppa_cpu_exec_interrupt, .cpu_exec_halt = hppa_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h index 11d59d1..c652ef9 100644 --- a/target/hppa/cpu.h +++ b/target/hppa/cpu.h @@ -29,21 +29,21 @@ #include "qemu/interval-tree.h" #include "hw/registerfields.h" -#define MMU_ABS_W_IDX 6 -#define MMU_ABS_IDX 7 -#define MMU_KERNEL_IDX 8 -#define MMU_KERNEL_P_IDX 9 -#define MMU_PL1_IDX 10 -#define MMU_PL1_P_IDX 11 -#define MMU_PL2_IDX 12 -#define MMU_PL2_P_IDX 13 -#define MMU_USER_IDX 14 -#define MMU_USER_P_IDX 15 - -#define MMU_IDX_MMU_DISABLED(MIDX) ((MIDX) < MMU_KERNEL_IDX) -#define MMU_IDX_TO_PRIV(MIDX) (((MIDX) - MMU_KERNEL_IDX) / 2) -#define MMU_IDX_TO_P(MIDX) (((MIDX) - MMU_KERNEL_IDX) & 1) -#define PRIV_P_TO_MMU_IDX(PRIV, P) ((PRIV) * 2 + !!(P) + MMU_KERNEL_IDX) +#define MMU_KERNEL_IDX 0 +#define MMU_KERNEL_P_IDX 1 +#define MMU_PL1_IDX 2 +#define MMU_PL1_P_IDX 3 +#define MMU_PL2_IDX 4 +#define MMU_PL2_P_IDX 5 +#define MMU_USER_IDX 6 +#define MMU_USER_P_IDX 7 +#define MMU_ABS_IDX 8 +#define MMU_ABS_W_IDX 9 + +#define MMU_IDX_MMU_DISABLED(MIDX) ((MIDX) >= MMU_ABS_IDX) +#define MMU_IDX_TO_PRIV(MIDX) ((MIDX) / 2) +#define MMU_IDX_TO_P(MIDX) ((MIDX) & 1) +#define PRIV_P_TO_MMU_IDX(PRIV, P) ((PRIV) * 2 + !!(P)) #define PRIV_KERNEL 0 #define PRIV_USER 3 @@ -187,7 +187,7 @@ typedef struct HPPATLBEntry { struct HPPATLBEntry *unused_next; }; - target_ulong pa; + hwaddr pa; unsigned entry_valid : 1; @@ -320,8 +320,8 @@ void hppa_translate_code(CPUState *cs, TranslationBlock *tb, #define CPU_RESOLVING_TYPE TYPE_HPPA_CPU -static inline target_ulong hppa_form_gva_mask(uint64_t gva_offset_mask, - uint64_t spc, target_ulong off) +static inline vaddr hppa_form_gva_mask(uint64_t gva_offset_mask, + uint64_t spc, target_ulong off) { #ifdef CONFIG_USER_ONLY return off & gva_offset_mask; @@ -330,8 +330,8 @@ static inline target_ulong hppa_form_gva_mask(uint64_t gva_offset_mask, #endif } -static inline target_ulong hppa_form_gva(CPUHPPAState *env, uint64_t spc, - target_ulong off) +static inline vaddr hppa_form_gva(CPUHPPAState *env, uint64_t spc, + target_ulong off) { return hppa_form_gva_mask(env->gva_offset_mask, spc, off); } diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c index ddd0a34..4535320 100644 --- a/target/hppa/fpu_helper.c +++ b/target/hppa/fpu_helper.c @@ -94,7 +94,8 @@ static void update_fr0_op(CPUHPPAState *env, uintptr_t ra) { uint32_t soft_exp = get_float_exception_flags(&env->fp_status); uint32_t hard_exp = 0; - uint32_t shadow = env->fr0_shadow; + uint32_t shadow = env->fr0_shadow & 0x3ffffff; + uint32_t fr1 = 0; if (likely(soft_exp == 0)) { env->fr[0] = (uint64_t)shadow << 32; @@ -107,9 +108,22 @@ static void update_fr0_op(CPUHPPAState *env, uintptr_t ra) hard_exp |= CONVERT_BIT(soft_exp, float_flag_overflow, R_FPSR_ENA_O_MASK); hard_exp |= CONVERT_BIT(soft_exp, float_flag_divbyzero, R_FPSR_ENA_Z_MASK); hard_exp |= CONVERT_BIT(soft_exp, float_flag_invalid, R_FPSR_ENA_V_MASK); - shadow |= hard_exp << (R_FPSR_FLAGS_SHIFT - R_FPSR_ENABLES_SHIFT); + if (hard_exp & shadow) { + shadow = FIELD_DP32(shadow, FPSR, T, 1); + /* fill exception register #1, which is lower 32-bits of fr[0] */ +#if !defined(CONFIG_USER_ONLY) + if (hard_exp & (R_FPSR_ENA_O_MASK | R_FPSR_ENA_U_MASK)) { + /* over- and underflow both set overflow flag only */ + fr1 = FIELD_DP32(fr1, FPSR, C, 1); + fr1 = FIELD_DP32(fr1, FPSR, FLG_O, 1); + } else +#endif + { + fr1 |= hard_exp << (R_FPSR_FLAGS_SHIFT - R_FPSR_ENABLES_SHIFT); + } + } env->fr0_shadow = shadow; - env->fr[0] = (uint64_t)shadow << 32; + env->fr[0] = (uint64_t)shadow << 32 | fr1; if (hard_exp & shadow) { hppa_dynamic_excp(env, EXCP_ASSIST, ra); diff --git a/target/hppa/helper.c b/target/hppa/helper.c index d7f8495..edcd2bf 100644 --- a/target/hppa/helper.c +++ b/target/hppa/helper.c @@ -148,8 +148,8 @@ void hppa_cpu_dump_state(CPUState *cs, FILE *f, int flags) m = UINT32_MAX; } - qemu_fprintf(f, "IA_F %08" PRIx64 ":%0*" PRIx64 " (" TARGET_FMT_lx ")\n" - "IA_B %08" PRIx64 ":%0*" PRIx64 " (" TARGET_FMT_lx ")\n", + qemu_fprintf(f, "IA_F %08" PRIx64 ":%0*" PRIx64 " (0x%" VADDR_PRIx ")\n" + "IA_B %08" PRIx64 ":%0*" PRIx64 " (0x%" VADDR_PRIx ")\n", env->iasq_f >> 32, w, m & env->iaoq_f, hppa_form_gva_mask(env->gva_offset_mask, env->iasq_f, env->iaoq_f), diff --git a/target/hppa/insns.decode b/target/hppa/insns.decode index 4eaac75..13c6a55 100644 --- a/target/hppa/insns.decode +++ b/target/hppa/insns.decode @@ -365,10 +365,10 @@ fstd 011100 ..... ..... .. ............1. @ldstim11 &mpyadd rm1 rm2 ta ra tm @mpyadd ...... rm1:5 rm2:5 ta:5 ra:5 . tm:5 &mpyadd -fmpyadd_f 000110 ..... ..... ..... ..... 0 ..... @mpyadd -fmpyadd_d 000110 ..... ..... ..... ..... 1 ..... @mpyadd -fmpysub_f 100110 ..... ..... ..... ..... 0 ..... @mpyadd -fmpysub_d 100110 ..... ..... ..... ..... 1 ..... @mpyadd +fmpyadd_f 000110 ..... ..... ..... ..... 1 ..... @mpyadd +fmpyadd_d 000110 ..... ..... ..... ..... 0 ..... @mpyadd +fmpysub_f 100110 ..... ..... ..... ..... 1 ..... @mpyadd +fmpysub_d 100110 ..... ..... ..... ..... 0 ..... @mpyadd #### # Conditional Branches diff --git a/target/hppa/int_helper.c b/target/hppa/int_helper.c index 7d48643..191ae19 100644 --- a/target/hppa/int_helper.c +++ b/target/hppa/int_helper.c @@ -177,6 +177,10 @@ void hppa_cpu_do_interrupt(CPUState *cs) } } env->cr[CR_IIR] = ldl_phys(cs->as, paddr); + if (i == EXCP_ASSIST) { + /* stuff insn code into bits of FP exception register #1 */ + env->fr[0] |= (env->cr[CR_IIR] & 0x03ffffff); + } } break; diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c index 9bdd0a6..cce82e6 100644 --- a/target/hppa/mem_helper.c +++ b/target/hppa/mem_helper.c @@ -803,7 +803,7 @@ void HELPER(diag_btlb)(CPUHPPAState *env) uint64_t HELPER(b_gate_priv)(CPUHPPAState *env, uint64_t iaoq_f) { - uint64_t gva = hppa_form_gva(env, env->iasq_f, iaoq_f); + vaddr gva = hppa_form_gva(env, env->iasq_f, iaoq_f); HPPATLBEntry *ent = hppa_find_tlb(env, gva); if (ent == NULL) { diff --git a/target/hppa/trace-events b/target/hppa/trace-events index a10ba73..01761a4 100644 --- a/target/hppa/trace-events +++ b/target/hppa/trace-events @@ -1,13 +1,13 @@ # See docs/devel/tracing.rst for syntax documentation. # mem_helper.c -disable hppa_tlb_flush_ent(void *env, void *ent, uint64_t va_b, uint64_t va_e, uint64_t pa) "env=%p ent=%p va_b=0x%lx va_e=0x%lx pa=0x%lx" -disable hppa_tlb_find_entry(void *env, void *ent, int valid, uint64_t va_b, uint64_t va_e, uint64_t pa) "env=%p ent=%p valid=%d va_b=0x%lx va_e=0x%lx pa=0x%lx" +disable hppa_tlb_flush_ent(void *env, void *ent, uint64_t va_b, uint64_t va_e, uint64_t pa) "env=%p ent=%p va_b=0x%lx va_e=0x%lx pa=0x%" PRIx64 +disable hppa_tlb_find_entry(void *env, void *ent, int valid, uint64_t va_b, uint64_t va_e, uint64_t pa) "env=%p ent=%p valid=%d va_b=0x%lx va_e=0x%lx pa=0x%" PRIx64 disable hppa_tlb_find_entry_not_found(void *env, uint64_t addr) "env=%p addr=%08lx" disable hppa_tlb_get_physical_address(void *env, int ret, int prot, uint64_t addr, uint64_t phys) "env=%p ret=%d prot=%d addr=0x%lx phys=0x%lx" disable hppa_tlb_fill_excp(void *env, uint64_t addr, int size, int type, int mmu_idx) "env=%p addr=0x%lx size=%d type=%d mmu_idx=%d" disable hppa_tlb_fill_success(void *env, uint64_t addr, uint64_t phys, int size, int type, int mmu_idx) "env=%p addr=0x%lx phys=0x%lx size=%d type=%d mmu_idx=%d" -disable hppa_tlb_itlba(void *env, void *ent, uint64_t va_b, uint64_t va_e, uint64_t pa) "env=%p ent=%p va_b=0x%lx va_e=0x%lx pa=0x%lx" +disable hppa_tlb_itlba(void *env, void *ent, uint64_t va_b, uint64_t va_e, uint64_t pa) "env=%p ent=%p va_b=0x%lx va_e=0x%lx pa=0x%" PRIx64 disable hppa_tlb_itlbp(void *env, void *ent, int access_id, int u, int pl2, int pl1, int type, int b, int d, int t) "env=%p ent=%p access_id=%x u=%d pl2=%d pl1=%d type=%d b=%d d=%d t=%d" disable hppa_tlb_ptlb(void *env) "env=%p" disable hppa_tlb_ptlb_local(void *env) "env=%p" diff --git a/target/hppa/translate.c b/target/hppa/translate.c index 7a81cfc..853cba2 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -104,6 +104,12 @@ typedef struct DisasContext { #define MMU_DISABLED(C) MMU_IDX_MMU_DISABLED((C)->mmu_idx) #endif +static inline MemOp mo_endian(DisasContext *ctx) +{ + /* The PSW_E bit sets the (little) endianness, but we don't implement it. */ + return MO_BE; +} + /* Note that ssm/rsm instructions number PSW_W and PSW_E differently. */ static int expand_sm_imm(DisasContext *ctx, int val) { @@ -1599,6 +1605,7 @@ static void do_load_32(DisasContext *ctx, TCGv_i32 dest, unsigned rb, /* Caller uses nullify_over/nullify_end. */ assert(ctx->null_cond.c == TCG_COND_NEVER); + mop |= mo_endian(ctx); form_gva(ctx, &addr, &ofs, rb, rx, scale, disp, sp, modify, MMU_DISABLED(ctx)); tcg_gen_qemu_ld_i32(dest, addr, ctx->mmu_idx, mop | UNALIGN(ctx)); @@ -1617,6 +1624,7 @@ static void do_load_64(DisasContext *ctx, TCGv_i64 dest, unsigned rb, /* Caller uses nullify_over/nullify_end. */ assert(ctx->null_cond.c == TCG_COND_NEVER); + mop |= mo_endian(ctx); form_gva(ctx, &addr, &ofs, rb, rx, scale, disp, sp, modify, MMU_DISABLED(ctx)); tcg_gen_qemu_ld_i64(dest, addr, ctx->mmu_idx, mop | UNALIGN(ctx)); @@ -1635,6 +1643,7 @@ static void do_store_32(DisasContext *ctx, TCGv_i32 src, unsigned rb, /* Caller uses nullify_over/nullify_end. */ assert(ctx->null_cond.c == TCG_COND_NEVER); + mop |= mo_endian(ctx); form_gva(ctx, &addr, &ofs, rb, rx, scale, disp, sp, modify, MMU_DISABLED(ctx)); tcg_gen_qemu_st_i32(src, addr, ctx->mmu_idx, mop | UNALIGN(ctx)); @@ -1653,6 +1662,7 @@ static void do_store_64(DisasContext *ctx, TCGv_i64 src, unsigned rb, /* Caller uses nullify_over/nullify_end. */ assert(ctx->null_cond.c == TCG_COND_NEVER); + mop |= mo_endian(ctx); form_gva(ctx, &addr, &ofs, rb, rx, scale, disp, sp, modify, MMU_DISABLED(ctx)); tcg_gen_qemu_st_i64(src, addr, ctx->mmu_idx, mop | UNALIGN(ctx)); @@ -1691,7 +1701,7 @@ static bool do_floadw(DisasContext *ctx, unsigned rt, unsigned rb, nullify_over(ctx); tmp = tcg_temp_new_i32(); - do_load_32(ctx, tmp, rb, rx, scale, disp, sp, modify, MO_TEUL); + do_load_32(ctx, tmp, rb, rx, scale, disp, sp, modify, MO_UL); save_frw_i32(rt, tmp); if (rt == 0) { @@ -1716,7 +1726,7 @@ static bool do_floadd(DisasContext *ctx, unsigned rt, unsigned rb, nullify_over(ctx); tmp = tcg_temp_new_i64(); - do_load_64(ctx, tmp, rb, rx, scale, disp, sp, modify, MO_TEUQ); + do_load_64(ctx, tmp, rb, rx, scale, disp, sp, modify, MO_UQ); save_frd(rt, tmp); if (rt == 0) { @@ -1750,7 +1760,7 @@ static bool do_fstorew(DisasContext *ctx, unsigned rt, unsigned rb, nullify_over(ctx); tmp = load_frw_i32(rt); - do_store_32(ctx, tmp, rb, rx, scale, disp, sp, modify, MO_TEUL); + do_store_32(ctx, tmp, rb, rx, scale, disp, sp, modify, MO_UL); return nullify_end(ctx); } @@ -1770,7 +1780,7 @@ static bool do_fstored(DisasContext *ctx, unsigned rt, unsigned rb, nullify_over(ctx); tmp = load_frd(rt); - do_store_64(ctx, tmp, rb, rx, scale, disp, sp, modify, MO_TEUQ); + do_store_64(ctx, tmp, rb, rx, scale, disp, sp, modify, MO_UQ); return nullify_end(ctx); } @@ -3302,7 +3312,7 @@ static bool trans_ld(DisasContext *ctx, arg_ldst *a) return gen_illegal(ctx); } return do_load(ctx, a->t, a->b, a->x, a->scale ? a->size : 0, - a->disp, a->sp, a->m, a->size | MO_TE); + a->disp, a->sp, a->m, a->size); } static bool trans_st(DisasContext *ctx, arg_ldst *a) @@ -3311,12 +3321,12 @@ static bool trans_st(DisasContext *ctx, arg_ldst *a) if (!ctx->is_pa20 && a->size > MO_32) { return gen_illegal(ctx); } - return do_store(ctx, a->t, a->b, a->disp, a->sp, a->m, a->size | MO_TE); + return do_store(ctx, a->t, a->b, a->disp, a->sp, a->m, a->size); } static bool trans_ldc(DisasContext *ctx, arg_ldst *a) { - MemOp mop = MO_TE | MO_ALIGN | a->size; + MemOp mop = mo_endian(ctx) | MO_ALIGN | a->size; TCGv_i64 dest, ofs; TCGv_i64 addr; diff --git a/target/i386/arch_memory_mapping.c b/target/i386/arch_memory_mapping.c index a2398c2..560f468 100644 --- a/target/i386/arch_memory_mapping.c +++ b/target/i386/arch_memory_mapping.c @@ -35,7 +35,7 @@ static void walk_pte(MemoryMappingList *list, AddressSpace *as, } start_paddr = (pte & ~0xfff) & ~(0x1ULL << 63); - if (cpu_physical_memory_is_io(start_paddr)) { + if (address_space_is_io(as, start_paddr)) { /* I/O region */ continue; } @@ -65,7 +65,7 @@ static void walk_pte2(MemoryMappingList *list, AddressSpace *as, } start_paddr = pte & ~0xfff; - if (cpu_physical_memory_is_io(start_paddr)) { + if (address_space_is_io(as, start_paddr)) { /* I/O region */ continue; } @@ -100,7 +100,7 @@ static void walk_pde(MemoryMappingList *list, AddressSpace *as, if (pde & PG_PSE_MASK) { /* 2 MB page */ start_paddr = (pde & ~0x1fffff) & ~(0x1ULL << 63); - if (cpu_physical_memory_is_io(start_paddr)) { + if (address_space_is_io(as, start_paddr)) { /* I/O region */ continue; } @@ -142,7 +142,7 @@ static void walk_pde2(MemoryMappingList *list, AddressSpace *as, */ high_paddr = ((hwaddr)(pde & 0x1fe000) << 19); start_paddr = (pde & ~0x3fffff) | high_paddr; - if (cpu_physical_memory_is_io(start_paddr)) { + if (address_space_is_io(as, start_paddr)) { /* I/O region */ continue; } @@ -203,7 +203,7 @@ static void walk_pdpe(MemoryMappingList *list, AddressSpace *as, if (pdpe & PG_PSE_MASK) { /* 1 GB page */ start_paddr = (pdpe & ~0x3fffffff) & ~(0x1ULL << 63); - if (cpu_physical_memory_is_io(start_paddr)) { + if (address_space_is_io(as, start_paddr)) { /* I/O region */ continue; } diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h index 164be76..48b88db 100644 --- a/target/i386/confidential-guest.h +++ b/target/i386/confidential-guest.h @@ -39,8 +39,10 @@ struct X86ConfidentialGuestClass { /* <public> */ int (*kvm_type)(X86ConfidentialGuest *cg); - uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, - int reg, uint32_t value); + void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu); + uint32_t (*adjust_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, + uint32_t index, int reg, uint32_t value); + int (*check_features)(X86ConfidentialGuest *cg, CPUState *cs); }; /** @@ -59,25 +61,47 @@ static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) } } +static inline void x86_confidential_guest_cpu_instance_init(X86ConfidentialGuest *cg, + CPUState *cpu) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->cpu_instance_init) { + klass->cpu_instance_init(cg, cpu); + } +} + /** - * x86_confidential_guest_mask_cpuid_features: + * x86_confidential_guest_adjust_cpuid_features: * - * Removes unsupported features from a confidential guest's CPUID values, returns - * the value with the bits removed. The bits removed should be those that KVM - * provides independent of host-supported CPUID features, but are not supported by - * the confidential computing firmware. + * Adjust the supported features from a confidential guest's CPUID values, + * returns the adjusted value. There are bits being removed that are not + * supported by the confidential computing firmware or bits being added that + * are forcibly exposed to guest by the confidential computing firmware. */ -static inline int x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg, +static inline int x86_confidential_guest_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); - if (klass->mask_cpuid_features) { - return klass->mask_cpuid_features(cg, feature, index, reg, value); + if (klass->adjust_cpuid_features) { + return klass->adjust_cpuid_features(cg, feature, index, reg, value); } else { return value; } } +static inline int x86_confidential_guest_check_features(X86ConfidentialGuest *cg, + CPUState *cs) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->check_features) { + return klass->check_features(cg, cs); + } + + return 0; +} + #endif diff --git a/target/i386/cpu-system.c b/target/i386/cpu-system.c index 55f192e..b1494aa 100644 --- a/target/i386/cpu-system.c +++ b/target/i386/cpu-system.c @@ -24,7 +24,7 @@ #include "qobject/qdict.h" #include "qapi/qobject-input-visitor.h" #include "qom/qom-qobject.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "cpu-internal.h" diff --git a/target/i386/cpu.c b/target/i386/cpu.c index ec908d7..455caff 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -28,6 +28,7 @@ #include "system/hvf.h" #include "hvf/hvf-i386.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #include "sev.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -37,12 +38,14 @@ #include "hw/i386/topology.h" #include "exec/watchpoint.h" #ifndef CONFIG_USER_ONLY +#include "confidential-guest.h" #include "system/reset.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "system/address-spaces.h" #include "hw/boards.h" #include "hw/i386/sgx-epc.h" #endif +#include "system/qtest.h" #include "tcg/tcg-cpu.h" #include "disas/capstone.h" @@ -65,6 +68,7 @@ struct CPUID2CacheDescriptorInfo { /* * Known CPUID 2 cache descriptors. + * TLB, prefetch and sectored cache related descriptors are not included. * From Intel SDM Volume 2A, CPUID instruction */ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { @@ -86,18 +90,29 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 2, .line_size = 64, }, [0x21] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB, .associativity = 8, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x22, 0x23 are not included - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x22, 0x23 are not included + */ [0x24] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, .associativity = 16, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x25, 0x20 are not included - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x25, 0x29 are not included + */ [0x2C] = { .level = 1, .type = DATA_CACHE, .size = 32 * KiB, .associativity = 8, .line_size = 64, }, [0x30] = { .level = 1, .type = INSTRUCTION_CACHE, .size = 32 * KiB, .associativity = 8, .line_size = 64, }, + /* + * Newer Intel CPUs (having the cores without L3, e.g., Intel MTL, ARL) + * use CPUID 0x4 leaf to describe cache topology, by encoding CPUID 0x2 + * leaf with 0xFF. For older CPUs (without 0x4 leaf), it's also valid + * to just ignore L3's code if there's no L3. + * + * This already covers all the cases in QEMU, so code 0x40 is not + * included. + */ [0x41] = { .level = 2, .type = UNIFIED_CACHE, .size = 128 * KiB, .associativity = 4, .line_size = 32, }, [0x42] = { .level = 2, .type = UNIFIED_CACHE, .size = 256 * KiB, @@ -114,7 +129,18 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 8, .line_size = 64, }, [0x48] = { .level = 2, .type = UNIFIED_CACHE, .size = 3 * MiB, .associativity = 12, .line_size = 64, }, - /* Descriptor 0x49 depends on CPU family/model, so it is not included */ + /* + * Descriptor 0x49 has 2 cases: + * - 2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size. + * - 3rd-level cache: 4MB, 16-way set associative, 64-byte line size + * (Intel Xeon processor MP, Family 0FH, Model 06H). + * + * When it represents L3, then it depends on CPU family/model. Fortunately, + * the legacy cache/CPU models don't have such special L3. So, just add it + * to represent the general L2 case. + */ + [0x49] = { .level = 2, .type = UNIFIED_CACHE, .size = 4 * MiB, + .associativity = 16, .line_size = 64, }, [0x4A] = { .level = 3, .type = UNIFIED_CACHE, .size = 6 * MiB, .associativity = 12, .line_size = 64, }, [0x4B] = { .level = 3, .type = UNIFIED_CACHE, .size = 8 * MiB, @@ -135,9 +161,10 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { .associativity = 4, .line_size = 64, }, [0x78] = { .level = 2, .type = UNIFIED_CACHE, .size = 1 * MiB, .associativity = 4, .line_size = 64, }, - /* lines per sector is not supported cpuid2_cache_descriptor(), - * so descriptors 0x79, 0x7A, 0x7B, 0x7C are not included. - */ + /* + * lines per sector is not supported cpuid2_cache_descriptor(), + * so descriptors 0x79, 0x7A, 0x7B, 0x7C are not included. + */ [0x7D] = { .level = 2, .type = UNIFIED_CACHE, .size = 2 * MiB, .associativity = 8, .line_size = 64, }, [0x7F] = { .level = 2, .type = UNIFIED_CACHE, .size = 512 * KiB, @@ -198,7 +225,7 @@ struct CPUID2CacheDescriptorInfo cpuid2_cache_descriptors[] = { * Return a CPUID 2 cache descriptor for a given cache. * If no known descriptor is found, return CACHE_DESCRIPTOR_UNAVAILABLE */ -static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) +static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache, bool *unmacthed) { int i; @@ -215,9 +242,46 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) } } + *unmacthed |= true; return CACHE_DESCRIPTOR_UNAVAILABLE; } +static const CPUCaches legacy_intel_cpuid2_cache_info; + +/* Encode cache info for CPUID[2] */ +static void encode_cache_cpuid2(X86CPU *cpu, + const CPUCaches *caches, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + CPUX86State *env = &cpu->env; + int l1d, l1i, l2, l3; + bool unmatched = false; + + *eax = 1; /* Number of CPUID[EAX=2] calls required */ + *ebx = *ecx = *edx = 0; + + l1d = cpuid2_cache_descriptor(caches->l1d_cache, &unmatched); + l1i = cpuid2_cache_descriptor(caches->l1i_cache, &unmatched); + l2 = cpuid2_cache_descriptor(caches->l2_cache, &unmatched); + l3 = cpuid2_cache_descriptor(caches->l3_cache, &unmatched); + + if (!cpu->consistent_cache || + (env->cpuid_min_level < 0x4 && !unmatched)) { + /* + * Though SDM defines code 0x40 for cases with no L2 or L3. It's + * also valid to just ignore l3's code if there's no l2. + */ + if (cpu->enable_l3_cache) { + *ecx = l3; + } + *edx = (l1d << 16) | (l1i << 8) | l2; + } else { + *ecx = 0; + *edx = CACHE_DESCRIPTOR_UNAVAILABLE; + } +} + /* CPUID Leaf 4 constants: */ /* EAX: */ @@ -285,11 +349,17 @@ static void encode_cache_cpuid4(CPUCacheInfo *cache, assert(cache->size == cache->line_size * cache->associativity * cache->partitions * cache->sets); + /* + * The following fields have bit-width limitations, so consider the + * maximum values to avoid overflow: + * Bits 25-14: maximum 4095. + * Bits 31-26: maximum 63. + */ *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | - (max_core_ids_in_package(topo_info) << 26) | - (max_thread_ids_for_cache(topo_info, cache->share_level) << 14); + (MIN(max_core_ids_in_package(topo_info), 63) << 26) | + (MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14); assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -429,7 +499,6 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count, static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache) { assert(cache->size % 1024 == 0); - assert(cache->lines_per_tag > 0); assert(cache->associativity > 0); assert(cache->line_size > 0); return ((cache->size / 1024) << 24) | (cache->associativity << 16) | @@ -438,8 +507,8 @@ static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache) #define ASSOC_FULL 0xFF -/* AMD associativity encoding used on CPUID Leaf 0x80000006: */ -#define AMD_ENC_ASSOC(a) (a <= 1 ? a : \ +/* x86 associativity encoding used on CPUID Leaf 0x80000006: */ +#define X86_ENC_ASSOC(a) (a <= 1 ? a : \ a == 2 ? 0x2 : \ a == 4 ? 0x4 : \ a == 8 ? 0x6 : \ @@ -462,19 +531,18 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2, { assert(l2->size % 1024 == 0); assert(l2->associativity > 0); - assert(l2->lines_per_tag > 0); assert(l2->line_size > 0); *ecx = ((l2->size / 1024) << 16) | - (AMD_ENC_ASSOC(l2->associativity) << 12) | + (X86_ENC_ASSOC(l2->associativity) << 12) | (l2->lines_per_tag << 8) | (l2->line_size); + /* For Intel, EDX is reserved. */ if (l3) { assert(l3->size % (512 * 1024) == 0); assert(l3->associativity > 0); - assert(l3->lines_per_tag > 0); assert(l3->line_size > 0); *edx = ((l3->size / (512 * 1024)) << 18) | - (AMD_ENC_ASSOC(l3->associativity) << 12) | + (X86_ENC_ASSOC(l3->associativity) << 12) | (l3->lines_per_tag << 8) | (l3->line_size); } else { *edx = 0; @@ -492,7 +560,8 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0); - *eax |= max_thread_ids_for_cache(topo_info, cache->share_level) << 14; + /* Bits 25:14 - NumSharingCache: maximum 4095. */ + *eax |= MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14; assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -572,117 +641,172 @@ static void encode_topo_cpuid8000001e(X86CPU *cpu, X86CPUTopoInfo *topo_info, * These are legacy cache values. If there is a need to change any * of these values please use builtin_x86_defs */ - -/* L1 data cache: */ -static CPUCacheInfo legacy_l1d_cache = { - .type = DATA_CACHE, - .level = 1, - .size = 32 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 8, - .sets = 64, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l1d_cache_amd = { - .type = DATA_CACHE, - .level = 1, - .size = 64 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 2, - .sets = 512, - .partitions = 1, - .lines_per_tag = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/* L1 instruction cache: */ -static CPUCacheInfo legacy_l1i_cache = { - .type = INSTRUCTION_CACHE, - .level = 1, - .size = 32 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 8, - .sets = 64, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l1i_cache_amd = { - .type = INSTRUCTION_CACHE, - .level = 1, - .size = 64 * KiB, - .self_init = 1, - .line_size = 64, - .associativity = 2, - .sets = 512, - .partitions = 1, - .lines_per_tag = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/* Level 2 unified cache: */ -static CPUCacheInfo legacy_l2_cache = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 4 * MiB, - .self_init = 1, - .line_size = 64, - .associativity = 16, - .sets = 4096, - .partitions = 1, - .no_invd_sharing = true, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, -}; - -/*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */ -static CPUCacheInfo legacy_l2_cache_cpuid2 = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 2 * MiB, - .line_size = 64, - .associativity = 8, - .share_level = CPU_TOPOLOGY_LEVEL_INVALID, +static const CPUCaches legacy_amd_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 64 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 2, + .sets = 512, + .partitions = 1, + .lines_per_tag = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 2, + .sets = 512, + .partitions = 1, + .lines_per_tag = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .lines_per_tag = 1, + .associativity = 16, + .sets = 512, + .partitions = 1, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; - -/*FIXME: CPUID leaf 0x80000006 is inconsistent with leaves 2 & 4 */ -static CPUCacheInfo legacy_l2_cache_amd = { - .type = UNIFIED_CACHE, - .level = 2, - .size = 512 * KiB, - .line_size = 64, - .lines_per_tag = 1, - .associativity = 16, - .sets = 512, - .partitions = 1, - .share_level = CPU_TOPOLOGY_LEVEL_CORE, +/* + * Only used for the CPU models with CPUID level < 4. + * These CPUs (CPUID level < 4) only use CPUID leaf 2 to present + * cache information. + * + * Note: This cache model is just a default one, and is not + * guaranteed to match real hardwares. + */ +static const CPUCaches legacy_intel_cpuid2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 2 * MiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 4096, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; -/* Level 3 unified cache: */ -static CPUCacheInfo legacy_l3_cache = { - .type = UNIFIED_CACHE, - .level = 3, - .size = 16 * MiB, - .line_size = 64, - .associativity = 16, - .sets = 16384, - .partitions = 1, - .lines_per_tag = 1, - .self_init = true, - .inclusive = true, - .complex_indexing = true, - .share_level = CPU_TOPOLOGY_LEVEL_DIE, +static const CPUCaches legacy_intel_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .self_init = 1, + .line_size = 64, + .associativity = 8, + .sets = 64, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 4 * MiB, + .self_init = 1, + .line_size = 64, + .associativity = 16, + .sets = 4096, + .partitions = 1, + .no_invd_sharing = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, }; /* TLB definitions: */ @@ -776,11 +900,12 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \ CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \ CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \ - CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE) + CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE | \ + CPUID_HT) /* partly implemented: CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */ /* missing: - CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */ + CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_TM, CPUID_PBE */ /* * Kernel-only features that can be shown to usermode programs even if @@ -848,7 +973,8 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \ CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A | \ - CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES) + CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES | \ + CPUID_EXT3_CMP_LEG) #define TCG_EXT4_FEATURES 0 @@ -897,6 +1023,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \ CPUID_7_1_EAX_FSRC | CPUID_7_1_EAX_CMPCCXADD) +#define TCG_7_1_ECX_FEATURES 0 #define TCG_7_1_EDX_FEATURES 0 #define TCG_7_2_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 @@ -1147,6 +1274,25 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .tcg_features = TCG_7_1_EAX_FEATURES, }, + [FEAT_7_1_ECX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, "msr-imm", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 1, + .reg = R_ECX, + }, + .tcg_features = TCG_7_1_ECX_FEATURES, + }, [FEAT_7_1_EDX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -1250,12 +1396,12 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_8000_0021_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { - "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, - NULL, NULL, "null-sel-clr-base", NULL, + "no-nested-data-bp", "fs-gs-base-ns", "lfence-always-serializing", NULL, + NULL, "verw-clear", "null-sel-clr-base", NULL, "auto-ibrs", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + "prefetchi", NULL, NULL, NULL, "eraps", NULL, NULL, "sbpb", "ibpb-brtype", "srso-no", "srso-user-kernel-no", NULL, }, @@ -1269,6 +1415,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .tcg_features = 0, .unmigratable_flags = 0, }, + [FEAT_8000_0021_ECX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, "tsa-sq-no", "tsa-l1-no", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x80000021, .reg = R_ECX, }, + .tcg_features = 0, + .unmigratable_flags = 0, + }, [FEAT_8000_0022_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -1675,14 +1837,21 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, }; -typedef struct FeatureMask { - FeatureWord index; - uint64_t mask; -} FeatureMask; +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg) +{ + FeatureWordInfo *wi; + FeatureWord w; -typedef struct FeatureDep { - FeatureMask from, to; -} FeatureDep; + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + if (wi->type == CPUID_FEATURE_WORD && wi->cpuid.eax == feature && + (!wi->cpuid.needs_ecx || wi->cpuid.ecx == index) && + wi->cpuid.reg == reg) { + return true; + } + } + return false; +} static FeatureDep feature_dependencies[] = { { @@ -1794,10 +1963,6 @@ static FeatureDep feature_dependencies[] = { .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, }, { - .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS }, - .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, - }, - { .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_SGX }, .to = { FEAT_7_0_ECX, CPUID_7_0_ECX_SGX_LC }, }, @@ -1852,9 +2017,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = { }; #undef REGISTER -/* CPUID feature bits available in XSS */ -#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) - ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_FP_BIT] = { /* x87 FP state component is always enabled if XSAVE is supported */ @@ -1920,7 +2082,7 @@ uint32_t xsave_area_size(uint64_t mask, bool compacted) static inline bool accel_uses_host_cpuid(void) { - return kvm_enabled() || hvf_enabled(); + return !tcg_enabled() && !qtest_enabled(); } static inline uint64_t x86_cpu_xsave_xcr0_components(X86CPU *cpu) @@ -2204,6 +2366,60 @@ static CPUCaches epyc_v4_cache_info = { }, }; +static CPUCaches epyc_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .line_size = 64, + .associativity = 4, + .partitions = 1, + .sets = 256, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 8 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 8192, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_rome_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2312,6 +2528,60 @@ static const CPUCaches epyc_rome_v3_cache_info = { }, }; +static const CPUCaches epyc_rome_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_milan_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2420,6 +2690,60 @@ static const CPUCaches epyc_milan_v2_cache_info = { }, }; +static const CPUCaches epyc_milan_v3_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_genoa_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2474,6 +2798,486 @@ static const CPUCaches epyc_genoa_cache_info = { }, }; +static const CPUCaches epyc_genoa_v2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 2048, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + +static const CPUCaches epyc_turin_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 48 * KiB, + .line_size = 64, + .associativity = 12, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + } +}; + +static const CPUCaches xeon_spr_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 48 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 32 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 2048, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 2 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 15, + + /* CPUID 0x4.0x3.ECX */ + .sets = 65536, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 60 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches xeon_gnr_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 48 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 64 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 2048, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 2 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x3.ECX */ + .sets = 294912, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 288 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches xeon_srf_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 32 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x1.ECX */ + .sets = 128, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 64 * KiB, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x2.ECX */ + .sets = 4096, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + .size = 4 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_MODULE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 12, + + /* CPUID 0x4.0x3.ECX */ + .sets = 147456, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = true, + + .size = 108 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_SOCKET, + }, +}; + +static const CPUCaches yongfeng_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ + .type = DATA_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x0.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x0.ECX */ + .sets = 64, + + /* CPUID 0x4.0x0.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + /* CPUID 0x80000005.ECX */ + .lines_per_tag = 1, + .size = 32 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x1.EAX */ + .type = INSTRUCTION_CACHE, + .level = 1, + .self_init = true, + + /* CPUID 0x4.0x1.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x1.ECX */ + .sets = 64, + + /* CPUID 0x4.0x1.EDX */ + .no_invd_sharing = false, + .inclusive = false, + .complex_indexing = false, + + /* CPUID 0x80000005.EDX */ + .lines_per_tag = 1, + .size = 64 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x2.EAX */ + .type = UNIFIED_CACHE, + .level = 2, + .self_init = true, + + /* CPUID 0x4.0x2.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 8, + + /* CPUID 0x4.0x2.ECX */ + .sets = 512, + + /* CPUID 0x4.0x2.EDX */ + .no_invd_sharing = false, + .inclusive = true, + .complex_indexing = false, + + /* CPUID 0x80000006.ECX */ + .size = 256 * KiB, + + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x3.EAX */ + .type = UNIFIED_CACHE, + .level = 3, + .self_init = true, + + /* CPUID 0x4.0x3.EBX */ + .line_size = 64, + .partitions = 1, + .associativity = 16, + + /* CPUID 0x4.0x3.ECX */ + .sets = 8192, + + /* CPUID 0x4.0x3.EDX */ + .no_invd_sharing = true, + .inclusive = true, + .complex_indexing = false, + + .size = 8 * MiB, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -2726,6 +3530,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { I486_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium", @@ -2738,6 +3543,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium2", @@ -2750,6 +3556,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM2_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "pentium3", @@ -2762,6 +3569,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { PENTIUM3_FEATURES, .xlevel = 0, .model_id = "", + .cache_info = &legacy_intel_cpuid2_cache_info, }, { .name = "athlon", @@ -4294,6 +5102,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 4, + .note = "with spr-sp cache model and 0x1f leaf", + .cache_info = &xeon_spr_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ } } }, @@ -4447,6 +5264,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .note = "with gnr-sp cache model and 0x1f leaf", + .cache_info = &xeon_gnr_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ }, }, }, @@ -4592,6 +5418,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .note = "with srf-sp cache model and 0x1f leaf", + .cache_info = &xeon_srf_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ }, }, }, @@ -5231,6 +6066,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_v4_cache_info }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_v5_cache_info + }, { /* end of list */ } } }, @@ -5369,6 +6223,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Rome-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_rome_v5_cache_info + }, { /* end of list */ } } }, @@ -5444,6 +6317,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_milan_v2_cache_info }, + { + .version = 3, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Milan-v3 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_milan_v3_cache_info + }, { /* end of list */ } } }, @@ -5518,6 +6410,31 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x80000022, .model_id = "AMD EPYC-Genoa Processor", .cache_info = &epyc_genoa_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "fs-gs-base-ns", "on" }, + { "perfmon-v2", "on" }, + { "model-id", + "AMD EPYC-Genoa-v2 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_genoa_v2_cache_info + }, + { /* end of list */ } + } }, { .name = "YongFeng", @@ -5652,9 +6569,101 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .note = "with the cache model and 0x1f leaf", + .cache_info = &yongfeng_cache_info, + .props = (PropValue[]) { + { "x-force-cpuid-0x1f", "on" }, + { /* end of list */ }, + } + }, { /* end of list */ } } }, + { + .name = "EPYC-Turin", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 26, + .model = 0, + .stepping = 0, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | + CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | + CPUID_EXT_SSE3, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | + CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | + CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_AVX512_VP2INTERSECT, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0007_EBX] = + CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | + CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, + .features[FEAT_8000_0021_EAX] = + CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | + CPUID_8000_0021_EAX_FS_GS_BASE_NS | + CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | + CPUID_8000_0021_EAX_AUTO_IBRS | CPUID_8000_0021_EAX_PREFETCHI | + CPUID_8000_0021_EAX_SBPB | CPUID_8000_0021_EAX_IBPB_BRTYPE | + CPUID_8000_0021_EAX_SRSO_USER_KERNEL_NO, + .features[FEAT_8000_0022_EAX] = + CPUID_8000_0022_EAX_PERFMON_V2, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE | + CPUID_SVM_TSCSCALE | CPUID_SVM_VMCBCLEAN | CPUID_SVM_FLUSHASID | + CPUID_SVM_PAUSEFILTER | CPUID_SVM_PFTHRESHOLD | + CPUID_SVM_V_VMSAVE_VMLOAD | CPUID_SVM_VGIF | + CPUID_SVM_VNMI | CPUID_SVM_SVME_ADDR_CHK, + .xlevel = 0x80000022, + .model_id = "AMD EPYC-Turin Processor", + .cache_info = &epyc_turin_cache_info, + }, }; /* @@ -5729,6 +6738,7 @@ static void max_x86_cpu_class_init(ObjectClass *oc, const void *data) xcc->ordering = 9; + xcc->max_features = true; xcc->model_description = "Enables all features supported by the accelerator in the current host"; @@ -5739,22 +6749,21 @@ static void max_x86_cpu_class_init(ObjectClass *oc, const void *data) static void max_x86_cpu_initfn(Object *obj) { X86CPU *cpu = X86_CPU(obj); - - /* We can't fill the features array here because we don't know yet if - * "migratable" is true or false. - */ - cpu->max_features = true; - object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort); + CPUX86State *env = &cpu->env; /* - * these defaults are used for TCG and all other accelerators - * besides KVM and HVF, which overwrite these values + * these defaults are used for TCG, other accelerators have overwritten + * these values */ - object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD, - &error_abort); - object_property_set_str(OBJECT(cpu), "model-id", - "QEMU TCG CPU version " QEMU_HW_VERSION, - &error_abort); + if (!env->cpuid_vendor1) { + object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD, + &error_abort); + } + if (!env->cpuid_model[0]) { + object_property_set_str(OBJECT(cpu), "model-id", + "QEMU TCG CPU version " QEMU_HW_VERSION, + &error_abort); + } } static const TypeInfo max_x86_cpu_type_info = { @@ -5764,7 +6773,7 @@ static const TypeInfo max_x86_cpu_type_info = { .class_init = max_x86_cpu_class_init, }; -static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) +static char *feature_word_description(FeatureWordInfo *f) { assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD); @@ -5773,11 +6782,15 @@ static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) { const char *reg = get_register_name_32(f->cpuid.reg); assert(reg); - return g_strdup_printf("CPUID.%02XH:%s", - f->cpuid.eax, reg); + if (!f->cpuid.needs_ecx) { + return g_strdup_printf("CPUID[eax=%02Xh].%s", f->cpuid.eax, reg); + } else { + return g_strdup_printf("CPUID[eax=%02Xh,ecx=%02Xh].%s", + f->cpuid.eax, f->cpuid.ecx, reg); + } } case MSR_FEATURE_WORD: - return g_strdup_printf("MSR(%02XH)", + return g_strdup_printf("MSR(%02Xh)", f->msr.index); } @@ -5797,12 +6810,13 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu) return false; } -static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, - const char *verbose_prefix) +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) { CPUX86State *env = &cpu->env; FeatureWordInfo *f = &feature_word_info[w]; int i; + g_autofree char *feat_word_str = feature_word_description(f); if (!cpu->force_features) { env->features[w] &= ~mask; @@ -5815,7 +6829,35 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, for (i = 0; i < 64; ++i) { if ((1ULL << i) & mask) { - g_autofree char *feat_word_str = feature_word_description(f, i); + warn_report("%s: %s%s%s [bit %d]", + verbose_prefix, + feat_word_str, + f->feat_names[i] ? "." : "", + f->feat_names[i] ? f->feat_names[i] : "", i); + } + } +} + +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) +{ + CPUX86State *env = &cpu->env; + FeatureWordInfo *f = &feature_word_info[w]; + int i; + + if (!cpu->force_features) { + env->features[w] |= mask; + } + + cpu->forced_on_features[w] |= mask; + + if (!verbose_prefix) { + return; + } + + for (i = 0; i < 64; ++i) { + if ((1ULL << i) & mask) { + g_autofree char *feat_word_str = feature_word_description(f); warn_report("%s: %s%s%s [bit %d]", verbose_prefix, feat_word_str, @@ -5833,10 +6875,7 @@ static void x86_cpuid_version_get_family(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = (env->cpuid_version >> 8) & 0xf; - if (value == 0xf) { - value += (env->cpuid_version >> 20) & 0xff; - } + value = x86_cpu_family(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -5874,8 +6913,7 @@ static void x86_cpuid_version_get_model(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = (env->cpuid_version >> 4) & 0xf; - value |= ((env->cpuid_version >> 16) & 0xf) << 4; + value = x86_cpu_model(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -5909,7 +6947,7 @@ static void x86_cpuid_version_get_stepping(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; uint64_t value; - value = env->cpuid_version & 0xf; + value = x86_cpu_stepping(env->cpuid_version); visit_type_uint64(v, name, &value, errp); } @@ -5977,11 +7015,11 @@ static char *x86_cpuid_get_model_id(Object *obj, Error **errp) char *value; int i; - value = g_malloc(48 + 1); - for (i = 0; i < 48; i++) { + value = g_malloc(CPUID_MODEL_ID_SZ + 1); + for (i = 0; i < CPUID_MODEL_ID_SZ; i++) { value[i] = env->cpuid_model[i >> 2] >> (8 * (i & 3)); } - value[48] = '\0'; + value[CPUID_MODEL_ID_SZ] = '\0'; return value; } @@ -5996,7 +7034,7 @@ static void x86_cpuid_set_model_id(Object *obj, const char *model_id, model_id = ""; } len = strlen(model_id); - memset(env->cpuid_model, 0, 48); + memset(env->cpuid_model, 0, CPUID_MODEL_ID_SZ); for (i = 0; i < 48; i++) { if (i >= len) { c = '\0'; @@ -6517,6 +7555,20 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w) #endif break; + case FEAT_7_0_EDX: + /* + * Windows does not like ARCH_CAPABILITIES on AMD machines at all. + * Do not show the fake ARCH_CAPABILITIES MSR that KVM sets up, + * except if needed for migration. + * + * When arch_cap_always_on is removed, this tweak can move to + * kvm_arch_get_supported_cpuid. + */ + if (cpu && IS_AMD_CPU(&cpu->env) && !cpu->arch_cap_always_on) { + unavail = CPUID_7_0_EDX_ARCH_CAPABILITIES; + } + break; + default: break; } @@ -6856,14 +7908,39 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } *edx = env->features[FEAT_1_EDX]; if (threads_per_pkg > 1) { - *ebx |= threads_per_pkg << 16; + uint32_t num; + + /* + * For CPUID.01H.EBX[Bits 23-16], AMD requires logical processor + * count, but Intel needs maximum number of addressable IDs for + * logical processors per package. + */ + if ((IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + num = 1 << apicid_pkg_offset(topo_info); + } else { + num = threads_per_pkg; + } + + /* Fixup overflow: max value for bits 23-16 is 255. */ + *ebx |= MIN(num, 255) << 16; } - if (!cpu->enable_pmu) { - *ecx &= ~CPUID_EXT_PDCM; + if (cpu->pdcm_on_even_without_pmu) { + if (!cpu->enable_pmu) { + *ecx &= ~CPUID_EXT_PDCM; + } } break; - case 2: - /* cache info: needed for Pentium Pro compatibility */ + case 2: { /* cache info: needed for Pentium Pro compatibility */ + const CPUCaches *caches; + + if (env->enable_legacy_cpuid2_cache) { + caches = &legacy_intel_cpuid2_cache_info; + } else if (env->enable_legacy_vendor_cache) { + caches = &legacy_intel_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; @@ -6871,18 +7948,18 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax = *ebx = *ecx = *edx = 0; break; } - *eax = 1; /* Number of CPUID[EAX=2] calls required */ - *ebx = 0; - if (!cpu->enable_l3_cache) { - *ecx = 0; + encode_cache_cpuid2(cpu, caches, eax, ebx, ecx, edx); + break; + } + case 4: { + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_intel_cache_info; } else { - *ecx = cpuid2_cache_descriptor(env->cache_info_cpuid2.l3_cache); + caches = &env->cache_info; } - *edx = (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1d_cache) << 16) | - (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1i_cache) << 8) | - (cpuid2_cache_descriptor(env->cache_info_cpuid2.l2_cache)); - break; - case 4: + /* cache info: needed for Core compatibility */ if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); @@ -6894,13 +7971,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); *eax &= ~0xFC000000; - *eax |= max_core_ids_in_package(topo_info) << 26; + *eax |= MIN(max_core_ids_in_package(topo_info), 63) << 26; if (host_vcpus_per_cache > threads_per_pkg) { *eax &= ~0x3FFC000; /* Share the cache at package level. */ - *eax |= max_thread_ids_for_cache(topo_info, - CPU_TOPOLOGY_LEVEL_SOCKET) << 14; + *eax |= MIN(max_thread_ids_for_cache(topo_info, + CPU_TOPOLOGY_LEVEL_SOCKET), 4095) << 14; } } } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { @@ -6910,30 +7987,26 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, switch (count) { case 0: /* L1 dcache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, - topo_info, + encode_cache_cpuid4(caches->l1d_cache, topo_info, eax, ebx, ecx, edx); if (!cpu->l1_cache_per_core) { *eax &= ~MAKE_64BIT_MASK(14, 12); } break; case 1: /* L1 icache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, - topo_info, + encode_cache_cpuid4(caches->l1i_cache, topo_info, eax, ebx, ecx, edx); if (!cpu->l1_cache_per_core) { *eax &= ~MAKE_64BIT_MASK(14, 12); } break; case 2: /* L2 cache info */ - encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, - topo_info, + encode_cache_cpuid4(caches->l2_cache, topo_info, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ if (cpu->enable_l3_cache) { - encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, - topo_info, + encode_cache_cpuid4(caches->l3_cache, topo_info, eax, ebx, ecx, edx); break; } @@ -6944,6 +8017,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } } break; + } case 5: /* MONITOR/MWAIT Leaf */ *eax = cpu->mwait.eax; /* Smallest monitor-line size in bytes */ @@ -6971,9 +8045,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */ } else if (count == 1) { *eax = env->features[FEAT_7_1_EAX]; + *ecx = env->features[FEAT_7_1_ECX]; *edx = env->features[FEAT_7_1_EDX]; *ebx = 0; - *ecx = 0; } else if (count == 2) { *edx = env->features[FEAT_7_2_EDX]; *eax = 0; @@ -7034,21 +8108,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, assert(!(*eax & ~0x1f)); *ebx &= 0xffff; /* The count doesn't need to be reliable. */ break; - case 0x1C: - if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { - x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx); - *edx = 0; - } - break; - case 0x1F: - /* V2 Extended Topology Enumeration Leaf */ - if (!x86_has_extended_topo(env->avail_cpu_topo)) { - *eax = *ebx = *ecx = *edx = 0; - break; - } - - encode_topo_cpuid1f(env, count, topo_info, eax, ebx, ecx, edx); - break; case 0xD: { /* Processor Extended State */ *eax = 0; @@ -7189,6 +8248,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x1C: + if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx); + *edx = 0; + } + break; case 0x1D: { /* AMX TILE, for now hardcoded for Sapphire Rapids*/ *eax = 0; @@ -7226,6 +8291,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x1F: + /* V2 Extended Topology Enumeration Leaf */ + if (!x86_has_cpuid_0x1f(cpu)) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + + encode_topo_cpuid1f(env, count, topo_info, eax, ebx, ecx, edx); + break; case 0x24: { *eax = 0; *ebx = 0; @@ -7262,9 +8336,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x80000000: *eax = env->cpuid_xlevel; - *ebx = env->cpuid_vendor1; - *edx = env->cpuid_vendor2; - *ecx = env->cpuid_vendor3; + + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *ebx = *ecx = *edx = 0; + } else { + *ebx = env->cpuid_vendor1; + *edx = env->cpuid_vendor2; + *ecx = env->cpuid_vendor3; + } break; case 0x80000001: *eax = env->cpuid_version; @@ -7272,7 +8352,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = env->features[FEAT_8000_0001_ECX]; *edx = env->features[FEAT_8000_0001_EDX]; - if (tcg_enabled() && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && + if (tcg_enabled() && IS_INTEL_CPU(env) && !(env->hflags & HF_LMA_MASK)) { *edx &= ~CPUID_EXT2_SYSCALL; } @@ -7285,41 +8365,78 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = env->cpuid_model[(index - 0x80000002) * 4 + 2]; *edx = env->cpuid_model[(index - 0x80000002) * 4 + 3]; break; - case 0x80000005: - /* cache info (L1 cache) */ + case 0x80000005: { + /* cache info (L1 cache/TLB Associativity Field) */ + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_amd_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } + + if (cpu->vendor_cpuid_only_v2 && IS_INTEL_CPU(env)) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | (L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES); *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | (L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES); - *ecx = encode_cache_cpuid80000005(env->cache_info_amd.l1d_cache); - *edx = encode_cache_cpuid80000005(env->cache_info_amd.l1i_cache); + *ecx = encode_cache_cpuid80000005(caches->l1d_cache); + *edx = encode_cache_cpuid80000005(caches->l1i_cache); break; - case 0x80000006: - /* cache info (L2 cache) */ + } + case 0x80000006: { /* cache info (L2 cache/TLB/L3 cache) */ + const CPUCaches *caches; + + if (env->enable_legacy_vendor_cache) { + caches = &legacy_amd_cache_info; + } else { + caches = &env->cache_info; + } + if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } - *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | + + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *eax = *ebx = 0; + encode_cache_cpuid80000006(caches->l2_cache, + NULL, ecx, edx); + break; + } + + *eax = (X86_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | (L2_DTLB_2M_ENTRIES << 16) | - (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | + (X86_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | (L2_ITLB_2M_ENTRIES); - *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | + *ebx = (X86_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | (L2_DTLB_4K_ENTRIES << 16) | - (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | + (X86_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | (L2_ITLB_4K_ENTRIES); - encode_cache_cpuid80000006(env->cache_info_amd.l2_cache, + + encode_cache_cpuid80000006(caches->l2_cache, cpu->enable_l3_cache ? - env->cache_info_amd.l3_cache : NULL, + caches->l3_cache : NULL, ecx, edx); break; + } case 0x80000007: *eax = 0; - *ebx = env->features[FEAT_8000_0007_EBX]; + if (cpu->vendor_cpuid_only_v2 && IS_INTEL_CPU(env)) { + *ebx = 0; + } else { + *ebx = env->features[FEAT_8000_0007_EBX]; + } *ecx = 0; *edx = env->features[FEAT_8000_0007_EDX]; break; @@ -7332,6 +8449,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax |= (cpu->guest_phys_bits << 16); } *ebx = env->features[FEAT_8000_0008_EBX]; + + /* + * Don't emulate Bits [7:0] & Bits [15:12] for Intel/Zhaoxin, since + * they're using 0x1f leaf. + */ + if (cpu->vendor_cpuid_only_v2 && + (IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env))) { + *ecx = *edx = 0; + break; + } + if (threads_per_pkg > 1) { /* * Bits 15:12 is "The number of bits in the initial @@ -7367,19 +8495,19 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } switch (count) { case 0: /* L1 dcache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, + encode_cache_cpuid8000001d(env->cache_info.l1d_cache, topo_info, eax, ebx, ecx, edx); break; case 1: /* L1 icache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, + encode_cache_cpuid8000001d(env->cache_info.l1i_cache, topo_info, eax, ebx, ecx, edx); break; case 2: /* L2 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, + encode_cache_cpuid8000001d(env->cache_info.l2_cache, topo_info, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, + encode_cache_cpuid8000001d(env->cache_info.l3_cache, topo_info, eax, ebx, ecx, edx); break; default: /* end of info */ @@ -7400,6 +8528,22 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = 0; } break; + case 0x8000001F: + *eax = *ebx = *ecx = *edx = 0; + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; + *eax |= sev_snp_enabled() ? 0x10 : 0; + *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } + break; + case 0x80000021: + *eax = *ebx = *ecx = *edx = 0; + *eax = env->features[FEAT_8000_0021_EAX]; + *ebx = env->features[FEAT_8000_0021_EBX]; + *ecx = env->features[FEAT_8000_0021_ECX]; + break; case 0x80000022: *eax = *ebx = *ecx = *edx = 0; /* AMD Extended Performance Monitoring and Debug */ @@ -7432,21 +8576,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = 0; *edx = 0; break; - case 0x8000001F: - *eax = *ebx = *ecx = *edx = 0; - if (sev_enabled()) { - *eax = 0x2; - *eax |= sev_es_enabled() ? 0x8 : 0; - *eax |= sev_snp_enabled() ? 0x10 : 0; - *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ - *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ - } - break; - case 0x80000021: - *eax = *ebx = *ecx = *edx = 0; - *eax = env->features[FEAT_8000_0021_EAX]; - *ebx = env->features[FEAT_8000_0021_EBX]; - break; default: /* reserved values: zero */ *eax = 0; @@ -7520,7 +8649,11 @@ static void x86_cpu_reset_hold(Object *obj, ResetType type) env->idt.limit = 0xffff; env->gdt.limit = 0xffff; +#if defined(CONFIG_USER_ONLY) + env->ldt.limit = 0; +#else env->ldt.limit = 0xffff; +#endif env->ldt.flags = DESC_P_MASK | (2 << DESC_TYPE_SHIFT); env->tr.limit = 0xffff; env->tr.flags = DESC_P_MASK | (11 << DESC_TYPE_SHIFT); @@ -7666,7 +8799,7 @@ static void mce_init(X86CPU *cpu) CPUX86State *cenv = &cpu->env; unsigned int bank; - if (((cenv->cpuid_version >> 8) & 0xf) >= 6 + if (x86_cpu_family(cenv->cpuid_version) >= 6 && (cenv->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == (CPUID_MCE | CPUID_MCA)) { cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF | @@ -7794,6 +8927,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) */ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) { + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); CPUX86State *env = &cpu->env; FeatureWord w; int i; @@ -7813,12 +8947,12 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } - /*TODO: Now cpu->max_features doesn't overwrite features + /* TODO: Now xcc->max_features doesn't overwrite features * set using QOM properties, and we can convert * plus_features & minus_features to global properties * inside x86_cpu_parse_featurestr() too. */ - if (cpu->max_features) { + if (xcc->max_features) { for (w = 0; w < FEATURE_WORDS; w++) { /* Override only features that weren't set explicitly * by the user. @@ -7850,6 +8984,13 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } + if (!cpu->pdcm_on_even_without_pmu) { + /* PDCM is fixed1 bit for TDX */ + if (!cpu->enable_pmu && !is_tdx_vm()) { + env->features[FEAT_1_ECX] &= ~CPUID_EXT_PDCM; + } + } + for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { FeatureDep *d = &feature_dependencies[i]; if (!(env->features[d->from.index] & d->from.mask)) { @@ -7878,6 +9019,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_feat_level(cpu, FEAT_6_EAX); x86_cpu_adjust_feat_level(cpu, FEAT_7_0_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EAX); + x86_cpu_adjust_feat_level(cpu, FEAT_7_1_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_7_2_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_EDX); @@ -7906,7 +9048,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) * cpu->vendor_cpuid_only has been unset for compatibility with older * machine types. */ - if (x86_has_extended_topo(env->avail_cpu_topo) && + if (x86_has_cpuid_0x1f(cpu) && (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } @@ -8078,46 +9220,34 @@ static bool x86_cpu_update_smp_cache_topo(MachineState *ms, X86CPU *cpu, level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l1d_cache->share_level = level; - env->cache_info_amd.l1d_cache->share_level = level; + env->cache_info.l1d_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D, - env->cache_info_cpuid4.l1d_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1D, - env->cache_info_amd.l1d_cache->share_level); + env->cache_info.l1d_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l1i_cache->share_level = level; - env->cache_info_amd.l1i_cache->share_level = level; + env->cache_info.l1i_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I, - env->cache_info_cpuid4.l1i_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L1I, - env->cache_info_amd.l1i_cache->share_level); + env->cache_info.l1i_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l2_cache->share_level = level; - env->cache_info_amd.l2_cache->share_level = level; + env->cache_info.l2_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2, - env->cache_info_cpuid4.l2_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L2, - env->cache_info_amd.l2_cache->share_level); + env->cache_info.l2_cache->share_level); } level = machine_get_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3); if (level != CPU_TOPOLOGY_LEVEL_DEFAULT) { - env->cache_info_cpuid4.l3_cache->share_level = level; - env->cache_info_amd.l3_cache->share_level = level; + env->cache_info.l3_cache->share_level = level; } else { machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3, - env->cache_info_cpuid4.l3_cache->share_level); - machine_set_cache_topo_level(ms, CACHE_LEVEL_AND_TYPE_L3, - env->cache_info_amd.l3_cache->share_level); + env->cache_info.l3_cache->share_level); } if (!machine_check_smp_cache(ms, errp)) { @@ -8141,6 +9271,16 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) tcg_cflags_set(cs, CF_PCREL); #endif + /* + * x-vendor-cpuid-only and v2 should be initernal only. But + * QEMU doesn't support "internal" property. + */ + if (!cpu->vendor_cpuid_only && cpu->vendor_cpuid_only_v2) { + error_setg(errp, "x-vendor-cpuid-only-v2 property " + "depends on x-vendor-cpuid-only"); + return; + } + if (cpu->apic_id == UNASSIGNED_APIC_ID) { error_setg(errp, "apic-id property was not initialized properly"); return; @@ -8344,24 +9484,22 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) "CPU model '%s' doesn't support legacy-cache=off", name); return; } - env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = - *cache_info; + env->cache_info = *cache_info; } else { /* Build legacy cache information */ - env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; - env->cache_info_cpuid2.l1i_cache = &legacy_l1i_cache; - env->cache_info_cpuid2.l2_cache = &legacy_l2_cache_cpuid2; - env->cache_info_cpuid2.l3_cache = &legacy_l3_cache; + if (!cpu->consistent_cache) { + env->enable_legacy_cpuid2_cache = true; + } - env->cache_info_cpuid4.l1d_cache = &legacy_l1d_cache; - env->cache_info_cpuid4.l1i_cache = &legacy_l1i_cache; - env->cache_info_cpuid4.l2_cache = &legacy_l2_cache; - env->cache_info_cpuid4.l3_cache = &legacy_l3_cache; + if (!cpu->vendor_cpuid_only_v2) { + env->enable_legacy_vendor_cache = true; + } - env->cache_info_amd.l1d_cache = &legacy_l1d_cache_amd; - env->cache_info_amd.l1i_cache = &legacy_l1i_cache_amd; - env->cache_info_amd.l2_cache = &legacy_l2_cache_amd; - env->cache_info_amd.l3_cache = &legacy_l3_cache; + if (IS_AMD_CPU(env)) { + env->cache_info = legacy_amd_cache_info; + } else { + env->cache_info = legacy_intel_cache_info; + } } #ifndef CONFIG_USER_ONLY @@ -8520,6 +9658,16 @@ static void x86_cpu_register_feature_bit_props(X86CPUClass *xcc, static void x86_cpu_post_initfn(Object *obj) { +#ifndef CONFIG_USER_ONLY + if (current_machine && current_machine->cgs) { + x86_confidential_guest_cpu_instance_init( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), (CPU(obj))); + } +#endif +} + +static void x86_cpu_init_xsave(void) +{ static bool first = true; uint64_t supported_xcr0; int i; @@ -8539,8 +9687,6 @@ static void x86_cpu_post_initfn(Object *obj) } } } - - accel_cpu_instance_init(CPU(obj)); } static void x86_cpu_init_default_topo(X86CPU *cpu) @@ -8609,6 +9755,13 @@ static void x86_cpu_initfn(Object *obj) if (xcc->model) { x86_cpu_load_model(cpu, xcc->model); } + + /* + * accel's cpu_instance_init may have the xsave check, + * so x86_ext_save_areas[] must be initialized before this. + */ + x86_cpu_init_xsave(); + accel_cpu_instance_init(CPU(obj)); } static int64_t x86_cpu_get_arch_id(CPUState *cs) @@ -8855,6 +10008,7 @@ static const Property x86_cpu_properties[] = { DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true), + DEFINE_PROP_BOOL("x-vendor-cpuid-only-v2", X86CPU, vendor_cpuid_only_v2, true), DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, amd_topoext_features_only, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), @@ -8869,6 +10023,7 @@ static const Property x86_cpu_properties[] = { * own cache information (see x86_cpu_load_def()). */ DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true), + DEFINE_PROP_BOOL("x-consistent-cache", X86CPU, consistent_cache, true), DEFINE_PROP_BOOL("legacy-multi-node", X86CPU, legacy_multi_node, false), DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false), @@ -8890,6 +10045,12 @@ static const Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level, true), DEFINE_PROP_BOOL("x-l1-cache-per-thread", X86CPU, l1_cache_per_core, true), + DEFINE_PROP_BOOL("x-force-cpuid-0x1f", X86CPU, force_cpuid_0x1f, false), + + DEFINE_PROP_BOOL("x-arch-cap-always-on", X86CPU, + arch_cap_always_on, false), + DEFINE_PROP_BOOL("x-pdcm-on-even-without-pmu", X86CPU, + pdcm_on_even_without_pmu, false), }; #ifndef CONFIG_USER_ONLY diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c51e0a4..ce94886 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -435,9 +435,11 @@ typedef enum X86Seg { #define MSR_SMI_COUNT 0x34 #define MSR_CORE_THREAD_COUNT 0x35 #define MSR_MTRRcap 0xfe +#define MSR_MTRR_MEM_TYPE_WB 0x06 #define MSR_MTRRcap_VCNT 8 #define MSR_MTRRcap_FIXRANGE_SUPPORT (1 << 8) #define MSR_MTRRcap_WC_SUPPORTED (1 << 10) +#define MSR_MTRR_ENABLE (1 << 11) #define MSR_IA32_SYSENTER_CS 0x174 #define MSR_IA32_SYSENTER_ESP 0x175 @@ -584,6 +586,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_BIT 5 #define XSTATE_ZMM_Hi256_BIT 6 #define XSTATE_Hi16_ZMM_BIT 7 +#define XSTATE_PT_BIT 8 #define XSTATE_PKRU_BIT 9 #define XSTATE_ARCH_LBR_BIT 15 #define XSTATE_XTILE_CFG_BIT 17 @@ -597,6 +600,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_MASK (1ULL << XSTATE_OPMASK_BIT) #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) +#define XSTATE_PT_MASK (1ULL << XSTATE_PT_BIT) #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) #define XSTATE_ARCH_LBR_MASK (1ULL << XSTATE_ARCH_LBR_BIT) #define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) @@ -619,6 +623,11 @@ typedef enum X86Seg { XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \ XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK) +/* CPUID feature bits available in XSS */ +#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) + +#define CPUID_XSTATE_MASK (CPUID_XSTATE_XCR0_MASK | CPUID_XSTATE_XSS_MASK) + /* CPUID feature words */ typedef enum FeatureWord { FEAT_1_EDX, /* CPUID[1].EDX */ @@ -634,6 +643,7 @@ typedef enum FeatureWord { FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ FEAT_8000_0021_EBX, /* CPUID[8000_0021].EBX */ + FEAT_8000_0021_ECX, /* CPUID[8000_0021].ECX */ FEAT_8000_0022_EAX, /* CPUID[8000_0022].EAX */ FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ @@ -661,12 +671,22 @@ typedef enum FeatureWord { FEAT_SGX_12_1_EAX, /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */ FEAT_XSAVE_XSS_LO, /* CPUID[EAX=0xd,ECX=1].ECX */ FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ + FEAT_7_1_ECX, /* CPUID[EAX=7,ECX=1].ECX */ FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */ FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */ FEAT_24_0_EBX, /* CPUID[EAX=0x24,ECX=0].EBX */ FEATURE_WORDS, } FeatureWord; +typedef struct FeatureMask { + FeatureWord index; + uint64_t mask; +} FeatureMask; + +typedef struct FeatureDep { + FeatureMask from, to; +} FeatureDep; + typedef uint64_t FeatureWordArray[FEATURE_WORDS]; uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); @@ -899,6 +919,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_ECX_LA57 (1U << 16) /* Read Processor ID */ #define CPUID_7_0_ECX_RDPID (1U << 22) +/* KeyLocker */ +#define CPUID_7_0_ECX_KeyLocker (1U << 23) /* Bus Lock Debug Exception */ #define CPUID_7_0_ECX_BUS_LOCK_DETECT (1U << 24) /* Cache Line Demote Instruction */ @@ -920,6 +942,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_EDX_FSRM (1U << 4) /* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ #define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) + /* "md_clear" VERW clears CPU buffers */ +#define CPUID_7_0_EDX_MD_CLEAR (1U << 10) /* SERIALIZE instruction */ #define CPUID_7_0_EDX_SERIALIZE (1U << 14) /* TSX Suspend Load Address Tracking instruction */ @@ -957,6 +981,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) /* AVX512 BFloat16 Instruction */ #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) +/* Linear address space separation */ +#define CPUID_7_1_EAX_LASS (1U << 6) /* CMPCCXADD Instructions */ #define CPUID_7_1_EAX_CMPCCXADD (1U << 7) /* Fast Zero REP MOVS */ @@ -978,6 +1004,9 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Linear Address Masking */ #define CPUID_7_1_EAX_LAM (1U << 26) +/* The immediate form of MSR access instructions */ +#define CPUID_7_1_ECX_MSR_IMM (1U << 5) + /* Support for VPDPB[SU,UU,SS]D[,S] */ #define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) /* AVX NE CONVERT Instructions */ @@ -1001,6 +1030,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_2_EDX_DDPD_U (1U << 3) /* Indicate bit 10 of the IA32_SPEC_CTRL MSR is supported */ #define CPUID_7_2_EDX_BHI_CTRL (1U << 4) + /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ #define CPUID_7_2_EDX_MCDT_NO (1U << 5) @@ -1070,12 +1100,18 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Processor ignores nested data breakpoints */ #define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0) +/* WRMSR to FS_BASE, GS_BASE, or KERNEL_GS_BASE is non-serializing */ +#define CPUID_8000_0021_EAX_FS_GS_BASE_NS (1U << 1) /* LFENCE is always serializing */ #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) +/* Memory form of VERW mitigates TSA */ +#define CPUID_8000_0021_EAX_VERW_CLEAR (1U << 5) /* Null Selector Clears Base */ #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) /* Automatic IBRS */ #define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) +/* Indicates support for IC prefetch */ +#define CPUID_8000_0021_EAX_PREFETCHI (1U << 20) /* Enhanced Return Address Predictor Scurity */ #define CPUID_8000_0021_EAX_ERAPS (1U << 24) /* Selective Branch Predictor Barrier */ @@ -1093,6 +1129,11 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); */ #define CPUID_8000_0021_EBX_RAPSIZE (8U << 16) +/* CPU is not vulnerable TSA SA-SQ attack */ +#define CPUID_8000_0021_ECX_TSA_SQ_NO (1U << 1) +/* CPU is not vulnerable TSA SA-L1 attack */ +#define CPUID_8000_0021_ECX_TSA_L1_NO (1U << 2) + /* Performance Monitoring Version 2 */ #define CPUID_8000_0022_EAX_PERFMON_V2 (1U << 0) @@ -1100,6 +1141,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1 (1U << 2) #define CPUID_XSAVE_XSAVES (1U << 3) +#define CPUID_XSAVE_XFD (1U << 4) #define CPUID_6_EAX_ARAT (1U << 2) @@ -1127,7 +1169,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* PMM enabled */ #define CPUID_C000_0001_EDX_PMM_EN (1U << 13) -#define CPUID_VENDOR_SZ 12 +#define CPUID_VENDOR_SZ 12 +#define CPUID_MODEL_ID_SZ 48 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */ #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */ @@ -1741,12 +1784,6 @@ typedef enum TPRAccess { /* Cache information data structures: */ -enum CacheType { - DATA_CACHE, - INSTRUCTION_CACHE, - UNIFIED_CACHE -}; - typedef struct CPUCacheInfo { enum CacheType type; uint8_t level; @@ -2046,11 +2083,14 @@ typedef struct CPUArchState { /* Features that were explicitly enabled/disabled */ FeatureWordArray user_features; uint32_t cpuid_model[12]; - /* Cache information for CPUID. When legacy-cache=on, the cache data + /* + * Cache information for CPUID. When legacy-cache=on, the cache data * on each CPUID leaf will be different, because we keep compatibility * with old QEMU versions. */ - CPUCaches cache_info_cpuid2, cache_info_cpuid4, cache_info_amd; + CPUCaches cache_info; + bool enable_legacy_cpuid2_cache; + bool enable_legacy_vendor_cache; /* MTRRs */ uint64_t mtrr_fixed[11]; @@ -2096,7 +2136,7 @@ typedef struct CPUArchState { QEMUTimer *xen_periodic_timer; QemuMutex xen_timers_lock; #endif -#if defined(CONFIG_HVF) +#if defined(CONFIG_HVF) || defined(CONFIG_MSHV) void *emu_mmio_buf; #endif @@ -2170,7 +2210,6 @@ struct ArchCPU { bool expose_tcg; bool migratable; bool migrate_smi_count; - bool max_features; /* Enable all supported features automatically */ uint32_t apic_id; /* Enables publishing of TSC increment and Local APIC bus frequencies to @@ -2192,6 +2231,9 @@ struct ArchCPU { /* Features that were filtered out because of missing host capabilities */ FeatureWordArray filtered_features; + /* Features that are forced enabled by underlying hypervisor, e.g., TDX */ + FeatureWordArray forced_on_features; + /* Enable PMU CPUID bits. This can't be enabled by default yet because * it doesn't have ABI stability guarantees, as it passes all PMU CPUID * bits returned by GET_SUPPORTED_CPUID (that depend on host CPU and kernel @@ -2230,6 +2272,13 @@ struct ArchCPU { */ bool legacy_cache; + /* + * Compatibility bits for old machine types. + * If true, use the same cache model in CPUID leaf 0x2 + * and 0x4. + */ + bool consistent_cache; + /* Compatibility bits for old machine types. * If true decode the CPUID Function 0x8000001E_ECX to support multiple * nodes per processor @@ -2239,12 +2288,24 @@ struct ArchCPU { /* Compatibility bits for old machine types: */ bool enable_cpuid_0xb; + /* Force to enable cpuid 0x1f */ + bool force_cpuid_0x1f; + /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; - /* Only advertise CPUID leaves defined by the vendor */ + /* + * Compatibility bits for old machine types (PC machine v6.0 and older). + * Only advertise CPUID leaves defined by the vendor. + */ bool vendor_cpuid_only; + /* + * Compatibility bits for old machine types (PC machine v10.0 and older). + * Only advertise CPUID leaves defined by the vendor. + */ + bool vendor_cpuid_only_v2; + /* Only advertise TOPOEXT features that AMD defines */ bool amd_topoext_features_only; @@ -2263,6 +2324,18 @@ struct ArchCPU { /* Forcefully disable KVM PV features not exposed in guest CPUIDs */ bool kvm_pv_enforce_cpuid; + /* + * Expose arch-capabilities unconditionally even on AMD models, for backwards + * compatibility with QEMU <10.1. + */ + bool arch_cap_always_on; + + /* + * Backwards compatibility with QEMU <10.1. The PDCM feature is now disabled when + * PMU is not available, but prior to 10.1 it was enabled even if PMU is off. + */ + bool pdcm_on_even_without_pmu; + /* Number of physical address bits supported */ uint32_t phys_bits; @@ -2317,6 +2390,7 @@ struct X86CPUClass { */ const X86CPUModel *model; + bool max_features; /* Enable all supported features automatically */ bool host_cpuid_required; int ordering; bool migration_safe; @@ -2385,7 +2459,14 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, SegmentCache *sc; unsigned int new_hflags; - sc = &env->segs[seg_reg]; + if (seg_reg == R_LDTR) { + sc = &env->ldt; + } else if (seg_reg == R_TR) { + sc = &env->tr; + } else { + sc = &env->segs[seg_reg]; + } + sc->selector = selector; sc->base = base; sc->limit = limit; @@ -2499,11 +2580,27 @@ void cpu_set_apic_feature(CPUX86State *env); void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); bool cpu_has_x2apic_feature(CPUX86State *env); +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg); +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); + +static inline bool x86_has_cpuid_0x1f(X86CPU *cpu) +{ + return cpu->force_cpuid_0x1f || + x86_has_extended_topo(cpu->env.avail_cpu_topo); +} /* helper.c */ void x86_cpu_set_a20(X86CPU *cpu, int a20_state); void cpu_sync_avx_hflag(CPUX86State *env); +typedef enum X86ASIdx { + X86ASIdx_MEM = 0, + X86ASIdx_SMM = 1, +} X86ASIdx; + #ifndef CONFIG_USER_ONLY static inline int x86_asidx_from_attrs(CPUState *cs, MemTxAttrs attrs) { @@ -2627,6 +2724,36 @@ static inline int32_t x86_get_a20_mask(CPUX86State *env) } } +static inline uint32_t x86_cpu_family(uint32_t eax) +{ + uint32_t family = (eax >> 8) & 0xf; + + if (family == 0xf) { + family += (eax >> 20) & 0xff; + } + + return family; +} + +static inline uint32_t x86_cpu_model(uint32_t eax) +{ + uint32_t family, model; + + family = x86_cpu_family(eax); + model = (eax >> 4) & 0xf; + + if (family >= 0x6) { + model += ((eax >> 16) & 0xf) << 4; + } + + return model; +} + +static inline uint32_t x86_cpu_stepping(uint32_t eax) +{ + return eax & 0xf; +} + static inline bool cpu_has_vmx(CPUX86State *env) { return env->features[FEAT_1_ECX] & CPUID_EXT_VMX; diff --git a/target/i386/emulate/meson.build b/target/i386/emulate/meson.build index 4edd4f4..b6dafb6 100644 --- a/target/i386/emulate/meson.build +++ b/target/i386/emulate/meson.build @@ -1,5 +1,8 @@ -i386_system_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files( +emulator_files = files( 'x86_decode.c', 'x86_emu.c', 'x86_flags.c', -)) +) + +i386_system_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: emulator_files) +i386_system_ss.add(when: 'CONFIG_MSHV', if_true: emulator_files) diff --git a/target/i386/emulate/x86_decode.c b/target/i386/emulate/x86_decode.c index 2eca398..97bd6f1 100644 --- a/target/i386/emulate/x86_decode.c +++ b/target/i386/emulate/x86_decode.c @@ -71,10 +71,16 @@ static inline uint64_t decode_bytes(CPUX86State *env, struct x86_decode *decode, VM_PANIC_EX("%s invalid size %d\n", __func__, size); break; } - target_ulong va = linear_rip(env_cpu(env), env->eip) + decode->len; - emul_ops->read_mem(env_cpu(env), &val, va, size); + + /* copy the bytes from the instruction stream, if available */ + if (decode->stream && decode->len + size <= decode->stream->len) { + memcpy(&val, decode->stream->bytes + decode->len, size); + } else { + target_ulong va = linear_rip(env_cpu(env), env->eip) + decode->len; + emul_ops->fetch_instruction(env_cpu(env), &val, va, size); + } decode->len += size; - + return val; } @@ -2076,9 +2082,10 @@ static void decode_opcodes(CPUX86State *env, struct x86_decode *decode) } } -uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode) +static uint32_t decode_opcode(CPUX86State *env, struct x86_decode *decode) { memset(decode, 0, sizeof(*decode)); + decode_prefix(env, decode); set_addressing_size(env, decode); set_operand_size(env, decode); @@ -2088,6 +2095,18 @@ uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode) return decode->len; } +uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode) +{ + return decode_opcode(env, decode); +} + +uint32_t decode_instruction_stream(CPUX86State *env, struct x86_decode *decode, + struct x86_insn_stream *stream) +{ + decode->stream = stream; + return decode_opcode(env, decode); +} + void init_decoder(void) { int i; diff --git a/target/i386/emulate/x86_decode.h b/target/i386/emulate/x86_decode.h index 927645a..1cadf36 100644 --- a/target/i386/emulate/x86_decode.h +++ b/target/i386/emulate/x86_decode.h @@ -272,6 +272,11 @@ typedef struct x86_decode_op { }; } x86_decode_op; +typedef struct x86_insn_stream { + const uint8_t *bytes; + size_t len; +} x86_insn_stream; + typedef struct x86_decode { int len; uint8_t opcode[4]; @@ -298,11 +303,15 @@ typedef struct x86_decode { struct x86_modrm modrm; struct x86_decode_op op[4]; bool is_fpu; + + x86_insn_stream *stream; } x86_decode; uint64_t sign(uint64_t val, int size); uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode); +uint32_t decode_instruction_stream(CPUX86State *env, struct x86_decode *decode, + struct x86_insn_stream *stream); void *get_reg_ref(CPUX86State *env, int reg, int rex_present, int is_extended, int size); diff --git a/target/i386/emulate/x86_emu.c b/target/i386/emulate/x86_emu.c index 4890e0a..4409f7b 100644 --- a/target/i386/emulate/x86_emu.c +++ b/target/i386/emulate/x86_emu.c @@ -31,8 +31,8 @@ // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA +// License along with this library; if not, see +// <https://www.gnu.org/licenses/>. ///////////////////////////////////////////////////////////////////////// #include "qemu/osdep.h" @@ -1246,7 +1246,8 @@ static void init_cmd_handler(void) bool exec_instruction(CPUX86State *env, struct x86_decode *ins) { if (!_cmd_handler[ins->cmd].handler) { - printf("Unimplemented handler (" TARGET_FMT_lx ") for %d (%x %x) \n", env->eip, + printf("Unimplemented handler (" TARGET_FMT_lx ") for %d (%x %x)\n", + env->eip, ins->cmd, ins->opcode[0], ins->opcode_len > 1 ? ins->opcode[1] : 0); env->eip += ins->len; diff --git a/target/i386/emulate/x86_emu.h b/target/i386/emulate/x86_emu.h index a1a9612..05686b1 100644 --- a/target/i386/emulate/x86_emu.h +++ b/target/i386/emulate/x86_emu.h @@ -24,6 +24,8 @@ #include "cpu.h" struct x86_emul_ops { + void (*fetch_instruction)(CPUState *cpu, void *data, target_ulong addr, + int bytes); void (*read_mem)(CPUState *cpu, void *data, target_ulong addr, int bytes); void (*write_mem)(CPUState *cpu, void *data, target_ulong addr, int bytes); void (*read_segment_descriptor)(CPUState *cpu, struct x86_segment_descriptor *desc, diff --git a/target/i386/emulate/x86_flags.c b/target/i386/emulate/x86_flags.c index 47bc197..6592193 100644 --- a/target/i386/emulate/x86_flags.c +++ b/target/i386/emulate/x86_flags.c @@ -14,8 +14,8 @@ // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA +// License along with this library; if not, see +// <https://www.gnu.org/licenses/>. ///////////////////////////////////////////////////////////////////////// /* * flags functions @@ -255,19 +255,19 @@ void lflags_to_rflags(CPUX86State *env) void rflags_to_lflags(CPUX86State *env) { - target_ulong cf_xor_of; + target_ulong cf_af, cf_xor_of; + /* Leave the low byte zero so that parity is always even... */ + env->cc_dst = !(env->eflags & CC_Z) << 8; + + /* ... and therefore cc_src always uses opposite polarity. */ env->cc_src = CC_P; env->cc_src ^= env->eflags & (CC_S | CC_P); /* rotate right by one to move CF and AF into the carry-out positions */ - env->cc_src |= ( - (env->eflags >> 1) | - (env->eflags << (TARGET_LONG_BITS - 1))) & (CC_C | CC_A); + cf_af = env->eflags & (CC_C | CC_A); + env->cc_src |= ((cf_af >> 1) | (cf_af << (TARGET_LONG_BITS - 1))); - cf_xor_of = (env->eflags & (CC_C | CC_O)) + (CC_O - CC_C); + cf_xor_of = ((env->eflags & (CC_C | CC_O)) + (CC_O - CC_C)) & CC_O; env->cc_src |= -cf_xor_of & LF_MASK_PO; - - /* Leave the low byte zero so that parity is not affected. */ - env->cc_dst = !(env->eflags & CC_Z) << 8; } diff --git a/target/i386/emulate/x86_flags.h b/target/i386/emulate/x86_flags.h index 28b008e..a395c83 100644 --- a/target/i386/emulate/x86_flags.h +++ b/target/i386/emulate/x86_flags.h @@ -14,8 +14,8 @@ // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA +// License along with this library; if not, see +// <https://www.gnu.org/licenses/>. ///////////////////////////////////////////////////////////////////////// /* * x86 eflags functions diff --git a/target/i386/helper.c b/target/i386/helper.c index e0aaed3..72b2e19 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -110,6 +110,7 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env) /* x86 mmu */ /* XXX: add PGE support */ +#ifndef CONFIG_USER_ONLY void x86_cpu_set_a20(X86CPU *cpu, int a20_state) { CPUX86State *env = &cpu->env; @@ -129,6 +130,7 @@ void x86_cpu_set_a20(X86CPU *cpu, int a20_state) env->a20_mask = ~(1 << 20) | (a20_state << 20); } } +#endif void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0) { @@ -619,6 +621,10 @@ void do_cpu_init(X86CPU *cpu) void do_cpu_sipi(X86CPU *cpu) { + CPUX86State *env = &cpu->env; + if (env->hflags & HF_SMM_MASK) { + return; + } apic_sipi(cpu->apic_state); } diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c index a2d3830..d5e2bb5 100644 --- a/target/i386/host-cpu.c +++ b/target/i386/host-cpu.c @@ -15,7 +15,7 @@ #include "system/system.h" /* Note: Only safe for use on x86(-64) hosts */ -static uint32_t host_cpu_phys_bits(void) +uint32_t host_cpu_phys_bits(void) { uint32_t eax; uint32_t host_phys_bits; @@ -80,7 +80,6 @@ bool host_cpu_realizefn(CPUState *cs, Error **errp) return true; } -#define CPUID_MODEL_ID_SZ 48 /** * cpu_x86_fill_model_id: * Get CPUID model ID string from host CPU. @@ -118,13 +117,13 @@ void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping) host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx); if (family) { - *family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); + *family = x86_cpu_family(eax); } if (model) { - *model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12); + *model = x86_cpu_model(eax); } if (stepping) { - *stepping = eax & 0x0F; + *stepping = x86_cpu_stepping(eax); } } @@ -132,27 +131,27 @@ void host_cpu_instance_init(X86CPU *cpu) { X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); - if (xcc->model) { - char vendor[CPUID_VENDOR_SZ + 1]; - - host_cpu_vendor_fms(vendor, NULL, NULL, NULL); - object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); - } -} - -void host_cpu_max_instance_init(X86CPU *cpu) -{ char vendor[CPUID_VENDOR_SZ + 1] = { 0 }; char model_id[CPUID_MODEL_ID_SZ + 1] = { 0 }; int family, model, stepping; - /* Use max host physical address bits if -cpu max option is applied */ - object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort); - + /* + * setting vendor applies to both max/host and builtin_x86_defs CPU. + * FIXME: this probably should warn or should be skipped if vendors do + * not match, because family numbers are incompatible between Intel and AMD. + */ host_cpu_vendor_fms(vendor, &family, &model, &stepping); + object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); + + if (!xcc->max_features) { + return; + } + host_cpu_fill_model_id(model_id); - object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); + /* Use max host physical address bits if -cpu max option is applied */ + object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort); + object_property_set_int(OBJECT(cpu), "family", family, &error_abort); object_property_set_int(OBJECT(cpu), "model", model, &error_abort); object_property_set_int(OBJECT(cpu), "stepping", stepping, @@ -161,6 +160,15 @@ void host_cpu_max_instance_init(X86CPU *cpu) &error_abort); } +bool is_host_cpu_intel(void) +{ + char vendor[CPUID_VENDOR_SZ + 1]; + + host_cpu_vendor_fms(vendor, NULL, NULL, NULL); + + return g_str_equal(vendor, CPUID_VENDOR_INTEL); +} + static void host_cpu_class_init(ObjectClass *oc, const void *data) { X86CPUClass *xcc = X86_CPU_CLASS(oc); diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h index 6a9bc91..ee65324 100644 --- a/target/i386/host-cpu.h +++ b/target/i386/host-cpu.h @@ -10,10 +10,11 @@ #ifndef HOST_CPU_H #define HOST_CPU_H +uint32_t host_cpu_phys_bits(void); void host_cpu_instance_init(X86CPU *cpu); -void host_cpu_max_instance_init(X86CPU *cpu); bool host_cpu_realizefn(CPUState *cs, Error **errp); void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping); +bool is_host_cpu_intel(void); #endif /* HOST_CPU_H */ diff --git a/target/i386/hvf/hvf-cpu.c b/target/i386/hvf/hvf-cpu.c index dfdda70..94ee096 100644 --- a/target/i386/hvf/hvf-cpu.c +++ b/target/i386/hvf/hvf-cpu.c @@ -21,8 +21,6 @@ static void hvf_cpu_max_instance_init(X86CPU *cpu) { CPUX86State *env = &cpu->env; - host_cpu_max_instance_init(cpu); - env->cpuid_min_level = hvf_get_supported_cpuid(0x0, 0, R_EAX); env->cpuid_min_xlevel = @@ -61,13 +59,14 @@ static void hvf_cpu_xsave_init(void) static void hvf_cpu_instance_init(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); host_cpu_instance_init(cpu); /* Special cases not set in the X86CPUDefinition structs: */ /* TODO: in-kernel irqchip for hvf */ - if (cpu->max_features) { + if (xcc->max_features) { hvf_cpu_max_instance_init(cpu); } diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 99e37a3..8445cad 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -733,9 +733,9 @@ int hvf_vcpu_exec(CPUState *cpu) } do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { hvf_put_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (hvf_inject_interrupts(cpu)) { @@ -773,9 +773,9 @@ int hvf_vcpu_exec(CPUState *cpu) switch (exit_reason) { case EXIT_REASON_HLT: { macvm_set_rip(cpu, rip + ins_len); - if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + if (!(cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) - && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) && + && !cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI) && !(idtvec_info & VMCS_IDT_VEC_VALID)) { cpu->halted = 1; ret = EXCP_HLT; diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c index fa131b1..0798a0c 100644 --- a/target/i386/hvf/x86_cpuid.c +++ b/target/i386/hvf/x86_cpuid.c @@ -73,7 +73,7 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | - CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS; + CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_HT; ecx &= CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_MOVBE | diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 2057314..a502437 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -395,9 +395,9 @@ bool hvf_inject_interrupts(CPUState *cs) }; } - if (cs->interrupt_request & CPU_INTERRUPT_NMI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_NMI)) { if (!(env->hflags2 & HF2_NMI_MASK) && !(info & VMCS_INTR_VALID)) { - cs->interrupt_request &= ~CPU_INTERRUPT_NMI; + cpu_reset_interrupt(cs, CPU_INTERRUPT_NMI); info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | EXCP02_NMI; wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, info); } else { @@ -406,20 +406,19 @@ bool hvf_inject_interrupts(CPUState *cs) } if (!(env->hflags & HF_INHIBIT_IRQ_MASK) && - (cs->interrupt_request & CPU_INTERRUPT_HARD) && + cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK) && !(info & VMCS_INTR_VALID)) { int line = cpu_get_pic_interrupt(env); - cs->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); if (line >= 0) { wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, line | VMCS_INTR_VALID | VMCS_INTR_T_HWINTR); } } - if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD)) { vmx_set_int_window_exiting(cs); } - return (cs->interrupt_request - & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)); + return cpu_test_interrupt(cs, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR); } int hvf_process_events(CPUState *cs) @@ -427,31 +426,31 @@ int hvf_process_events(CPUState *cs) X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - if (!cs->accel->dirty) { + if (!cs->vcpu_dirty) { /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */ env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS); } - if (cs->interrupt_request & CPU_INTERRUPT_INIT) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_INIT)) { cpu_synchronize_state(cs); do_cpu_init(cpu); } - if (cs->interrupt_request & CPU_INTERRUPT_POLL) { - cs->interrupt_request &= ~CPU_INTERRUPT_POLL; + if (cpu_test_interrupt(cs, CPU_INTERRUPT_POLL)) { + cpu_reset_interrupt(cs, CPU_INTERRUPT_POLL); apic_poll_irq(cpu->apic_state); } - if (((cs->interrupt_request & CPU_INTERRUPT_HARD) && + if ((cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) || - (cs->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu_test_interrupt(cs, CPU_INTERRUPT_NMI)) { cs->halted = 0; } - if (cs->interrupt_request & CPU_INTERRUPT_SIPI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_SIPI)) { cpu_synchronize_state(cs); do_cpu_sipi(cpu); } - if (cs->interrupt_request & CPU_INTERRUPT_TPR) { - cs->interrupt_request &= ~CPU_INTERRUPT_TPR; + if (cpu_test_interrupt(cs, CPU_INTERRUPT_TPR)) { + cpu_reset_interrupt(cs, CPU_INTERRUPT_TPR); cpu_synchronize_state(cs); apic_handle_tpr_access_report(cpu->apic_state, env->eip, env->tpr_access_type); diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c index 9865120..f7a81bd 100644 --- a/target/i386/kvm/hyperv.c +++ b/target/i386/kvm/hyperv.c @@ -81,7 +81,6 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) * necessary because memory hierarchy is being changed */ async_safe_run_on_cpu(CPU(cpu), async_synic_update, RUN_ON_CPU_NULL); - cpu_exit(CPU(cpu)); return EXCP_INTERRUPT; case KVM_EXIT_HYPERV_HCALL: { diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index 16bde4d..9c25b55 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -13,6 +13,7 @@ #include "qapi/error.h" #include "system/system.h" #include "hw/boards.h" +#include "hw/i386/x86.h" #include "kvm_i386.h" #include "accel/accel-cpu-target.h" @@ -41,6 +42,7 @@ static void kvm_set_guest_phys_bits(CPUState *cs) static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) { X86CPU *cpu = X86_CPU(cs); + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); CPUX86State *env = &cpu->env; bool ret; @@ -63,7 +65,7 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) * check/update ucode_rev, phys_bits, guest_phys_bits, mwait * cpu_common_realizefn() (via xcc->parent_realize) */ - if (cpu->max_features) { + if (xcc->max_features) { if (enable_cpu_pm) { if (kvm_has_waitpkg()) { env->features[FEAT_7_0_ECX] |= CPUID_7_0_ECX_WAITPKG; @@ -72,7 +74,7 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) { host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, &cpu->mwait.ecx, &cpu->mwait.edx); - } + } } if (cpu->ucode_rev == 0) { cpu->ucode_rev = @@ -90,6 +92,15 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) kvm_set_guest_phys_bits(cs); } + /* + * When SMM is enabled, there is 2 address spaces. Otherwise only 1. + * + * Only initialize address space 0 here, the second one for SMM is + * initialized at register_smram_listener() after machine init done. + */ + cs->num_ases = x86_machine_is_smm_enabled(X86_MACHINE(current_machine)) ? 2 : 1; + cpu_address_space_init(cs, X86ASIdx_MEM, "cpu-memory", cs->memory); + return true; } @@ -108,7 +119,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu) CPUX86State *env = &cpu->env; KVMState *s = kvm_state; - host_cpu_max_instance_init(cpu); + object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort); if (lmce_supported()) { object_property_set_bool(OBJECT(cpu), "lmce", true, &error_abort); @@ -216,7 +227,7 @@ static void kvm_cpu_instance_init(CPUState *cs) x86_cpu_apply_props(cpu, kvm_default_props); } - if (cpu->max_features) { + if (xcc->max_features) { kvm_cpu_max_instance_init(cpu); } diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index c9a3c02..309f043 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -38,6 +38,7 @@ #include "kvm_i386.h" #include "../confidential-guest.h" #include "sev.h" +#include "tdx.h" #include "xen-emu.h" #include "hyperv.h" #include "hyperv-proto.h" @@ -192,6 +193,7 @@ static const char *vm_type_name[] = { [KVM_X86_SEV_VM] = "SEV", [KVM_X86_SEV_ES_VM] = "SEV-ES", [KVM_X86_SNP_VM] = "SEV-SNP", + [KVM_X86_TDX_VM] = "TDX", }; bool kvm_is_vm_type_supported(int type) @@ -326,7 +328,7 @@ void kvm_synchronize_all_tsc(void) { CPUState *cpu; - if (kvm_enabled()) { + if (kvm_enabled() && !is_tdx_vm()) { CPU_FOREACH(cpu) { run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL); } @@ -392,7 +394,7 @@ static bool host_tsx_broken(void) /* Returns the value for a specific register on the cpuid entry */ -static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) { uint32_t ret = 0; switch (reg) { @@ -414,9 +416,9 @@ static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) /* Find matching entry for function/index on kvm_cpuid2 struct */ -static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, - uint32_t function, - uint32_t index) +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index) { int i; for (i = 0; i < cpuid->nent; ++i) { @@ -572,7 +574,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, } if (current_machine->cgs) { - ret = x86_confidential_guest_mask_cpuid_features( + ret = x86_confidential_guest_adjust_cpuid_features( X86_CONFIDENTIAL_GUEST(current_machine->cgs), function, index, reg, ret); } @@ -651,6 +653,23 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) must_be_one = (uint32_t)value; can_be_one = (uint32_t)(value >> 32); return can_be_one & ~must_be_one; + case MSR_IA32_ARCH_CAPABILITIES: + /* + * Special handling for fb-clear bit in ARCH_CAPABILITIES MSR. + * KVM will only report the bit if it is enabled in the host, + * but, for live migration capability purposes, we want to + * expose the bit to the guest even if it is disabled in the + * host, as long as the host itself is not vulnerable to + * the issue that the fb-clear bit is meant to mitigate. + */ + if ((value & MSR_ARCH_CAP_MDS_NO) && + (value & MSR_ARCH_CAP_TAA_NO) && + (value & MSR_ARCH_CAP_SBDR_SSDP_NO) && + (value & MSR_ARCH_CAP_FBSDP_NO) && + (value & MSR_ARCH_CAP_PSDP_NO)) { + value |= MSR_ARCH_CAP_FB_CLEAR; + } + return value; default: return value; @@ -868,6 +887,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs) int r, cur_freq; bool set_ioctl = false; + /* + * TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope + * VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ + * before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu() + */ + if (is_tdx_vm()) { + return 0; + } + if (!env->tsc_khz) { return 0; } @@ -1779,8 +1807,6 @@ static int hyperv_init_vcpu(X86CPU *cpu) static Error *invtsc_mig_blocker; -#define KVM_MAX_CPUID_ENTRIES 100 - static void kvm_init_xsave(CPUX86State *env) { if (has_xsave2) { @@ -1823,9 +1849,8 @@ static void kvm_init_nested_state(CPUX86State *env) } } -static uint32_t kvm_x86_build_cpuid(CPUX86State *env, - struct kvm_cpuid_entry2 *entries, - uint32_t cpuid_i) +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i) { uint32_t limit, i, j; uint32_t unused; @@ -1864,7 +1889,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, break; } case 0x1f: - if (!x86_has_extended_topo(env->avail_cpu_topo)) { + if (!x86_has_cpuid_0x1f(env_archcpu(env))) { cpuid_i--; break; } @@ -2052,6 +2077,15 @@ full: abort(); } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + if (is_tdx_vm()) { + return tdx_pre_create_vcpu(cpu, errp); + } + + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { struct { @@ -2076,6 +2110,14 @@ int kvm_arch_init_vcpu(CPUState *cs) int r; Error *local_err = NULL; + if (current_machine->cgs) { + r = x86_confidential_guest_check_features( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), cs); + if (r < 0) { + return r; + } + } + memset(&cpuid_data, 0, sizeof(cpuid_data)); cpuid_i = 0; @@ -2234,7 +2276,7 @@ int kvm_arch_init_vcpu(CPUState *cs) cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i); cpuid_data.cpuid.nent = cpuid_i; - if (((env->cpuid_version >> 8)&0xF) >= 6 + if (x86_cpu_family(env->cpuid_version) >= 6 && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == (CPUID_MCE | CPUID_MCA)) { uint64_t mcg_cap, unsupported_caps; @@ -2675,6 +2717,7 @@ static MemoryRegion smram_as_mem; static void register_smram_listener(Notifier *n, void *unused) { + CPUState *cpu; MemoryRegion *smram = (MemoryRegion *) object_resolve_path("/machine/smram", NULL); @@ -2698,7 +2741,11 @@ static void register_smram_listener(Notifier *n, void *unused) address_space_init(&smram_address_space, &smram_as_root, "KVM-SMRAM"); kvm_memory_listener_register(kvm_state, &smram_listener, - &smram_address_space, 1, "kvm-smram"); + &smram_address_space, X86ASIdx_SMM, "kvm-smram"); + + CPU_FOREACH(cpu) { + cpu_address_space_init(cpu, X86ASIdx_SMM, "cpu-smm", &smram_as_root); + } } static void *kvm_msr_energy_thread(void *data) @@ -3206,16 +3253,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) Error *local_err = NULL; /* - * Initialize SEV context, if required - * - * If no memory encryption is requested (ms->cgs == NULL) this is - * a no-op. - * - * It's also a no-op if a non-SEV confidential guest support - * mechanism is selected. SEV is the only mechanism available to - * select on x86 at present, so this doesn't arise, but if new - * mechanisms are supported in future (e.g. TDX), they'll need - * their own initialization either here or elsewhere. + * Initialize confidential guest (SEV/TDX) context, if required */ if (ms->cgs) { ret = confidential_guest_kvm_init(ms->cgs, &local_err); @@ -3290,8 +3328,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) return ret; } - if (kvm_check_extension(s, KVM_CAP_X86_SMM) && - object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE) && + if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE) && x86_machine_is_smm_enabled(X86_MACHINE(ms))) { smram_machine_done.notify = register_smram_listener; qemu_add_machine_init_done_notifier(&smram_machine_done); @@ -3856,36 +3893,38 @@ static void kvm_init_msrs(X86CPU *cpu) CPUX86State *env = &cpu->env; kvm_msr_buf_reset(cpu); - if (has_msr_arch_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, - env->features[FEAT_ARCH_CAPABILITIES]); - } - if (has_msr_core_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, - env->features[FEAT_CORE_CAPABILITY]); - } + if (!is_tdx_vm()) { + if (has_msr_arch_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, + env->features[FEAT_ARCH_CAPABILITIES]); + } + + if (has_msr_core_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, + env->features[FEAT_CORE_CAPABILITY]); + } + + if (has_msr_perf_capabs && cpu->enable_pmu) { + kvm_msr_entry_add_perf(cpu, env->features); + } - if (has_msr_perf_capabs && cpu->enable_pmu) { - kvm_msr_entry_add_perf(cpu, env->features); + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. + */ + if (kvm_feature_msrs && cpu_has_vmx(env)) { + kvm_msr_entry_add_vmx(cpu, env->features); + } } if (has_msr_ucode_rev) { kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); } - - /* - * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but - * all kernels with MSR features should have them. - */ - if (kvm_feature_msrs && cpu_has_vmx(env)) { - kvm_msr_entry_add_vmx(cpu, env->features); - } - assert(kvm_buf_set_msrs(cpu) == 0); } -static int kvm_put_msrs(X86CPU *cpu, int level) +static int kvm_put_msrs(X86CPU *cpu, KvmPutState level) { CPUX86State *env = &cpu->env; int i; @@ -5005,7 +5044,7 @@ static int kvm_get_apic(X86CPU *cpu) return 0; } -static int kvm_put_vcpu_events(X86CPU *cpu, int level) +static int kvm_put_vcpu_events(X86CPU *cpu, KvmPutState level) { CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; @@ -5044,7 +5083,7 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) */ events.smi.pending = cs->interrupt_request & CPU_INTERRUPT_SMI; events.smi.latched_init = cs->interrupt_request & CPU_INTERRUPT_INIT; - cs->interrupt_request &= ~(CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI); + cpu_reset_interrupt(cs, CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI); } else { /* Keep these in cs->interrupt_request. */ events.smi.pending = 0; @@ -5248,7 +5287,7 @@ static int kvm_get_nested_state(X86CPU *cpu) return ret; } -int kvm_arch_put_registers(CPUState *cpu, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cpu, KvmPutState level, Error **errp) { X86CPU *x86_cpu = X86_CPU(cpu); int ret; @@ -5431,10 +5470,10 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) int ret; /* Inject NMI */ - if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { - if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { bql_lock(); - cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + cpu_reset_interrupt(cpu, CPU_INTERRUPT_NMI); bql_unlock(); DPRINTF("injected NMI\n"); ret = kvm_vcpu_ioctl(cpu, KVM_NMI); @@ -5443,9 +5482,9 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) strerror(-ret)); } } - if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_SMI)) { bql_lock(); - cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + cpu_reset_interrupt(cpu, CPU_INTERRUPT_SMI); bql_unlock(); DPRINTF("injected SMI\n"); ret = kvm_vcpu_ioctl(cpu, KVM_SMI); @@ -5456,32 +5495,31 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } } - if (!kvm_pic_in_kernel()) { - bql_lock(); - } /* Force the VCPU out of its inner loop to process any INIT requests * or (for userspace APIC, but it is cheap to combine the checks here) * pending TPR access reports. */ - if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { - if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT) && !(env->hflags & HF_SMM_MASK)) { - cpu->exit_request = 1; + qatomic_set(&cpu->exit_request, true); } - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->exit_request = 1; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) { + qatomic_set(&cpu->exit_request, true); } } if (!kvm_pic_in_kernel()) { /* Try to inject an interrupt if the guest can accept it */ if (run->ready_for_interrupt_injection && - (cpu->interrupt_request & CPU_INTERRUPT_HARD) && + cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) { int irq; - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + bql_lock(); + + cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD); irq = cpu_get_pic_interrupt(env); if (irq >= 0) { struct kvm_interrupt intr; @@ -5495,13 +5533,14 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) strerror(-ret)); } } + bql_unlock(); } /* If we have an interrupt but the guest is not ready to receive an * interrupt, request an interrupt window exit. This will * cause a return to userspace as soon as the guest is ready to * receive interrupts. */ - if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { run->request_interrupt_window = 1; } else { run->request_interrupt_window = 0; @@ -5509,8 +5548,6 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) DPRINTF("setting tpr\n"); run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state); - - bql_unlock(); } } @@ -5573,18 +5610,18 @@ int kvm_arch_process_async_events(CPUState *cs) X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - if (cs->interrupt_request & CPU_INTERRUPT_MCE) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_MCE)) { /* We must not raise CPU_INTERRUPT_MCE if it's not supported. */ assert(env->mcg_cap); - cs->interrupt_request &= ~CPU_INTERRUPT_MCE; + cpu_reset_interrupt(cs, CPU_INTERRUPT_MCE); kvm_cpu_synchronize_state(cs); if (env->exception_nr == EXCP08_DBLE) { /* this means triple fault */ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); - cs->exit_request = 1; + qatomic_set(&cs->exit_request, true); return 0; } kvm_queue_exception(env, EXCP12_MCHK, 0, 0); @@ -5596,7 +5633,7 @@ int kvm_arch_process_async_events(CPUState *cs) } } - if ((cs->interrupt_request & CPU_INTERRUPT_INIT) && + if (cpu_test_interrupt(cs, CPU_INTERRUPT_INIT) && !(env->hflags & HF_SMM_MASK)) { kvm_cpu_synchronize_state(cs); do_cpu_init(cpu); @@ -5606,21 +5643,21 @@ int kvm_arch_process_async_events(CPUState *cs) return 0; } - if (cs->interrupt_request & CPU_INTERRUPT_POLL) { - cs->interrupt_request &= ~CPU_INTERRUPT_POLL; + if (cpu_test_interrupt(cs, CPU_INTERRUPT_POLL)) { + cpu_reset_interrupt(cs, CPU_INTERRUPT_POLL); apic_poll_irq(cpu->apic_state); } - if (((cs->interrupt_request & CPU_INTERRUPT_HARD) && + if ((cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) || - (cs->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu_test_interrupt(cs, CPU_INTERRUPT_NMI)) { cs->halted = 0; } - if (cs->interrupt_request & CPU_INTERRUPT_SIPI) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_SIPI)) { kvm_cpu_synchronize_state(cs); do_cpu_sipi(cpu); } - if (cs->interrupt_request & CPU_INTERRUPT_TPR) { - cs->interrupt_request &= ~CPU_INTERRUPT_TPR; + if (cpu_test_interrupt(cs, CPU_INTERRUPT_TPR)) { + cpu_reset_interrupt(cs, CPU_INTERRUPT_TPR); kvm_cpu_synchronize_state(cs); apic_handle_tpr_access_report(cpu->apic_state, env->eip, env->tpr_access_type); @@ -5634,9 +5671,9 @@ static int kvm_handle_halt(X86CPU *cpu) CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; - if (!((cs->interrupt_request & CPU_INTERRUPT_HARD) && + if (!(cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) && - !(cs->interrupt_request & CPU_INTERRUPT_NMI)) { + !cpu_test_interrupt(cs, CPU_INTERRUPT_NMI)) { cs->halted = 1; return EXCP_HLT; } @@ -6000,9 +6037,11 @@ static bool host_supports_vmx(void) * because private/shared page tracking is already provided through other * means, these 2 use-cases should be treated as being mutually-exclusive. */ -static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) +static int kvm_handle_hc_map_gpa_range(X86CPU *cpu, struct kvm_run *run) { + struct kvm_pre_fault_memory mem; uint64_t gpa, size, attributes; + int ret; if (!machine_require_guest_memfd(current_machine)) return -EINVAL; @@ -6013,13 +6052,32 @@ static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); - return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); + ret = kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); + if (ret || !kvm_pre_fault_memory_supported) { + return ret; + } + + /* + * Opportunistically pre-fault memory in. Failures are ignored so that any + * errors in faulting in the memory will get captured in KVM page fault + * path when the guest first accesses the page. + */ + memset(&mem, 0, sizeof(mem)); + mem.gpa = gpa; + mem.size = size; + while (mem.size) { + if (kvm_vcpu_ioctl(CPU(cpu), KVM_PRE_FAULT_MEMORY, &mem)) { + break; + } + } + + return 0; } -static int kvm_handle_hypercall(struct kvm_run *run) +static int kvm_handle_hypercall(X86CPU *cpu, struct kvm_run *run) { if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) - return kvm_handle_hc_map_gpa_range(run); + return kvm_handle_hc_map_gpa_range(cpu, run); return -EINVAL; } @@ -6119,7 +6177,35 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; #endif case KVM_EXIT_HYPERCALL: - ret = kvm_handle_hypercall(run); + ret = kvm_handle_hypercall(cpu, run); + break; + case KVM_EXIT_SYSTEM_EVENT: + switch (run->system_event.type) { + case KVM_SYSTEM_EVENT_TDX_FATAL: + ret = tdx_handle_report_fatal_error(cpu, run); + break; + default: + ret = -1; + break; + } + break; + case KVM_EXIT_TDX: + /* + * run->tdx is already set up for the case where userspace + * does not handle the TDVMCALL. + */ + switch (run->tdx.nr) { + case TDVMCALL_GET_QUOTE: + tdx_handle_get_quote(cpu, run); + break; + case TDVMCALL_GET_TD_VM_CALL_INFO: + tdx_handle_get_tdvmcall_info(cpu, run); + break; + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + tdx_handle_setup_event_notify_interrupt(cpu, run); + break; + } + ret = 0; break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 88565e8..5f83e88 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -13,6 +13,8 @@ #include "system/kvm.h" +#define KVM_MAX_CPUID_ENTRIES 100 + /* always false if !CONFIG_KVM */ #define kvm_pit_in_kernel() \ (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) @@ -42,6 +44,13 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #ifdef CONFIG_KVM +#include <linux/kvm.h> + +typedef struct KvmCpuidInfo { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +} KvmCpuidInfo; + bool kvm_is_vm_type_supported(int type); bool kvm_has_adjust_clock_stable(void); bool kvm_has_exception_payload(void); @@ -57,6 +66,12 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); void kvm_update_msi_routes_all(void *private, bool global, uint32_t index, uint32_t mask); +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index); +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg); +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i); #endif /* CONFIG_KVM */ void kvm_pc_setup_irq_routing(bool pci_enabled); diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 3996caf..2675bf8 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -8,6 +8,8 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) +i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c', 'tdx-quote-generator.c'), if_false: files('tdx-stub.c')) + i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/target/i386/kvm/tdx-quote-generator.c b/target/i386/kvm/tdx-quote-generator.c new file mode 100644 index 0000000..dee8334 --- /dev/null +++ b/target/i386/kvm/tdx-quote-generator.c @@ -0,0 +1,302 @@ +/* + * QEMU TDX Quote Generation Support + * + * Copyright (c) 2025 Intel Corporation + * + * Author: + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" + +#include "tdx-quote-generator.h" + +#define QGS_MSG_LIB_MAJOR_VER 1 +#define QGS_MSG_LIB_MINOR_VER 1 + +typedef enum _qgs_msg_type_t { + GET_QUOTE_REQ = 0, + GET_QUOTE_RESP = 1, + GET_COLLATERAL_REQ = 2, + GET_COLLATERAL_RESP = 3, + GET_PLATFORM_INFO_REQ = 4, + GET_PLATFORM_INFO_RESP = 5, + QGS_MSG_TYPE_MAX +} qgs_msg_type_t; + +typedef struct _qgs_msg_header_t { + uint16_t major_version; + uint16_t minor_version; + uint32_t type; + uint32_t size; // size of the whole message, include this header, in byte + uint32_t error_code; // used in response only +} qgs_msg_header_t; + +typedef struct _qgs_msg_get_quote_req_t { + qgs_msg_header_t header; // header.type = GET_QUOTE_REQ + uint32_t report_size; // cannot be 0 + uint32_t id_list_size; // length of id_list, in byte, can be 0 +} qgs_msg_get_quote_req_t; + +typedef struct _qgs_msg_get_quote_resp_s { + qgs_msg_header_t header; // header.type = GET_QUOTE_RESP + uint32_t selected_id_size; // can be 0 in case only one id is sent in request + uint32_t quote_size; // length of quote_data, in byte + uint8_t id_quote[]; // selected id followed by quote +} qgs_msg_get_quote_resp_t; + +#define HEADER_SIZE 4 + +static uint32_t decode_header(const char *buf, size_t len) { + if (len < HEADER_SIZE) { + return 0; + } + uint32_t msg_size = 0; + for (uint32_t i = 0; i < HEADER_SIZE; ++i) { + msg_size = msg_size * 256 + (buf[i] & 0xFF); + } + return msg_size; +} + +static void encode_header(char *buf, size_t len, uint32_t size) { + assert(len >= HEADER_SIZE); + buf[0] = ((size >> 24) & 0xFF); + buf[1] = ((size >> 16) & 0xFF); + buf[2] = ((size >> 8) & 0xFF); + buf[3] = (size & 0xFF); +} + +static void tdx_generate_quote_cleanup(TdxGenerateQuoteTask *task) +{ + timer_del(&task->timer); + + if (task->watch) { + g_source_remove(task->watch); + } + qio_channel_close(QIO_CHANNEL(task->sioc), NULL); + object_unref(OBJECT(task->sioc)); + + task->completion(task); +} + +static gboolean tdx_get_quote_read(QIOChannel *ioc, GIOCondition condition, + gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_channel_read(ioc, task->receive_buf + task->receive_buf_received, + task->payload_len - task->receive_buf_received, &err); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + return G_SOURCE_CONTINUE; + } else { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + + if (ret == 0) { + error_report("End of file before reply received"); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + task->receive_buf_received += ret; + if (task->receive_buf_received >= HEADER_SIZE) { + uint32_t len = decode_header(task->receive_buf, + task->receive_buf_received); + if (len == 0 || + len > (task->payload_len - HEADER_SIZE)) { + error_report("Message len %u must be non-zero & less than %zu", + len, (task->payload_len - HEADER_SIZE)); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + /* Now we know the size, shrink to fit */ + task->payload_len = HEADER_SIZE + len; + task->receive_buf = g_renew(char, + task->receive_buf, + task->payload_len); + } + + if (task->receive_buf_received >= (sizeof(qgs_msg_header_t) + HEADER_SIZE)) { + qgs_msg_header_t *hdr = (qgs_msg_header_t *)(task->receive_buf + HEADER_SIZE); + if (hdr->major_version != QGS_MSG_LIB_MAJOR_VER || + hdr->minor_version != QGS_MSG_LIB_MINOR_VER) { + error_report("Invalid QGS message header version %d.%d", + hdr->major_version, + hdr->minor_version); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->type != GET_QUOTE_RESP) { + error_report("Invalid QGS message type %d", + hdr->type); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->size > (task->payload_len - HEADER_SIZE)) { + error_report("QGS message size %d exceeds payload capacity %zu", + hdr->size, task->payload_len); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->error_code != 0) { + error_report("QGS message error code %d", + hdr->error_code); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + if (task->receive_buf_received >= (sizeof(qgs_msg_get_quote_resp_t) + HEADER_SIZE)) { + qgs_msg_get_quote_resp_t *msg = (qgs_msg_get_quote_resp_t *)(task->receive_buf + HEADER_SIZE); + if (msg->selected_id_size != 0) { + error_report("QGS message selected ID was %d not 0", + msg->selected_id_size); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + if ((task->payload_len - HEADER_SIZE - sizeof(qgs_msg_get_quote_resp_t)) != + msg->quote_size) { + error_report("QGS quote size %d should be %zu", + msg->quote_size, + (task->payload_len - sizeof(qgs_msg_get_quote_resp_t))); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + + if (task->receive_buf_received == task->payload_len) { + size_t strip = HEADER_SIZE + sizeof(qgs_msg_get_quote_resp_t); + memmove(task->receive_buf, + task->receive_buf + strip, + task->receive_buf_received - strip); + task->receive_buf_received -= strip; + task->status_code = TDX_VP_GET_QUOTE_SUCCESS; + goto end; + } + + return G_SOURCE_CONTINUE; + +end: + tdx_generate_quote_cleanup(task); + return G_SOURCE_REMOVE; +} + +static gboolean tdx_send_report(QIOChannel *ioc, GIOCondition condition, + gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_channel_write(ioc, task->send_data + task->send_data_sent, + task->send_data_size - task->send_data_sent, &err); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + ret = 0; + } else { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + tdx_generate_quote_cleanup(task); + goto end; + } + } + task->send_data_sent += ret; + + if (task->send_data_sent == task->send_data_size) { + task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_IN, + tdx_get_quote_read, task, NULL); + goto end; + } + + return G_SOURCE_CONTINUE; + +end: + return G_SOURCE_REMOVE; +} + +static void tdx_quote_generator_connected(QIOTask *qio_task, gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_task_propagate_error(qio_task, &err); + if (ret) { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_QGS_UNAVAILABLE; + tdx_generate_quote_cleanup(task); + return; + } + + task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_OUT, + tdx_send_report, task, NULL); +} + +#define TRANSACTION_TIMEOUT 30000 + +static void getquote_expired(void *opaque) +{ + TdxGenerateQuoteTask *task = opaque; + + task->status_code = TDX_VP_GET_QUOTE_ERROR; + tdx_generate_quote_cleanup(task); +} + +static void setup_get_quote_timer(TdxGenerateQuoteTask *task) +{ + int64_t time; + + timer_init_ms(&task->timer, QEMU_CLOCK_VIRTUAL, getquote_expired, task); + time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + timer_mod(&task->timer, time + TRANSACTION_TIMEOUT); +} + +void tdx_generate_quote(TdxGenerateQuoteTask *task, + SocketAddress *qg_sock_addr) +{ + QIOChannelSocket *sioc; + qgs_msg_get_quote_req_t msg; + + /* Prepare a QGS message prelude */ + msg.header.major_version = QGS_MSG_LIB_MAJOR_VER; + msg.header.minor_version = QGS_MSG_LIB_MINOR_VER; + msg.header.type = GET_QUOTE_REQ; + msg.header.size = sizeof(msg) + task->send_data_size; + msg.header.error_code = 0; + msg.report_size = task->send_data_size; + msg.id_list_size = 0; + + /* Make room to add the QGS message prelude */ + task->send_data = g_renew(char, + task->send_data, + task->send_data_size + sizeof(msg) + HEADER_SIZE); + memmove(task->send_data + sizeof(msg) + HEADER_SIZE, + task->send_data, + task->send_data_size); + memcpy(task->send_data + HEADER_SIZE, + &msg, + sizeof(msg)); + encode_header(task->send_data, HEADER_SIZE, task->send_data_size + sizeof(msg)); + task->send_data_size += sizeof(msg) + HEADER_SIZE; + + sioc = qio_channel_socket_new(); + task->sioc = sioc; + + setup_get_quote_timer(task); + + qio_channel_socket_connect_async(sioc, qg_sock_addr, + tdx_quote_generator_connected, task, + NULL, NULL); +} diff --git a/target/i386/kvm/tdx-quote-generator.h b/target/i386/kvm/tdx-quote-generator.h new file mode 100644 index 0000000..3bd9b8e --- /dev/null +++ b/target/i386/kvm/tdx-quote-generator.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef QEMU_I386_TDX_QUOTE_GENERATOR_H +#define QEMU_I386_TDX_QUOTE_GENERATOR_H + +#include "qom/object_interfaces.h" +#include "io/channel-socket.h" +#include "exec/hwaddr.h" + +#define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL + +#define TDX_VP_GET_QUOTE_SUCCESS 0ULL +#define TDX_VP_GET_QUOTE_IN_FLIGHT (-1ULL) +#define TDX_VP_GET_QUOTE_ERROR 0x8000000000000000ULL +#define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE 0x8000000000000001ULL + +/* Limit to avoid resource starvation. */ +#define TDX_GET_QUOTE_MAX_BUF_LEN (128 * 1024) +#define TDX_MAX_GET_QUOTE_REQUEST 16 + +#define TDX_GET_QUOTE_HDR_SIZE 24 + +/* Format of pages shared with guest. */ +struct tdx_get_quote_header { + /* Format version: must be 1 in little endian. */ + uint64_t structure_version; + + /* + * GetQuote status code in little endian: + * Guest must set error_code to 0 to avoid information leak. + * Qemu sets this before interrupting guest. + */ + uint64_t error_code; + + /* + * in-message size in little endian: The message will follow this header. + * The in-message will be send to QGS. + */ + uint32_t in_len; + + /* + * out-message size in little endian: + * On request, out_len must be zero to avoid information leak. + * On return, message size from QGS. Qemu overwrites this field. + * The message will follows this header. The in-message is overwritten. + */ + uint32_t out_len; + + /* + * Message buffer follows. + * Guest sets message that will be send to QGS. If out_len > in_len, guest + * should zero remaining buffer to avoid information leak. + * Qemu overwrites this buffer with a message returned from QGS. + */ +}; + +typedef struct TdxGenerateQuoteTask { + hwaddr buf_gpa; + hwaddr payload_gpa; + uint64_t payload_len; + + char *send_data; + uint64_t send_data_size; + uint64_t send_data_sent; + + char *receive_buf; + uint64_t receive_buf_received; + + uint64_t status_code; + struct tdx_get_quote_header hdr; + + QIOChannelSocket *sioc; + guint watch; + QEMUTimer timer; + + void (*completion)(struct TdxGenerateQuoteTask *task); + void *opaque; +} TdxGenerateQuoteTask; + +void tdx_generate_quote(TdxGenerateQuoteTask *task, SocketAddress *qg_sock_addr); + +#endif /* QEMU_I386_TDX_QUOTE_GENERATOR_H */ diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c new file mode 100644 index 0000000..1f0e108 --- /dev/null +++ b/target/i386/kvm/tdx-stub.c @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" + +#include "tdx.h" + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return -EINVAL; +} + +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return -EINVAL; +} + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + return -EINVAL; +} + +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) +{ +} + +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) +{ +} + +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run) +{ +} diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c new file mode 100644 index 0000000..dbf0fa2 --- /dev/null +++ b/target/i386/kvm/tdx.c @@ -0,0 +1,1548 @@ +/* + * QEMU TDX support + * + * Copyright (c) 2025 Intel Corporation + * + * Author: + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/base64.h" +#include "qemu/mmap-alloc.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" +#include "qom/object_interfaces.h" +#include "crypto/hash.h" +#include "system/kvm_int.h" +#include "system/runstate.h" +#include "system/system.h" +#include "system/ramblock.h" +#include "system/address-spaces.h" + +#include <linux/kvm_para.h> + +#include "cpu.h" +#include "cpu-internal.h" +#include "host-cpu.h" +#include "hw/i386/apic_internal.h" +#include "hw/i386/apic-msidef.h" +#include "hw/i386/e820_memory_layout.h" +#include "hw/i386/tdvf.h" +#include "hw/i386/x86.h" +#include "hw/i386/tdvf-hob.h" +#include "hw/pci/msi.h" +#include "kvm_i386.h" +#include "tdx.h" +#include "tdx-quote-generator.h" + +#include "standard-headers/asm-x86/kvm_para.h" + +#define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000) +#define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000) + +#define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0) +#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) +#define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) +#define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) + +#define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\ + TDX_TD_ATTRIBUTES_PKS | \ + TDX_TD_ATTRIBUTES_PERFMON) + +#define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \ + (1U << KVM_FEATURE_PV_UNHALT) | \ + (1U << KVM_FEATURE_PV_TLB_FLUSH) | \ + (1U << KVM_FEATURE_PV_SEND_IPI) | \ + (1U << KVM_FEATURE_POLL_CONTROL) | \ + (1U << KVM_FEATURE_PV_SCHED_YIELD) | \ + (1U << KVM_FEATURE_MSI_EXT_DEST_ID)) + +static TdxGuest *tdx_guest; + +static struct kvm_tdx_capabilities *tdx_caps; +static struct kvm_cpuid2 *tdx_supported_cpuid; + +/* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ +bool is_tdx_vm(void) +{ + return !!tdx_guest; +} + +enum tdx_ioctl_level { + TDX_VM_IOCTL, + TDX_VCPU_IOCTL, +}; + +static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state, + int cmd_id, __u32 flags, void *data, + Error **errp) +{ + struct kvm_tdx_cmd tdx_cmd = {}; + int r; + + const char *tdx_ioctl_name[] = { + [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES", + [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM", + [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU", + [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION", + [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM", + [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID", + }; + + tdx_cmd.id = cmd_id; + tdx_cmd.flags = flags; + tdx_cmd.data = (__u64)(unsigned long)data; + + switch (level) { + case TDX_VM_IOCTL: + r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + case TDX_VCPU_IOCTL: + r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + default: + error_setg(errp, "Invalid tdx_ioctl_level %d", level); + return -EINVAL; + } + + if (r < 0) { + error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx", + tdx_ioctl_name[cmd_id], tdx_cmd.hw_error); + } + return r; +} + +static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data, + Error **errp) +{ + return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp); +} + +static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags, + void *data, Error **errp) +{ + return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp); +} + +static int get_tdx_capabilities(Error **errp) +{ + struct kvm_tdx_capabilities *caps; + /* 1st generation of TDX reports 6 cpuid configs */ + int nr_cpuid_configs = 6; + size_t size; + int r; + + do { + Error *local_err = NULL; + size = sizeof(struct kvm_tdx_capabilities) + + nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2); + caps = g_malloc0(size); + caps->cpuid.nent = nr_cpuid_configs; + + r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err); + if (r == -E2BIG) { + g_free(caps); + nr_cpuid_configs *= 2; + if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) { + error_report("KVM TDX seems broken that number of CPUID entries" + " in kvm_tdx_capabilities exceeds limit: %d", + KVM_MAX_CPUID_ENTRIES); + error_propagate(errp, local_err); + return r; + } + error_free(local_err); + } else if (r < 0) { + g_free(caps); + error_propagate(errp, local_err); + return r; + } + } while (r == -E2BIG); + + tdx_caps = caps; + + return 0; +} + +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) +{ + assert(!tdx_guest->tdvf_mr); + tdx_guest->tdvf_mr = tdvf_mr; +} + +static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx) +{ + TdxFirmwareEntry *entry; + + for_each_tdx_fw_entry(&tdx->tdvf, entry) { + if (entry->type == TDVF_SECTION_TYPE_TD_HOB) { + return entry; + } + } + error_report("TDVF metadata doesn't specify TD_HOB location."); + exit(1); +} + +static void tdx_add_ram_entry(uint64_t address, uint64_t length, + enum TdxRamType type) +{ + uint32_t nr_entries = tdx_guest->nr_ram_entries; + tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries, + nr_entries + 1); + + tdx_guest->ram_entries[nr_entries].address = address; + tdx_guest->ram_entries[nr_entries].length = length; + tdx_guest->ram_entries[nr_entries].type = type; + tdx_guest->nr_ram_entries++; +} + +static int tdx_accept_ram_range(uint64_t address, uint64_t length) +{ + uint64_t head_start, tail_start, head_length, tail_length; + uint64_t tmp_address, tmp_length; + TdxRamEntry *e; + int i = 0; + + do { + if (i == tdx_guest->nr_ram_entries) { + return -1; + } + + e = &tdx_guest->ram_entries[i++]; + } while (address + length <= e->address || address >= e->address + e->length); + + /* + * The to-be-accepted ram range must be fully contained by one + * RAM entry. + */ + if (e->address > address || + e->address + e->length < address + length) { + return -1; + } + + if (e->type == TDX_RAM_ADDED) { + return 0; + } + + tmp_address = e->address; + tmp_length = e->length; + + e->address = address; + e->length = length; + e->type = TDX_RAM_ADDED; + + head_length = address - tmp_address; + if (head_length > 0) { + head_start = tmp_address; + tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED); + } + + tail_start = address + length; + if (tail_start < tmp_address + tmp_length) { + tail_length = tmp_address + tmp_length - tail_start; + tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED); + } + + return 0; +} + +static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_) +{ + const TdxRamEntry *lhs = lhs_; + const TdxRamEntry *rhs = rhs_; + + if (lhs->address == rhs->address) { + return 0; + } + if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) { + return 1; + } + return -1; +} + +static void tdx_init_ram_entries(void) +{ + unsigned i, j, nr_e820_entries; + + nr_e820_entries = e820_get_table(NULL); + tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries); + + for (i = 0, j = 0; i < nr_e820_entries; i++) { + uint64_t addr, len; + + if (e820_get_entry(i, E820_RAM, &addr, &len)) { + tdx_guest->ram_entries[j].address = addr; + tdx_guest->ram_entries[j].length = len; + tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED; + j++; + } + } + tdx_guest->nr_ram_entries = j; +} + +static void tdx_post_init_vcpus(void) +{ + TdxFirmwareEntry *hob; + CPUState *cpu; + + hob = tdx_get_hob_entry(tdx_guest); + CPU_FOREACH(cpu) { + tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address, + &error_fatal); + } +} + +static void tdx_finalize_vm(Notifier *notifier, void *unused) +{ + TdxFirmware *tdvf = &tdx_guest->tdvf; + TdxFirmwareEntry *entry; + RAMBlock *ram_block; + Error *local_err = NULL; + int r; + + tdx_init_ram_entries(); + + for_each_tdx_fw_entry(tdvf, entry) { + switch (entry->type) { + case TDVF_SECTION_TYPE_BFV: + case TDVF_SECTION_TYPE_CFV: + entry->mem_ptr = tdvf->mem_ptr + entry->data_offset; + break; + case TDVF_SECTION_TYPE_TD_HOB: + case TDVF_SECTION_TYPE_TEMP_MEM: + entry->mem_ptr = qemu_ram_mmap(-1, entry->size, + qemu_real_host_page_size(), 0, 0); + if (entry->mem_ptr == MAP_FAILED) { + error_report("Failed to mmap memory for TDVF section %d", + entry->type); + exit(1); + } + if (tdx_accept_ram_range(entry->address, entry->size)) { + error_report("Failed to accept memory for TDVF section %d", + entry->type); + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + exit(1); + } + break; + default: + error_report("Unsupported TDVF section %d", entry->type); + exit(1); + } + } + + qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries, + sizeof(TdxRamEntry), &tdx_ram_entry_compare); + + tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest)); + + tdx_post_init_vcpus(); + + for_each_tdx_fw_entry(tdvf, entry) { + struct kvm_tdx_init_mem_region region; + uint32_t flags; + + region = (struct kvm_tdx_init_mem_region) { + .source_addr = (uintptr_t)entry->mem_ptr, + .gpa = entry->address, + .nr_pages = entry->size >> 12, + }; + + flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ? + KVM_TDX_MEASURE_MEMORY_REGION : 0; + + do { + error_free(local_err); + local_err = NULL; + r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags, + ®ion, &local_err); + } while (r == -EAGAIN || r == -EINTR); + if (r < 0) { + error_report_err(local_err); + exit(1); + } + + if (entry->type == TDVF_SECTION_TYPE_TD_HOB || + entry->type == TDVF_SECTION_TYPE_TEMP_MEM) { + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + entry->mem_ptr = NULL; + } + } + + /* + * TDVF image has been copied into private region above via + * KVM_MEMORY_MAPPING. It becomes useless. + */ + ram_block = tdx_guest->tdvf_mr->ram_block; + ram_block_discard_range(ram_block, 0, ram_block->max_length); + + tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal); + CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true; +} + +static Notifier tdx_machine_done_notify = { + .notify = tdx_finalize_vm, +}; + +/* + * Some CPUID bits change from fixed1 to configurable bits when TDX module + * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY. + * + * To make QEMU work with all the versions of TDX module, keep the fixed1 bits + * here if they are ever fixed1 bits in any of the version though not fixed1 in + * the latest version. Otherwise, with the older version of TDX module, QEMU may + * treat the fixed1 bit as unsupported. + * + * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even + * though they changed to configurable bits. Because tdx_fixed1_bits is used to + * setup the supported bits. + */ +KvmCpuidInfo tdx_fixed1_bits = { + .cpuid.nent = 8, + .entries[0] = { + .function = 0x1, + .index = 0, + .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 | + CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | + CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE | + CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR, + .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, + }, + .entries[1] = { + .function = 0x6, + .index = 0, + .eax = CPUID_6_EAX_ARAT, + }, + .entries[2] = { + .function = 0x7, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI, + .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D | + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY | + CPUID_7_0_EDX_SPEC_CTRL_SSBD, + }, + .entries[3] = { + .function = 0x7, + .index = 2, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL | + CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL, + }, + .entries[4] = { + .function = 0xD, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK, + }, + .entries[5] = { + .function = 0xD, + .index = 1, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC| + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + }, + .entries[6] = { + .function = 0x80000001, + .index = 0, + .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, + /* + * Strictly speaking, SYSCALL is not fixed1 bit since it depends on + * the CPU to be in 64-bit mode. But here fixed1 is used to serve the + * purpose of supported bits for TDX. In this sense, SYACALL is always + * supported. + */ + .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, + }, + .entries[7] = { + .function = 0x80000007, + .index = 0, + .edx = CPUID_APM_INVTSC, + }, +}; + +typedef struct TdxAttrsMap { + uint32_t attr_index; + uint32_t cpuid_leaf; + uint32_t cpuid_subleaf; + int cpuid_reg; + uint32_t feat_mask; +} TdxAttrsMap; + +static TdxAttrsMap tdx_attrs_maps[] = { + {.attr_index = 27, + .cpuid_leaf = 7, + .cpuid_subleaf = 1, + .cpuid_reg = R_EAX, + .feat_mask = CPUID_7_1_EAX_LASS,}, + + {.attr_index = 30, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_PKS,}, + + {.attr_index = 31, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_KeyLocker,}, +}; + +typedef struct TdxXFAMDep { + int xfam_bit; + FeatureMask feat_mask; +} TdxXFAMDep; + +/* + * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are + * defiend here. + * + * For those whose virtualization type are "XFAM & Configured & Native", they + * are reported as configurable bits. And they are not supported if not in the + * configureable bits list from KVM even if the corresponding XFAM bit is + * supported. + */ +TdxXFAMDep tdx_xfam_deps[] = { + { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }}, + { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}}, + { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}}, + { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }}, +}; + +static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function, + uint32_t index) +{ + struct kvm_cpuid_entry2 *e; + + e = cpuid_find_entry(tdx_supported_cpuid, function, index); + if (!e) { + if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) { + error_report("tdx_supported_cpuid requries more space than %d entries", + KVM_MAX_CPUID_ENTRIES); + exit(1); + } + e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++]; + e->function = function; + e->index = index; + } + + return e; +} + +static void tdx_add_supported_cpuid_by_fixed1_bits(void) +{ + struct kvm_cpuid_entry2 *e, *e1; + int i; + + for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) { + e = &tdx_fixed1_bits.entries[i]; + + e1 = find_in_supported_entry(e->function, e->index); + e1->eax |= e->eax; + e1->ebx |= e->ebx; + e1->ecx |= e->ecx; + e1->edx |= e->edx; + } +} + +static void tdx_add_supported_cpuid_by_attrs(void) +{ + struct kvm_cpuid_entry2 *e; + TdxAttrsMap *map; + int i; + + for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) { + map = &tdx_attrs_maps[i]; + if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) { + continue; + } + + e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf); + + switch(map->cpuid_reg) { + case R_EAX: + e->eax |= map->feat_mask; + break; + case R_EBX: + e->ebx |= map->feat_mask; + break; + case R_ECX: + e->ecx |= map->feat_mask; + break; + case R_EDX: + e->edx |= map->feat_mask; + break; + } + } +} + +static void tdx_add_supported_cpuid_by_xfam(void) +{ + struct kvm_cpuid_entry2 *e; + int i; + + const TdxXFAMDep *xfam_dep; + const FeatureWordInfo *f; + for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) { + xfam_dep = &tdx_xfam_deps[i]; + if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) { + continue; + } + + f = &feature_word_info[xfam_dep->feat_mask.index]; + if (f->type != CPUID_FEATURE_WORD) { + continue; + } + + e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx); + switch(f->cpuid.reg) { + case R_EAX: + e->eax |= xfam_dep->feat_mask.mask; + break; + case R_EBX: + e->ebx |= xfam_dep->feat_mask.mask; + break; + case R_ECX: + e->ecx |= xfam_dep->feat_mask.mask; + break; + case R_EDX: + e->edx |= xfam_dep->feat_mask.mask; + break; + } + } + + e = find_in_supported_entry(0xd, 0); + e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32; + + e = find_in_supported_entry(0xd, 1); + /* + * Mark XFD always support for TDX, it will be cleared finally in + * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware + * because in this case the original data has it as 0. + */ + e->eax |= CPUID_XSAVE_XFD; + e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32; +} + +static void tdx_add_supported_kvm_features(void) +{ + struct kvm_cpuid_entry2 *e; + + e = find_in_supported_entry(0x40000001, 0); + e->eax = TDX_SUPPORTED_KVM_FEATURES; +} + +static void tdx_setup_supported_cpuid(void) +{ + if (tdx_supported_cpuid) { + return; + } + + tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) + + KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2)); + + memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries, + tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2)); + tdx_supported_cpuid->nent = tdx_caps->cpuid.nent; + + tdx_add_supported_cpuid_by_fixed1_bits(); + tdx_add_supported_cpuid_by_attrs(); + tdx_add_supported_cpuid_by_xfam(); + + tdx_add_supported_kvm_features(); +} + +static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + X86MachineState *x86ms = X86_MACHINE(ms); + TdxGuest *tdx = TDX_GUEST(cgs); + int r = 0; + + kvm_mark_guest_state_protected(); + + if (x86ms->smm == ON_OFF_AUTO_AUTO) { + x86ms->smm = ON_OFF_AUTO_OFF; + } else if (x86ms->smm == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support SMM"); + return -EINVAL; + } + + if (x86ms->pic == ON_OFF_AUTO_AUTO) { + x86ms->pic = ON_OFF_AUTO_OFF; + } else if (x86ms->pic == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support PIC"); + return -EINVAL; + } + + if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { + kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON; + } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM requires kernel_irqchip to be split"); + return -EINVAL; + } + + if (!tdx_caps) { + r = get_tdx_capabilities(errp); + if (r) { + return r; + } + } + + tdx_setup_supported_cpuid(); + + /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */ + if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { + return -EOPNOTSUPP; + } + + /* + * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly + * memory for shared memory but not for private memory. Besides, whether a + * memslot is private or shared is not determined by QEMU. + * + * Thus, just mark readonly memory not supported for simplicity. + */ + kvm_readonly_mem_allowed = false; + + qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); + + tdx_guest = tdx; + return 0; +} + +static int tdx_kvm_type(X86ConfidentialGuest *cg) +{ + /* Do the object check */ + TDX_GUEST(cg); + + return KVM_X86_TDX_VM; +} + +static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) +{ + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); + X86CPU *x86cpu = X86_CPU(cpu); + + if (xcc->model) { + error_report("Named cpu model is not supported for TDX yet!"); + exit(1); + } + + object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); + + /* invtsc is fixed1 for TD guest */ + object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort); + + x86cpu->force_cpuid_0x1f = true; +} + +static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg, + uint32_t feature, uint32_t index, + int reg, uint32_t value) +{ + struct kvm_cpuid_entry2 *e; + + e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index); + if (e) { + value |= cpuid_entry_get_reg(e, reg); + } + + if (is_feature_word_cpuid(feature, index, reg)) { + e = cpuid_find_entry(tdx_supported_cpuid, feature, index); + if (e) { + value &= cpuid_entry_get_reg(e, reg); + } + } + + return value; +} + +static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret) +{ + struct kvm_cpuid2 *fetch_cpuid; + int size = KVM_MAX_CPUID_ENTRIES; + Error *local_err = NULL; + int r; + + do { + error_free(local_err); + local_err = NULL; + + fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) + + sizeof(struct kvm_cpuid_entry2) * size); + fetch_cpuid->nent = size; + r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err); + if (r == -E2BIG) { + g_free(fetch_cpuid); + size = fetch_cpuid->nent; + } + } while (r == -E2BIG); + + if (r < 0) { + error_report_err(local_err); + *ret = r; + return NULL; + } + + return fetch_cpuid; +} + +static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs) +{ + uint64_t actual, requested, unavailable, forced_on; + g_autofree struct kvm_cpuid2 *fetch_cpuid; + const char *forced_on_prefix = NULL; + const char *unav_prefix = NULL; + struct kvm_cpuid_entry2 *entry; + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + FeatureWordInfo *wi; + FeatureWord w; + bool mismatch = false; + int r; + + fetch_cpuid = tdx_fetch_cpuid(cs, &r); + if (!fetch_cpuid) { + return r; + } + + if (cpu->check_cpuid || cpu->enforce_cpuid) { + unav_prefix = "TDX doesn't support requested feature"; + forced_on_prefix = "TDX forcibly sets the feature"; + } + + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + actual = 0; + + switch (wi->type) { + case CPUID_FEATURE_WORD: + entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx); + if (!entry) { + /* + * If KVM doesn't report it means it's totally configurable + * by QEMU + */ + continue; + } + + actual = cpuid_entry_get_reg(entry, wi->cpuid.reg); + break; + case MSR_FEATURE_WORD: + /* + * TODO: + * validate MSR features when KVM has interface report them. + */ + continue; + } + + /* Fixup for special cases */ + switch (w) { + case FEAT_8000_0001_EDX: + /* + * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit + * mode and before vcpu running it's not in 64-bit mode. + */ + actual |= CPUID_EXT2_SYSCALL; + break; + default: + break; + } + + requested = env->features[w]; + unavailable = requested & ~actual; + mark_unavailable_features(cpu, w, unavailable, unav_prefix); + if (unavailable) { + mismatch = true; + } + + forced_on = actual & ~requested; + mark_forced_on_features(cpu, w, forced_on, forced_on_prefix); + if (forced_on) { + mismatch = true; + } + } + + if (cpu->enforce_cpuid && mismatch) { + return -EINVAL; + } + + if (cpu->phys_bits != host_cpu_phys_bits()) { + error_report("TDX requires guest CPU physical bits (%u) " + "to match host CPU physical bits (%u)", + cpu->phys_bits, host_cpu_phys_bits()); + return -EINVAL; + } + + return 0; +} + +static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) +{ + if ((tdx->attributes & ~tdx_caps->supported_attrs)) { + error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM " + "(KVM supported: 0x%"PRIx64")", tdx->attributes, + (uint64_t)tdx_caps->supported_attrs); + return -1; + } + + if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) { + error_setg(errp, "Some QEMU unsupported TD attribute bits being " + "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")", + tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS); + return -1; + } + + return 0; +} + +static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp) +{ + CPUX86State *env = &x86cpu->env; + + tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? + TDX_TD_ATTRIBUTES_PKS : 0; + tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; + + return tdx_validate_attributes(tdx_guest, errp); +} + +static int setup_td_xfam(X86CPU *x86cpu, Error **errp) +{ + CPUX86State *env = &x86cpu->env; + uint64_t xfam; + + xfam = env->features[FEAT_XSAVE_XCR0_LO] | + env->features[FEAT_XSAVE_XCR0_HI] | + env->features[FEAT_XSAVE_XSS_LO] | + env->features[FEAT_XSAVE_XSS_HI]; + + if (xfam & ~tdx_caps->supported_xfam) { + error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))", + xfam, (uint64_t)tdx_caps->supported_xfam); + return -1; + } + + tdx_guest->xfam = xfam; + return 0; +} + +static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids) +{ + int i, dest_cnt = 0; + struct kvm_cpuid_entry2 *src, *dest, *conf; + + for (i = 0; i < cpuids->nent; i++) { + src = cpuids->entries + i; + conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index); + if (!conf) { + continue; + } + dest = cpuids->entries + dest_cnt; + + dest->function = src->function; + dest->index = src->index; + dest->flags = src->flags; + dest->eax = src->eax & conf->eax; + dest->ebx = src->ebx & conf->ebx; + dest->ecx = src->ecx & conf->ecx; + dest->edx = src->edx & conf->edx; + + dest_cnt++; + } + cpuids->nent = dest_cnt++; +} + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + g_autofree struct kvm_tdx_init_vm *init_vm = NULL; + Error *local_err = NULL; + size_t data_len; + int retry = 10000; + int r = 0; + + QEMU_LOCK_GUARD(&tdx_guest->lock); + if (tdx_guest->initialized) { + return r; + } + + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + + if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) { + error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS"); + return -EOPNOTSUPP; + } + + r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + 0, TDX_APIC_BUS_CYCLES_NS); + if (r < 0) { + error_setg_errno(errp, -r, + "Unable to set core crystal clock frequency to 25MHz"); + return r; + } + + if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ || + env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) { + error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency " + "between [%d, %d] kHz", env->tsc_khz, + TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ); + return -EINVAL; + } + + if (env->tsc_khz % (25 * 1000)) { + error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz", + env->tsc_khz); + return -EINVAL; + } + + /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */ + r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz); + if (r < 0) { + error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz", + env->tsc_khz); + return r; + } + + if (tdx_guest->mrconfigid) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, + strlen(tdx_guest->mrconfigid), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrconfigid' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrconfigid, data, data_len); + } + + if (tdx_guest->mrowner) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner, + strlen(tdx_guest->mrowner), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrowner' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrowner, data, data_len); + } + + if (tdx_guest->mrownerconfig) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig, + strlen(tdx_guest->mrownerconfig), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrownerconfig' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrownerconfig, data, data_len); + } + + r = setup_td_guest_attributes(x86cpu, errp); + if (r) { + return r; + } + + r = setup_td_xfam(x86cpu, errp); + if (r) { + return r; + } + + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0); + tdx_filter_cpuid(&init_vm->cpuid); + + init_vm->attributes = tdx_guest->attributes; + init_vm->xfam = tdx_guest->xfam; + + /* + * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE) + * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or + * RDSEED) is busy. + * + * Retry for the case. + */ + do { + error_free(local_err); + local_err = NULL; + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err); + } while (r == -EAGAIN && --retry); + + if (r < 0) { + if (!retry) { + error_append_hint(&local_err, "Hardware RNG (Random Number " + "Generator) is busy occupied by someone (via RDRAND/RDSEED) " + "maliciously, which leads to KVM_TDX_INIT_VM keeping failure " + "due to lack of entropy.\n"); + } + error_propagate(errp, local_err); + return r; + } + + tdx_guest->initialized = true; + + return 0; +} + +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); +} + +static void tdx_inject_interrupt(TdxGuest *tdx) +{ + int ret; + uint32_t apicid, vector; + + qemu_mutex_lock(&tdx->lock); + vector = tdx->event_notify_vector; + apicid = tdx->event_notify_apicid; + qemu_mutex_unlock(&tdx->lock); + if (vector < 32 || vector > 255) { + return; + } + + MSIMessage msg = { + .address = ((apicid & 0xff) << MSI_ADDR_DEST_ID_SHIFT) | + (((uint64_t)apicid & 0xffffff00) << 32), + .data = vector | (APIC_DM_FIXED << MSI_DATA_DELIVERY_MODE_SHIFT), + }; + + ret = kvm_irqchip_send_msi(kvm_state, msg); + if (ret < 0) { + /* In this case, no better way to tell it to guest. Log it. */ + error_report("TDX: injection interrupt %d failed, interrupt lost (%s).", + vector, strerror(-ret)); + } +} + +static void tdx_get_quote_completion(TdxGenerateQuoteTask *task) +{ + TdxGuest *tdx = task->opaque; + int ret; + + /* Maintain the number of in-flight requests. */ + qemu_mutex_lock(&tdx->lock); + tdx->num--; + qemu_mutex_unlock(&tdx->lock); + + if (task->status_code == TDX_VP_GET_QUOTE_SUCCESS) { + ret = address_space_write(&address_space_memory, task->payload_gpa, + MEMTXATTRS_UNSPECIFIED, task->receive_buf, + task->receive_buf_received); + if (ret != MEMTX_OK) { + error_report("TDX: get-quote: failed to write quote data."); + } else { + task->hdr.out_len = cpu_to_le64(task->receive_buf_received); + } + } + task->hdr.error_code = cpu_to_le64(task->status_code); + + /* Publish the response contents before marking this request completed. */ + smp_wmb(); + ret = address_space_write(&address_space_memory, task->buf_gpa, + MEMTXATTRS_UNSPECIFIED, &task->hdr, + TDX_GET_QUOTE_HDR_SIZE); + if (ret != MEMTX_OK) { + error_report("TDX: get-quote: failed to update GetQuote header."); + } + + tdx_inject_interrupt(tdx); + + g_free(task->send_data); + g_free(task->receive_buf); + g_free(task); + object_unref(tdx); +} + +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) +{ + TdxGenerateQuoteTask *task; + struct tdx_get_quote_header hdr; + hwaddr buf_gpa = run->tdx.get_quote.gpa; + uint64_t buf_len = run->tdx.get_quote.size; + + QEMU_BUILD_BUG_ON(sizeof(struct tdx_get_quote_header) != TDX_GET_QUOTE_HDR_SIZE); + + run->tdx.get_quote.ret = TDG_VP_VMCALL_INVALID_OPERAND; + + if (buf_len == 0) { + return; + } + + if (!QEMU_IS_ALIGNED(buf_gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) { + run->tdx.get_quote.ret = TDG_VP_VMCALL_ALIGN_ERROR; + return; + } + + if (address_space_read(&address_space_memory, buf_gpa, MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + error_report("TDX: get-quote: failed to read GetQuote header."); + return; + } + + if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) { + return; + } + + /* Only safe-guard check to avoid too large buffer size. */ + if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN || + le32_to_cpu(hdr.in_len) > buf_len - TDX_GET_QUOTE_HDR_SIZE) { + return; + } + + if (!tdx_guest->qg_sock_addr) { + hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE); + if (address_space_write(&address_space_memory, buf_gpa, + MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + error_report("TDX: failed to update GetQuote header."); + return; + } + run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; + return; + } + + qemu_mutex_lock(&tdx_guest->lock); + if (tdx_guest->num >= TDX_MAX_GET_QUOTE_REQUEST) { + qemu_mutex_unlock(&tdx_guest->lock); + run->tdx.get_quote.ret = TDG_VP_VMCALL_RETRY; + return; + } + tdx_guest->num++; + qemu_mutex_unlock(&tdx_guest->lock); + + task = g_new(TdxGenerateQuoteTask, 1); + task->buf_gpa = buf_gpa; + task->payload_gpa = buf_gpa + TDX_GET_QUOTE_HDR_SIZE; + task->payload_len = buf_len - TDX_GET_QUOTE_HDR_SIZE; + task->hdr = hdr; + task->completion = tdx_get_quote_completion; + + task->send_data_size = le32_to_cpu(hdr.in_len); + task->send_data = g_malloc(task->send_data_size); + task->send_data_sent = 0; + + if (address_space_read(&address_space_memory, task->payload_gpa, + MEMTXATTRS_UNSPECIFIED, task->send_data, + task->send_data_size) != MEMTX_OK) { + goto out_free; + } + + /* Mark the buffer in-flight. */ + hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT); + if (address_space_write(&address_space_memory, buf_gpa, + MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + goto out_free; + } + + task->receive_buf = g_malloc0(task->payload_len); + task->receive_buf_received = 0; + task->opaque = tdx_guest; + + object_ref(tdx_guest); + tdx_generate_quote(task, tdx_guest->qg_sock_addr); + run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; + return; + +out_free: + g_free(task->send_data); + g_free(task); +} + +#define SUPPORTED_TDVMCALLINFO_1_R11 (TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT) +#define SUPPORTED_TDVMCALLINFO_1_R12 (0) + +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) +{ + if (run->tdx.get_tdvmcall_info.leaf != 1) { + return; + } + + run->tdx.get_tdvmcall_info.r11 = (tdx_caps->user_tdvmcallinfo_1_r11 & + SUPPORTED_TDVMCALLINFO_1_R11) | + tdx_caps->kernel_tdvmcallinfo_1_r11; + run->tdx.get_tdvmcall_info.r12 = (tdx_caps->user_tdvmcallinfo_1_r12 & + SUPPORTED_TDVMCALLINFO_1_R12) | + tdx_caps->kernel_tdvmcallinfo_1_r12; + run->tdx.get_tdvmcall_info.r13 = 0; + run->tdx.get_tdvmcall_info.r14 = 0; + + run->tdx.get_tdvmcall_info.ret = TDG_VP_VMCALL_SUCCESS; +} + +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run) +{ + uint64_t vector = run->tdx.setup_event_notify.vector; + + if (vector >= 32 && vector < 256) { + qemu_mutex_lock(&tdx_guest->lock); + tdx_guest->event_notify_vector = vector; + tdx_guest->event_notify_apicid = cpu->apic_id; + qemu_mutex_unlock(&tdx_guest->lock); + run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_SUCCESS; + } else { + run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_INVALID_OPERAND; + } +} + +static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, + char *message, bool has_gpa, + uint64_t gpa) +{ + GuestPanicInformation *panic_info; + + panic_info = g_new0(GuestPanicInformation, 1); + panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX; + panic_info->u.tdx.error_code = (uint32_t) error_code; + panic_info->u.tdx.message = message; + panic_info->u.tdx.gpa = gpa; + panic_info->u.tdx.has_gpa = has_gpa; + + qemu_system_guest_panicked(panic_info); +} + +/* + * Only 8 registers can contain valid ASCII byte stream to form the fatal + * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX + */ +#define TDX_FATAL_MESSAGE_MAX 64 + +#define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63) + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + uint64_t error_code = run->system_event.data[R_R12]; + uint64_t reg_mask = run->system_event.data[R_ECX]; + char *message = NULL; + uint64_t *tmp; + uint64_t gpa = -1ull; + bool has_gpa = false; + + if (error_code & 0xffff) { + error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64, + error_code); + return -1; + } + + if (reg_mask) { + message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1); + tmp = (uint64_t *)message; + +#define COPY_REG(REG) \ + do { \ + if (reg_mask & BIT_ULL(REG)) { \ + *(tmp++) = run->system_event.data[REG]; \ + } \ + } while (0) + + COPY_REG(R_R14); + COPY_REG(R_R15); + COPY_REG(R_EBX); + COPY_REG(R_EDI); + COPY_REG(R_ESI); + COPY_REG(R_R8); + COPY_REG(R_R9); + COPY_REG(R_EDX); + *((char *)tmp) = '\0'; + } +#undef COPY_REG + + if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) { + gpa = run->system_event.data[R_R13]; + has_gpa = true; + } + + tdx_panicked_on_fatal_error(cpu, error_code, message, has_gpa, gpa); + + return -1; +} + +static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE); +} + +static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + if (value) { + tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } else { + tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } +} + +static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrconfigid); +} + +static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrconfigid); + tdx->mrconfigid = g_strdup(value); +} + +static char *tdx_guest_get_mrowner(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrowner); +} + +static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrowner); + tdx->mrowner = g_strdup(value); +} + +static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrownerconfig); +} + +static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrownerconfig); + tdx->mrownerconfig = g_strdup(value); +} + +static void tdx_guest_get_qgs(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + if (!tdx->qg_sock_addr) { + error_setg(errp, "quote-generation-socket is not set"); + return; + } + visit_type_SocketAddress(v, name, &tdx->qg_sock_addr, errp); +} + +static void tdx_guest_set_qgs(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + SocketAddress *sock = NULL; + + if (!visit_type_SocketAddress(v, name, &sock, errp)) { + return; + } + + if (tdx->qg_sock_addr) { + qapi_free_SocketAddress(tdx->qg_sock_addr); + } + + tdx->qg_sock_addr = sock; +} + +/* tdx guest */ +OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, + tdx_guest, + TDX_GUEST, + X86_CONFIDENTIAL_GUEST, + { TYPE_USER_CREATABLE }, + { NULL }) + +static void tdx_guest_init(Object *obj) +{ + ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); + TdxGuest *tdx = TDX_GUEST(obj); + + qemu_mutex_init(&tdx->lock); + + cgs->require_guest_memfd = true; + tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + + object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, + OBJ_PROP_FLAG_READWRITE); + object_property_add_bool(obj, "sept-ve-disable", + tdx_guest_get_sept_ve_disable, + tdx_guest_set_sept_ve_disable); + object_property_add_str(obj, "mrconfigid", + tdx_guest_get_mrconfigid, + tdx_guest_set_mrconfigid); + object_property_add_str(obj, "mrowner", + tdx_guest_get_mrowner, tdx_guest_set_mrowner); + object_property_add_str(obj, "mrownerconfig", + tdx_guest_get_mrownerconfig, + tdx_guest_set_mrownerconfig); + + object_property_add(obj, "quote-generation-socket", "SocketAddress", + tdx_guest_get_qgs, + tdx_guest_set_qgs, + NULL, NULL); + + tdx->event_notify_vector = -1; + tdx->event_notify_apicid = -1; +} + +static void tdx_guest_finalize(Object *obj) +{ +} + +static void tdx_guest_class_init(ObjectClass *oc, const void *data) +{ + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = tdx_kvm_init; + x86_klass->kvm_type = tdx_kvm_type; + x86_klass->cpu_instance_init = tdx_cpu_instance_init; + x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features; + x86_klass->check_features = tdx_check_features; +} diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h new file mode 100644 index 0000000..1c38faf --- /dev/null +++ b/target/i386/kvm/tdx.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef QEMU_I386_TDX_H +#define QEMU_I386_TDX_H + +#ifndef CONFIG_USER_ONLY +#include CONFIG_DEVICES /* CONFIG_TDX */ +#endif + +#include "confidential-guest.h" +#include "cpu.h" +#include "hw/i386/tdvf.h" + +#include "tdx-quote-generator.h" + +#define TYPE_TDX_GUEST "tdx-guest" +#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST) + +typedef struct TdxGuestClass { + X86ConfidentialGuestClass parent_class; +} TdxGuestClass; + +/* TDX requires bus frequency 25MHz */ +#define TDX_APIC_BUS_CYCLES_NS 40 + +#define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000 +#define TDVMCALL_GET_QUOTE 0x10002 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004 + +#define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL +#define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL +#define TDG_VP_VMCALL_INVALID_OPERAND 0x8000000000000000ULL +#define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL +#define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL + +#define TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT BIT_ULL(1) + +enum TdxRamType { + TDX_RAM_UNACCEPTED, + TDX_RAM_ADDED, +}; + +typedef struct TdxRamEntry { + uint64_t address; + uint64_t length; + enum TdxRamType type; +} TdxRamEntry; + +typedef struct TdxGuest { + X86ConfidentialGuest parent_obj; + + QemuMutex lock; + + bool initialized; + uint64_t attributes; /* TD attributes */ + uint64_t xfam; + char *mrconfigid; /* base64 encoded sha384 digest */ + char *mrowner; /* base64 encoded sha384 digest */ + char *mrownerconfig; /* base64 encoded sha384 digest */ + + MemoryRegion *tdvf_mr; + TdxFirmware tdvf; + + uint32_t nr_ram_entries; + TdxRamEntry *ram_entries; + + /* GetQuote */ + SocketAddress *qg_sock_addr; + int num; + + uint32_t event_notify_vector; + uint32_t event_notify_apicid; +} TdxGuest; + +#ifdef CONFIG_TDX +bool is_tdx_vm(void); +#else +#define is_tdx_vm() 0 +#endif /* CONFIG_TDX */ + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp); +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr); +int tdx_parse_tdvf(void *flash_ptr, int size); +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run); + +#endif /* QEMU_I386_TDX_H */ diff --git a/target/i386/kvm/vmsr_energy.c b/target/i386/kvm/vmsr_energy.c index d6aad52..890322a 100644 --- a/target/i386/kvm/vmsr_energy.c +++ b/target/i386/kvm/vmsr_energy.c @@ -27,15 +27,6 @@ char *vmsr_compute_default_paths(void) return g_build_filename(state, "run", "qemu-vmsr-helper.sock", NULL); } -bool is_host_cpu_intel(void) -{ - char vendor[CPUID_VENDOR_SZ + 1]; - - host_cpu_vendor_fms(vendor, NULL, NULL, NULL); - - return g_str_equal(vendor, CPUID_VENDOR_INTEL); -} - int is_rapl_enabled(void) { const char *path = "/sys/class/powercap/intel-rapl/enabled"; @@ -66,13 +57,9 @@ QIOChannelSocket *vmsr_open_socket(const char *path) }; QIOChannelSocket *sioc = qio_channel_socket_new(); - Error *local_err = NULL; qio_channel_set_name(QIO_CHANNEL(sioc), "vmsr-helper"); - qio_channel_socket_connect_sync(sioc, - &saddr, - &local_err); - if (local_err) { + if (qio_channel_socket_connect_sync(sioc, &saddr, NULL) < 0) { /* Close socket. */ qio_channel_close(QIO_CHANNEL(sioc), NULL); object_unref(OBJECT(sioc)); diff --git a/target/i386/kvm/vmsr_energy.h b/target/i386/kvm/vmsr_energy.h index 16cc1f4..151bcbd 100644 --- a/target/i386/kvm/vmsr_energy.h +++ b/target/i386/kvm/vmsr_energy.h @@ -94,6 +94,5 @@ double vmsr_get_ratio(uint64_t e_delta, unsigned long long delta_ticks, unsigned int maxticks); void vmsr_init_topo_info(X86CPUTopoInfo *topo_info, const MachineState *ms); -bool is_host_cpu_intel(void); int is_rapl_enabled(void); #endif /* VMSR_ENERGY_H */ diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index 284c5ef..52de019 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -21,6 +21,7 @@ #include "system/address-spaces.h" #include "xen-emu.h" #include "trace.h" +#include "system/memory.h" #include "system/runstate.h" #include "hw/pci/msi.h" @@ -75,6 +76,7 @@ static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa, static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz, bool is_write) { + AddressSpace *as = cpu_addressspace(cs, MEMTXATTRS_UNSPECIFIED); uint8_t *buf = (uint8_t *)_buf; uint64_t gpa; size_t len; @@ -87,7 +89,7 @@ static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz, len = sz; } - cpu_physical_memory_rw(gpa, buf, len, is_write); + address_space_rw(as, gpa, MEMTXATTRS_UNSPECIFIED, buf, len, is_write); buf += len; sz -= len; diff --git a/target/i386/machine.c b/target/i386/machine.c index 6cb561c..45b7cea 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -462,6 +462,24 @@ static const VMStateDescription vmstate_exception_info = { } }; +static bool cpu_errcode_needed(void *opaque) +{ + X86CPU *cpu = opaque; + + return cpu->env.has_error_code != 0; +} + +static const VMStateDescription vmstate_error_code = { + .name = "cpu/error_code", + .version_id = 1, + .minimum_version_id = 1, + .needed = cpu_errcode_needed, + .fields = (const VMStateField[]) { + VMSTATE_INT32(env.error_code, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + /* Poll control MSR enabled by default */ static bool poll_control_msr_needed(void *opaque) { @@ -1060,9 +1078,8 @@ static bool tsc_khz_needed(void *opaque) { X86CPU *cpu = opaque; CPUX86State *env = &cpu->env; - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); - X86MachineClass *x86mc = X86_MACHINE_CLASS(mc); - return env->tsc_khz && x86mc->save_tsc_khz; + + return env->tsc_khz; } static const VMStateDescription vmstate_tsc_khz = { @@ -1747,6 +1764,7 @@ const VMStateDescription vmstate_x86_cpu = { }, .subsections = (const VMStateDescription * const []) { &vmstate_exception_info, + &vmstate_error_code, &vmstate_async_pf_msr, &vmstate_async_pf_int_msr, &vmstate_pv_eoi_msr, diff --git a/target/i386/meson.build b/target/i386/meson.build index c1aacea..89ba491 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -11,6 +11,9 @@ i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest # x86 cpu type i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) i386_ss.add(when: 'CONFIG_HVF', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_WHPX', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_NVMM', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_MSHV', if_true: files('host-cpu.c')) i386_system_ss = ss.source_set() i386_system_ss.add(files( @@ -32,6 +35,7 @@ subdir('nvmm') subdir('hvf') subdir('tcg') subdir('emulate') +subdir('mshv') target_arch += {'i386': i386_ss} target_system_arch += {'i386': i386_system_ss} diff --git a/target/i386/monitor.c b/target/i386/monitor.c index 3ea92b0..d2bb873 100644 --- a/target/i386/monitor.c +++ b/target/i386/monitor.c @@ -29,8 +29,8 @@ #include "monitor/hmp.h" #include "qobject/qdict.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" #include "qapi/qapi-commands-misc.h" +#include "system/memory.h" /* Perform linear address sign extension */ static hwaddr addr_canonical(CPUArchState *env, hwaddr addr) @@ -69,23 +69,23 @@ static void print_pte(Monitor *mon, CPUArchState *env, hwaddr addr, pte & PG_RW_MASK ? 'W' : '-'); } -static void tlb_info_32(Monitor *mon, CPUArchState *env) +static void tlb_info_32(Monitor *mon, CPUArchState *env, AddressSpace *as) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; unsigned int l1, l2; uint32_t pgd, pde, pte; pgd = env->cr[3] & ~0xfff; for(l1 = 0; l1 < 1024; l1++) { - cpu_physical_memory_read(pgd + l1 * 4, &pde, 4); - pde = le32_to_cpu(pde); + pde = address_space_ldl_le(as, pgd + l1 * 4, attrs, NULL); if (pde & PG_PRESENT_MASK) { if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { /* 4M pages */ print_pte(mon, env, (l1 << 22), pde, ~((1 << 21) - 1)); } else { for(l2 = 0; l2 < 1024; l2++) { - cpu_physical_memory_read((pde & ~0xfff) + l2 * 4, &pte, 4); - pte = le32_to_cpu(pte); + pte = address_space_ldl_le(as, (pde & ~0xfff) + l2 * 4, + attrs, NULL); if (pte & PG_PRESENT_MASK) { print_pte(mon, env, (l1 << 22) + (l2 << 12), pte & ~PG_PSE_MASK, @@ -97,21 +97,20 @@ static void tlb_info_32(Monitor *mon, CPUArchState *env) } } -static void tlb_info_pae32(Monitor *mon, CPUArchState *env) +static void tlb_info_pae32(Monitor *mon, CPUArchState *env, AddressSpace *as) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; unsigned int l1, l2, l3; uint64_t pdpe, pde, pte; uint64_t pdp_addr, pd_addr, pt_addr; pdp_addr = env->cr[3] & ~0x1f; for (l1 = 0; l1 < 4; l1++) { - cpu_physical_memory_read(pdp_addr + l1 * 8, &pdpe, 8); - pdpe = le64_to_cpu(pdpe); + pdpe = address_space_ldq_le(as, pdp_addr + l1 * 8, attrs, NULL); if (pdpe & PG_PRESENT_MASK) { pd_addr = pdpe & 0x3fffffffff000ULL; for (l2 = 0; l2 < 512; l2++) { - cpu_physical_memory_read(pd_addr + l2 * 8, &pde, 8); - pde = le64_to_cpu(pde); + pde = address_space_ldq_le(as, pd_addr + l2 * 8, attrs, NULL); if (pde & PG_PRESENT_MASK) { if (pde & PG_PSE_MASK) { /* 2M pages with PAE, CR4.PSE is ignored */ @@ -120,8 +119,8 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env) } else { pt_addr = pde & 0x3fffffffff000ULL; for (l3 = 0; l3 < 512; l3++) { - cpu_physical_memory_read(pt_addr + l3 * 8, &pte, 8); - pte = le64_to_cpu(pte); + pte = address_space_ldq_le(as, pt_addr + l3 * 8, + attrs, NULL); if (pte & PG_PRESENT_MASK) { print_pte(mon, env, (l1 << 30) + (l2 << 21) + (l3 << 12), @@ -137,24 +136,23 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env) } #ifdef TARGET_X86_64 -static void tlb_info_la48(Monitor *mon, CPUArchState *env, +static void tlb_info_la48(Monitor *mon, CPUArchState *env, AddressSpace *as, uint64_t l0, uint64_t pml4_addr) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; uint64_t l1, l2, l3, l4; uint64_t pml4e, pdpe, pde, pte; uint64_t pdp_addr, pd_addr, pt_addr; for (l1 = 0; l1 < 512; l1++) { - cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8); - pml4e = le64_to_cpu(pml4e); + pml4e = address_space_ldq_le(as, pml4_addr + l1 * 8, attrs, NULL); if (!(pml4e & PG_PRESENT_MASK)) { continue; } pdp_addr = pml4e & 0x3fffffffff000ULL; for (l2 = 0; l2 < 512; l2++) { - cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8); - pdpe = le64_to_cpu(pdpe); + pdpe = address_space_ldq_le(as, pdp_addr + l2 * 8, attrs, NULL); if (!(pdpe & PG_PRESENT_MASK)) { continue; } @@ -168,8 +166,7 @@ static void tlb_info_la48(Monitor *mon, CPUArchState *env, pd_addr = pdpe & 0x3fffffffff000ULL; for (l3 = 0; l3 < 512; l3++) { - cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8); - pde = le64_to_cpu(pde); + pde = address_space_ldq_le(as, pd_addr + l3 * 8, attrs, NULL); if (!(pde & PG_PRESENT_MASK)) { continue; } @@ -183,10 +180,8 @@ static void tlb_info_la48(Monitor *mon, CPUArchState *env, pt_addr = pde & 0x3fffffffff000ULL; for (l4 = 0; l4 < 512; l4++) { - cpu_physical_memory_read(pt_addr - + l4 * 8, - &pte, 8); - pte = le64_to_cpu(pte); + pte = address_space_ldq_le(as, pt_addr + l4 * 8, + attrs, NULL); if (pte & PG_PRESENT_MASK) { print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30) + (l3 << 21) + (l4 << 12), @@ -198,18 +193,18 @@ static void tlb_info_la48(Monitor *mon, CPUArchState *env, } } -static void tlb_info_la57(Monitor *mon, CPUArchState *env) +static void tlb_info_la57(Monitor *mon, CPUArchState *env, AddressSpace *as) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; uint64_t l0; uint64_t pml5e; uint64_t pml5_addr; pml5_addr = env->cr[3] & 0x3fffffffff000ULL; for (l0 = 0; l0 < 512; l0++) { - cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8); - pml5e = le64_to_cpu(pml5e); + pml5e = address_space_ldq_le(as, pml5_addr + l0 * 8, attrs, NULL); if (pml5e & PG_PRESENT_MASK) { - tlb_info_la48(mon, env, l0, pml5e & 0x3fffffffff000ULL); + tlb_info_la48(mon, env, as, l0, pml5e & 0x3fffffffff000ULL); } } } @@ -218,6 +213,7 @@ static void tlb_info_la57(Monitor *mon, CPUArchState *env) void hmp_info_tlb(Monitor *mon, const QDict *qdict) { CPUArchState *env; + AddressSpace *as; env = mon_get_cpu_env(mon); if (!env) { @@ -229,21 +225,22 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict) monitor_printf(mon, "PG disabled\n"); return; } + as = cpu_get_address_space(env_cpu(env), X86ASIdx_MEM); if (env->cr[4] & CR4_PAE_MASK) { #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { if (env->cr[4] & CR4_LA57_MASK) { - tlb_info_la57(mon, env); + tlb_info_la57(mon, env, as); } else { - tlb_info_la48(mon, env, 0, env->cr[3] & 0x3fffffffff000ULL); + tlb_info_la48(mon, env, as, 0, env->cr[3] & 0x3fffffffff000ULL); } } else #endif { - tlb_info_pae32(mon, env); + tlb_info_pae32(mon, env, as); } } else { - tlb_info_32(mon, env); + tlb_info_32(mon, env, as); } } @@ -272,8 +269,9 @@ static void mem_print(Monitor *mon, CPUArchState *env, } } -static void mem_info_32(Monitor *mon, CPUArchState *env) +static void mem_info_32(Monitor *mon, CPUArchState *env, AddressSpace *as) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; unsigned int l1, l2; int prot, last_prot; uint32_t pgd, pde, pte; @@ -283,8 +281,7 @@ static void mem_info_32(Monitor *mon, CPUArchState *env) last_prot = 0; start = -1; for(l1 = 0; l1 < 1024; l1++) { - cpu_physical_memory_read(pgd + l1 * 4, &pde, 4); - pde = le32_to_cpu(pde); + pde = address_space_ldl_le(as, pgd + l1 * 4, attrs, NULL); end = l1 << 22; if (pde & PG_PRESENT_MASK) { if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { @@ -292,8 +289,8 @@ static void mem_info_32(Monitor *mon, CPUArchState *env) mem_print(mon, env, &start, &last_prot, end, prot); } else { for(l2 = 0; l2 < 1024; l2++) { - cpu_physical_memory_read((pde & ~0xfff) + l2 * 4, &pte, 4); - pte = le32_to_cpu(pte); + pte = address_space_ldl_le(as, (pde & ~0xfff) + l2 * 4, + attrs, NULL); end = (l1 << 22) + (l2 << 12); if (pte & PG_PRESENT_MASK) { prot = pte & pde & @@ -313,8 +310,9 @@ static void mem_info_32(Monitor *mon, CPUArchState *env) mem_print(mon, env, &start, &last_prot, (hwaddr)1 << 32, 0); } -static void mem_info_pae32(Monitor *mon, CPUArchState *env) +static void mem_info_pae32(Monitor *mon, CPUArchState *env, AddressSpace *as) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; unsigned int l1, l2, l3; int prot, last_prot; uint64_t pdpe, pde, pte; @@ -325,14 +323,12 @@ static void mem_info_pae32(Monitor *mon, CPUArchState *env) last_prot = 0; start = -1; for (l1 = 0; l1 < 4; l1++) { - cpu_physical_memory_read(pdp_addr + l1 * 8, &pdpe, 8); - pdpe = le64_to_cpu(pdpe); + pdpe = address_space_ldq_le(as, pdp_addr + l1 * 8, attrs, NULL); end = l1 << 30; if (pdpe & PG_PRESENT_MASK) { pd_addr = pdpe & 0x3fffffffff000ULL; for (l2 = 0; l2 < 512; l2++) { - cpu_physical_memory_read(pd_addr + l2 * 8, &pde, 8); - pde = le64_to_cpu(pde); + pde = address_space_ldq_le(as, pd_addr + l2 * 8, attrs, NULL); end = (l1 << 30) + (l2 << 21); if (pde & PG_PRESENT_MASK) { if (pde & PG_PSE_MASK) { @@ -342,8 +338,8 @@ static void mem_info_pae32(Monitor *mon, CPUArchState *env) } else { pt_addr = pde & 0x3fffffffff000ULL; for (l3 = 0; l3 < 512; l3++) { - cpu_physical_memory_read(pt_addr + l3 * 8, &pte, 8); - pte = le64_to_cpu(pte); + pte = address_space_ldq_le(as, pt_addr + l3 * 8, + attrs, NULL); end = (l1 << 30) + (l2 << 21) + (l3 << 12); if (pte & PG_PRESENT_MASK) { prot = pte & pde & (PG_USER_MASK | PG_RW_MASK | @@ -370,8 +366,9 @@ static void mem_info_pae32(Monitor *mon, CPUArchState *env) #ifdef TARGET_X86_64 -static void mem_info_la48(Monitor *mon, CPUArchState *env) +static void mem_info_la48(Monitor *mon, CPUArchState *env, AddressSpace *as) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; int prot, last_prot; uint64_t l1, l2, l3, l4; uint64_t pml4e, pdpe, pde, pte; @@ -381,14 +378,12 @@ static void mem_info_la48(Monitor *mon, CPUArchState *env) last_prot = 0; start = -1; for (l1 = 0; l1 < 512; l1++) { - cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8); - pml4e = le64_to_cpu(pml4e); + pml4e = address_space_ldq_le(as, pml4_addr + l1 * 8, attrs, NULL); end = l1 << 39; if (pml4e & PG_PRESENT_MASK) { pdp_addr = pml4e & 0x3fffffffff000ULL; for (l2 = 0; l2 < 512; l2++) { - cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8); - pdpe = le64_to_cpu(pdpe); + pdpe = address_space_ldq_le(as, pdp_addr + l2 * 8, attrs, NULL); end = (l1 << 39) + (l2 << 30); if (pdpe & PG_PRESENT_MASK) { if (pdpe & PG_PSE_MASK) { @@ -399,8 +394,8 @@ static void mem_info_la48(Monitor *mon, CPUArchState *env) } else { pd_addr = pdpe & 0x3fffffffff000ULL; for (l3 = 0; l3 < 512; l3++) { - cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8); - pde = le64_to_cpu(pde); + pde = address_space_ldq_le(as, pd_addr + l3 * 8, + attrs, NULL); end = (l1 << 39) + (l2 << 30) + (l3 << 21); if (pde & PG_PRESENT_MASK) { if (pde & PG_PSE_MASK) { @@ -412,10 +407,10 @@ static void mem_info_la48(Monitor *mon, CPUArchState *env) } else { pt_addr = pde & 0x3fffffffff000ULL; for (l4 = 0; l4 < 512; l4++) { - cpu_physical_memory_read(pt_addr - + l4 * 8, - &pte, 8); - pte = le64_to_cpu(pte); + pte = address_space_ldq_le(as, + pt_addr + + l4 * 8, + attrs, NULL); end = (l1 << 39) + (l2 << 30) + (l3 << 21) + (l4 << 12); if (pte & PG_PRESENT_MASK) { @@ -450,8 +445,9 @@ static void mem_info_la48(Monitor *mon, CPUArchState *env) mem_print(mon, env, &start, &last_prot, (hwaddr)1 << 48, 0); } -static void mem_info_la57(Monitor *mon, CPUArchState *env) +static void mem_info_la57(Monitor *mon, CPUArchState *env, AddressSpace *as) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; int prot, last_prot; uint64_t l0, l1, l2, l3, l4; uint64_t pml5e, pml4e, pdpe, pde, pte; @@ -461,8 +457,7 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env) last_prot = 0; start = -1; for (l0 = 0; l0 < 512; l0++) { - cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8); - pml5e = le64_to_cpu(pml5e); + pml5e = address_space_ldq_le(as, pml5_addr + l0 * 8, attrs, NULL); end = l0 << 48; if (!(pml5e & PG_PRESENT_MASK)) { prot = 0; @@ -472,8 +467,7 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env) pml4_addr = pml5e & 0x3fffffffff000ULL; for (l1 = 0; l1 < 512; l1++) { - cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8); - pml4e = le64_to_cpu(pml4e); + pml4e = address_space_ldq_le(as, pml4_addr + l1 * 8, attrs, NULL); end = (l0 << 48) + (l1 << 39); if (!(pml4e & PG_PRESENT_MASK)) { prot = 0; @@ -483,8 +477,7 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env) pdp_addr = pml4e & 0x3fffffffff000ULL; for (l2 = 0; l2 < 512; l2++) { - cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8); - pdpe = le64_to_cpu(pdpe); + pdpe = address_space_ldq_le(as, pdp_addr + l2 * 8, attrs, NULL); end = (l0 << 48) + (l1 << 39) + (l2 << 30); if (pdpe & PG_PRESENT_MASK) { prot = 0; @@ -502,8 +495,8 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env) pd_addr = pdpe & 0x3fffffffff000ULL; for (l3 = 0; l3 < 512; l3++) { - cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8); - pde = le64_to_cpu(pde); + pde = address_space_ldq_le(as, pd_addr + l3 * 8, + attrs, NULL); end = (l0 << 48) + (l1 << 39) + (l2 << 30) + (l3 << 21); if (pde & PG_PRESENT_MASK) { prot = 0; @@ -521,8 +514,8 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env) pt_addr = pde & 0x3fffffffff000ULL; for (l4 = 0; l4 < 512; l4++) { - cpu_physical_memory_read(pt_addr + l4 * 8, &pte, 8); - pte = le64_to_cpu(pte); + pte = address_space_ldq_le(as, pt_addr + l4 * 8, + attrs, NULL); end = (l0 << 48) + (l1 << 39) + (l2 << 30) + (l3 << 21) + (l4 << 12); if (pte & PG_PRESENT_MASK) { @@ -546,6 +539,7 @@ static void mem_info_la57(Monitor *mon, CPUArchState *env) void hmp_info_mem(Monitor *mon, const QDict *qdict) { CPUArchState *env; + AddressSpace *as; env = mon_get_cpu_env(mon); if (!env) { @@ -557,21 +551,22 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict) monitor_printf(mon, "PG disabled\n"); return; } + as = cpu_get_address_space(env_cpu(env), X86ASIdx_MEM); if (env->cr[4] & CR4_PAE_MASK) { #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { if (env->cr[4] & CR4_LA57_MASK) { - mem_info_la57(mon, env); + mem_info_la57(mon, env, as); } else { - mem_info_la48(mon, env); + mem_info_la48(mon, env, as); } } else #endif { - mem_info_pae32(mon, env); + mem_info_pae32(mon, env, as); } } else { - mem_info_32(mon, env); + mem_info_32(mon, env, as); } } diff --git a/target/i386/mshv/meson.build b/target/i386/mshv/meson.build new file mode 100644 index 0000000..647e5da --- /dev/null +++ b/target/i386/mshv/meson.build @@ -0,0 +1,8 @@ +i386_mshv_ss = ss.source_set() + +i386_mshv_ss.add(files( + 'mshv-cpu.c', + 'x86.c', +)) + +i386_system_ss.add_all(when: 'CONFIG_MSHV', if_true: i386_mshv_ss) diff --git a/target/i386/mshv/mshv-cpu.c b/target/i386/mshv/mshv-cpu.c new file mode 100644 index 0000000..1f7b9cb --- /dev/null +++ b/target/i386/mshv/mshv-cpu.c @@ -0,0 +1,1763 @@ +/* + * QEMU MSHV support + * + * Copyright Microsoft, Corp. 2025 + * + * Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com> + * Magnus Kulke <magnuskulke@microsoft.com> + * Jinank Jain <jinankjain@microsoft.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/memalign.h" +#include "qemu/typedefs.h" + +#include "system/mshv.h" +#include "system/mshv_int.h" +#include "system/address-spaces.h" +#include "linux/mshv.h" +#include "hw/hyperv/hvgdk.h" +#include "hw/hyperv/hvgdk_mini.h" +#include "hw/hyperv/hvhdk_mini.h" +#include "hw/i386/apic_internal.h" + +#include "cpu.h" +#include "emulate/x86_decode.h" +#include "emulate/x86_emu.h" +#include "emulate/x86_flags.h" + +#include "trace-accel_mshv.h" +#include "trace.h" + +#include <sys/ioctl.h> + +#define MAX_REGISTER_COUNT (MAX_CONST(ARRAY_SIZE(STANDARD_REGISTER_NAMES), \ + MAX_CONST(ARRAY_SIZE(SPECIAL_REGISTER_NAMES), \ + ARRAY_SIZE(FPU_REGISTER_NAMES)))) + +static enum hv_register_name STANDARD_REGISTER_NAMES[18] = { + HV_X64_REGISTER_RAX, + HV_X64_REGISTER_RBX, + HV_X64_REGISTER_RCX, + HV_X64_REGISTER_RDX, + HV_X64_REGISTER_RSI, + HV_X64_REGISTER_RDI, + HV_X64_REGISTER_RSP, + HV_X64_REGISTER_RBP, + HV_X64_REGISTER_R8, + HV_X64_REGISTER_R9, + HV_X64_REGISTER_R10, + HV_X64_REGISTER_R11, + HV_X64_REGISTER_R12, + HV_X64_REGISTER_R13, + HV_X64_REGISTER_R14, + HV_X64_REGISTER_R15, + HV_X64_REGISTER_RIP, + HV_X64_REGISTER_RFLAGS, +}; + +static enum hv_register_name SPECIAL_REGISTER_NAMES[17] = { + HV_X64_REGISTER_CS, + HV_X64_REGISTER_DS, + HV_X64_REGISTER_ES, + HV_X64_REGISTER_FS, + HV_X64_REGISTER_GS, + HV_X64_REGISTER_SS, + HV_X64_REGISTER_TR, + HV_X64_REGISTER_LDTR, + HV_X64_REGISTER_GDTR, + HV_X64_REGISTER_IDTR, + HV_X64_REGISTER_CR0, + HV_X64_REGISTER_CR2, + HV_X64_REGISTER_CR3, + HV_X64_REGISTER_CR4, + HV_X64_REGISTER_CR8, + HV_X64_REGISTER_EFER, + HV_X64_REGISTER_APIC_BASE, +}; + +static enum hv_register_name FPU_REGISTER_NAMES[26] = { + HV_X64_REGISTER_XMM0, + HV_X64_REGISTER_XMM1, + HV_X64_REGISTER_XMM2, + HV_X64_REGISTER_XMM3, + HV_X64_REGISTER_XMM4, + HV_X64_REGISTER_XMM5, + HV_X64_REGISTER_XMM6, + HV_X64_REGISTER_XMM7, + HV_X64_REGISTER_XMM8, + HV_X64_REGISTER_XMM9, + HV_X64_REGISTER_XMM10, + HV_X64_REGISTER_XMM11, + HV_X64_REGISTER_XMM12, + HV_X64_REGISTER_XMM13, + HV_X64_REGISTER_XMM14, + HV_X64_REGISTER_XMM15, + HV_X64_REGISTER_FP_MMX0, + HV_X64_REGISTER_FP_MMX1, + HV_X64_REGISTER_FP_MMX2, + HV_X64_REGISTER_FP_MMX3, + HV_X64_REGISTER_FP_MMX4, + HV_X64_REGISTER_FP_MMX5, + HV_X64_REGISTER_FP_MMX6, + HV_X64_REGISTER_FP_MMX7, + HV_X64_REGISTER_FP_CONTROL_STATUS, + HV_X64_REGISTER_XMM_CONTROL_STATUS, +}; + +static int translate_gva(const CPUState *cpu, uint64_t gva, uint64_t *gpa, + uint64_t flags) +{ + int ret; + int cpu_fd = mshv_vcpufd(cpu); + int vp_index = cpu->cpu_index; + + hv_input_translate_virtual_address in = { 0 }; + hv_output_translate_virtual_address out = { 0 }; + struct mshv_root_hvcall args = {0}; + uint64_t gva_page = gva >> HV_HYP_PAGE_SHIFT; + + in.vp_index = vp_index; + in.control_flags = flags; + in.gva_page = gva_page; + + /* create the hvcall envelope */ + args.code = HVCALL_TRANSLATE_VIRTUAL_ADDRESS; + args.in_sz = sizeof(in); + args.in_ptr = (uint64_t) ∈ + args.out_sz = sizeof(out); + args.out_ptr = (uint64_t) &out; + + /* perform the call */ + ret = mshv_hvcall(cpu_fd, &args); + if (ret < 0) { + error_report("Failed to invoke gva->gpa translation"); + return -errno; + } + + if (out.translation_result.result_code != HV_TRANSLATE_GVA_SUCCESS) { + error_report("Failed to translate gva (" TARGET_FMT_lx ") to gpa", gva); + return -1; + } + + *gpa = ((out.gpa_page << HV_HYP_PAGE_SHIFT) + | (gva & ~(uint64_t)HV_HYP_PAGE_MASK)); + + return 0; +} + +int mshv_set_generic_regs(const CPUState *cpu, const hv_register_assoc *assocs, + size_t n_regs) +{ + int cpu_fd = mshv_vcpufd(cpu); + int vp_index = cpu->cpu_index; + size_t in_sz, assocs_sz; + hv_input_set_vp_registers *in = cpu->accel->hvcall_args.input_page; + struct mshv_root_hvcall args = {0}; + int ret; + + /* find out the size of the struct w/ a flexible array at the tail */ + assocs_sz = n_regs * sizeof(hv_register_assoc); + in_sz = sizeof(hv_input_set_vp_registers) + assocs_sz; + + /* fill the input struct */ + memset(in, 0, sizeof(hv_input_set_vp_registers)); + in->vp_index = vp_index; + memcpy(in->elements, assocs, assocs_sz); + + /* create the hvcall envelope */ + args.code = HVCALL_SET_VP_REGISTERS; + args.in_sz = in_sz; + args.in_ptr = (uint64_t) in; + args.reps = (uint16_t) n_regs; + + /* perform the call */ + ret = mshv_hvcall(cpu_fd, &args); + if (ret < 0) { + error_report("Failed to set registers"); + return -1; + } + + /* assert we set all registers */ + if (args.reps != n_regs) { + error_report("Failed to set registers: expected %zu elements" + ", got %u", n_regs, args.reps); + return -1; + } + + return 0; +} + +static int get_generic_regs(CPUState *cpu, hv_register_assoc *assocs, + size_t n_regs) +{ + int cpu_fd = mshv_vcpufd(cpu); + int vp_index = cpu->cpu_index; + hv_input_get_vp_registers *in = cpu->accel->hvcall_args.input_page; + hv_register_value *values = cpu->accel->hvcall_args.output_page; + size_t in_sz, names_sz, values_sz; + int i, ret; + struct mshv_root_hvcall args = {0}; + + /* find out the size of the struct w/ a flexible array at the tail */ + names_sz = n_regs * sizeof(hv_register_name); + in_sz = sizeof(hv_input_get_vp_registers) + names_sz; + + /* fill the input struct */ + memset(in, 0, sizeof(hv_input_get_vp_registers)); + in->vp_index = vp_index; + for (i = 0; i < n_regs; i++) { + in->names[i] = assocs[i].name; + } + + /* determine size of value output buffer */ + values_sz = n_regs * sizeof(union hv_register_value); + + /* create the hvcall envelope */ + args.code = HVCALL_GET_VP_REGISTERS; + args.in_sz = in_sz; + args.in_ptr = (uint64_t) in; + args.out_sz = values_sz; + args.out_ptr = (uint64_t) values; + args.reps = (uint16_t) n_regs; + + /* perform the call */ + ret = mshv_hvcall(cpu_fd, &args); + if (ret < 0) { + error_report("Failed to retrieve registers"); + return -1; + } + + /* assert we got all registers */ + if (args.reps != n_regs) { + error_report("Failed to retrieve registers: expected %zu elements" + ", got %u", n_regs, args.reps); + return -1; + } + + /* copy values into assoc */ + for (i = 0; i < n_regs; i++) { + assocs[i].value = values[i]; + } + + return 0; +} + +static int set_standard_regs(const CPUState *cpu) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + hv_register_assoc assocs[ARRAY_SIZE(STANDARD_REGISTER_NAMES)]; + int ret; + size_t n_regs = ARRAY_SIZE(STANDARD_REGISTER_NAMES); + + /* set names */ + for (size_t i = 0; i < ARRAY_SIZE(STANDARD_REGISTER_NAMES); i++) { + assocs[i].name = STANDARD_REGISTER_NAMES[i]; + } + assocs[0].value.reg64 = env->regs[R_EAX]; + assocs[1].value.reg64 = env->regs[R_EBX]; + assocs[2].value.reg64 = env->regs[R_ECX]; + assocs[3].value.reg64 = env->regs[R_EDX]; + assocs[4].value.reg64 = env->regs[R_ESI]; + assocs[5].value.reg64 = env->regs[R_EDI]; + assocs[6].value.reg64 = env->regs[R_ESP]; + assocs[7].value.reg64 = env->regs[R_EBP]; + assocs[8].value.reg64 = env->regs[R_R8]; + assocs[9].value.reg64 = env->regs[R_R9]; + assocs[10].value.reg64 = env->regs[R_R10]; + assocs[11].value.reg64 = env->regs[R_R11]; + assocs[12].value.reg64 = env->regs[R_R12]; + assocs[13].value.reg64 = env->regs[R_R13]; + assocs[14].value.reg64 = env->regs[R_R14]; + assocs[15].value.reg64 = env->regs[R_R15]; + assocs[16].value.reg64 = env->eip; + lflags_to_rflags(env); + assocs[17].value.reg64 = env->eflags; + + ret = mshv_set_generic_regs(cpu, assocs, n_regs); + if (ret < 0) { + error_report("failed to set standard registers"); + return -errno; + } + return 0; +} + +int mshv_store_regs(CPUState *cpu) +{ + int ret; + + ret = set_standard_regs(cpu); + if (ret < 0) { + error_report("Failed to store standard registers"); + return -1; + } + + return 0; +} + +static void populate_standard_regs(const hv_register_assoc *assocs, + CPUX86State *env) +{ + env->regs[R_EAX] = assocs[0].value.reg64; + env->regs[R_EBX] = assocs[1].value.reg64; + env->regs[R_ECX] = assocs[2].value.reg64; + env->regs[R_EDX] = assocs[3].value.reg64; + env->regs[R_ESI] = assocs[4].value.reg64; + env->regs[R_EDI] = assocs[5].value.reg64; + env->regs[R_ESP] = assocs[6].value.reg64; + env->regs[R_EBP] = assocs[7].value.reg64; + env->regs[R_R8] = assocs[8].value.reg64; + env->regs[R_R9] = assocs[9].value.reg64; + env->regs[R_R10] = assocs[10].value.reg64; + env->regs[R_R11] = assocs[11].value.reg64; + env->regs[R_R12] = assocs[12].value.reg64; + env->regs[R_R13] = assocs[13].value.reg64; + env->regs[R_R14] = assocs[14].value.reg64; + env->regs[R_R15] = assocs[15].value.reg64; + + env->eip = assocs[16].value.reg64; + env->eflags = assocs[17].value.reg64; + rflags_to_lflags(env); +} + +int mshv_get_standard_regs(CPUState *cpu) +{ + struct hv_register_assoc assocs[ARRAY_SIZE(STANDARD_REGISTER_NAMES)]; + int ret; + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + size_t n_regs = ARRAY_SIZE(STANDARD_REGISTER_NAMES); + + for (size_t i = 0; i < n_regs; i++) { + assocs[i].name = STANDARD_REGISTER_NAMES[i]; + } + ret = get_generic_regs(cpu, assocs, n_regs); + if (ret < 0) { + error_report("failed to get standard registers"); + return -1; + } + + populate_standard_regs(assocs, env); + return 0; +} + +static inline void populate_segment_reg(const hv_x64_segment_register *hv_seg, + SegmentCache *seg) +{ + memset(seg, 0, sizeof(SegmentCache)); + + seg->base = hv_seg->base; + seg->limit = hv_seg->limit; + seg->selector = hv_seg->selector; + + seg->flags = (hv_seg->segment_type << DESC_TYPE_SHIFT) + | (hv_seg->present * DESC_P_MASK) + | (hv_seg->descriptor_privilege_level << DESC_DPL_SHIFT) + | (hv_seg->_default << DESC_B_SHIFT) + | (hv_seg->non_system_segment * DESC_S_MASK) + | (hv_seg->_long << DESC_L_SHIFT) + | (hv_seg->granularity * DESC_G_MASK) + | (hv_seg->available * DESC_AVL_MASK); + +} + +static inline void populate_table_reg(const hv_x64_table_register *hv_seg, + SegmentCache *tbl) +{ + memset(tbl, 0, sizeof(SegmentCache)); + + tbl->base = hv_seg->base; + tbl->limit = hv_seg->limit; +} + +static void populate_special_regs(const hv_register_assoc *assocs, + X86CPU *x86cpu) +{ + CPUX86State *env = &x86cpu->env; + + populate_segment_reg(&assocs[0].value.segment, &env->segs[R_CS]); + populate_segment_reg(&assocs[1].value.segment, &env->segs[R_DS]); + populate_segment_reg(&assocs[2].value.segment, &env->segs[R_ES]); + populate_segment_reg(&assocs[3].value.segment, &env->segs[R_FS]); + populate_segment_reg(&assocs[4].value.segment, &env->segs[R_GS]); + populate_segment_reg(&assocs[5].value.segment, &env->segs[R_SS]); + + populate_segment_reg(&assocs[6].value.segment, &env->tr); + populate_segment_reg(&assocs[7].value.segment, &env->ldt); + + populate_table_reg(&assocs[8].value.table, &env->gdt); + populate_table_reg(&assocs[9].value.table, &env->idt); + + env->cr[0] = assocs[10].value.reg64; + env->cr[2] = assocs[11].value.reg64; + env->cr[3] = assocs[12].value.reg64; + env->cr[4] = assocs[13].value.reg64; + + cpu_set_apic_tpr(x86cpu->apic_state, assocs[14].value.reg64); + env->efer = assocs[15].value.reg64; + cpu_set_apic_base(x86cpu->apic_state, assocs[16].value.reg64); +} + + +int mshv_get_special_regs(CPUState *cpu) +{ + struct hv_register_assoc assocs[ARRAY_SIZE(SPECIAL_REGISTER_NAMES)]; + int ret; + X86CPU *x86cpu = X86_CPU(cpu); + size_t n_regs = ARRAY_SIZE(SPECIAL_REGISTER_NAMES); + + for (size_t i = 0; i < n_regs; i++) { + assocs[i].name = SPECIAL_REGISTER_NAMES[i]; + } + ret = get_generic_regs(cpu, assocs, n_regs); + if (ret < 0) { + error_report("failed to get special registers"); + return -errno; + } + + populate_special_regs(assocs, x86cpu); + return 0; +} + +int mshv_load_regs(CPUState *cpu) +{ + int ret; + + ret = mshv_get_standard_regs(cpu); + if (ret < 0) { + error_report("Failed to load standard registers"); + return -1; + } + + ret = mshv_get_special_regs(cpu); + if (ret < 0) { + error_report("Failed to load special registers"); + return -1; + } + + return 0; +} + +static void add_cpuid_entry(GList *cpuid_entries, + uint32_t function, uint32_t index, + uint32_t eax, uint32_t ebx, + uint32_t ecx, uint32_t edx) +{ + struct hv_cpuid_entry *entry; + + entry = g_malloc0(sizeof(struct hv_cpuid_entry)); + entry->function = function; + entry->index = index; + entry->eax = eax; + entry->ebx = ebx; + entry->ecx = ecx; + entry->edx = edx; + + cpuid_entries = g_list_append(cpuid_entries, entry); +} + +static void collect_cpuid_entries(const CPUState *cpu, GList *cpuid_entries) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + uint32_t eax, ebx, ecx, edx; + uint32_t leaf, subleaf; + size_t max_leaf = 0x1F; + size_t max_subleaf = 0x20; + + uint32_t leaves_with_subleaves[] = {0x4, 0x7, 0xD, 0xF, 0x10}; + int n_subleaf_leaves = ARRAY_SIZE(leaves_with_subleaves); + + /* Regular leaves without subleaves */ + for (leaf = 0; leaf <= max_leaf; leaf++) { + bool has_subleaves = false; + for (int i = 0; i < n_subleaf_leaves; i++) { + if (leaf == leaves_with_subleaves[i]) { + has_subleaves = true; + break; + } + } + + if (!has_subleaves) { + cpu_x86_cpuid(env, leaf, 0, &eax, &ebx, &ecx, &edx); + if (eax == 0 && ebx == 0 && ecx == 0 && edx == 0) { + /* all zeroes indicates no more leaves */ + continue; + } + + add_cpuid_entry(cpuid_entries, leaf, 0, eax, ebx, ecx, edx); + continue; + } + + subleaf = 0; + while (subleaf < max_subleaf) { + cpu_x86_cpuid(env, leaf, subleaf, &eax, &ebx, &ecx, &edx); + + if (eax == 0 && ebx == 0 && ecx == 0 && edx == 0) { + /* all zeroes indicates no more leaves */ + break; + } + add_cpuid_entry(cpuid_entries, leaf, 0, eax, ebx, ecx, edx); + subleaf++; + } + } +} + +static int register_intercept_result_cpuid_entry(const CPUState *cpu, + uint8_t subleaf_specific, + uint8_t always_override, + struct hv_cpuid_entry *entry) +{ + int ret; + int vp_index = cpu->cpu_index; + int cpu_fd = mshv_vcpufd(cpu); + + struct hv_register_x64_cpuid_result_parameters cpuid_params = { + .input.eax = entry->function, + .input.ecx = entry->index, + .input.subleaf_specific = subleaf_specific, + .input.always_override = always_override, + .input.padding = 0, + /* + * With regard to masks - these are to specify bits to be overwritten + * The current CpuidEntry structure wouldn't allow to carry the masks + * in addition to the actual register values. For this reason, the + * masks are set to the exact values of the corresponding register bits + * to be registered for an overwrite. To view resulting values the + * hypervisor would return, HvCallGetVpCpuidValues hypercall can be + * used. + */ + .result.eax = entry->eax, + .result.eax_mask = entry->eax, + .result.ebx = entry->ebx, + .result.ebx_mask = entry->ebx, + .result.ecx = entry->ecx, + .result.ecx_mask = entry->ecx, + .result.edx = entry->edx, + .result.edx_mask = entry->edx, + }; + union hv_register_intercept_result_parameters parameters = { + .cpuid = cpuid_params, + }; + + hv_input_register_intercept_result in = {0}; + in.vp_index = vp_index; + in.intercept_type = HV_INTERCEPT_TYPE_X64_CPUID; + in.parameters = parameters; + + struct mshv_root_hvcall args = {0}; + args.code = HVCALL_REGISTER_INTERCEPT_RESULT; + args.in_sz = sizeof(in); + args.in_ptr = (uint64_t)∈ + + ret = mshv_hvcall(cpu_fd, &args); + if (ret < 0) { + error_report("failed to register intercept result for cpuid"); + return -1; + } + + return 0; +} + +static int register_intercept_result_cpuid(const CPUState *cpu, + struct hv_cpuid *cpuid) +{ + int ret = 0, entry_ret; + struct hv_cpuid_entry *entry; + uint8_t subleaf_specific, always_override; + + for (size_t i = 0; i < cpuid->nent; i++) { + entry = &cpuid->entries[i]; + + /* set defaults */ + subleaf_specific = 0; + always_override = 1; + + /* Intel */ + /* 0xb - Extended Topology Enumeration Leaf */ + /* 0x1f - V2 Extended Topology Enumeration Leaf */ + /* AMD */ + /* 0x8000_001e - Processor Topology Information */ + /* 0x8000_0026 - Extended CPU Topology */ + if (entry->function == 0xb + || entry->function == 0x1f + || entry->function == 0x8000001e + || entry->function == 0x80000026) { + subleaf_specific = 1; + always_override = 1; + } else if (entry->function == 0x00000001 + || entry->function == 0x80000000 + || entry->function == 0x80000001 + || entry->function == 0x80000008) { + subleaf_specific = 0; + always_override = 1; + } + + entry_ret = register_intercept_result_cpuid_entry(cpu, subleaf_specific, + always_override, + entry); + if ((entry_ret < 0) && (ret == 0)) { + ret = entry_ret; + } + } + + return ret; +} + +static int set_cpuid2(const CPUState *cpu) +{ + int ret; + size_t n_entries, cpuid_size; + struct hv_cpuid *cpuid; + struct hv_cpuid_entry *entry; + GList *entries = NULL; + + collect_cpuid_entries(cpu, entries); + n_entries = g_list_length(entries); + + cpuid_size = sizeof(struct hv_cpuid) + + n_entries * sizeof(struct hv_cpuid_entry); + + cpuid = g_malloc0(cpuid_size); + cpuid->nent = n_entries; + cpuid->padding = 0; + + for (size_t i = 0; i < n_entries; i++) { + entry = g_list_nth_data(entries, i); + cpuid->entries[i] = *entry; + g_free(entry); + } + g_list_free(entries); + + ret = register_intercept_result_cpuid(cpu, cpuid); + g_free(cpuid); + if (ret < 0) { + return ret; + } + + return 0; +} + +static inline void populate_hv_segment_reg(SegmentCache *seg, + hv_x64_segment_register *hv_reg) +{ + uint32_t flags = seg->flags; + + hv_reg->base = seg->base; + hv_reg->limit = seg->limit; + hv_reg->selector = seg->selector; + hv_reg->segment_type = (flags >> DESC_TYPE_SHIFT) & 0xF; + hv_reg->non_system_segment = (flags & DESC_S_MASK) != 0; + hv_reg->descriptor_privilege_level = (flags >> DESC_DPL_SHIFT) & 0x3; + hv_reg->present = (flags & DESC_P_MASK) != 0; + hv_reg->reserved = 0; + hv_reg->available = (flags & DESC_AVL_MASK) != 0; + hv_reg->_long = (flags >> DESC_L_SHIFT) & 0x1; + hv_reg->_default = (flags >> DESC_B_SHIFT) & 0x1; + hv_reg->granularity = (flags & DESC_G_MASK) != 0; +} + +static inline void populate_hv_table_reg(const struct SegmentCache *seg, + hv_x64_table_register *hv_reg) +{ + memset(hv_reg, 0, sizeof(*hv_reg)); + + hv_reg->base = seg->base; + hv_reg->limit = seg->limit; +} + +static int set_special_regs(const CPUState *cpu) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + struct hv_register_assoc assocs[ARRAY_SIZE(SPECIAL_REGISTER_NAMES)]; + size_t n_regs = ARRAY_SIZE(SPECIAL_REGISTER_NAMES); + int ret; + + /* set names */ + for (size_t i = 0; i < n_regs; i++) { + assocs[i].name = SPECIAL_REGISTER_NAMES[i]; + } + populate_hv_segment_reg(&env->segs[R_CS], &assocs[0].value.segment); + populate_hv_segment_reg(&env->segs[R_DS], &assocs[1].value.segment); + populate_hv_segment_reg(&env->segs[R_ES], &assocs[2].value.segment); + populate_hv_segment_reg(&env->segs[R_FS], &assocs[3].value.segment); + populate_hv_segment_reg(&env->segs[R_GS], &assocs[4].value.segment); + populate_hv_segment_reg(&env->segs[R_SS], &assocs[5].value.segment); + populate_hv_segment_reg(&env->tr, &assocs[6].value.segment); + populate_hv_segment_reg(&env->ldt, &assocs[7].value.segment); + + populate_hv_table_reg(&env->gdt, &assocs[8].value.table); + populate_hv_table_reg(&env->idt, &assocs[9].value.table); + + assocs[10].value.reg64 = env->cr[0]; + assocs[11].value.reg64 = env->cr[2]; + assocs[12].value.reg64 = env->cr[3]; + assocs[13].value.reg64 = env->cr[4]; + assocs[14].value.reg64 = cpu_get_apic_tpr(x86cpu->apic_state); + assocs[15].value.reg64 = env->efer; + assocs[16].value.reg64 = cpu_get_apic_base(x86cpu->apic_state); + + ret = mshv_set_generic_regs(cpu, assocs, n_regs); + if (ret < 0) { + error_report("failed to set special registers"); + return -1; + } + + return 0; +} + +static int set_fpu(const CPUState *cpu, const struct MshvFPU *regs) +{ + struct hv_register_assoc assocs[ARRAY_SIZE(FPU_REGISTER_NAMES)]; + union hv_register_value *value; + size_t fp_i; + union hv_x64_fp_control_status_register *ctrl_status; + union hv_x64_xmm_control_status_register *xmm_ctrl_status; + int ret; + size_t n_regs = ARRAY_SIZE(FPU_REGISTER_NAMES); + + /* first 16 registers are xmm0-xmm15 */ + for (size_t i = 0; i < 16; i++) { + assocs[i].name = FPU_REGISTER_NAMES[i]; + value = &assocs[i].value; + memcpy(&value->reg128, ®s->xmm[i], 16); + } + + /* next 8 registers are fp_mmx0-fp_mmx7 */ + for (size_t i = 16; i < 24; i++) { + assocs[i].name = FPU_REGISTER_NAMES[i]; + fp_i = (i - 16); + value = &assocs[i].value; + memcpy(&value->reg128, ®s->fpr[fp_i], 16); + } + + /* last two registers are fp_control_status and xmm_control_status */ + assocs[24].name = FPU_REGISTER_NAMES[24]; + value = &assocs[24].value; + ctrl_status = &value->fp_control_status; + ctrl_status->fp_control = regs->fcw; + ctrl_status->fp_status = regs->fsw; + ctrl_status->fp_tag = regs->ftwx; + ctrl_status->reserved = 0; + ctrl_status->last_fp_op = regs->last_opcode; + ctrl_status->last_fp_rip = regs->last_ip; + + assocs[25].name = FPU_REGISTER_NAMES[25]; + value = &assocs[25].value; + xmm_ctrl_status = &value->xmm_control_status; + xmm_ctrl_status->xmm_status_control = regs->mxcsr; + xmm_ctrl_status->xmm_status_control_mask = 0; + xmm_ctrl_status->last_fp_rdp = regs->last_dp; + + ret = mshv_set_generic_regs(cpu, assocs, n_regs); + if (ret < 0) { + error_report("failed to set fpu registers"); + return -1; + } + + return 0; +} + +static int set_xc_reg(const CPUState *cpu, uint64_t xcr0) +{ + int ret; + struct hv_register_assoc assoc = { + .name = HV_X64_REGISTER_XFEM, + .value.reg64 = xcr0, + }; + + ret = mshv_set_generic_regs(cpu, &assoc, 1); + if (ret < 0) { + error_report("failed to set xcr0"); + return -errno; + } + return 0; +} + +static int set_cpu_state(const CPUState *cpu, const MshvFPU *fpu_regs, + uint64_t xcr0) +{ + int ret; + + ret = set_standard_regs(cpu); + if (ret < 0) { + return ret; + } + ret = set_special_regs(cpu); + if (ret < 0) { + return ret; + } + ret = set_fpu(cpu, fpu_regs); + if (ret < 0) { + return ret; + } + ret = set_xc_reg(cpu, xcr0); + if (ret < 0) { + return ret; + } + return 0; +} + +static int get_vp_state(int cpu_fd, struct mshv_get_set_vp_state *state) +{ + int ret; + + ret = ioctl(cpu_fd, MSHV_GET_VP_STATE, state); + if (ret < 0) { + error_report("failed to get partition state: %s", strerror(errno)); + return -1; + } + + return 0; +} + +static int get_lapic(int cpu_fd, + struct hv_local_interrupt_controller_state *state) +{ + int ret; + size_t size = 4096; + /* buffer aligned to 4k, as *state requires that */ + void *buffer = qemu_memalign(size, size); + struct mshv_get_set_vp_state mshv_state = { 0 }; + + mshv_state.buf_ptr = (uint64_t) buffer; + mshv_state.buf_sz = size; + mshv_state.type = MSHV_VP_STATE_LAPIC; + + ret = get_vp_state(cpu_fd, &mshv_state); + if (ret == 0) { + memcpy(state, buffer, sizeof(*state)); + } + qemu_vfree(buffer); + if (ret < 0) { + error_report("failed to get lapic"); + return -1; + } + + return 0; +} + +static uint32_t set_apic_delivery_mode(uint32_t reg, uint32_t mode) +{ + return ((reg) & ~0x700) | ((mode) << 8); +} + +static int set_vp_state(int cpu_fd, const struct mshv_get_set_vp_state *state) +{ + int ret; + + ret = ioctl(cpu_fd, MSHV_SET_VP_STATE, state); + if (ret < 0) { + error_report("failed to set partition state: %s", strerror(errno)); + return -1; + } + + return 0; +} + +static int set_lapic(int cpu_fd, + const struct hv_local_interrupt_controller_state *state) +{ + int ret; + size_t size = 4096; + /* buffer aligned to 4k, as *state requires that */ + void *buffer = qemu_memalign(size, size); + struct mshv_get_set_vp_state mshv_state = { 0 }; + + if (!state) { + error_report("lapic state is NULL"); + return -1; + } + memcpy(buffer, state, sizeof(*state)); + + mshv_state.buf_ptr = (uint64_t) buffer; + mshv_state.buf_sz = size; + mshv_state.type = MSHV_VP_STATE_LAPIC; + + ret = set_vp_state(cpu_fd, &mshv_state); + qemu_vfree(buffer); + if (ret < 0) { + error_report("failed to set lapic: %s", strerror(errno)); + return -1; + } + + return 0; +} + +static int set_lint(int cpu_fd) +{ + int ret; + uint32_t *lvt_lint0, *lvt_lint1; + + struct hv_local_interrupt_controller_state lapic_state = { 0 }; + ret = get_lapic(cpu_fd, &lapic_state); + if (ret < 0) { + return ret; + } + + lvt_lint0 = &lapic_state.apic_lvt_lint0; + *lvt_lint0 = set_apic_delivery_mode(*lvt_lint0, APIC_DM_EXTINT); + + lvt_lint1 = &lapic_state.apic_lvt_lint1; + *lvt_lint1 = set_apic_delivery_mode(*lvt_lint1, APIC_DM_NMI); + + /* TODO: should we skip setting lapic if the values are the same? */ + + return set_lapic(cpu_fd, &lapic_state); +} + +static int setup_msrs(const CPUState *cpu) +{ + int ret; + uint64_t default_type = MSR_MTRR_ENABLE | MSR_MTRR_MEM_TYPE_WB; + + /* boot msr entries */ + MshvMsrEntry msrs[9] = { + { .index = IA32_MSR_SYSENTER_CS, .data = 0x0, }, + { .index = IA32_MSR_SYSENTER_ESP, .data = 0x0, }, + { .index = IA32_MSR_SYSENTER_EIP, .data = 0x0, }, + { .index = IA32_MSR_STAR, .data = 0x0, }, + { .index = IA32_MSR_CSTAR, .data = 0x0, }, + { .index = IA32_MSR_LSTAR, .data = 0x0, }, + { .index = IA32_MSR_KERNEL_GS_BASE, .data = 0x0, }, + { .index = IA32_MSR_SFMASK, .data = 0x0, }, + { .index = IA32_MSR_MTRR_DEF_TYPE, .data = default_type, }, + }; + + ret = mshv_configure_msr(cpu, msrs, 9); + if (ret < 0) { + error_report("failed to setup msrs"); + return -1; + } + + return 0; +} + +/* + * TODO: populate topology info: + * + * X86CPU *x86cpu = X86_CPU(cpu); + * CPUX86State *env = &x86cpu->env; + * X86CPUTopoInfo *topo_info = &env->topo_info; + */ +int mshv_configure_vcpu(const CPUState *cpu, const struct MshvFPU *fpu, + uint64_t xcr0) +{ + int ret; + int cpu_fd = mshv_vcpufd(cpu); + + ret = set_cpuid2(cpu); + if (ret < 0) { + error_report("failed to set cpuid"); + return -1; + } + + ret = setup_msrs(cpu); + if (ret < 0) { + error_report("failed to setup msrs"); + return -1; + } + + ret = set_cpu_state(cpu, fpu, xcr0); + if (ret < 0) { + error_report("failed to set cpu state"); + return -1; + } + + ret = set_lint(cpu_fd); + if (ret < 0) { + error_report("failed to set lpic int"); + return -1; + } + + return 0; +} + +static int put_regs(const CPUState *cpu) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + MshvFPU fpu = {0}; + int ret; + + memset(&fpu, 0, sizeof(fpu)); + + ret = mshv_configure_vcpu(cpu, &fpu, env->xcr0); + if (ret < 0) { + error_report("failed to configure vcpu"); + return ret; + } + + return 0; +} + +struct MsrPair { + uint32_t index; + uint64_t value; +}; + +static int put_msrs(const CPUState *cpu) +{ + int ret = 0; + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + MshvMsrEntries *msrs = g_malloc0(sizeof(MshvMsrEntries)); + + struct MsrPair pairs[] = { + { MSR_IA32_SYSENTER_CS, env->sysenter_cs }, + { MSR_IA32_SYSENTER_ESP, env->sysenter_esp }, + { MSR_IA32_SYSENTER_EIP, env->sysenter_eip }, + { MSR_EFER, env->efer }, + { MSR_PAT, env->pat }, + { MSR_STAR, env->star }, + { MSR_CSTAR, env->cstar }, + { MSR_LSTAR, env->lstar }, + { MSR_KERNELGSBASE, env->kernelgsbase }, + { MSR_FMASK, env->fmask }, + { MSR_MTRRdefType, env->mtrr_deftype }, + { MSR_VM_HSAVE_PA, env->vm_hsave }, + { MSR_SMI_COUNT, env->msr_smi_count }, + { MSR_IA32_PKRS, env->pkrs }, + { MSR_IA32_BNDCFGS, env->msr_bndcfgs }, + { MSR_IA32_XSS, env->xss }, + { MSR_IA32_UMWAIT_CONTROL, env->umwait }, + { MSR_IA32_TSX_CTRL, env->tsx_ctrl }, + { MSR_AMD64_TSC_RATIO, env->amd_tsc_scale_msr }, + { MSR_TSC_AUX, env->tsc_aux }, + { MSR_TSC_ADJUST, env->tsc_adjust }, + { MSR_IA32_SMBASE, env->smbase }, + { MSR_IA32_SPEC_CTRL, env->spec_ctrl }, + { MSR_VIRT_SSBD, env->virt_ssbd }, + }; + + if (ARRAY_SIZE(pairs) > MSHV_MSR_ENTRIES_COUNT) { + error_report("MSR entries exceed maximum size"); + g_free(msrs); + return -1; + } + + for (size_t i = 0; i < ARRAY_SIZE(pairs); i++) { + MshvMsrEntry *entry = &msrs->entries[i]; + entry->index = pairs[i].index; + entry->reserved = 0; + entry->data = pairs[i].value; + msrs->nmsrs++; + } + + ret = mshv_configure_msr(cpu, &msrs->entries[0], msrs->nmsrs); + g_free(msrs); + return ret; +} + + +int mshv_arch_put_registers(const CPUState *cpu) +{ + int ret; + + ret = put_regs(cpu); + if (ret < 0) { + error_report("Failed to put registers"); + return -1; + } + + ret = put_msrs(cpu); + if (ret < 0) { + error_report("Failed to put msrs"); + return -1; + } + + return 0; +} + +void mshv_arch_amend_proc_features( + union hv_partition_synthetic_processor_features *features) +{ + features->access_guest_idle_reg = 1; +} + +static int set_memory_info(const struct hyperv_message *msg, + struct hv_x64_memory_intercept_message *info) +{ + if (msg->header.message_type != HVMSG_GPA_INTERCEPT + && msg->header.message_type != HVMSG_UNMAPPED_GPA + && msg->header.message_type != HVMSG_UNACCEPTED_GPA) { + error_report("invalid message type"); + return -1; + } + memcpy(info, msg->payload, sizeof(*info)); + + return 0; +} + +static int emulate_instruction(CPUState *cpu, + const uint8_t *insn_bytes, size_t insn_len, + uint64_t gva, uint64_t gpa) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + struct x86_decode decode = { 0 }; + int ret; + x86_insn_stream stream = { .bytes = insn_bytes, .len = insn_len }; + + ret = mshv_load_regs(cpu); + if (ret < 0) { + error_report("failed to load registers"); + return -1; + } + + decode_instruction_stream(env, &decode, &stream); + exec_instruction(env, &decode); + + ret = mshv_store_regs(cpu); + if (ret < 0) { + error_report("failed to store registers"); + return -1; + } + + return 0; +} + +static int handle_mmio(CPUState *cpu, const struct hyperv_message *msg, + MshvVmExit *exit_reason) +{ + struct hv_x64_memory_intercept_message info = { 0 }; + size_t insn_len; + uint8_t access_type; + uint8_t *instruction_bytes; + int ret; + + ret = set_memory_info(msg, &info); + if (ret < 0) { + error_report("failed to convert message to memory info"); + return -1; + } + insn_len = info.instruction_byte_count; + access_type = info.header.intercept_access_type; + + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_EXECUTE) { + error_report("invalid intercept access type: execute"); + return -1; + } + + if (insn_len > 16) { + error_report("invalid mmio instruction length: %zu", insn_len); + return -1; + } + + trace_mshv_handle_mmio(info.guest_virtual_address, + info.guest_physical_address, + info.instruction_byte_count, access_type); + + instruction_bytes = info.instruction_bytes; + + ret = emulate_instruction(cpu, instruction_bytes, insn_len, + info.guest_virtual_address, + info.guest_physical_address); + if (ret < 0) { + error_report("failed to emulate mmio"); + return -1; + } + + *exit_reason = MshvVmExitIgnore; + + return 0; +} + +static int handle_unmapped_mem(int vm_fd, CPUState *cpu, + const struct hyperv_message *msg, + MshvVmExit *exit_reason) +{ + struct hv_x64_memory_intercept_message info = { 0 }; + uint64_t gpa; + int ret; + enum MshvRemapResult remap_result; + + ret = set_memory_info(msg, &info); + if (ret < 0) { + error_report("failed to convert message to memory info"); + return -1; + } + + gpa = info.guest_physical_address; + + /* attempt to remap the region, in case of overlapping userspace mappings */ + remap_result = mshv_remap_overlap_region(vm_fd, gpa); + *exit_reason = MshvVmExitIgnore; + + switch (remap_result) { + case MshvRemapNoMapping: + /* if we didn't find a mapping, it is probably mmio */ + return handle_mmio(cpu, msg, exit_reason); + case MshvRemapOk: + break; + case MshvRemapNoOverlap: + /* This should not happen, but we are forgiving it */ + warn_report("found no overlap for unmapped region"); + *exit_reason = MshvVmExitSpecial; + break; + } + + return 0; +} + +static int set_ioport_info(const struct hyperv_message *msg, + hv_x64_io_port_intercept_message *info) +{ + if (msg->header.message_type != HVMSG_X64_IO_PORT_INTERCEPT) { + error_report("Invalid message type"); + return -1; + } + memcpy(info, msg->payload, sizeof(*info)); + + return 0; +} + +static int set_x64_registers(const CPUState *cpu, const uint32_t *names, + const uint64_t *values) +{ + + hv_register_assoc assocs[2]; + int ret; + + for (size_t i = 0; i < ARRAY_SIZE(assocs); i++) { + assocs[i].name = names[i]; + assocs[i].value.reg64 = values[i]; + } + + ret = mshv_set_generic_regs(cpu, assocs, ARRAY_SIZE(assocs)); + if (ret < 0) { + error_report("failed to set x64 registers"); + return -1; + } + + return 0; +} + +static inline MemTxAttrs get_mem_attrs(bool is_secure_mode) +{ + MemTxAttrs memattr = {0}; + memattr.secure = is_secure_mode; + return memattr; +} + +static void pio_read(uint64_t port, uint8_t *data, uintptr_t size, + bool is_secure_mode) +{ + int ret = 0; + MemTxAttrs memattr = get_mem_attrs(is_secure_mode); + ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size, + false); + if (ret != MEMTX_OK) { + error_report("Failed to read from port %lx: %d", port, ret); + abort(); + } +} + +static int pio_write(uint64_t port, const uint8_t *data, uintptr_t size, + bool is_secure_mode) +{ + int ret = 0; + MemTxAttrs memattr = get_mem_attrs(is_secure_mode); + ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size, + true); + return ret; +} + +static int handle_pio_non_str(const CPUState *cpu, + hv_x64_io_port_intercept_message *info) +{ + size_t len = info->access_info.access_size; + uint8_t access_type = info->header.intercept_access_type; + int ret; + uint32_t val, eax; + const uint32_t eax_mask = 0xffffffffu >> (32 - len * 8); + size_t insn_len; + uint64_t rip, rax; + uint32_t reg_names[2]; + uint64_t reg_values[2]; + uint16_t port = info->port_number; + + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) { + union { + uint32_t u32; + uint8_t bytes[4]; + } conv; + + /* convert the first 4 bytes of rax to bytes */ + conv.u32 = (uint32_t)info->rax; + /* secure mode is set to false */ + ret = pio_write(port, conv.bytes, len, false); + if (ret < 0) { + error_report("Failed to write to io port"); + return -1; + } + } else { + uint8_t data[4] = { 0 }; + /* secure mode is set to false */ + pio_read(info->port_number, data, len, false); + + /* Preserve high bits in EAX, but clear out high bits in RAX */ + val = *(uint32_t *)data; + eax = (((uint32_t)info->rax) & ~eax_mask) | (val & eax_mask); + info->rax = (uint64_t)eax; + } + + insn_len = info->header.instruction_length; + + /* Advance RIP and update RAX */ + rip = info->header.rip + insn_len; + rax = info->rax; + + reg_names[0] = HV_X64_REGISTER_RIP; + reg_values[0] = rip; + reg_names[1] = HV_X64_REGISTER_RAX; + reg_values[1] = rax; + + ret = set_x64_registers(cpu, reg_names, reg_values); + if (ret < 0) { + error_report("Failed to set x64 registers"); + return -1; + } + + cpu->accel->dirty = false; + + return 0; +} + +static int fetch_guest_state(CPUState *cpu) +{ + int ret; + + ret = mshv_get_standard_regs(cpu); + if (ret < 0) { + error_report("Failed to get standard registers"); + return -1; + } + + ret = mshv_get_special_regs(cpu); + if (ret < 0) { + error_report("Failed to get special registers"); + return -1; + } + + return 0; +} + +static int read_memory(const CPUState *cpu, uint64_t initial_gva, + uint64_t initial_gpa, uint64_t gva, uint8_t *data, + size_t len) +{ + int ret; + uint64_t gpa, flags; + + if (gva == initial_gva) { + gpa = initial_gpa; + } else { + flags = HV_TRANSLATE_GVA_VALIDATE_READ; + ret = translate_gva(cpu, gva, &gpa, flags); + if (ret < 0) { + return -1; + } + + ret = mshv_guest_mem_read(gpa, data, len, false, false); + if (ret < 0) { + error_report("failed to read guest mem"); + return -1; + } + } + + return 0; +} + +static int write_memory(const CPUState *cpu, uint64_t initial_gva, + uint64_t initial_gpa, uint64_t gva, const uint8_t *data, + size_t len) +{ + int ret; + uint64_t gpa, flags; + + if (gva == initial_gva) { + gpa = initial_gpa; + } else { + flags = HV_TRANSLATE_GVA_VALIDATE_WRITE; + ret = translate_gva(cpu, gva, &gpa, flags); + if (ret < 0) { + error_report("failed to translate gva to gpa"); + return -1; + } + } + ret = mshv_guest_mem_write(gpa, data, len, false); + if (ret != MEMTX_OK) { + error_report("failed to write to mmio"); + return -1; + } + + return 0; +} + +static int handle_pio_str_write(CPUState *cpu, + hv_x64_io_port_intercept_message *info, + size_t repeat, uint16_t port, + bool direction_flag) +{ + int ret; + uint64_t src; + uint8_t data[4] = { 0 }; + size_t len = info->access_info.access_size; + + src = linear_addr(cpu, info->rsi, R_DS); + + for (size_t i = 0; i < repeat; i++) { + ret = read_memory(cpu, 0, 0, src, data, len); + if (ret < 0) { + error_report("Failed to read memory"); + return -1; + } + ret = pio_write(port, data, len, false); + if (ret < 0) { + error_report("Failed to write to io port"); + return -1; + } + src += direction_flag ? -len : len; + info->rsi += direction_flag ? -len : len; + } + + return 0; +} + +static int handle_pio_str_read(CPUState *cpu, + hv_x64_io_port_intercept_message *info, + size_t repeat, uint16_t port, + bool direction_flag) +{ + int ret; + uint64_t dst; + size_t len = info->access_info.access_size; + uint8_t data[4] = { 0 }; + + dst = linear_addr(cpu, info->rdi, R_ES); + + for (size_t i = 0; i < repeat; i++) { + pio_read(port, data, len, false); + + ret = write_memory(cpu, 0, 0, dst, data, len); + if (ret < 0) { + error_report("Failed to write memory"); + return -1; + } + dst += direction_flag ? -len : len; + info->rdi += direction_flag ? -len : len; + } + + return 0; +} + +static int handle_pio_str(CPUState *cpu, hv_x64_io_port_intercept_message *info) +{ + uint8_t access_type = info->header.intercept_access_type; + uint16_t port = info->port_number; + bool repop = info->access_info.rep_prefix == 1; + size_t repeat = repop ? info->rcx : 1; + size_t insn_len = info->header.instruction_length; + bool direction_flag; + uint32_t reg_names[3]; + uint64_t reg_values[3]; + int ret; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + + ret = fetch_guest_state(cpu); + if (ret < 0) { + error_report("Failed to fetch guest state"); + return -1; + } + + direction_flag = (env->eflags & DESC_E_MASK) != 0; + + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) { + ret = handle_pio_str_write(cpu, info, repeat, port, direction_flag); + if (ret < 0) { + error_report("Failed to handle pio str write"); + return -1; + } + reg_names[0] = HV_X64_REGISTER_RSI; + reg_values[0] = info->rsi; + } else { + ret = handle_pio_str_read(cpu, info, repeat, port, direction_flag); + reg_names[0] = HV_X64_REGISTER_RDI; + reg_values[0] = info->rdi; + } + + reg_names[1] = HV_X64_REGISTER_RIP; + reg_values[1] = info->header.rip + insn_len; + reg_names[2] = HV_X64_REGISTER_RAX; + reg_values[2] = info->rax; + + ret = set_x64_registers(cpu, reg_names, reg_values); + if (ret < 0) { + error_report("Failed to set x64 registers"); + return -1; + } + + cpu->accel->dirty = false; + + return 0; +} + +static int handle_pio(CPUState *cpu, const struct hyperv_message *msg) +{ + struct hv_x64_io_port_intercept_message info = { 0 }; + int ret; + + ret = set_ioport_info(msg, &info); + if (ret < 0) { + error_report("Failed to convert message to ioport info"); + return -1; + } + + if (info.access_info.string_op) { + return handle_pio_str(cpu, &info); + } + + return handle_pio_non_str(cpu, &info); +} + +int mshv_run_vcpu(int vm_fd, CPUState *cpu, hv_message *msg, MshvVmExit *exit) +{ + int ret; + enum MshvVmExit exit_reason; + int cpu_fd = mshv_vcpufd(cpu); + + ret = ioctl(cpu_fd, MSHV_RUN_VP, msg); + if (ret < 0) { + return MshvVmExitShutdown; + } + + switch (msg->header.message_type) { + case HVMSG_UNRECOVERABLE_EXCEPTION: + return MshvVmExitShutdown; + case HVMSG_UNMAPPED_GPA: + ret = handle_unmapped_mem(vm_fd, cpu, msg, &exit_reason); + if (ret < 0) { + error_report("failed to handle unmapped memory"); + return -1; + } + return exit_reason; + case HVMSG_GPA_INTERCEPT: + ret = handle_mmio(cpu, msg, &exit_reason); + if (ret < 0) { + error_report("failed to handle mmio"); + return -1; + } + return exit_reason; + case HVMSG_X64_IO_PORT_INTERCEPT: + ret = handle_pio(cpu, msg); + if (ret < 0) { + return MshvVmExitSpecial; + } + return MshvVmExitIgnore; + default: + break; + } + + *exit = MshvVmExitIgnore; + return 0; +} + +void mshv_remove_vcpu(int vm_fd, int cpu_fd) +{ + close(cpu_fd); +} + + +int mshv_create_vcpu(int vm_fd, uint8_t vp_index, int *cpu_fd) +{ + int ret; + struct mshv_create_vp vp_arg = { + .vp_index = vp_index, + }; + ret = ioctl(vm_fd, MSHV_CREATE_VP, &vp_arg); + if (ret < 0) { + error_report("failed to create mshv vcpu: %s", strerror(errno)); + return -1; + } + + *cpu_fd = ret; + + return 0; +} + +static int guest_mem_read_with_gva(const CPUState *cpu, uint64_t gva, + uint8_t *data, uintptr_t size, + bool fetch_instruction) +{ + int ret; + uint64_t gpa, flags; + + flags = HV_TRANSLATE_GVA_VALIDATE_READ; + ret = translate_gva(cpu, gva, &gpa, flags); + if (ret < 0) { + error_report("failed to translate gva to gpa"); + return -1; + } + + ret = mshv_guest_mem_read(gpa, data, size, false, fetch_instruction); + if (ret < 0) { + error_report("failed to read from guest memory"); + return -1; + } + + return 0; +} + +static int guest_mem_write_with_gva(const CPUState *cpu, uint64_t gva, + const uint8_t *data, uintptr_t size) +{ + int ret; + uint64_t gpa, flags; + + flags = HV_TRANSLATE_GVA_VALIDATE_WRITE; + ret = translate_gva(cpu, gva, &gpa, flags); + if (ret < 0) { + error_report("failed to translate gva to gpa"); + return -1; + } + ret = mshv_guest_mem_write(gpa, data, size, false); + if (ret < 0) { + error_report("failed to write to guest memory"); + return -1; + } + return 0; +} + +static void write_mem(CPUState *cpu, void *data, target_ulong addr, int bytes) +{ + if (guest_mem_write_with_gva(cpu, addr, data, bytes) < 0) { + error_report("failed to write memory"); + abort(); + } +} + +static void fetch_instruction(CPUState *cpu, void *data, + target_ulong addr, int bytes) +{ + if (guest_mem_read_with_gva(cpu, addr, data, bytes, true) < 0) { + error_report("failed to fetch instruction"); + abort(); + } +} + +static void read_mem(CPUState *cpu, void *data, target_ulong addr, int bytes) +{ + if (guest_mem_read_with_gva(cpu, addr, data, bytes, false) < 0) { + error_report("failed to read memory"); + abort(); + } +} + +static void read_segment_descriptor(CPUState *cpu, + struct x86_segment_descriptor *desc, + enum X86Seg seg_idx) +{ + bool ret; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + SegmentCache *seg = &env->segs[seg_idx]; + x86_segment_selector sel = { .sel = seg->selector & 0xFFFF }; + + ret = x86_read_segment_descriptor(cpu, desc, sel); + if (ret == false) { + error_report("failed to read segment descriptor"); + abort(); + } +} + +static const struct x86_emul_ops mshv_x86_emul_ops = { + .fetch_instruction = fetch_instruction, + .read_mem = read_mem, + .write_mem = write_mem, + .read_segment_descriptor = read_segment_descriptor, +}; + +void mshv_init_mmio_emu(void) +{ + init_decoder(); + init_emu(&mshv_x86_emul_ops); +} + +void mshv_arch_init_vcpu(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + AccelCPUState *state = cpu->accel; + size_t page = HV_HYP_PAGE_SIZE; + void *mem = qemu_memalign(page, 2 * page); + + /* sanity check, to make sure we don't overflow the page */ + QEMU_BUILD_BUG_ON((MAX_REGISTER_COUNT + * sizeof(hv_register_assoc) + + sizeof(hv_input_get_vp_registers) + > HV_HYP_PAGE_SIZE)); + + state->hvcall_args.base = mem; + state->hvcall_args.input_page = mem; + state->hvcall_args.output_page = (uint8_t *)mem + page; + + env->emu_mmio_buf = g_new(char, 4096); +} + +void mshv_arch_destroy_vcpu(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + AccelCPUState *state = cpu->accel; + + g_free(state->hvcall_args.base); + state->hvcall_args = (MshvHvCallArgs){0}; + g_clear_pointer(&env->emu_mmio_buf, g_free); +} + +/* + * Default Microsoft Hypervisor behavior for unimplemented MSR is to send a + * fault to the guest if it tries to access it. It is possible to override + * this behavior with a more suitable option i.e., ignore writes from the guest + * and return zero in attempt to read unimplemented. + */ +static int set_unimplemented_msr_action(int vm_fd) +{ + struct hv_input_set_partition_property in = {0}; + struct mshv_root_hvcall args = {0}; + + in.property_code = HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION; + in.property_value = HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO; + + args.code = HVCALL_SET_PARTITION_PROPERTY; + args.in_sz = sizeof(in); + args.in_ptr = (uint64_t)∈ + + trace_mshv_hvcall_args("unimplemented_msr_action", args.code, args.in_sz); + + int ret = mshv_hvcall(vm_fd, &args); + if (ret < 0) { + error_report("Failed to set unimplemented MSR action"); + return -1; + } + return 0; +} + +int mshv_arch_post_init_vm(int vm_fd) +{ + int ret; + + ret = set_unimplemented_msr_action(vm_fd); + if (ret < 0) { + error_report("Failed to set unimplemented MSR action"); + } + + return ret; +} diff --git a/target/i386/mshv/x86.c b/target/i386/mshv/x86.c new file mode 100644 index 0000000..d574b3b --- /dev/null +++ b/target/i386/mshv/x86.c @@ -0,0 +1,297 @@ +/* + * QEMU MSHV support + * + * Copyright Microsoft, Corp. 2025 + * + * Authors: Magnus Kulke <magnuskulke@microsoft.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" + +#include "cpu.h" +#include "emulate/x86_decode.h" +#include "emulate/x86_emu.h" +#include "qemu/typedefs.h" +#include "qemu/error-report.h" +#include "system/mshv.h" + +/* RW or Exec segment */ +static const uint8_t RWRX_SEGMENT_TYPE = 0x2; +static const uint8_t CODE_SEGMENT_TYPE = 0x8; +static const uint8_t EXPAND_DOWN_SEGMENT_TYPE = 0x4; + +typedef enum CpuMode { + REAL_MODE, + PROTECTED_MODE, + LONG_MODE, +} CpuMode; + +static CpuMode cpu_mode(CPUState *cpu) +{ + enum CpuMode m = REAL_MODE; + + if (x86_is_protected(cpu)) { + m = PROTECTED_MODE; + + if (x86_is_long_mode(cpu)) { + m = LONG_MODE; + } + } + + return m; +} + +static bool segment_type_ro(const SegmentCache *seg) +{ + uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15; + return (type_ & (~RWRX_SEGMENT_TYPE)) == 0; +} + +static bool segment_type_code(const SegmentCache *seg) +{ + uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15; + return (type_ & CODE_SEGMENT_TYPE) != 0; +} + +static bool segment_expands_down(const SegmentCache *seg) +{ + uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15; + + if (segment_type_code(seg)) { + return false; + } + + return (type_ & EXPAND_DOWN_SEGMENT_TYPE) != 0; +} + +static uint32_t segment_limit(const SegmentCache *seg) +{ + uint32_t limit = seg->limit; + uint32_t granularity = (seg->flags & DESC_G_MASK) != 0; + + if (granularity != 0) { + limit = (limit << 12) | 0xFFF; + } + + return limit; +} + +static uint8_t segment_db(const SegmentCache *seg) +{ + return (seg->flags >> DESC_B_SHIFT) & 1; +} + +static uint32_t segment_max_limit(const SegmentCache *seg) +{ + if (segment_db(seg) != 0) { + return 0xFFFFFFFF; + } + return 0xFFFF; +} + +static int linearize(CPUState *cpu, + target_ulong logical_addr, target_ulong *linear_addr, + X86Seg seg_idx) +{ + enum CpuMode mode; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + SegmentCache *seg = &env->segs[seg_idx]; + target_ulong base = seg->base; + target_ulong logical_addr_32b; + uint32_t limit; + /* TODO: the emulator will not pass us "write" indicator yet */ + bool write = false; + + mode = cpu_mode(cpu); + + switch (mode) { + case LONG_MODE: + if (__builtin_add_overflow(logical_addr, base, linear_addr)) { + error_report("Address overflow"); + return -1; + } + break; + case PROTECTED_MODE: + case REAL_MODE: + if (segment_type_ro(seg) && write) { + error_report("Cannot write to read-only segment"); + return -1; + } + + logical_addr_32b = logical_addr & 0xFFFFFFFF; + limit = segment_limit(seg); + + if (segment_expands_down(seg)) { + if (logical_addr_32b >= limit) { + error_report("Address exceeds limit (expands down)"); + return -1; + } + + limit = segment_max_limit(seg); + } + + if (logical_addr_32b > limit) { + error_report("Address exceeds limit %u", limit); + return -1; + } + *linear_addr = logical_addr_32b + base; + break; + default: + error_report("Unknown cpu mode: %d", mode); + return -1; + } + + return 0; +} + +bool x86_read_segment_descriptor(CPUState *cpu, + struct x86_segment_descriptor *desc, + x86_segment_selector sel) +{ + target_ulong base; + uint32_t limit; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + target_ulong gva; + + memset(desc, 0, sizeof(*desc)); + + /* valid gdt descriptors start from index 1 */ + if (!sel.index && GDT_SEL == sel.ti) { + return false; + } + + if (GDT_SEL == sel.ti) { + base = env->gdt.base; + limit = env->gdt.limit; + } else { + base = env->ldt.base; + limit = env->ldt.limit; + } + + if (sel.index * 8 >= limit) { + return false; + } + + gva = base + sel.index * 8; + emul_ops->read_mem(cpu, desc, gva, sizeof(*desc)); + + return true; +} + +bool x86_read_call_gate(CPUState *cpu, struct x86_call_gate *idt_desc, + int gate) +{ + target_ulong base; + uint32_t limit; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + target_ulong gva; + + base = env->idt.base; + limit = env->idt.limit; + + memset(idt_desc, 0, sizeof(*idt_desc)); + if (gate * 8 >= limit) { + perror("call gate exceeds idt limit"); + return false; + } + + gva = base + gate * 8; + emul_ops->read_mem(cpu, idt_desc, gva, sizeof(*idt_desc)); + + return true; +} + +bool x86_is_protected(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + uint64_t cr0 = env->cr[0]; + + return cr0 & CR0_PE_MASK; +} + +bool x86_is_real(CPUState *cpu) +{ + return !x86_is_protected(cpu); +} + +bool x86_is_v8086(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + return x86_is_protected(cpu) && (env->eflags & VM_MASK); +} + +bool x86_is_long_mode(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + uint64_t efer = env->efer; + uint64_t lme_lma = (MSR_EFER_LME | MSR_EFER_LMA); + + return ((efer & lme_lma) == lme_lma); +} + +bool x86_is_long64_mode(CPUState *cpu) +{ + error_report("unimplemented: is_long64_mode()"); + abort(); +} + +bool x86_is_paging_mode(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + uint64_t cr0 = env->cr[0]; + + return cr0 & CR0_PG_MASK; +} + +bool x86_is_pae_enabled(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + uint64_t cr4 = env->cr[4]; + + return cr4 & CR4_PAE_MASK; +} + +target_ulong linear_addr(CPUState *cpu, target_ulong addr, X86Seg seg) +{ + int ret; + target_ulong linear_addr; + + ret = linearize(cpu, addr, &linear_addr, seg); + if (ret < 0) { + error_report("failed to linearize address"); + abort(); + } + + return linear_addr; +} + +target_ulong linear_addr_size(CPUState *cpu, target_ulong addr, int size, + X86Seg seg) +{ + switch (size) { + case 2: + addr = (uint16_t)addr; + break; + case 4: + addr = (uint32_t)addr; + break; + default: + break; + } + return linear_addr(cpu, addr, seg); +} + +target_ulong linear_rip(CPUState *cpu, target_ulong rip) +{ + return linear_addr(cpu, rip, R_CS); +} diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c index 2144307..dd5d542 100644 --- a/target/i386/nvmm/nvmm-accel-ops.c +++ b/target/i386/nvmm/nvmm-accel-ops.c @@ -10,7 +10,7 @@ #include "qemu/osdep.h" #include "system/kvm_int.h" #include "qemu/main-loop.h" -#include "system/accel-ops.h" +#include "accel/accel-cpu-ops.h" #include "system/cpus.h" #include "qemu/guest-random.h" @@ -42,16 +42,14 @@ static void *qemu_nvmm_cpu_thread_fn(void *arg) qemu_guest_random_seed_thread_part2(cpu->random_seed); do { + qemu_process_cpu_events(cpu); + if (cpu_can_run(cpu)) { r = nvmm_vcpu_exec(cpu); if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); } } - while (cpu_thread_is_idle(cpu)) { - qemu_cond_wait_bql(cpu->halt_cond); - } - qemu_wait_io_event_common(cpu); } while (!cpu->unplug || cpu_can_run(cpu)); nvmm_destroy_vcpu(cpu); @@ -77,7 +75,7 @@ static void nvmm_start_vcpu_thread(CPUState *cpu) */ static void nvmm_kick_vcpu_thread(CPUState *cpu) { - cpu->exit_request = 1; + qatomic_set(&cpu->exit_request, true); cpus_kick_thread(cpu); } @@ -87,6 +85,7 @@ static void nvmm_accel_ops_class_init(ObjectClass *oc, const void *data) ops->create_vcpu_thread = nvmm_start_vcpu_thread; ops->kick_vcpu_thread = nvmm_kick_vcpu_thread; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = nvmm_cpu_synchronize_post_reset; ops->synchronize_post_init = nvmm_cpu_synchronize_post_init; diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index f1c6120..2e442ba 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -12,13 +12,17 @@ #include "system/address-spaces.h" #include "system/ioport.h" #include "qemu/accel.h" +#include "accel/accel-ops.h" #include "system/nvmm.h" #include "system/cpus.h" +#include "system/memory.h" #include "system/runstate.h" #include "qemu/main-loop.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "qemu/queue.h" +#include "accel/accel-cpu-target.h" +#include "host-cpu.h" #include "migration/blocker.h" #include "strings.h" @@ -30,7 +34,6 @@ struct AccelCPUState { struct nvmm_vcpu vcpu; uint8_t tpr; bool stop; - bool dirty; /* Window-exiting for INTs/NMIs. */ bool int_window_exit; @@ -47,7 +50,7 @@ struct qemu_machine { /* -------------------------------------------------------------------------- */ -static bool nvmm_allowed; +bool nvmm_allowed; static struct qemu_machine qemu_mach; static struct nvmm_machine * @@ -411,22 +414,22 @@ nvmm_vcpu_pre_run(CPUState *cpu) * Force the VCPU out of its inner loop to process any INIT requests * or commit pending TPR access. */ - if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { - cpu->exit_request = 1; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + qatomic_set(&cpu->exit_request, true); } - if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + if (!has_event && cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { if (nvmm_can_take_nmi(cpu)) { - cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + cpu_reset_interrupt(cpu, CPU_INTERRUPT_NMI); event->type = NVMM_VCPU_EVENT_INTR; event->vector = 2; has_event = true; } } - if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { + if (!has_event && cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { if (nvmm_can_take_int(cpu)) { - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD); event->type = NVMM_VCPU_EVENT_INTR; event->vector = cpu_get_pic_interrupt(env); has_event = true; @@ -434,8 +437,8 @@ nvmm_vcpu_pre_run(CPUState *cpu) } /* Don't want SMIs. */ - if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { - cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_SMI)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_SMI); } if (sync_tpr) { @@ -508,16 +511,18 @@ nvmm_io_callback(struct nvmm_io *io) } /* Needed, otherwise infinite loop. */ - current_cpu->accel->dirty = false; + current_cpu->vcpu_dirty = false; } static void nvmm_mem_callback(struct nvmm_mem *mem) { - cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write); + /* TODO: Get CPUState via mem->vcpu? */ + address_space_rw(&address_space_memory, mem->gpa, MEMTXATTRS_UNSPECIFIED, + mem->data, mem->size, mem->write); /* Needed, otherwise infinite loop. */ - current_cpu->accel->dirty = false; + current_cpu->vcpu_dirty = false; } static struct nvmm_assist_callbacks nvmm_callbacks = { @@ -649,9 +654,9 @@ nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu, bql_lock(); - if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + if (!(cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (cpu_env(cpu)->eflags & IF_MASK)) && - !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + !cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { cpu->exception_index = EXCP_HLT; cpu->halted = true; ret = 1; @@ -689,26 +694,26 @@ nvmm_vcpu_loop(CPUState *cpu) * Some asynchronous events must be handled outside of the inner * VCPU loop. They are handled here. */ - if (cpu->interrupt_request & CPU_INTERRUPT_INIT) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT)) { nvmm_cpu_synchronize_state(cpu); do_cpu_init(x86_cpu); /* set int/nmi windows back to the reset state */ } - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { - cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_POLL)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); apic_poll_irq(x86_cpu->apic_state); } - if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + if ((cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) || - (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { cpu->halted = false; } - if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_SIPI)) { nvmm_cpu_synchronize_state(cpu); do_cpu_sipi(x86_cpu); } - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_TPR); nvmm_cpu_synchronize_state(cpu); apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, env->tpr_access_type); @@ -727,9 +732,9 @@ nvmm_vcpu_loop(CPUState *cpu) * Inner VCPU loop. */ do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (qcpu->stop) { @@ -741,7 +746,8 @@ nvmm_vcpu_loop(CPUState *cpu) nvmm_vcpu_pre_run(cpu); - if (qatomic_read(&cpu->exit_request)) { + /* Corresponding store-release is in cpu_exit. */ + if (qatomic_load_acquire(&cpu->exit_request)) { #if NVMM_USER_VERSION >= 2 nvmm_vcpu_stop(vcpu); #else @@ -749,8 +755,6 @@ nvmm_vcpu_loop(CPUState *cpu) #endif } - /* Read exit_request before the kernel reads the immediate exit flag */ - smp_rmb(); ret = nvmm_vcpu_run(mach, vcpu); if (ret == -1) { error_report("NVMM: Failed to exec a virtual processor," @@ -816,8 +820,6 @@ nvmm_vcpu_loop(CPUState *cpu) cpu_exec_end(cpu); bql_lock(); - qatomic_set(&cpu->exit_request, false); - return ret < 0; } @@ -827,32 +829,32 @@ static void do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { nvmm_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } static void do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } void nvmm_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -982,7 +984,7 @@ nvmm_init_vcpu(CPUState *cpu) } } - qcpu->dirty = true; + qcpu->vcpu_dirty = true; cpu->accel = qcpu; return 0; @@ -1153,7 +1155,7 @@ static struct RAMBlockNotifier nvmm_ram_notifier = { /* -------------------------------------------------------------------------- */ static int -nvmm_accel_init(MachineState *ms) +nvmm_accel_init(AccelState *as, MachineState *ms) { int ret, err; @@ -1193,12 +1195,6 @@ nvmm_accel_init(MachineState *ms) return 0; } -int -nvmm_enabled(void) -{ - return nvmm_allowed; -} - static void nvmm_accel_class_init(ObjectClass *oc, const void *data) { @@ -1214,10 +1210,33 @@ static const TypeInfo nvmm_accel_type = { .class_init = nvmm_accel_class_init, }; +static void nvmm_cpu_instance_init(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + + host_cpu_instance_init(cpu); +} + +static void nvmm_cpu_accel_class_init(ObjectClass *oc, const void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_instance_init = nvmm_cpu_instance_init; +} + +static const TypeInfo nvmm_cpu_accel_type = { + .name = ACCEL_CPU_NAME("nvmm"), + + .parent = TYPE_ACCEL_CPU, + .class_init = nvmm_cpu_accel_class_init, + .abstract = true, +}; + static void nvmm_type_init(void) { type_register_static(&nvmm_accel_type); + type_register_static(&nvmm_cpu_accel_type); } type_init(nvmm_type_init); diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index f0aa189..a2e4d48 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -842,7 +842,7 @@ int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s) void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; for (i = 0; i < 2 << SHIFT; i++) { d->ZMM_S(i) = float32_div(float32_one, @@ -855,7 +855,7 @@ void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) #if SHIFT == 1 void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; d->ZMM_S(0) = float32_div(float32_one, float32_sqrt(s->ZMM_S(0), &env->sse_status), @@ -869,7 +869,7 @@ void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; for (i = 0; i < 2 << SHIFT; i++) { d->ZMM_S(i) = float32_div(float32_one, s->ZMM_S(i), &env->sse_status); @@ -880,7 +880,7 @@ void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) #if SHIFT == 1 void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); for (i = 1; i < 2 << SHIFT; i++) { @@ -1714,7 +1714,7 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1738,7 +1738,7 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1763,7 +1763,7 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1788,7 +1788,7 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; diff --git a/target/i386/sev-system-stub.c b/target/i386/sev-system-stub.c index d5bf886..7c5c02a 100644 --- a/target/i386/sev-system-stub.c +++ b/target/i386/sev-system-stub.c @@ -14,34 +14,9 @@ #include "qemu/osdep.h" #include "monitor/monitor.h" #include "monitor/hmp-target.h" -#include "qapi/qapi-commands-misc-target.h" #include "qapi/error.h" #include "sev.h" -SevInfo *qmp_query_sev(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -SevCapability *qmp_query_sev_capabilities(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, - bool has_gpa, uint64_t gpa, Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); -} - int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { g_assert_not_reached(); @@ -56,13 +31,6 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) g_assert_not_reached(); } -SevAttestationReport *qmp_query_sev_attestation_report(const char *mnonce, - Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - void hmp_info_sev(Monitor *mon, const QDict *qdict) { monitor_printf(mon, "SEV is not available in this QEMU\n"); diff --git a/target/i386/sev.c b/target/i386/sev.c index 7ee700d..1057b8a 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -37,11 +37,13 @@ #include "qom/object.h" #include "monitor/monitor.h" #include "monitor/hmp-target.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-misc-i386.h" #include "confidential-guest.h" #include "hw/i386/pc.h" #include "system/address-spaces.h" +#include "hw/i386/e820_memory_layout.h" #include "qemu/queue.h" +#include "qemu/cutils.h" OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) @@ -50,6 +52,15 @@ OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) /* hard code sha256 digest size */ #define HASH_SIZE 32 +/* Hard coded GPA that KVM uses for the VMSA */ +#define KVM_VMSA_GPA 0xFFFFFFFFF000 + +/* Convert between SEV-ES VMSA and SegmentCache flags/attributes */ +#define FLAGS_VMSA_TO_SEGCACHE(flags) \ + ((((flags) & 0xff00) << 12) | (((flags) & 0xff) << 8)) +#define FLAGS_SEGCACHE_TO_VMSA(flags) \ + ((((flags) & 0xff00) >> 8) | (((flags) & 0xf00000) >> 12)) + typedef struct QEMU_PACKED SevHashTableEntry { QemuUUID guid; uint16_t len; @@ -89,6 +100,14 @@ typedef struct QEMU_PACKED SevHashTableDescriptor { uint32_t size; } SevHashTableDescriptor; +typedef struct SevLaunchVmsa { + QTAILQ_ENTRY(SevLaunchVmsa) next; + + uint16_t cpu_index; + uint64_t gpa; + struct sev_es_save_area vmsa; +} SevLaunchVmsa; + struct SevCommonState { X86ConfidentialGuest parent_obj; @@ -99,6 +118,8 @@ struct SevCommonState { uint32_t cbitpos; uint32_t reduced_phys_bits; bool kernel_hashes; + uint64_t sev_features; + uint64_t supported_sev_features; /* runtime state */ uint8_t api_major; @@ -107,9 +128,7 @@ struct SevCommonState { int sev_fd; SevState state; - uint32_t reset_cs; - uint32_t reset_ip; - bool reset_data_valid; + QTAILQ_HEAD(, SevLaunchVmsa) launch_vmsa; }; struct SevCommonStateClass { @@ -122,7 +141,8 @@ struct SevCommonStateClass { Error **errp); int (*launch_start)(SevCommonState *sev_common); void (*launch_finish)(SevCommonState *sev_common); - int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, size_t len); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, + uint8_t *ptr, size_t len, Error **errp); int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); }; @@ -212,14 +232,6 @@ static const char *const sev_fw_errlist[] = { #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) -/* <linux/kvm.h> doesn't expose this, so re-use the max from kvm.c */ -#define KVM_MAX_CPUID_ENTRIES 100 - -typedef struct KvmCpuidInfo { - struct kvm_cpuid2 cpuid; - struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; -} KvmCpuidInfo; - #define SNP_CPUID_FUNCTION_MAXCOUNT 64 #define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF @@ -371,6 +383,288 @@ static struct RAMBlockNotifier sev_ram_notifier = { .ram_block_removed = sev_ram_block_removed, }; +static void sev_apply_cpu_context(CPUState *cpu) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + X86CPU *x86; + CPUX86State *env; + struct SevLaunchVmsa *launch_vmsa; + + /* See if an initial VMSA has been provided for this CPU */ + QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next) + { + if (cpu->cpu_index == launch_vmsa->cpu_index) { + x86 = X86_CPU(cpu); + env = &x86->env; + + /* + * Ideally we would provide the VMSA directly to kvm which would + * ensure that the resulting initial VMSA measurement which is + * calculated during KVM_SEV_LAUNCH_UPDATE_VMSA is calculated from + * exactly what we provide here. Currently this is not possible so + * we need to copy the parts of the VMSA structure that we currently + * support into the CPU state. + */ + cpu_load_efer(env, launch_vmsa->vmsa.efer); + cpu_x86_update_cr4(env, launch_vmsa->vmsa.cr4); + cpu_x86_update_cr0(env, launch_vmsa->vmsa.cr0); + cpu_x86_update_cr3(env, launch_vmsa->vmsa.cr3); + env->xcr0 = launch_vmsa->vmsa.xcr0; + env->pat = launch_vmsa->vmsa.g_pat; + + cpu_x86_load_seg_cache( + env, R_CS, launch_vmsa->vmsa.cs.selector, + launch_vmsa->vmsa.cs.base, launch_vmsa->vmsa.cs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.cs.attrib)); + cpu_x86_load_seg_cache( + env, R_DS, launch_vmsa->vmsa.ds.selector, + launch_vmsa->vmsa.ds.base, launch_vmsa->vmsa.ds.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ds.attrib)); + cpu_x86_load_seg_cache( + env, R_ES, launch_vmsa->vmsa.es.selector, + launch_vmsa->vmsa.es.base, launch_vmsa->vmsa.es.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.es.attrib)); + cpu_x86_load_seg_cache( + env, R_FS, launch_vmsa->vmsa.fs.selector, + launch_vmsa->vmsa.fs.base, launch_vmsa->vmsa.fs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.fs.attrib)); + cpu_x86_load_seg_cache( + env, R_GS, launch_vmsa->vmsa.gs.selector, + launch_vmsa->vmsa.gs.base, launch_vmsa->vmsa.gs.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gs.attrib)); + cpu_x86_load_seg_cache( + env, R_SS, launch_vmsa->vmsa.ss.selector, + launch_vmsa->vmsa.ss.base, launch_vmsa->vmsa.ss.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ss.attrib)); + + env->gdt.base = launch_vmsa->vmsa.gdtr.base; + env->gdt.limit = launch_vmsa->vmsa.gdtr.limit; + env->gdt.flags = + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gdtr.attrib); + env->idt.base = launch_vmsa->vmsa.idtr.base; + env->idt.limit = launch_vmsa->vmsa.idtr.limit; + env->idt.flags = + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.idtr.attrib); + + cpu_x86_load_seg_cache( + env, R_LDTR, launch_vmsa->vmsa.ldtr.selector, + launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.ldtr.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ldtr.attrib)); + cpu_x86_load_seg_cache( + env, R_TR, launch_vmsa->vmsa.tr.selector, + launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.tr.limit, + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.tr.attrib)); + + env->dr[6] = launch_vmsa->vmsa.dr6; + env->dr[7] = launch_vmsa->vmsa.dr7; + + env->regs[R_EAX] = launch_vmsa->vmsa.rax; + env->regs[R_ECX] = launch_vmsa->vmsa.rcx; + env->regs[R_EDX] = launch_vmsa->vmsa.rdx; + env->regs[R_EBX] = launch_vmsa->vmsa.rbx; + env->regs[R_ESP] = launch_vmsa->vmsa.rsp; + env->regs[R_EBP] = launch_vmsa->vmsa.rbp; + env->regs[R_ESI] = launch_vmsa->vmsa.rsi; + env->regs[R_EDI] = launch_vmsa->vmsa.rdi; +#ifdef TARGET_X86_64 + env->regs[R_R8] = launch_vmsa->vmsa.r8; + env->regs[R_R9] = launch_vmsa->vmsa.r9; + env->regs[R_R10] = launch_vmsa->vmsa.r10; + env->regs[R_R11] = launch_vmsa->vmsa.r11; + env->regs[R_R12] = launch_vmsa->vmsa.r12; + env->regs[R_R13] = launch_vmsa->vmsa.r13; + env->regs[R_R14] = launch_vmsa->vmsa.r14; + env->regs[R_R15] = launch_vmsa->vmsa.r15; +#endif + env->eip = launch_vmsa->vmsa.rip; + env->eflags = launch_vmsa->vmsa.rflags; + + cpu_set_fpuc(env, launch_vmsa->vmsa.x87_fcw); + env->mxcsr = launch_vmsa->vmsa.mxcsr; + + break; + } + } +} + +static int check_sev_features(SevCommonState *sev_common, uint64_t sev_features, + Error **errp) +{ + /* + * Ensure SEV_FEATURES is configured for correct SEV hardware and that + * the requested features are supported. If SEV-SNP is enabled then + * that feature must be enabled, otherwise it must be cleared. + */ + if (sev_snp_enabled() && !(sev_features & SVM_SEV_FEAT_SNP_ACTIVE)) { + error_setg( + errp, + "%s: SEV_SNP is enabled but is not enabled in VMSA sev_features", + __func__); + return -1; + } else if (!sev_snp_enabled() && + (sev_features & SVM_SEV_FEAT_SNP_ACTIVE)) { + error_setg( + errp, + "%s: SEV_SNP is not enabled but is enabled in VMSA sev_features", + __func__); + return -1; + } + if (sev_features & ~sev_common->supported_sev_features) { + error_setg(errp, + "%s: VMSA contains unsupported sev_features: %lX, " + "supported features: %lX", + __func__, sev_features, sev_common->supported_sev_features); + return -1; + } + return 0; +} + +static int check_vmsa_supported(SevCommonState *sev_common, hwaddr gpa, + const struct sev_es_save_area *vmsa, + Error **errp) +{ + struct sev_es_save_area vmsa_check; + + /* + * KVM always populates the VMSA at a fixed GPA which cannot be modified + * from userspace. Specifying a different GPA will not prevent the guest + * from starting but will cause the launch measurement to be different + * from expected. Therefore check that the provided GPA matches the KVM + * hardcoded value. + */ + if (gpa != KVM_VMSA_GPA) { + error_setg(errp, + "%s: The VMSA GPA must be %lX but is specified as %lX", + __func__, KVM_VMSA_GPA, gpa); + return -1; + } + + /* + * Clear all supported fields so we can then check the entire structure + * is zero. + */ + memcpy(&vmsa_check, vmsa, sizeof(struct sev_es_save_area)); + memset(&vmsa_check.es, 0, sizeof(vmsa_check.es)); + memset(&vmsa_check.cs, 0, sizeof(vmsa_check.cs)); + memset(&vmsa_check.ss, 0, sizeof(vmsa_check.ss)); + memset(&vmsa_check.ds, 0, sizeof(vmsa_check.ds)); + memset(&vmsa_check.fs, 0, sizeof(vmsa_check.fs)); + memset(&vmsa_check.gs, 0, sizeof(vmsa_check.gs)); + memset(&vmsa_check.gdtr, 0, sizeof(vmsa_check.gdtr)); + memset(&vmsa_check.idtr, 0, sizeof(vmsa_check.idtr)); + memset(&vmsa_check.ldtr, 0, sizeof(vmsa_check.ldtr)); + memset(&vmsa_check.tr, 0, sizeof(vmsa_check.tr)); + vmsa_check.efer = 0; + vmsa_check.cr0 = 0; + vmsa_check.cr3 = 0; + vmsa_check.cr4 = 0; + vmsa_check.xcr0 = 0; + vmsa_check.dr6 = 0; + vmsa_check.dr7 = 0; + vmsa_check.rax = 0; + vmsa_check.rcx = 0; + vmsa_check.rdx = 0; + vmsa_check.rbx = 0; + vmsa_check.rsp = 0; + vmsa_check.rbp = 0; + vmsa_check.rsi = 0; + vmsa_check.rdi = 0; + vmsa_check.r8 = 0; + vmsa_check.r9 = 0; + vmsa_check.r10 = 0; + vmsa_check.r11 = 0; + vmsa_check.r12 = 0; + vmsa_check.r13 = 0; + vmsa_check.r14 = 0; + vmsa_check.r15 = 0; + vmsa_check.rip = 0; + vmsa_check.rflags = 0; + + vmsa_check.g_pat = 0; + vmsa_check.xcr0 = 0; + + vmsa_check.x87_fcw = 0; + vmsa_check.mxcsr = 0; + + if (check_sev_features(sev_common, vmsa_check.sev_features, errp) < 0) { + return -1; + } + vmsa_check.sev_features = 0; + + if (!buffer_is_zero(&vmsa_check, sizeof(vmsa_check))) { + error_setg(errp, + "%s: The VMSA contains fields that are not " + "synchronized with KVM. Continuing would result in " + "either unpredictable guest behavior, or a " + "mismatched launch measurement.", + __func__); + return -1; + } + return 0; +} + +static int sev_set_cpu_context(uint16_t cpu_index, const void *ctx, + uint32_t ctx_len, hwaddr gpa, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevLaunchVmsa *launch_vmsa; + CPUState *cpu; + bool exists = false; + + /* + * Setting the CPU context is only supported for SEV-ES and SEV-SNP. The + * context buffer will contain a sev_es_save_area from the Linux kernel + * which is defined by "Table B-4. VMSA Layout, State Save Area for SEV-ES" + * in the AMD64 APM, Volume 2. + */ + + if (!sev_es_enabled()) { + error_setg(errp, "SEV: unable to set CPU context: Not supported"); + return -1; + } + + if (ctx_len < sizeof(struct sev_es_save_area)) { + error_setg(errp, "SEV: unable to set CPU context: " + "Invalid context provided"); + return -1; + } + + cpu = qemu_get_cpu(cpu_index); + if (!cpu) { + error_setg(errp, "SEV: unable to set CPU context for out of bounds " + "CPU index %d", cpu_index); + return -1; + } + + /* + * If the context of this VP has already been set then replace it with the + * new context. + */ + QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next) + { + if (cpu_index == launch_vmsa->cpu_index) { + launch_vmsa->gpa = gpa; + memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa)); + exists = true; + break; + } + } + + if (!exists) { + /* New VP context */ + launch_vmsa = g_new0(SevLaunchVmsa, 1); + memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa)); + launch_vmsa->cpu_index = cpu_index; + launch_vmsa->gpa = gpa; + QTAILQ_INSERT_TAIL(&sev_common->launch_vmsa, launch_vmsa, next); + } + + /* Synchronise the VMSA with the current CPU state */ + sev_apply_cpu_context(cpu); + + return 0; +} + bool sev_enabled(void) { @@ -947,7 +1241,7 @@ out: } static uint32_t -sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, +sev_snp_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { switch (feature) { @@ -978,9 +1272,8 @@ sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t return value; } -static int -sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, - uint8_t *addr, size_t len) +static int sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *addr, size_t len, Error **errp) { int ret, fw_error; struct kvm_sev_launch_update_data update; @@ -995,8 +1288,8 @@ sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, &update, &fw_error); if (ret) { - error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", - __func__, ret, fw_error, fw_error_to_str(fw_error)); + error_setg(errp, "%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", __func__, + ret, fw_error, fw_error_to_str(fw_error)); } return ret; @@ -1006,6 +1299,16 @@ static int sev_launch_update_vmsa(SevGuestState *sev_guest) { int ret, fw_error; + CPUState *cpu; + + /* + * The initial CPU state is measured as part of KVM_SEV_LAUNCH_UPDATE_VMSA. + * Synchronise the CPU state to any provided launch VMSA structures. + */ + CPU_FOREACH(cpu) { + sev_apply_cpu_context(cpu); + } + ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); @@ -1124,8 +1427,8 @@ sev_launch_finish(SevCommonState *sev_common) migrate_add_blocker(&sev_mig_blocker, &error_fatal); } -static int -snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type) +static int snp_launch_update_data(uint64_t gpa, void *hva, size_t len, + int type, Error **errp) { SevLaunchUpdateData *data; @@ -1140,23 +1443,21 @@ snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type) return 0; } -static int -sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, - uint8_t *ptr, size_t len) +static int sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *ptr, size_t len, Error **errp) { - int ret = snp_launch_update_data(gpa, ptr, len, - KVM_SEV_SNP_PAGE_TYPE_NORMAL); - return ret; + return snp_launch_update_data(gpa, ptr, len, + KVM_SEV_SNP_PAGE_TYPE_NORMAL, errp); } static int sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, - const KvmCpuidInfo *kvm_cpuid_info) + const KvmCpuidInfo *kvm_cpuid_info, Error **errp) { size_t i; if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) { - error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)", + error_setg(errp, "SEV-SNP: CPUID entry count (%d) exceeds max (%d)", kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT); return -1; } @@ -1198,8 +1499,8 @@ sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, return 0; } -static int -snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len) +static int snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, + size_t cpuid_len, Error **errp) { KvmCpuidInfo kvm_cpuid_info = {0}; SnpCpuidInfo snp_cpuid_info; @@ -1216,26 +1517,25 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len) } while (ret == -E2BIG); if (ret) { - error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'", - strerror(-ret)); - return 1; + error_setg(errp, "SEV-SNP: unable to query CPUID values for CPU: '%s'", + strerror(-ret)); + return -1; } - ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info); - if (ret) { - error_report("SEV-SNP: failed to generate CPUID table information"); - return 1; + ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info, errp); + if (ret < 0) { + return -1; } memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info)); return snp_launch_update_data(cpuid_addr, hva, cpuid_len, - KVM_SEV_SNP_PAGE_TYPE_CPUID); + KVM_SEV_SNP_PAGE_TYPE_CPUID, errp); } -static int -snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, - void *hva, uint32_t len) +static int snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, + uint32_t addr, void *hva, + uint32_t len, Error **errp) { int type = KVM_SEV_SNP_PAGE_TYPE_ZERO; if (sev_snp->parent_obj.kernel_hashes) { @@ -1247,7 +1547,7 @@ snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, sizeof(*sev_snp->kernel_hashes_data)); type = KVM_SEV_SNP_PAGE_TYPE_NORMAL; } - return snp_launch_update_data(addr, hva, len, type); + return snp_launch_update_data(addr, hva, len, type, errp); } static int @@ -1285,12 +1585,14 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, } if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { - ret = snp_launch_update_cpuid(desc->base, hva, desc->len); + ret = snp_launch_update_cpuid(desc->base, hva, desc->len, + &error_fatal); } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) { ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva, - desc->len); + desc->len, &error_fatal); } else { - ret = snp_launch_update_data(desc->base, hva, desc->len, type); + ret = snp_launch_update_data(desc->base, hva, desc->len, type, + &error_fatal); } if (ret) { @@ -1312,18 +1614,26 @@ sev_snp_launch_finish(SevCommonState *sev_common) struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; /* - * To boot the SNP guest, the hypervisor is required to populate the CPUID - * and Secrets page before finalizing the launch flow. The location of - * the secrets and CPUID page is available through the OVMF metadata GUID. + * Populate all the metadata pages if not using an IGVM file. In the case + * where an IGVM file is provided it will be used to configure the metadata + * pages directly. */ - metadata = pc_system_get_ovmf_sev_metadata_ptr(); - if (metadata == NULL) { - error_report("%s: Failed to locate SEV metadata header", __func__); - exit(1); - } + if (!X86_MACHINE(qdev_get_machine())->igvm) { + /* + * To boot the SNP guest, the hypervisor is required to populate the + * CPUID and Secrets page before finalizing the launch flow. The + * location of the secrets and CPUID page is available through the + * OVMF metadata GUID. + */ + metadata = pc_system_get_ovmf_sev_metadata_ptr(); + if (metadata == NULL) { + error_report("%s: Failed to locate SEV metadata header", __func__); + exit(1); + } - /* Populate all the metadata pages */ - snp_populate_metadata_pages(sev_snp, metadata); + /* Populate all the metadata pages */ + snp_populate_metadata_pages(sev_snp, metadata); + } QTAILQ_FOREACH(data, &launch_update, next) { ret = sev_snp_launch_update(sev_snp, data); @@ -1433,6 +1743,39 @@ static int sev_snp_kvm_type(X86ConfidentialGuest *cg) return KVM_X86_SNP_VM; } +static int sev_init_supported_features(ConfidentialGuestSupport *cgs, + SevCommonState *sev_common, Error **errp) +{ + X86ConfidentialGuestClass *x86_klass = + X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs); + /* + * Older kernels do not support query or setting of sev_features. In this + * case the set of supported features must be zero to match the settings + * in the kernel. + */ + if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == + KVM_X86_DEFAULT_VM) { + sev_common->supported_sev_features = 0; + return 0; + } + + /* Query KVM for the supported set of sev_features */ + struct kvm_device_attr attr = { + .group = KVM_X86_GRP_SEV, + .attr = KVM_X86_SEV_VMSA_FEATURES, + .addr = (unsigned long)&sev_common->supported_sev_features, + }; + if (kvm_ioctl(kvm_state, KVM_GET_DEVICE_ATTR, &attr) < 0) { + error_setg(errp, "%s: failed to query supported sev_features", + __func__); + return -1; + } + if (sev_snp_enabled()) { + sev_common->supported_sev_features |= SVM_SEV_FEAT_SNP_ACTIVE; + } + return 0; +} + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { char *devname; @@ -1513,6 +1856,10 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } } + if (sev_init_supported_features(cgs, sev_common, errp) < 0) { + return -1; + } + trace_kvm_sev_init(); switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) { case KVM_X86_DEFAULT_VM: @@ -1524,6 +1871,40 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) case KVM_X86_SEV_ES_VM: case KVM_X86_SNP_VM: { struct kvm_sev_init args = { 0 }; + MachineState *machine = MACHINE(qdev_get_machine()); + X86MachineState *x86machine = X86_MACHINE(qdev_get_machine()); + + /* + * If configuration is provided via an IGVM file then the IGVM file + * might contain configuration of the initial vcpu context. For SEV + * the vcpu context includes the sev_features which should be applied + * to the vcpu. + * + * KVM does not synchronize sev_features from CPU state. Instead it + * requires sev_features to be provided as part of this initialization + * call which is subsequently automatically applied to the VMSA of + * each vcpu. + * + * The IGVM file is normally processed after initialization. Therefore + * we need to pre-process it here to extract sev_features in order to + * provide it to KVM_SEV_INIT2. Each cgs_* function that is called by + * the IGVM processor detects this pre-process by observing the state + * as SEV_STATE_UNINIT. + */ + if (x86machine->igvm) { + if (IGVM_CFG_GET_CLASS(x86machine->igvm) + ->process(x86machine->igvm, machine->cgs, true, errp) == + -1) { + return -1; + } + /* + * KVM maintains a bitmask of allowed sev_features. This does not + * include SVM_SEV_FEAT_SNP_ACTIVE which is set accordingly by KVM + * itself. Therefore we need to clear this flag. + */ + args.vmsa_features = sev_common->sev_features & + ~SVM_SEV_FEAT_SNP_ACTIVE; + } ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); break; @@ -1623,9 +2004,8 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { int ret; - ret = klass->launch_update_data(sev_common, gpa, ptr, len); + ret = klass->launch_update_data(sev_common, gpa, ptr, len, errp); if (ret < 0) { - error_setg(errp, "SEV: Failed to encrypt pflash rom"); return ret; } } @@ -1790,40 +2170,109 @@ sev_es_find_reset_vector(void *flash_ptr, uint64_t flash_size, return sev_es_parse_reset_block(info, addr); } -void sev_es_set_reset_vector(CPUState *cpu) + +static void seg_to_vmsa(const SegmentCache *cpu_seg, struct vmcb_seg *vmsa_seg) { - X86CPU *x86; - CPUX86State *env; - ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; - SevCommonState *sev_common = SEV_COMMON( - object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON)); + vmsa_seg->selector = cpu_seg->selector; + vmsa_seg->base = cpu_seg->base; + vmsa_seg->limit = cpu_seg->limit; + vmsa_seg->attrib = FLAGS_SEGCACHE_TO_VMSA(cpu_seg->flags); +} - /* Only update if we have valid reset information */ - if (!sev_common || !sev_common->reset_data_valid) { - return; - } +static void initialize_vmsa(const CPUState *cpu, struct sev_es_save_area *vmsa) +{ + const X86CPU *x86 = X86_CPU(cpu); + const CPUX86State *env = &x86->env; - /* Do not update the BSP reset state */ - if (cpu->cpu_index == 0) { - return; + /* + * Initialize the SEV-ES save area from the current state of + * the CPU. The entire state does not need to be copied, only the state + * that is copied back to the CPUState in sev_apply_cpu_context. + */ + memset(vmsa, 0, sizeof(struct sev_es_save_area)); + vmsa->efer = env->efer; + vmsa->cr0 = env->cr[0]; + vmsa->cr3 = env->cr[3]; + vmsa->cr4 = env->cr[4]; + vmsa->xcr0 = env->xcr0; + vmsa->g_pat = env->pat; + + seg_to_vmsa(&env->segs[R_CS], &vmsa->cs); + seg_to_vmsa(&env->segs[R_DS], &vmsa->ds); + seg_to_vmsa(&env->segs[R_ES], &vmsa->es); + seg_to_vmsa(&env->segs[R_FS], &vmsa->fs); + seg_to_vmsa(&env->segs[R_GS], &vmsa->gs); + seg_to_vmsa(&env->segs[R_SS], &vmsa->ss); + + seg_to_vmsa(&env->gdt, &vmsa->gdtr); + seg_to_vmsa(&env->idt, &vmsa->idtr); + seg_to_vmsa(&env->ldt, &vmsa->ldtr); + seg_to_vmsa(&env->tr, &vmsa->tr); + + vmsa->dr6 = env->dr[6]; + vmsa->dr7 = env->dr[7]; + + vmsa->rax = env->regs[R_EAX]; + vmsa->rcx = env->regs[R_ECX]; + vmsa->rdx = env->regs[R_EDX]; + vmsa->rbx = env->regs[R_EBX]; + vmsa->rsp = env->regs[R_ESP]; + vmsa->rbp = env->regs[R_EBP]; + vmsa->rsi = env->regs[R_ESI]; + vmsa->rdi = env->regs[R_EDI]; + +#ifdef TARGET_X86_64 + vmsa->r8 = env->regs[R_R8]; + vmsa->r9 = env->regs[R_R9]; + vmsa->r10 = env->regs[R_R10]; + vmsa->r11 = env->regs[R_R11]; + vmsa->r12 = env->regs[R_R12]; + vmsa->r13 = env->regs[R_R13]; + vmsa->r14 = env->regs[R_R14]; + vmsa->r15 = env->regs[R_R15]; +#endif + + vmsa->rip = env->eip; + vmsa->rflags = env->eflags; +} + +static void sev_es_set_ap_context(uint32_t reset_addr) +{ + CPUState *cpu; + struct sev_es_save_area vmsa; + SegmentCache cs; + + cs.selector = 0xf000; + cs.base = reset_addr & 0xffff0000; + cs.limit = 0xffff; + cs.flags = DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | DESC_R_MASK | + DESC_A_MASK; + + CPU_FOREACH(cpu) { + if (cpu->cpu_index == 0) { + /* Do not update the BSP reset state */ + continue; + } + initialize_vmsa(cpu, &vmsa); + seg_to_vmsa(&cs, &vmsa.cs); + vmsa.rip = reset_addr & 0x0000ffff; + sev_set_cpu_context(cpu->cpu_index, &vmsa, + sizeof(struct sev_es_save_area), + 0, &error_fatal); } +} - x86 = X86_CPU(cpu); - env = &x86->env; - - cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff, - DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | - DESC_R_MASK | DESC_A_MASK); - - env->eip = sev_common->reset_ip; +void sev_es_set_reset_vector(CPUState *cpu) +{ + if (sev_enabled()) { + sev_apply_cpu_context(cpu); + } } int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) { - CPUState *cpu; uint32_t addr; int ret; - SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); if (!sev_es_enabled()) { return 0; @@ -1836,14 +2285,12 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) return ret; } + /* + * The reset vector is saved into a CPU context for each AP but not for + * the BSP. This is applied during guest startup or when the CPU is reset. + */ if (addr) { - sev_common->reset_cs = addr & 0xffff0000; - sev_common->reset_ip = addr & 0x0000ffff; - sev_common->reset_data_valid = true; - - CPU_FOREACH(cpu) { - sev_es_set_reset_vector(cpu); - } + sev_es_set_ap_context(addr); } return 0; @@ -2045,6 +2492,237 @@ static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp) SEV_COMMON(obj)->kernel_hashes = value; } +static bool cgs_check_support(ConfidentialGuestPlatformType platform, + uint16_t platform_version, uint8_t highest_vtl, + uint64_t shared_gpa_boundary) +{ + return (((platform == CGS_PLATFORM_SEV_SNP) && sev_snp_enabled()) || + ((platform == CGS_PLATFORM_SEV_ES) && sev_es_enabled()) || + ((platform == CGS_PLATFORM_SEV) && sev_enabled())); +} + +static int cgs_set_guest_state(hwaddr gpa, uint8_t *ptr, uint64_t len, + ConfidentialGuestPageType memory_type, + uint16_t cpu_index, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + if ((cpu_index == 0) && (memory_type == CGS_PAGE_TYPE_VMSA)) { + const struct sev_es_save_area *sa = + (const struct sev_es_save_area *)ptr; + if (len < sizeof(*sa)) { + error_setg(errp, "%s: invalid VMSA length encountered", + __func__); + return -1; + } + if (check_sev_features(sev_common, sa->sev_features, errp) < 0) { + return -1; + } + sev_common->sev_features = sa->sev_features; + } + return 0; + } + + if (!sev_enabled()) { + error_setg(errp, "%s: attempt to configure guest memory, but SEV " + "is not enabled", __func__); + return -1; + } + + switch (memory_type) { + case CGS_PAGE_TYPE_NORMAL: + case CGS_PAGE_TYPE_ZERO: + return klass->launch_update_data(sev_common, gpa, ptr, len, errp); + + case CGS_PAGE_TYPE_VMSA: + if (!sev_es_enabled()) { + error_setg(errp, + "%s: attempt to configure initial VMSA, but SEV-ES " + "is not supported", + __func__); + return -1; + } + if (check_vmsa_supported(sev_common, gpa, + (const struct sev_es_save_area *)ptr, + errp) < 0) { + return -1; + } + return sev_set_cpu_context(cpu_index, ptr, len, gpa, errp); + + case CGS_PAGE_TYPE_UNMEASURED: + if (sev_snp_enabled()) { + return snp_launch_update_data( + gpa, ptr, len, KVM_SEV_SNP_PAGE_TYPE_UNMEASURED, errp); + } + /* No action required if not SEV-SNP */ + return 0; + + case CGS_PAGE_TYPE_SECRETS: + if (!sev_snp_enabled()) { + error_setg(errp, + "%s: attempt to configure secrets page, but SEV-SNP " + "is not supported", + __func__); + return -1; + } + return snp_launch_update_data(gpa, ptr, len, + KVM_SEV_SNP_PAGE_TYPE_SECRETS, errp); + + case CGS_PAGE_TYPE_REQUIRED_MEMORY: + if (kvm_convert_memory(gpa, len, true) < 0) { + error_setg( + errp, + "%s: failed to configure required memory. gpa: %lX, type: %d", + __func__, gpa, memory_type); + return -1; + } + return 0; + + case CGS_PAGE_TYPE_CPUID: + if (!sev_snp_enabled()) { + error_setg(errp, + "%s: attempt to configure CPUID page, but SEV-SNP " + "is not supported", + __func__); + return -1; + } + return snp_launch_update_cpuid(gpa, ptr, len, errp); + } + error_setg(errp, "%s: failed to update guest. gpa: %lX, type: %d", __func__, + gpa, memory_type); + return -1; +} + +static int cgs_get_mem_map_entry(int index, + ConfidentialGuestMemoryMapEntry *entry, + Error **errp) +{ + struct e820_entry *table; + int num_entries; + + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + return 1; + } + + num_entries = e820_get_table(&table); + if ((index < 0) || (index >= num_entries)) { + return 1; + } + entry->gpa = table[index].address; + entry->size = table[index].length; + switch (table[index].type) { + case E820_RAM: + entry->type = CGS_MEM_RAM; + break; + case E820_RESERVED: + entry->type = CGS_MEM_RESERVED; + break; + case E820_ACPI: + entry->type = CGS_MEM_ACPI; + break; + case E820_NVS: + entry->type = CGS_MEM_NVS; + break; + case E820_UNUSABLE: + entry->type = CGS_MEM_UNUSABLE; + break; + } + return 0; +} + +static int cgs_set_guest_policy(ConfidentialGuestPolicyType policy_type, + uint64_t policy, void *policy_data1, + uint32_t policy_data1_size, void *policy_data2, + uint32_t policy_data2_size, Error **errp) +{ + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (sev_common->state == SEV_STATE_UNINIT) { + /* Pre-processing of IGVM file called from sev_common_kvm_init() */ + return 0; + } + + if (policy_type != GUEST_POLICY_SEV) { + error_setg(errp, "%s: Invalid guest policy type provided for SEV: %d", + __func__, policy_type); + return -1; + } + /* + * SEV-SNP handles policy differently. The policy flags are defined in + * kvm_start_conf.policy and an ID block and ID auth can be provided. + */ + if (sev_snp_enabled()) { + SevSnpGuestState *sev_snp_guest = + SEV_SNP_GUEST(MACHINE(qdev_get_machine())->cgs); + struct kvm_sev_snp_launch_finish *finish = + &sev_snp_guest->kvm_finish_conf; + + /* + * The policy consists of flags in 'policy' and optionally an ID block + * and ID auth in policy_data1 and policy_data2 respectively. The ID + * block and auth are optional so clear any previous ID block and auth + * and set them if provided, but always set the policy flags. + */ + g_free(sev_snp_guest->id_block); + g_free((guchar *)finish->id_block_uaddr); + g_free(sev_snp_guest->id_auth); + g_free((guchar *)finish->id_auth_uaddr); + sev_snp_guest->id_block = NULL; + finish->id_block_uaddr = 0; + sev_snp_guest->id_auth = NULL; + finish->id_auth_uaddr = 0; + + if (policy_data1_size > 0) { + struct sev_snp_id_authentication *id_auth = + (struct sev_snp_id_authentication *)policy_data2; + + if (policy_data1_size != KVM_SEV_SNP_ID_BLOCK_SIZE) { + error_setg(errp, "%s: Invalid SEV-SNP ID block: incorrect size", + __func__); + return -1; + } + if (policy_data2_size != KVM_SEV_SNP_ID_AUTH_SIZE) { + error_setg(errp, + "%s: Invalid SEV-SNP ID auth block: incorrect size", + __func__); + return -1; + } + assert(policy_data1 != NULL); + assert(policy_data2 != NULL); + + finish->id_block_uaddr = + (__u64)g_memdup2(policy_data1, KVM_SEV_SNP_ID_BLOCK_SIZE); + finish->id_auth_uaddr = + (__u64)g_memdup2(policy_data2, KVM_SEV_SNP_ID_AUTH_SIZE); + + /* + * Check if an author key has been provided and use that to flag + * whether the author key is enabled. The first of the author key + * must be non-zero to indicate the key type, which will currently + * always be 2. + */ + sev_snp_guest->kvm_finish_conf.auth_key_en = + id_auth->author_key[0] ? 1 : 0; + finish->id_block_en = 1; + } + sev_snp_guest->kvm_start_conf.policy = policy; + } else { + SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); + /* Only the policy flags are supported for SEV and SEV-ES */ + if ((policy_data1_size > 0) || (policy_data2_size > 0) || !sev_guest) { + error_setg(errp, "%s: An ID block/ID auth block has been provided " + "but SEV-SNP is not enabled", __func__); + return -1; + } + sev_guest->policy = policy; + } + return 0; +} + static void sev_common_class_init(ObjectClass *oc, const void *data) { @@ -2068,6 +2746,8 @@ static void sev_common_instance_init(Object *obj) { SevCommonState *sev_common = SEV_COMMON(obj); + ConfidentialGuestSupportClass *cgs = + CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(obj); sev_common->kvm_type = -1; @@ -2078,6 +2758,12 @@ sev_common_instance_init(Object *obj) object_property_add_uint32_ptr(obj, "reduced-phys-bits", &sev_common->reduced_phys_bits, OBJ_PROP_FLAG_READWRITE); + cgs->check_support = cgs_check_support; + cgs->set_guest_state = cgs_set_guest_state; + cgs->get_mem_map_entry = cgs_get_mem_map_entry; + cgs->set_guest_policy = cgs_set_guest_policy; + + QTAILQ_INIT(&sev_common->launch_vmsa); } /* sev guest info common to sev/sev-es/sev-snp */ @@ -2405,7 +3091,7 @@ sev_snp_guest_class_init(ObjectClass *oc, const void *data) klass->launch_finish = sev_snp_launch_finish; klass->launch_update_data = sev_snp_launch_update_data; klass->kvm_init = sev_snp_kvm_init; - x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features; + x86_klass->adjust_cpuid_features = sev_snp_adjust_cpuid_features; x86_klass->kvm_type = sev_snp_kvm_type; object_class_property_add(oc, "policy", "uint64", diff --git a/target/i386/sev.h b/target/i386/sev.h index 373669e..9db1a80 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -44,6 +44,8 @@ bool sev_snp_enabled(void); #define SEV_SNP_POLICY_SMT 0x10000 #define SEV_SNP_POLICY_DBG 0x80000 +#define SVM_SEV_FEAT_SNP_ACTIVE 1 + typedef struct SevKernelLoaderContext { char *setup_data; size_t setup_size; @@ -55,6 +57,128 @@ typedef struct SevKernelLoaderContext { size_t cmdline_size; } SevKernelLoaderContext; +/* Save area definition for SEV-ES and SEV-SNP guests */ +struct QEMU_PACKED sev_es_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + uint64_t vmpl0_ssp; + uint64_t vmpl1_ssp; + uint64_t vmpl2_ssp; + uint64_t vmpl3_ssp; + uint64_t u_cet; + uint8_t reserved_0xc8[2]; + uint8_t vmpl; + uint8_t cpl; + uint8_t reserved_0xcc[4]; + uint64_t efer; + uint8_t reserved_0xd8[104]; + uint64_t xss; + uint64_t cr4; + uint64_t cr3; + uint64_t cr0; + uint64_t dr7; + uint64_t dr6; + uint64_t rflags; + uint64_t rip; + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr0_addr_mask; + uint64_t dr1_addr_mask; + uint64_t dr2_addr_mask; + uint64_t dr3_addr_mask; + uint8_t reserved_0x1c0[24]; + uint64_t rsp; + uint64_t s_cet; + uint64_t ssp; + uint64_t isst_addr; + uint64_t rax; + uint64_t star; + uint64_t lstar; + uint64_t cstar; + uint64_t sfmask; + uint64_t kernel_gs_base; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + uint64_t cr2; + uint8_t reserved_0x248[32]; + uint64_t g_pat; + uint64_t dbgctl; + uint64_t br_from; + uint64_t br_to; + uint64_t last_excp_from; + uint64_t last_excp_to; + uint8_t reserved_0x298[80]; + uint32_t pkru; + uint32_t tsc_aux; + uint8_t reserved_0x2f0[24]; + uint64_t rcx; + uint64_t rdx; + uint64_t rbx; + uint64_t reserved_0x320; /* rsp already available at 0x01d8 */ + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint8_t reserved_0x380[16]; + uint64_t guest_exit_info_1; + uint64_t guest_exit_info_2; + uint64_t guest_exit_int_info; + uint64_t guest_nrip; + uint64_t sev_features; + uint64_t vintr_ctrl; + uint64_t guest_exit_code; + uint64_t virtual_tom; + uint64_t tlb_id; + uint64_t pcpu_id; + uint64_t event_inj; + uint64_t xcr0; + uint8_t reserved_0x3f0[16]; + + /* Floating point area */ + uint64_t x87_dp; + uint32_t mxcsr; + uint16_t x87_ftw; + uint16_t x87_fsw; + uint16_t x87_fcw; + uint16_t x87_fop; + uint16_t x87_ds; + uint16_t x87_cs; + uint64_t x87_rip; + uint8_t fpreg_x87[80]; + uint8_t fpreg_xmm[256]; + uint8_t fpreg_ymm[256]; +}; + +struct QEMU_PACKED sev_snp_id_authentication { + uint32_t id_key_alg; + uint32_t auth_key_algo; + uint8_t reserved[56]; + uint8_t id_block_sig[512]; + uint8_t id_key[1028]; + uint8_t reserved2[60]; + uint8_t id_key_sig[512]; + uint8_t author_key[1028]; + uint8_t reserved3[892]; +}; + bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp); diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 55216e0..a50f57d 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -878,10 +878,10 @@ static const X86OpEntry opcodes_0F3A[256] = { [0x0e] = X86_OP_ENTRY4(VPBLENDW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), [0x0f] = X86_OP_ENTRY4(PALIGNR, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), - [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66), + [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX) p_66), [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX) p_66), - [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), + [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX2) p_66), [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX2) p_66), /* Listed incorrectly as type 4 */ @@ -1541,7 +1541,7 @@ static void decode_group4_5(DisasContext *s, CPUX86State *env, X86OpEntry *entry [0x0b] = X86_OP_ENTRYr(CALLF_m, M,p), [0x0c] = X86_OP_ENTRYr(JMP_m, E,f64, zextT0), [0x0d] = X86_OP_ENTRYr(JMPF_m, M,p), - [0x0e] = X86_OP_ENTRYr(PUSH, E,f64), + [0x0e] = X86_OP_ENTRYr(PUSH, E,d64), }; int w = (*b & 1); @@ -2722,14 +2722,14 @@ static void disas_insn(DisasContext *s, CPUState *cpu) if (decode.e.check & X86_CHECK_i64) { goto illegal_op; } - if ((decode.e.check & X86_CHECK_i64_amd) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { + if ((decode.e.check & X86_CHECK_i64_amd) && !IS_INTEL_CPU(env)) { goto illegal_op; } } else { if (decode.e.check & X86_CHECK_o64) { goto illegal_op; } - if ((decode.e.check & X86_CHECK_o64_intel) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { + if ((decode.e.check & X86_CHECK_o64_intel) && IS_INTEL_CPU(env)) { goto illegal_op; } } diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 1cbadb1..b3b2382 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -189,25 +189,25 @@ void cpu_init_fp_statuses(CPUX86State *env) set_float_default_nan_pattern(0b11000000, &env->mmx_status); set_float_default_nan_pattern(0b11000000, &env->sse_status); /* - * TODO: x86 does flush-to-zero detection after rounding (the SDM + * x86 does flush-to-zero detection after rounding (the SDM * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush * when we detect underflow, which x86 does after rounding). */ - set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); - set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); - set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->fp_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->mmx_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->sse_status); } -static inline uint8_t save_exception_flags(CPUX86State *env) +static inline int save_exception_flags(CPUX86State *env) { - uint8_t old_flags = get_float_exception_flags(&env->fp_status); + int old_flags = get_float_exception_flags(&env->fp_status); set_float_exception_flags(0, &env->fp_status); return old_flags; } -static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) +static void merge_exception_flags(CPUX86State *env, int old_flags) { - uint8_t new_flags = get_float_exception_flags(&env->fp_status); + int new_flags = get_float_exception_flags(&env->fp_status); float_raise(old_flags, &env->fp_status); fpu_set_exception(env, ((new_flags & float_flag_invalid ? FPUS_IE : 0) | @@ -215,12 +215,12 @@ static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) (new_flags & float_flag_overflow ? FPUS_OE : 0) | (new_flags & float_flag_underflow ? FPUS_UE : 0) | (new_flags & float_flag_inexact ? FPUS_PE : 0) | - (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0))); + (new_flags & float_flag_input_denormal_used ? FPUS_DE : 0))); } static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); floatx80 ret = floatx80_div(a, b, &env->fp_status); merge_exception_flags(env, old_flags); return ret; @@ -240,7 +240,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) void helper_flds_FT0(CPUX86State *env, uint32_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -253,7 +253,7 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val) void helper_fldl_FT0(CPUX86State *env, uint64_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -271,7 +271,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val) void helper_flds_ST0(CPUX86State *env, uint32_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int new_fpstt; union { float32 f; @@ -288,7 +288,7 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val) void helper_fldl_ST0(CPUX86State *env, uint64_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int new_fpstt; union { float64 f; @@ -338,7 +338,7 @@ void helper_fildll_ST0(CPUX86State *env, int64_t val) uint32_t helper_fsts_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -351,7 +351,7 @@ uint32_t helper_fsts_ST0(CPUX86State *env) uint64_t helper_fstl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -364,7 +364,7 @@ uint64_t helper_fstl_ST0(CPUX86State *env) int32_t helper_fist_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); @@ -378,7 +378,7 @@ int32_t helper_fist_ST0(CPUX86State *env) int32_t helper_fistl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); @@ -391,7 +391,7 @@ int32_t helper_fistl_ST0(CPUX86State *env) int64_t helper_fistll_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int64_t val; val = floatx80_to_int64(ST0, &env->fp_status); @@ -404,7 +404,7 @@ int64_t helper_fistll_ST0(CPUX86State *env) int32_t helper_fistt_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); @@ -418,7 +418,7 @@ int32_t helper_fistt_ST0(CPUX86State *env) int32_t helper_fisttl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); @@ -431,7 +431,7 @@ int32_t helper_fisttl_ST0(CPUX86State *env) int64_t helper_fisttll_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int64_t val; val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); @@ -527,7 +527,7 @@ static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; void helper_fcom_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); FloatRelation ret; ret = floatx80_compare(ST0, FT0, &env->fp_status); @@ -537,7 +537,7 @@ void helper_fcom_ST0_FT0(CPUX86State *env) void helper_fucom_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); FloatRelation ret; ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); @@ -549,7 +549,7 @@ static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_fcomi_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int eflags; FloatRelation ret; @@ -562,7 +562,7 @@ void helper_fcomi_ST0_FT0(CPUX86State *env) void helper_fucomi_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int eflags; FloatRelation ret; @@ -575,28 +575,28 @@ void helper_fucomi_ST0_FT0(CPUX86State *env) void helper_fadd_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_add(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fmul_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_mul(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsub_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_sub(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsubr_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_sub(FT0, ST0, &env->fp_status); merge_exception_flags(env, old_flags); } @@ -615,28 +615,28 @@ void helper_fdivr_ST0_FT0(CPUX86State *env) void helper_fadd_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fmul_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsub_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); merge_exception_flags(env, old_flags); } @@ -861,7 +861,7 @@ void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int v; target_ulong mem_ref, mem_end; int64_t val; @@ -1136,7 +1136,7 @@ static const struct f2xm1_data f2xm1_table[65] = { void helper_f2xm1(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t sig = extractFloatx80Frac(ST0); int32_t exp = extractFloatx80Exp(ST0); bool sign = extractFloatx80Sign(ST0); @@ -1369,7 +1369,7 @@ static const struct fpatan_data fpatan_table[9] = { void helper_fpatan(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -1808,7 +1808,7 @@ void helper_fpatan(CPUX86State *env) void helper_fxtract(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); CPU_LDoubleU temp; temp.d = ST0; @@ -1857,7 +1857,7 @@ void helper_fxtract(CPUX86State *env) static void helper_fprem_common(CPUX86State *env, bool mod) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t quotient; CPU_LDoubleU temp0, temp1; int exp0, exp1, expdiff; @@ -2053,7 +2053,7 @@ static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, void helper_fyl2xp1(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -2151,7 +2151,7 @@ void helper_fyl2xp1(CPUX86State *env) void helper_fyl2x(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -2298,7 +2298,7 @@ void helper_fyl2x(CPUX86State *env) void helper_fsqrt(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); if (floatx80_is_neg(ST0)) { env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ env->fpus |= 0x400; @@ -2324,14 +2324,14 @@ void helper_fsincos(CPUX86State *env) void helper_frndint(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_round_to_int(ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fscale(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); if (floatx80_invalid_encoding(ST1, &env->fp_status) || floatx80_invalid_encoding(ST0, &env->fp_status)) { float_raise(float_flag_invalid, &env->fp_status); @@ -2369,7 +2369,7 @@ void helper_fscale(CPUX86State *env) } else { int n; FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; - uint8_t save_flags = get_float_exception_flags(&env->fp_status); + int save_flags = get_float_exception_flags(&env->fp_status); set_float_exception_flags(0, &env->fp_status); n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); set_float_exception_flags(save_flags, &env->fp_status); @@ -3254,6 +3254,7 @@ void update_mxcsr_status(CPUX86State *env) /* Set exception flags. */ set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | + (mxcsr & FPUS_DE ? float_flag_input_denormal_used : 0) | (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | (mxcsr & FPUS_OE ? float_flag_overflow : 0) | (mxcsr & FPUS_UE ? float_flag_underflow : 0) | @@ -3269,15 +3270,9 @@ void update_mxcsr_status(CPUX86State *env) void update_mxcsr_from_sse_status(CPUX86State *env) { - uint8_t flags = get_float_exception_flags(&env->sse_status); - /* - * The MXCSR denormal flag has opposite semantics to - * float_flag_input_denormal_flushed (the softfloat code sets that flag - * only when flushing input denormals to zero, but SSE sets it - * only when not flushing them to zero), so is not converted - * here. - */ + int flags = get_float_exception_flags(&env->sse_status); env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | + (flags & float_flag_input_denormal_used ? FPUS_DE : 0) | (flags & float_flag_divbyzero ? FPUS_ZE : 0) | (flags & float_flag_overflow ? FPUS_OE : 0) | (flags & float_flag_underflow ? FPUS_UE : 0) | diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index 6b3f198..be011b0 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -97,7 +97,7 @@ static inline unsigned int compute_pf(uint8_t x) /* misc_helper.c */ void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask); -/* sysemu/svm_helper.c */ +/* system/svm_helper.c */ #ifndef CONFIG_USER_ONLY G_NORETURN void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1, uintptr_t retaddr); @@ -115,7 +115,7 @@ int exception_has_error_code(int intno); /* smm_helper.c */ void do_smm_enter(X86CPU *cpu); -/* sysemu/bpt_helper.c */ +/* system/bpt_helper.c */ bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update); /* diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 071f3fb..f49fe85 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -456,7 +456,7 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector, new_segs[i] = access_ldw(&new, tss_base + (0x48 + i * 4)); } new_ldt = access_ldw(&new, tss_base + 0x60); - new_trap = access_ldl(&new, tss_base + 0x64); + new_trap = access_ldw(&new, tss_base + 0x64) & 1; } else { /* 16 bit */ new_cr3 = 0; diff --git a/target/i386/tcg/system/excp_helper.c b/target/i386/tcg/system/excp_helper.c index c162621..f622b5d 100644 --- a/target/i386/tcg/system/excp_helper.c +++ b/target/i386/tcg/system/excp_helper.c @@ -25,7 +25,6 @@ #include "exec/page-protection.h" #include "exec/target_page.h" #include "exec/tlb-flags.h" -#include "exec/tswap.h" #include "tcg/helper-tcg.h" typedef struct TranslateParams { @@ -593,7 +592,8 @@ static bool get_physical_address(CPUX86State *env, vaddr addr, if (sext != 0 && sext != -1) { *err = (TranslateFault){ .exception_index = EXCP0D_GPF, - .cr2 = addr, + /* non-canonical #GP doesn't change CR2 */ + .cr2 = env->cr[2], }; return false; } diff --git a/target/i386/tcg/system/seg_helper.c b/target/i386/tcg/system/seg_helper.c index d4ea890..8c7856b 100644 --- a/target/i386/tcg/system/seg_helper.c +++ b/target/i386/tcg/system/seg_helper.c @@ -133,7 +133,7 @@ bool x86_cpu_exec_halt(CPUState *cpu) X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_POLL)) { bql_lock(); apic_poll_irq(x86_cpu->apic_state); cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); @@ -178,31 +178,31 @@ bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request) */ switch (interrupt_request) { case CPU_INTERRUPT_POLL: - cs->interrupt_request &= ~CPU_INTERRUPT_POLL; + cpu_reset_interrupt(cs, CPU_INTERRUPT_POLL); apic_poll_irq(cpu->apic_state); break; case CPU_INTERRUPT_SIPI: + cpu_reset_interrupt(cs, CPU_INTERRUPT_SIPI); do_cpu_sipi(cpu); break; case CPU_INTERRUPT_SMI: cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0, 0); - cs->interrupt_request &= ~CPU_INTERRUPT_SMI; + cpu_reset_interrupt(cs, CPU_INTERRUPT_SMI); do_smm_enter(cpu); break; case CPU_INTERRUPT_NMI: cpu_svm_check_intercept_param(env, SVM_EXIT_NMI, 0, 0); - cs->interrupt_request &= ~CPU_INTERRUPT_NMI; + cpu_reset_interrupt(cs, CPU_INTERRUPT_NMI); env->hflags2 |= HF2_NMI_MASK; do_interrupt_x86_hardirq(env, EXCP02_NMI, 1); break; case CPU_INTERRUPT_MCE: - cs->interrupt_request &= ~CPU_INTERRUPT_MCE; + cpu_reset_interrupt(cs, CPU_INTERRUPT_MCE); do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0); break; case CPU_INTERRUPT_HARD: cpu_svm_check_intercept_param(env, SVM_EXIT_INTR, 0, 0); - cs->interrupt_request &= ~(CPU_INTERRUPT_HARD | - CPU_INTERRUPT_VIRQ); + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_VIRQ); intno = cpu_get_pic_interrupt(env); qemu_log_mask(CPU_LOG_INT, "Servicing hardware INT=0x%02x\n", intno); @@ -215,7 +215,7 @@ bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request) qemu_log_mask(CPU_LOG_INT, "Servicing virtual hardware INT=0x%02x\n", intno); do_interrupt_x86_hardirq(env, intno, 1); - cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + cpu_reset_interrupt(cs, CPU_INTERRUPT_VIRQ); env->int_ctl &= ~V_IRQ_MASK; break; } diff --git a/target/i386/tcg/system/smm_helper.c b/target/i386/tcg/system/smm_helper.c index 251eb78..fb028a8 100644 --- a/target/i386/tcg/system/smm_helper.c +++ b/target/i386/tcg/system/smm_helper.c @@ -168,7 +168,7 @@ void do_smm_enter(X86CPU *cpu) env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | CR0_PG_MASK)); cpu_x86_update_cr4(env, 0); - env->dr[7] = 0x00000400; + helper_set_dr(env, 7, 0x00000400); cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase, 0xffffffff, @@ -233,8 +233,8 @@ void helper_rsm(CPUX86State *env) env->eip = x86_ldq_phys(cs, sm_state + 0x7f78); cpu_load_eflags(env, x86_ldl_phys(cs, sm_state + 0x7f70), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); - env->dr[6] = x86_ldl_phys(cs, sm_state + 0x7f68); - env->dr[7] = x86_ldl_phys(cs, sm_state + 0x7f60); + helper_set_dr(env, 6, x86_ldl_phys(cs, sm_state + 0x7f68)); + helper_set_dr(env, 7, x86_ldl_phys(cs, sm_state + 0x7f60)); cpu_x86_update_cr4(env, x86_ldl_phys(cs, sm_state + 0x7f48)); cpu_x86_update_cr3(env, x86_ldq_phys(cs, sm_state + 0x7f50)); @@ -268,8 +268,8 @@ void helper_rsm(CPUX86State *env) env->regs[R_EDX] = x86_ldl_phys(cs, sm_state + 0x7fd8); env->regs[R_ECX] = x86_ldl_phys(cs, sm_state + 0x7fd4); env->regs[R_EAX] = x86_ldl_phys(cs, sm_state + 0x7fd0); - env->dr[6] = x86_ldl_phys(cs, sm_state + 0x7fcc); - env->dr[7] = x86_ldl_phys(cs, sm_state + 0x7fc8); + helper_set_dr(env, 6, x86_ldl_phys(cs, sm_state + 0x7fcc)); + helper_set_dr(env, 7, x86_ldl_phys(cs, sm_state + 0x7fc8)); env->tr.selector = x86_ldl_phys(cs, sm_state + 0x7fc4) & 0xffff; env->tr.base = x86_ldl_phys(cs, sm_state + 0x7f64); diff --git a/target/i386/tcg/system/svm_helper.c b/target/i386/tcg/system/svm_helper.c index b27049b..505788b 100644 --- a/target/i386/tcg/system/svm_helper.c +++ b/target/i386/tcg/system/svm_helper.c @@ -49,7 +49,7 @@ static void svm_save_seg(CPUX86State *env, int mmu_idx, hwaddr addr, static inline void svm_canonicalization(CPUX86State *env, target_ulong *seg_base) { uint16_t shift_amt = 64 - cpu_x86_virtual_addr_width(env); - *seg_base = ((((long) *seg_base) << shift_amt) >> shift_amt); + *seg_base = (((int64_t) *seg_base) << shift_amt) >> shift_amt; } static void svm_load_seg(CPUX86State *env, int mmu_idx, hwaddr addr, @@ -403,7 +403,7 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->hflags2 |= HF2_GIF_MASK; if (ctl_has_irq(env)) { - cs->interrupt_request |= CPU_INTERRUPT_VIRQ; + cpu_set_interrupt(cs, CPU_INTERRUPT_VIRQ); } if (virtual_gif_set(env)) { @@ -824,7 +824,7 @@ void do_vmexit(CPUX86State *env) env->intercept_exceptions = 0; /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */ - cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + cpu_reset_interrupt(cs, CPU_INTERRUPT_VIRQ); env->int_ctl = 0; /* Clears the TSC_OFFSET inside the processor. */ diff --git a/target/i386/tcg/system/tcg-cpu.c b/target/i386/tcg/system/tcg-cpu.c index 0538a4f..7255862 100644 --- a/target/i386/tcg/system/tcg-cpu.c +++ b/target/i386/tcg/system/tcg-cpu.c @@ -74,8 +74,8 @@ bool tcg_cpu_realizefn(CPUState *cs, Error **errp) memory_region_set_enabled(cpu->cpu_as_mem, true); cs->num_ases = 2; - cpu_address_space_init(cs, 0, "cpu-memory", cs->memory); - cpu_address_space_init(cs, 1, "cpu-smm", cpu->cpu_as_root); + cpu_address_space_init(cs, X86ASIdx_MEM, "cpu-memory", cs->memory); + cpu_address_space_init(cs, X86ASIdx_SMM, "cpu-smm", cpu->cpu_as_root); /* ... SMRAM with higher priority, linked from /machine/smram. */ cpu->machine_done.notify = tcg_cpu_machine_done; diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c index 179dfdf..6f5dc06 100644 --- a/target/i386/tcg/tcg-cpu.c +++ b/target/i386/tcg/tcg-cpu.c @@ -149,6 +149,12 @@ static void x86_cpu_exec_reset(CPUState *cs) do_cpu_init(env_archcpu(env)); cs->exception_index = EXCP_HALTED; } + +static vaddr x86_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return cpu_env(cs)->hflags & HF_CS64_MASK ? result : (uint32_t)result; +} #endif const TCGCPUOps x86_tcg_ops = { @@ -172,6 +178,7 @@ const TCGCPUOps x86_tcg_ops = { .record_sigbus = x86_cpu_record_sigbus, #else .tlb_fill = x86_cpu_tlb_fill, + .pointer_wrap = x86_pointer_wrap, .do_interrupt = x86_cpu_do_interrupt, .cpu_exec_halt = x86_cpu_exec_halt, .cpu_exec_interrupt = x86_cpu_exec_interrupt, diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 0fcddc2..0cb87d0 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2033,8 +2033,11 @@ static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src, bool inhibit tcg_gen_trunc_tl_i32(sel, src); gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), sel); - /* For move to DS/ES/SS, the addseg or ss32 flags may change. */ - if (CODE32(s) && seg_reg < R_FS) { + /* + * For moves to SS, the SS32 flag may change. For CODE32 only, changes + * to SS, DS and ES may change the ADDSEG flags. + */ + if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) { s->base.is_jmp = DISAS_EOB_NEXT; } } else { diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c index b8bebe4..f758861 100644 --- a/target/i386/whpx/whpx-accel-ops.c +++ b/target/i386/whpx/whpx-accel-ops.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" #include "system/kvm_int.h" #include "qemu/main-loop.h" -#include "system/accel-ops.h" +#include "accel/accel-cpu-ops.h" #include "system/cpus.h" #include "qemu/guest-random.h" @@ -42,16 +42,14 @@ static void *whpx_cpu_thread_fn(void *arg) qemu_guest_random_seed_thread_part2(cpu->random_seed); do { + qemu_process_cpu_events(cpu); + if (cpu_can_run(cpu)) { r = whpx_vcpu_exec(cpu); if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); } } - while (cpu_thread_is_idle(cpu)) { - qemu_cond_wait_bql(cpu->halt_cond); - } - qemu_wait_io_event_common(cpu); } while (!cpu->unplug || cpu_can_run(cpu)); whpx_destroy_vcpu(cpu); @@ -90,12 +88,12 @@ static void whpx_accel_ops_class_init(ObjectClass *oc, const void *data) ops->create_vcpu_thread = whpx_start_vcpu_thread; ops->kick_vcpu_thread = whpx_kick_vcpu_thread; ops->cpu_thread_is_idle = whpx_vcpu_thread_is_idle; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = whpx_cpu_synchronize_post_reset; ops->synchronize_post_init = whpx_cpu_synchronize_post_init; ops->synchronize_state = whpx_cpu_synchronize_state; ops->synchronize_pre_loadvm = whpx_cpu_synchronize_pre_loadvm; - ops->synchronize_pre_resume = whpx_cpu_synchronize_pre_resume; } static const TypeInfo whpx_accel_ops_type = { diff --git a/target/i386/whpx/whpx-accel-ops.h b/target/i386/whpx/whpx-accel-ops.h index e6cf155..54cfc25 100644 --- a/target/i386/whpx/whpx-accel-ops.h +++ b/target/i386/whpx/whpx-accel-ops.h @@ -21,7 +21,6 @@ void whpx_cpu_synchronize_state(CPUState *cpu); void whpx_cpu_synchronize_post_reset(CPUState *cpu); void whpx_cpu_synchronize_post_init(CPUState *cpu); void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu); -void whpx_cpu_synchronize_pre_resume(bool step_pending); /* state subset only touched by the VCPU itself during runtime */ #define WHPX_SET_RUNTIME_STATE 1 diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index cf6d3e4..2567618 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -14,6 +14,7 @@ #include "system/ioport.h" #include "gdbstub/helpers.h" #include "qemu/accel.h" +#include "accel/accel-ops.h" #include "system/whpx.h" #include "system/cpus.h" #include "system/runstate.h" @@ -26,6 +27,8 @@ #include "qapi/qapi-types-common.h" #include "qapi/qapi-visit-common.h" #include "migration/blocker.h" +#include "host-cpu.h" +#include "accel/accel-cpu-target.h" #include <winerror.h> #include "whpx-internal.h" @@ -237,13 +240,12 @@ struct AccelCPUState { uint64_t tpr; uint64_t apic_base; bool interruption_pending; - bool dirty; /* Must be the last field as it may have a tail */ WHV_RUN_VP_EXIT_CONTEXT exit_ctx; }; -static bool whpx_allowed; +bool whpx_allowed; static bool whp_dispatch_initialized; static HMODULE hWinHvPlatform, hWinHvEmulation; static uint32_t max_vcpu_index; @@ -786,8 +788,11 @@ static HRESULT CALLBACK whpx_emu_mmio_callback( void *ctx, WHV_EMULATOR_MEMORY_ACCESS_INFO *ma) { - cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize, - ma->Direction); + CPUState *cs = (CPUState *)ctx; + AddressSpace *as = cpu_addressspace(cs, MEMTXATTRS_UNSPECIFIED); + + address_space_rw(as, ma->GpaAddress, MEMTXATTRS_UNSPECIFIED, + ma->Data, ma->AccessSize, ma->Direction); return S_OK; } @@ -836,7 +841,7 @@ static HRESULT CALLBACK whpx_emu_setreg_callback( * The emulator just successfully wrote the register state. We clear the * dirty state so we avoid the double write on resume of the VP. */ - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; return hr; } @@ -1391,7 +1396,7 @@ static int whpx_last_vcpu_stopping(CPUState *cpu) /* Returns the address of the next instruction that is about to be executed. */ static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid) { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { /* The CPU registers have been modified by other parts of QEMU. */ return cpu_env(cpu)->eip; } else if (exit_context_valid) { @@ -1434,9 +1439,9 @@ static int whpx_handle_halt(CPUState *cpu) int ret = 0; bql_lock(); - if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + if (!(cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (cpu_env(cpu)->eflags & IF_MASK)) && - !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + !cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { cpu->exception_index = EXCP_HLT; cpu->halted = true; ret = 1; @@ -1467,16 +1472,16 @@ static void whpx_vcpu_pre_run(CPUState *cpu) /* Inject NMI */ if (!vcpu->interruption_pending && - cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { - if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { - cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_NMI); vcpu->interruptable = false; new_int.InterruptionType = WHvX64PendingNmi; new_int.InterruptionPending = 1; new_int.InterruptionVector = 2; } - if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { - cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_SMI)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_SMI); } } @@ -1484,13 +1489,13 @@ static void whpx_vcpu_pre_run(CPUState *cpu) * Force the VCPU out of its inner loop to process any INIT requests or * commit pending TPR access. */ - if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { - if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT) && !(env->hflags & HF_SMM_MASK)) { - cpu->exit_request = 1; + qatomic_set(&cpu->exit_request, true); } - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->exit_request = 1; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) { + qatomic_set(&cpu->exit_request, true); } } @@ -1499,8 +1504,8 @@ static void whpx_vcpu_pre_run(CPUState *cpu) if (!vcpu->interruption_pending && vcpu->interruptable && (env->eflags & IF_MASK)) { assert(!new_int.InterruptionPending); - if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD); irq = cpu_get_pic_interrupt(env); if (irq >= 0) { new_int.InterruptionType = WHvX64PendingInterrupt; @@ -1517,8 +1522,8 @@ static void whpx_vcpu_pre_run(CPUState *cpu) reg_count += 1; } } else if (vcpu->ready_for_pic_interrupt && - (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD); irq = cpu_get_pic_interrupt(env); if (irq >= 0) { reg_names[reg_count] = WHvRegisterPendingEvent; @@ -1537,14 +1542,14 @@ static void whpx_vcpu_pre_run(CPUState *cpu) if (tpr != vcpu->tpr) { vcpu->tpr = tpr; reg_values[reg_count].Reg64 = tpr; - cpu->exit_request = 1; + qatomic_set(&cpu->exit_request, true); reg_names[reg_count] = WHvX64RegisterCr8; reg_count += 1; } /* Update the state of the interrupt delivery notification */ if (!vcpu->window_registered && - cpu->interrupt_request & CPU_INTERRUPT_HARD) { + cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD)) { reg_values[reg_count].DeliverabilityNotifications = (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) { .InterruptNotification = 1 @@ -1597,31 +1602,31 @@ static void whpx_vcpu_process_async_events(CPUState *cpu) CPUX86State *env = &x86_cpu->env; AccelCPUState *vcpu = cpu->accel; - if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT) && !(env->hflags & HF_SMM_MASK)) { whpx_cpu_synchronize_state(cpu); do_cpu_init(x86_cpu); vcpu->interruptable = true; } - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { - cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_POLL)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); apic_poll_irq(x86_cpu->apic_state); } - if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + if ((cpu_test_interrupt(cpu, CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) || - (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu_test_interrupt(cpu, CPU_INTERRUPT_NMI)) { cpu->halted = false; } - if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_SIPI)) { whpx_cpu_synchronize_state(cpu); do_cpu_sipi(x86_cpu); } - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; + if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) { + cpu_reset_interrupt(cpu, CPU_INTERRUPT_TPR); whpx_cpu_synchronize_state(cpu); apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, env->tpr_access_type); @@ -1704,15 +1709,16 @@ static int whpx_vcpu_run(CPUState *cpu) } do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (exclusive_step_mode == WHPX_STEP_NONE) { whpx_vcpu_pre_run(cpu); - if (qatomic_read(&cpu->exit_request)) { + /* Corresponding store-release is in cpu_exit. */ + if (qatomic_load_acquire(&cpu->exit_request)) { whpx_vcpu_kick(cpu); } } @@ -2047,16 +2053,14 @@ static int whpx_vcpu_run(CPUState *cpu) whpx_last_vcpu_stopping(cpu); } - qatomic_set(&cpu->exit_request, false); - return ret < 0; } static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { whpx_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } } @@ -2064,20 +2068,20 @@ static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_RESET_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_FULL_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } /* @@ -2086,7 +2090,7 @@ static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, void whpx_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -2106,7 +2110,7 @@ void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu) run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); } -void whpx_cpu_synchronize_pre_resume(bool step_pending) +static void whpx_pre_resume_vm(AccelState *as, bool step_pending) { whpx_global.step_pending = step_pending; } @@ -2226,7 +2230,7 @@ int whpx_init_vcpu(CPUState *cpu) } vcpu->interruptable = true; - vcpu->dirty = true; + cpu->vcpu_dirty = true; cpu->accel = vcpu; max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); qemu_add_vm_change_state_handler(whpx_cpu_update_state, env); @@ -2501,11 +2505,33 @@ static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, } } +static void whpx_cpu_instance_init(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + + host_cpu_instance_init(cpu); +} + +static void whpx_cpu_accel_class_init(ObjectClass *oc, const void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_instance_init = whpx_cpu_instance_init; +} + +static const TypeInfo whpx_cpu_accel_type = { + .name = ACCEL_CPU_NAME("whpx"), + + .parent = TYPE_ACCEL_CPU, + .class_init = whpx_cpu_accel_class_init, + .abstract = true, +}; + /* * Partition support */ -static int whpx_accel_init(MachineState *ms) +static int whpx_accel_init(AccelState *as, MachineState *ms) { struct whpx_state *whpx; int ret; @@ -2689,11 +2715,6 @@ error: return ret; } -int whpx_enabled(void) -{ - return whpx_allowed; -} - bool whpx_apic_in_platform(void) { return whpx_global.apic_in_platform; } @@ -2703,6 +2724,7 @@ static void whpx_accel_class_init(ObjectClass *oc, const void *data) AccelClass *ac = ACCEL_CLASS(oc); ac->name = "WHPX"; ac->init_machine = whpx_accel_init; + ac->pre_resume_vm = whpx_pre_resume_vm; ac->allowed = &whpx_allowed; object_class_property_add(oc, "kernel-irqchip", "on|off|split", @@ -2731,6 +2753,7 @@ static const TypeInfo whpx_accel_type = { static void whpx_type_init(void) { type_register_static(&whpx_accel_type); + type_register_static(&whpx_cpu_accel_type); } bool init_whp_dispatch(void) diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c index 24ab7be..996e9f3 100644 --- a/target/i386/xsave_helper.c +++ b/target/i386/xsave_helper.c @@ -5,7 +5,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/tswap.h" void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen) { diff --git a/target/loongarch/README b/target/loongarch/README index 0b9dc0d..1ffd342 100644 --- a/target/loongarch/README +++ b/target/loongarch/README @@ -11,7 +11,7 @@ - System emulation - You can reference docs/system/loongarch/loongson3.rst to get the information about system emulation of LoongArch. + You can reference docs/system/loongarch/virt.rst to get the information about system emulation of LoongArch. - Linux-user emulation diff --git a/target/loongarch/cpu-csr.h b/target/loongarch/cpu-csr.h index 0834e91..9097fdd 100644 --- a/target/loongarch/cpu-csr.h +++ b/target/loongarch/cpu-csr.h @@ -34,11 +34,13 @@ FIELD(CSR_MISC, ALCL, 12, 4) FIELD(CSR_MISC, DWPL, 16, 3) #define LOONGARCH_CSR_ECFG 0x4 /* Exception config */ -FIELD(CSR_ECFG, LIE, 0, 13) +FIELD(CSR_ECFG, LIE, 0, 15) /* bit 15 is msg interrupt enabled */ +FIELD(CSR_ECFG, MSGINT, 14, 1) FIELD(CSR_ECFG, VS, 16, 3) #define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ -FIELD(CSR_ESTAT, IS, 0, 13) +FIELD(CSR_ESTAT, IS, 0, 15) /* bit 15 is msg interrupt enabled */ +FIELD(CSR_ESTAT, MSGINT, 14, 1) FIELD(CSR_ESTAT, ECODE, 16, 6) FIELD(CSR_ESTAT, ESUBCODE, 22, 9) @@ -106,6 +108,7 @@ FIELD(CSR_PWCH, DIR4_WIDTH, 18, 6) #define LOONGARCH_CSR_STLBPS 0x1e /* Stlb page size */ FIELD(CSR_STLBPS, PS, 0, 5) +FIELD(CSR_STLBPS, RESERVE, 5, 27) #define LOONGARCH_CSR_RVACFG 0x1f /* Reduced virtual address config */ FIELD(CSR_RVACFG, RBITS, 0, 4) @@ -186,6 +189,9 @@ FIELD(CSR_MERRCTL, ISMERR, 0, 1) #define LOONGARCH_CSR_CTAG 0x98 /* TagLo + TagHi */ +#define LOONGARCH_CSR_MSGIS(N) (0xa0 + N) +#define LOONGARCH_CSR_MSGIR 0xa4 + /* Direct map windows CSRs*/ #define LOONGARCH_CSR_DMW(N) (0x180 + N) FIELD(CSR_DMW, PLV0, 0, 1) diff --git a/target/loongarch/cpu-mmu.h b/target/loongarch/cpu-mmu.h new file mode 100644 index 0000000..dbc69c7 --- /dev/null +++ b/target/loongarch/cpu-mmu.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch CPU parameters for QEMU. + * + * Copyright (c) 2025 Loongson Technology Corporation Limited + */ + +#ifndef LOONGARCH_CPU_MMU_H +#define LOONGARCH_CPU_MMU_H + +typedef enum TLBRet { + TLBRET_MATCH, + TLBRET_BADADDR, + TLBRET_NOMATCH, + TLBRET_INVALID, + TLBRET_DIRTY, + TLBRET_RI, + TLBRET_XI, + TLBRET_PE, +} TLBRet; + +typedef struct MMUContext { + vaddr addr; + uint64_t pte; + hwaddr physical; + int ps; /* page size shift */ + int prot; +} MMUContext; + +bool check_ps(CPULoongArchState *ent, uint8_t ps); +TLBRet loongarch_check_pte(CPULoongArchState *env, MMUContext *context, + MMUAccessType access_type, int mmu_idx); +TLBRet get_physical_address(CPULoongArchState *env, MMUContext *context, + MMUAccessType access_type, int mmu_idx, + int is_debug); +void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, + uint64_t *dir_width, unsigned int level); +hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); + +#endif /* LOONGARCH_CPU_MMU_H */ diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c index f7535d1..86490e0 100644 --- a/target/loongarch/cpu.c +++ b/target/loongarch/cpu.c @@ -17,6 +17,7 @@ #include "hw/qdev-properties.h" #include "exec/translation-block.h" #include "cpu.h" +#include "cpu-mmu.h" #include "internals.h" #include "fpu/softfloat-helpers.h" #include "csr.h" @@ -27,11 +28,6 @@ #ifdef CONFIG_KVM #include <linux/kvm.h> #endif -#ifdef CONFIG_TCG -#include "accel/tcg/cpu-ldst.h" -#include "accel/tcg/cpu-ops.h" -#include "tcg/tcg.h" -#endif #include "tcg/tcg_loongarch.h" const char * const regnames[32] = { @@ -48,62 +44,6 @@ const char * const fregnames[32] = { "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", }; -struct TypeExcp { - int32_t exccode; - const char * const name; -}; - -static const struct TypeExcp excp_names[] = { - {EXCCODE_INT, "Interrupt"}, - {EXCCODE_PIL, "Page invalid exception for load"}, - {EXCCODE_PIS, "Page invalid exception for store"}, - {EXCCODE_PIF, "Page invalid exception for fetch"}, - {EXCCODE_PME, "Page modified exception"}, - {EXCCODE_PNR, "Page Not Readable exception"}, - {EXCCODE_PNX, "Page Not Executable exception"}, - {EXCCODE_PPI, "Page Privilege error"}, - {EXCCODE_ADEF, "Address error for instruction fetch"}, - {EXCCODE_ADEM, "Address error for Memory access"}, - {EXCCODE_SYS, "Syscall"}, - {EXCCODE_BRK, "Break"}, - {EXCCODE_INE, "Instruction Non-Existent"}, - {EXCCODE_IPE, "Instruction privilege error"}, - {EXCCODE_FPD, "Floating Point Disabled"}, - {EXCCODE_FPE, "Floating Point Exception"}, - {EXCCODE_DBP, "Debug breakpoint"}, - {EXCCODE_BCE, "Bound Check Exception"}, - {EXCCODE_SXD, "128 bit vector instructions Disable exception"}, - {EXCCODE_ASXD, "256 bit vector instructions Disable exception"}, - {EXCP_HLT, "EXCP_HLT"}, -}; - -const char *loongarch_exception_name(int32_t exception) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(excp_names); i++) { - if (excp_names[i].exccode == exception) { - return excp_names[i].name; - } - } - return "Unknown"; -} - -void G_NORETURN do_raise_exception(CPULoongArchState *env, - uint32_t exception, - uintptr_t pc) -{ - CPUState *cs = env_cpu(env); - - qemu_log_mask(CPU_LOG_INT, "%s: exception: %d (%s)\n", - __func__, - exception, - loongarch_exception_name(exception)); - cs->exception_index = exception; - - cpu_loop_exit_restore(cs, pc); -} - static void loongarch_cpu_set_pc(CPUState *cs, vaddr value) { set_pc(cpu_env(cs), value); @@ -139,18 +79,8 @@ void loongarch_cpu_set_irq(void *opaque, int irq, int level) } } -static inline bool cpu_loongarch_hw_interrupts_enabled(CPULoongArchState *env) -{ - bool ret = 0; - - ret = (FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE) && - !(FIELD_EX64(env->CSR_DBG, CSR_DBG, DST))); - - return ret; -} - /* Check if there is pending and not masked out interrupt */ -static inline bool cpu_loongarch_hw_interrupts_pending(CPULoongArchState *env) +bool cpu_loongarch_hw_interrupts_pending(CPULoongArchState *env) { uint32_t pending; uint32_t status; @@ -162,258 +92,156 @@ static inline bool cpu_loongarch_hw_interrupts_pending(CPULoongArchState *env) } #endif -#ifdef CONFIG_TCG #ifndef CONFIG_USER_ONLY -static void loongarch_cpu_do_interrupt(CPUState *cs) +bool loongarch_cpu_has_work(CPUState *cs) { - CPULoongArchState *env = cpu_env(cs); - bool update_badinstr = 1; - int cause = -1; - bool tlbfill = FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR); - uint32_t vec_size = FIELD_EX64(env->CSR_ECFG, CSR_ECFG, VS); - - if (cs->exception_index != EXCCODE_INT) { - qemu_log_mask(CPU_LOG_INT, - "%s enter: pc " TARGET_FMT_lx " ERA " TARGET_FMT_lx - " TLBRERA " TARGET_FMT_lx " exception: %d (%s)\n", - __func__, env->pc, env->CSR_ERA, env->CSR_TLBRERA, - cs->exception_index, - loongarch_exception_name(cs->exception_index)); - } - - switch (cs->exception_index) { - case EXCCODE_DBP: - env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DCL, 1); - env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, ECODE, 0xC); - goto set_DERA; - set_DERA: - env->CSR_DERA = env->pc; - env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DST, 1); - set_pc(env, env->CSR_EENTRY + 0x480); - break; - case EXCCODE_INT: - if (FIELD_EX64(env->CSR_DBG, CSR_DBG, DST)) { - env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DEI, 1); - goto set_DERA; - } - QEMU_FALLTHROUGH; - case EXCCODE_PIF: - case EXCCODE_ADEF: - cause = cs->exception_index; - update_badinstr = 0; - break; - case EXCCODE_SYS: - case EXCCODE_BRK: - case EXCCODE_INE: - case EXCCODE_IPE: - case EXCCODE_FPD: - case EXCCODE_FPE: - case EXCCODE_SXD: - case EXCCODE_ASXD: - env->CSR_BADV = env->pc; - QEMU_FALLTHROUGH; - case EXCCODE_BCE: - case EXCCODE_ADEM: - case EXCCODE_PIL: - case EXCCODE_PIS: - case EXCCODE_PME: - case EXCCODE_PNR: - case EXCCODE_PNX: - case EXCCODE_PPI: - cause = cs->exception_index; - break; - default: - qemu_log("Error: exception(%d) has not been supported\n", - cs->exception_index); - abort(); - } - - if (update_badinstr) { - env->CSR_BADI = cpu_ldl_code(env, env->pc); - } + bool has_work = false; - /* Save PLV and IE */ - if (tlbfill) { - env->CSR_TLBRPRMD = FIELD_DP64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PPLV, - FIELD_EX64(env->CSR_CRMD, - CSR_CRMD, PLV)); - env->CSR_TLBRPRMD = FIELD_DP64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PIE, - FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE)); - /* set the DA mode */ - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DA, 1); - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PG, 0); - env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, - PC, (env->pc >> 2)); - } else { - env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, ECODE, - EXCODE_MCODE(cause)); - env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, ESUBCODE, - EXCODE_SUBCODE(cause)); - env->CSR_PRMD = FIELD_DP64(env->CSR_PRMD, CSR_PRMD, PPLV, - FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV)); - env->CSR_PRMD = FIELD_DP64(env->CSR_PRMD, CSR_PRMD, PIE, - FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE)); - env->CSR_ERA = env->pc; + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && + cpu_loongarch_hw_interrupts_pending(cpu_env(cs))) { + has_work = true; } - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PLV, 0); - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, IE, 0); + return has_work; +} +#endif /* !CONFIG_USER_ONLY */ - if (vec_size) { - vec_size = (1 << vec_size) * 4; - } +static void loongarch_la464_init_csr(Object *obj) +{ +#ifndef CONFIG_USER_ONLY + static bool initialized; + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + CPULoongArchState *env = &cpu->env; + int i, num; - if (cs->exception_index == EXCCODE_INT) { - /* Interrupt */ - uint32_t vector = 0; - uint32_t pending = FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS); - pending &= FIELD_EX64(env->CSR_ECFG, CSR_ECFG, LIE); - - /* Find the highest-priority interrupt. */ - vector = 31 - clz32(pending); - set_pc(env, env->CSR_EENTRY + \ - (EXCCODE_EXTERNAL_INT + vector) * vec_size); - qemu_log_mask(CPU_LOG_INT, - "%s: PC " TARGET_FMT_lx " ERA " TARGET_FMT_lx - " cause %d\n" " A " TARGET_FMT_lx " D " - TARGET_FMT_lx " vector = %d ExC " TARGET_FMT_lx "ExS" - TARGET_FMT_lx "\n", - __func__, env->pc, env->CSR_ERA, - cause, env->CSR_BADV, env->CSR_DERA, vector, - env->CSR_ECFG, env->CSR_ESTAT); - } else { - if (tlbfill) { - set_pc(env, env->CSR_TLBRENTRY); - } else { - set_pc(env, env->CSR_EENTRY + EXCODE_MCODE(cause) * vec_size); + if (!initialized) { + initialized = true; + num = FIELD_EX64(env->CSR_PRCFG1, CSR_PRCFG1, SAVE_NUM); + for (i = num; i < 16; i++) { + set_csr_flag(LOONGARCH_CSR_SAVE(i), CSRFL_UNUSED); } - qemu_log_mask(CPU_LOG_INT, - "%s: PC " TARGET_FMT_lx " ERA " TARGET_FMT_lx - " cause %d%s\n, ESTAT " TARGET_FMT_lx - " EXCFG " TARGET_FMT_lx " BADVA " TARGET_FMT_lx - "BADI " TARGET_FMT_lx " SYS_NUM " TARGET_FMT_lu - " cpu %d asid " TARGET_FMT_lx "\n", __func__, env->pc, - tlbfill ? env->CSR_TLBRERA : env->CSR_ERA, - cause, tlbfill ? "(refill)" : "", env->CSR_ESTAT, - env->CSR_ECFG, - tlbfill ? env->CSR_TLBRBADV : env->CSR_BADV, - env->CSR_BADI, env->gpr[11], cs->cpu_index, - env->CSR_ASID); + set_csr_flag(LOONGARCH_CSR_IMPCTL1, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_IMPCTL2, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRCTL, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRINFO1, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRINFO2, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRENTRY, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRERA, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_MERRSAVE, CSRFL_UNUSED); + set_csr_flag(LOONGARCH_CSR_CTAG, CSRFL_UNUSED); } - cs->exception_index = -1; +#endif } -static void loongarch_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, - vaddr addr, unsigned size, - MMUAccessType access_type, - int mmu_idx, MemTxAttrs attrs, - MemTxResult response, - uintptr_t retaddr) +static bool loongarch_get_lsx(Object *obj, Error **errp) { - CPULoongArchState *env = cpu_env(cs); - - if (access_type == MMU_INST_FETCH) { - do_raise_exception(env, EXCCODE_ADEF, retaddr); - } else { - do_raise_exception(env, EXCCODE_ADEM, retaddr); - } + return LOONGARCH_CPU(obj)->lsx != ON_OFF_AUTO_OFF; } -static bool loongarch_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +static void loongarch_set_lsx(Object *obj, bool value, Error **errp) { - if (interrupt_request & CPU_INTERRUPT_HARD) { - CPULoongArchState *env = cpu_env(cs); - - if (cpu_loongarch_hw_interrupts_enabled(env) && - cpu_loongarch_hw_interrupts_pending(env)) { - /* Raise it */ - cs->exception_index = EXCCODE_INT; - loongarch_cpu_do_interrupt(cs); - return true; + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + uint32_t val; + + cpu->lsx = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; + if (cpu->lsx == ON_OFF_AUTO_OFF) { + cpu->lasx = ON_OFF_AUTO_OFF; + if (cpu->lasx == ON_OFF_AUTO_ON) { + error_setg(errp, "Failed to disable LSX since LASX is enabled"); + return; } } - return false; -} -#endif -static TCGTBCPUState loongarch_get_tb_cpu_state(CPUState *cs) -{ - CPULoongArchState *env = cpu_env(cs); - uint32_t flags; + if (kvm_enabled()) { + /* kvm feature detection in function kvm_arch_init_vcpu */ + return; + } - flags = env->CSR_CRMD & (R_CSR_CRMD_PLV_MASK | R_CSR_CRMD_PG_MASK); - flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, FPE) * HW_FLAGS_EUEN_FPE; - flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE) * HW_FLAGS_EUEN_SXE; - flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE) * HW_FLAGS_EUEN_ASXE; - flags |= is_va32(env) * HW_FLAGS_VA32; + /* LSX feature detection in TCG mode */ + val = cpu->env.cpucfg[2]; + if (cpu->lsx == ON_OFF_AUTO_ON) { + if (FIELD_EX32(val, CPUCFG2, LSX) == 0) { + error_setg(errp, "Failed to enable LSX in TCG mode"); + return; + } + } else { + cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LASX, 0); + val = cpu->env.cpucfg[2]; + } - return (TCGTBCPUState){ .pc = env->pc, .flags = flags }; + cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LSX, value); } -static void loongarch_cpu_synchronize_from_tb(CPUState *cs, - const TranslationBlock *tb) +static bool loongarch_get_lasx(Object *obj, Error **errp) { - tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); - set_pc(cpu_env(cs), tb->pc); + return LOONGARCH_CPU(obj)->lasx != ON_OFF_AUTO_OFF; } -static void loongarch_restore_state_to_opc(CPUState *cs, - const TranslationBlock *tb, - const uint64_t *data) +static void loongarch_set_lasx(Object *obj, bool value, Error **errp) { - set_pc(cpu_env(cs), data[0]); -} -#endif /* CONFIG_TCG */ + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + uint32_t val; -#ifndef CONFIG_USER_ONLY -static bool loongarch_cpu_has_work(CPUState *cs) -{ - bool has_work = false; + cpu->lasx = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; + if ((cpu->lsx == ON_OFF_AUTO_OFF) && (cpu->lasx == ON_OFF_AUTO_ON)) { + error_setg(errp, "Failed to enable LASX since lSX is disabled"); + return; + } - if ((cs->interrupt_request & CPU_INTERRUPT_HARD) && - cpu_loongarch_hw_interrupts_pending(cpu_env(cs))) { - has_work = true; + if (kvm_enabled()) { + /* kvm feature detection in function kvm_arch_init_vcpu */ + return; } - return has_work; + /* LASX feature detection in TCG mode */ + val = cpu->env.cpucfg[2]; + if (cpu->lasx == ON_OFF_AUTO_ON) { + if (FIELD_EX32(val, CPUCFG2, LASX) == 0) { + error_setg(errp, "Failed to enable LASX in TCG mode"); + return; + } + } + + cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LASX, value); } -#endif /* !CONFIG_USER_ONLY */ -static int loongarch_cpu_mmu_index(CPUState *cs, bool ifetch) +static bool loongarch_get_msgint(Object *obj, Error **errp) { - CPULoongArchState *env = cpu_env(cs); + return LOONGARCH_CPU(obj)->msgint != ON_OFF_AUTO_OFF; +} + +static void loongarch_set_msgint(Object *obj, bool value, Error **errp) +{ + LoongArchCPU *cpu = LOONGARCH_CPU(obj); + + cpu->msgint = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; - if (FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG)) { - return FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV); + if (kvm_enabled()) { + /* kvm feature detection in function kvm_arch_init_vcpu */ + return; } - return MMU_DA_IDX; + + cpu->env.cpucfg[1] = FIELD_DP32(cpu->env.cpucfg[1], CPUCFG1, MSG_INT, value); } -static void loongarch_la464_init_csr(Object *obj) +static void loongarch_cpu_post_init(Object *obj) { -#ifndef CONFIG_USER_ONLY - static bool initialized; LoongArchCPU *cpu = LOONGARCH_CPU(obj); - CPULoongArchState *env = &cpu->env; - int i, num; - if (!initialized) { - initialized = true; - num = FIELD_EX64(env->CSR_PRCFG1, CSR_PRCFG1, SAVE_NUM); - for (i = num; i < 16; i++) { - set_csr_flag(LOONGARCH_CSR_SAVE(i), CSRFL_UNUSED); - } - set_csr_flag(LOONGARCH_CSR_IMPCTL1, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_IMPCTL2, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRCTL, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRINFO1, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRINFO2, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRENTRY, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRERA, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_MERRSAVE, CSRFL_UNUSED); - set_csr_flag(LOONGARCH_CSR_CTAG, CSRFL_UNUSED); + cpu->lbt = ON_OFF_AUTO_OFF; + cpu->pmu = ON_OFF_AUTO_OFF; + cpu->lsx = ON_OFF_AUTO_AUTO; + cpu->lasx = ON_OFF_AUTO_AUTO; + object_property_add_bool(obj, "lsx", loongarch_get_lsx, + loongarch_set_lsx); + object_property_add_bool(obj, "lasx", loongarch_get_lasx, + loongarch_set_lasx); + object_property_add_bool(obj, "msgint", loongarch_get_msgint, + loongarch_set_msgint); + /* lbt is enabled only in kvm mode, not supported in tcg mode */ + if (kvm_enabled()) { + kvm_loongarch_cpu_post_init(cpu); } -#endif } static void loongarch_la464_initfn(Object *obj) @@ -517,6 +345,7 @@ static void loongarch_la464_initfn(Object *obj) env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, STLB_WAYS, 7); env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, STLB_SETS, 8); + cpu->msgint = ON_OFF_AUTO_OFF; loongarch_la464_init_csr(obj); loongarch_cpu_post_init(obj); } @@ -547,12 +376,19 @@ static void loongarch_la132_initfn(Object *obj) data = FIELD_DP32(data, CPUCFG1, HP, 1); data = FIELD_DP32(data, CPUCFG1, CRC, 1); env->cpucfg[1] = data; + cpu->msgint = ON_OFF_AUTO_OFF; } static void loongarch_max_initfn(Object *obj) { + LoongArchCPU *cpu = LOONGARCH_CPU(obj); /* '-cpu max' for TCG: we use cpu la464. */ loongarch_la464_initfn(obj); + + if (tcg_enabled()) { + cpu->env.cpucfg[1] = FIELD_DP32(cpu->env.cpucfg[1], CPUCFG1, MSG_INT, 1); + cpu->msgint = ON_OFF_AUTO_AUTO; + } } static void loongarch_cpu_reset_hold(Object *obj, ResetType type) @@ -677,96 +513,6 @@ static void loongarch_cpu_unrealizefn(DeviceState *dev) lacc->parent_unrealize(dev); } -static bool loongarch_get_lsx(Object *obj, Error **errp) -{ - return LOONGARCH_CPU(obj)->lsx != ON_OFF_AUTO_OFF; -} - -static void loongarch_set_lsx(Object *obj, bool value, Error **errp) -{ - LoongArchCPU *cpu = LOONGARCH_CPU(obj); - uint32_t val; - - cpu->lsx = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; - if (cpu->lsx == ON_OFF_AUTO_OFF) { - cpu->lasx = ON_OFF_AUTO_OFF; - if (cpu->lasx == ON_OFF_AUTO_ON) { - error_setg(errp, "Failed to disable LSX since LASX is enabled"); - return; - } - } - - if (kvm_enabled()) { - /* kvm feature detection in function kvm_arch_init_vcpu */ - return; - } - - /* LSX feature detection in TCG mode */ - val = cpu->env.cpucfg[2]; - if (cpu->lsx == ON_OFF_AUTO_ON) { - if (FIELD_EX32(val, CPUCFG2, LSX) == 0) { - error_setg(errp, "Failed to enable LSX in TCG mode"); - return; - } - } else { - cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LASX, 0); - val = cpu->env.cpucfg[2]; - } - - cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LSX, value); -} - -static bool loongarch_get_lasx(Object *obj, Error **errp) -{ - return LOONGARCH_CPU(obj)->lasx != ON_OFF_AUTO_OFF; -} - -static void loongarch_set_lasx(Object *obj, bool value, Error **errp) -{ - LoongArchCPU *cpu = LOONGARCH_CPU(obj); - uint32_t val; - - cpu->lasx = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; - if ((cpu->lsx == ON_OFF_AUTO_OFF) && (cpu->lasx == ON_OFF_AUTO_ON)) { - error_setg(errp, "Failed to enable LASX since lSX is disabled"); - return; - } - - if (kvm_enabled()) { - /* kvm feature detection in function kvm_arch_init_vcpu */ - return; - } - - /* LASX feature detection in TCG mode */ - val = cpu->env.cpucfg[2]; - if (cpu->lasx == ON_OFF_AUTO_ON) { - if (FIELD_EX32(val, CPUCFG2, LASX) == 0) { - error_setg(errp, "Failed to enable LASX in TCG mode"); - return; - } - } - - cpu->env.cpucfg[2] = FIELD_DP32(val, CPUCFG2, LASX, value); -} - -void loongarch_cpu_post_init(Object *obj) -{ - LoongArchCPU *cpu = LOONGARCH_CPU(obj); - - cpu->lbt = ON_OFF_AUTO_OFF; - cpu->pmu = ON_OFF_AUTO_OFF; - cpu->lsx = ON_OFF_AUTO_AUTO; - cpu->lasx = ON_OFF_AUTO_AUTO; - object_property_add_bool(obj, "lsx", loongarch_get_lsx, - loongarch_set_lsx); - object_property_add_bool(obj, "lasx", loongarch_get_lasx, - loongarch_set_lasx); - /* lbt is enabled only in kvm mode, not supported in tcg mode */ - if (kvm_enabled()) { - kvm_loongarch_cpu_post_init(cpu); - } -} - static void loongarch_cpu_init(Object *obj) { #ifndef CONFIG_USER_ONLY @@ -875,29 +621,6 @@ static void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags) } } -#ifdef CONFIG_TCG -static const TCGCPUOps loongarch_tcg_ops = { - .guest_default_memory_order = 0, - .mttcg_supported = true, - - .initialize = loongarch_translate_init, - .translate_code = loongarch_translate_code, - .get_tb_cpu_state = loongarch_get_tb_cpu_state, - .synchronize_from_tb = loongarch_cpu_synchronize_from_tb, - .restore_state_to_opc = loongarch_restore_state_to_opc, - .mmu_index = loongarch_cpu_mmu_index, - -#ifndef CONFIG_USER_ONLY - .tlb_fill = loongarch_cpu_tlb_fill, - .cpu_exec_interrupt = loongarch_cpu_exec_interrupt, - .cpu_exec_halt = loongarch_cpu_has_work, - .cpu_exec_reset = cpu_reset, - .do_interrupt = loongarch_cpu_do_interrupt, - .do_transaction_failed = loongarch_cpu_do_transaction_failed, -#endif -}; -#endif /* CONFIG_TCG */ - #ifndef CONFIG_USER_ONLY #include "hw/core/sysemu-cpu-ops.h" diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h index 262bf87..b8e3b46 100644 --- a/target/loongarch/cpu.h +++ b/target/loongarch/cpu.h @@ -21,27 +21,6 @@ #include "cpu-csr.h" #include "cpu-qom.h" -#define IOCSRF_TEMP 0 -#define IOCSRF_NODECNT 1 -#define IOCSRF_MSI 2 -#define IOCSRF_EXTIOI 3 -#define IOCSRF_CSRIPI 4 -#define IOCSRF_FREQCSR 5 -#define IOCSRF_FREQSCALE 6 -#define IOCSRF_DVFSV1 7 -#define IOCSRF_GMOD 9 -#define IOCSRF_VM 11 - -#define VERSION_REG 0x0 -#define FEATURE_REG 0x8 -#define VENDOR_REG 0x10 -#define CPUNAME_REG 0x20 -#define MISC_FUNC_REG 0x420 -#define IOCSRM_EXTIOI_EN 48 -#define IOCSRM_EXTIOI_INT_ENCODE 49 - -#define IOCSR_MEM_SIZE 0x428 - #define FCSR0_M1 0x1f /* FCSR1 mask, Enables */ #define FCSR0_M2 0x1f1f0000 /* FCSR2 mask, Cause and Flags */ #define FCSR0_M3 0x300 /* FCSR3 mask, Round Mode */ @@ -238,9 +217,10 @@ FIELD(CSR_CRMD, WE, 9, 1) extern const char * const regnames[32]; extern const char * const fregnames[32]; -#define N_IRQS 13 +#define N_IRQS 15 #define IRQ_TIMER 11 #define IRQ_IPI 12 +#define INT_DMSI 14 #define LOONGARCH_STLB 2048 /* 2048 STLB */ #define LOONGARCH_MTLB 64 /* 64 MTLB */ @@ -254,6 +234,13 @@ FIELD(TLB_MISC, ASID, 1, 10) FIELD(TLB_MISC, VPPN, 13, 35) FIELD(TLB_MISC, PS, 48, 6) +/*Msg interrupt registers */ +#define N_MSGIS 4 +FIELD(CSR_MSGIS, IS, 0, 63) +FIELD(CSR_MSGIR, INTNUM, 0, 8) +FIELD(CSR_MSGIR, ACTIVE, 31, 1) +FIELD(CSR_MSGIE, PT, 0, 8) + #define LSX_LEN (128) #define LASX_LEN (256) @@ -371,6 +358,10 @@ typedef struct CPUArchState { uint64_t CSR_DBG; uint64_t CSR_DERA; uint64_t CSR_DSAVE; + /* Msg interrupt registers */ + uint64_t CSR_MSGIS[N_MSGIS]; + uint64_t CSR_MSGIR; + uint64_t CSR_MSGIE; struct { uint64_t guest_addr; } stealtime; @@ -387,11 +378,7 @@ typedef struct CPUArchState { #endif AddressSpace *address_space_iocsr; - bool load_elf; - uint64_t elf_address; uint32_t mp_state; - - struct loongarch_boot_info *boot_info; #endif } CPULoongArchState; @@ -417,6 +404,7 @@ struct ArchCPU { OnOffAuto pmu; OnOffAuto lsx; OnOffAuto lasx; + OnOffAuto msgint; OnOffAuto kvm_pv_ipi; OnOffAuto kvm_steal_time; int32_t socket_id; /* socket-id of this CPU */ @@ -494,14 +482,4 @@ static inline void set_pc(CPULoongArchState *env, uint64_t value) #define CPU_RESOLVING_TYPE TYPE_LOONGARCH_CPU -void loongarch_cpu_post_init(Object *obj); - -#ifdef CONFIG_KVM -void kvm_loongarch_cpu_post_init(LoongArchCPU *cpu); -#else -static inline void kvm_loongarch_cpu_post_init(LoongArchCPU *cpu) -{ -} -#endif - #endif /* LOONGARCH_CPU_H */ diff --git a/target/loongarch/cpu_helper.c b/target/loongarch/cpu_helper.c index e172b11..867e7c8 100644 --- a/target/loongarch/cpu_helper.c +++ b/target/loongarch/cpu_helper.c @@ -13,10 +13,11 @@ #include "exec/target_page.h" #include "internals.h" #include "cpu-csr.h" +#include "cpu-mmu.h" #include "tcg/tcg_loongarch.h" void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, - uint64_t *dir_width, target_ulong level) + uint64_t *dir_width, unsigned int level) { switch (level) { case 1: @@ -43,15 +44,79 @@ void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, } } -static int loongarch_page_table_walker(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address) +TLBRet loongarch_check_pte(CPULoongArchState *env, MMUContext *context, + MMUAccessType access_type, int mmu_idx) +{ + uint64_t plv = mmu_idx; + uint64_t tlb_entry, tlb_ppn; + uint8_t tlb_ps, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv; + + tlb_entry = context->pte; + tlb_ps = context->ps; + tlb_v = FIELD_EX64(tlb_entry, TLBENTRY, V); + tlb_d = FIELD_EX64(tlb_entry, TLBENTRY, D); + tlb_plv = FIELD_EX64(tlb_entry, TLBENTRY, PLV); + if (is_la64(env)) { + tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_64, PPN); + tlb_nx = FIELD_EX64(tlb_entry, TLBENTRY_64, NX); + tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY_64, NR); + tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY_64, RPLV); + } else { + tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_32, PPN); + tlb_nx = 0; + tlb_nr = 0; + tlb_rplv = 0; + } + + /* Remove sw bit between bit12 -- bit PS*/ + tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) - 1)); + + /* Check access rights */ + if (!tlb_v) { + return TLBRET_INVALID; + } + + if (access_type == MMU_INST_FETCH && tlb_nx) { + return TLBRET_XI; + } + + if (access_type == MMU_DATA_LOAD && tlb_nr) { + return TLBRET_RI; + } + + if (((tlb_rplv == 0) && (plv > tlb_plv)) || + ((tlb_rplv == 1) && (plv != tlb_plv))) { + return TLBRET_PE; + } + + if ((access_type == MMU_DATA_STORE) && !tlb_d) { + return TLBRET_DIRTY; + } + + context->physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | + (context->addr & MAKE_64BIT_MASK(0, tlb_ps)); + context->prot = PAGE_READ; + if (tlb_d) { + context->prot |= PAGE_WRITE; + } + if (!tlb_nx) { + context->prot |= PAGE_EXEC; + } + return TLBRET_MATCH; +} + +static TLBRet loongarch_page_table_walker(CPULoongArchState *env, + MMUContext *context, + int access_type, int mmu_idx) { CPUState *cs = env_cpu(env); target_ulong index, phys; uint64_t dir_base, dir_width; uint64_t base; int level; + vaddr address; + address = context->addr; if ((address >> 63) & 0x1) { base = env->CSR_PGDH; } else { @@ -93,41 +158,20 @@ static int loongarch_page_table_walker(CPULoongArchState *env, hwaddr *physical, base = ldq_phys(cs->as, phys); } - /* TODO: check plv and other bits? */ - - /* base is pte, in normal pte format */ - if (!FIELD_EX64(base, TLBENTRY, V)) { - return TLBRET_NOMATCH; - } - - if (!FIELD_EX64(base, TLBENTRY, D)) { - *prot = PAGE_READ; - } else { - *prot = PAGE_READ | PAGE_WRITE; - } - - /* get TARGET_PAGE_SIZE aligned physical address */ - base += (address & TARGET_PHYS_MASK) & ((1 << dir_base) - 1); - /* mask RPLV, NX, NR bits */ - base = FIELD_DP64(base, TLBENTRY_64, RPLV, 0); - base = FIELD_DP64(base, TLBENTRY_64, NX, 0); - base = FIELD_DP64(base, TLBENTRY_64, NR, 0); - /* mask other attribute bits */ - *physical = base & TARGET_PAGE_MASK; - - return 0; + context->ps = dir_base; + context->pte = base; + return loongarch_check_pte(env, context, access_type, mmu_idx); } -static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - MMUAccessType access_type, int mmu_idx, - int is_debug) +static TLBRet loongarch_map_address(CPULoongArchState *env, + MMUContext *context, + MMUAccessType access_type, int mmu_idx, + int is_debug) { - int ret; + TLBRet ret; if (tcg_enabled()) { - ret = loongarch_get_addr_from_tlb(env, physical, prot, address, - access_type, mmu_idx); + ret = loongarch_get_addr_from_tlb(env, context, access_type, mmu_idx); if (ret != TLBRET_NOMATCH) { return ret; } @@ -139,14 +183,13 @@ static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, * legal mapping, even if the mapping is not yet in TLB. return 0 if * there is a valid map, else none zero. */ - return loongarch_page_table_walker(env, physical, prot, address); + return loongarch_page_table_walker(env, context, access_type, mmu_idx); } return TLBRET_NOMATCH; } -static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, - target_ulong dmw) +static hwaddr dmw_va2pa(CPULoongArchState *env, vaddr va, target_ulong dmw) { if (is_la64(env)) { return va & TARGET_VIRT_MASK; @@ -157,9 +200,9 @@ static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, } } -int get_physical_address(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - MMUAccessType access_type, int mmu_idx, int is_debug) +TLBRet get_physical_address(CPULoongArchState *env, MMUContext *context, + MMUAccessType access_type, int mmu_idx, + int is_debug) { int user_mode = mmu_idx == MMU_USER_IDX; int kernel_mode = mmu_idx == MMU_KERNEL_IDX; @@ -167,11 +210,13 @@ int get_physical_address(CPULoongArchState *env, hwaddr *physical, int64_t addr_high; uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA); uint8_t pg = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG); + vaddr address; /* Check PG and DA */ + address = context->addr; if (da & !pg) { - *physical = address & TARGET_PHYS_MASK; - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + context->physical = address & TARGET_PHYS_MASK; + context->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; return TLBRET_MATCH; } @@ -189,32 +234,31 @@ int get_physical_address(CPULoongArchState *env, hwaddr *physical, base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_32, VSEG); } if ((plv & env->CSR_DMW[i]) && (base_c == base_v)) { - *physical = dmw_va2pa(env, address, env->CSR_DMW[i]); - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + context->physical = dmw_va2pa(env, address, env->CSR_DMW[i]); + context->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; return TLBRET_MATCH; } } /* Check valid extension */ - addr_high = sextract64(address, TARGET_VIRT_ADDR_SPACE_BITS, 16); - if (!(addr_high == 0 || addr_high == -1)) { + addr_high = (int64_t)address >> (TARGET_VIRT_ADDR_SPACE_BITS - 1); + if (!(addr_high == 0 || addr_high == -1ULL)) { return TLBRET_BADADDR; } /* Mapped address */ - return loongarch_map_address(env, physical, prot, address, - access_type, mmu_idx, is_debug); + return loongarch_map_address(env, context, access_type, mmu_idx, is_debug); } hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) { CPULoongArchState *env = cpu_env(cs); - hwaddr phys_addr; - int prot; + MMUContext context; - if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD, - cpu_mmu_index(cs, false), 1) != 0) { + context.addr = addr; + if (get_physical_address(env, &context, MMU_DATA_LOAD, + cpu_mmu_index(cs, false), 1) != TLBRET_MATCH) { return -1; } - return phys_addr; + return context.physical; } diff --git a/target/loongarch/csr.c b/target/loongarch/csr.c index 7ea0a30..f973780 100644 --- a/target/loongarch/csr.c +++ b/target/loongarch/csr.c @@ -97,6 +97,11 @@ static CSRInfo csr_info[] = { CSR_OFF(DBG), CSR_OFF(DERA), CSR_OFF(DSAVE), + CSR_OFF_ARRAY(MSGIS, 0), + CSR_OFF_ARRAY(MSGIS, 1), + CSR_OFF_ARRAY(MSGIS, 2), + CSR_OFF_ARRAY(MSGIS, 3), + CSR_OFF(MSGIR), }; CSRInfo *get_csr(unsigned int csr_num) diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c index 471eda2..23a5eec 100644 --- a/target/loongarch/gdbstub.c +++ b/target/loongarch/gdbstub.c @@ -62,7 +62,7 @@ int loongarch_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) int loongarch_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) { CPULoongArchState *env = cpu_env(cs); - target_ulong tmp; + uint64_t tmp; int length = 0; if (n < 0 || n > 34) { diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h index a7384b0..8793bd9 100644 --- a/target/loongarch/internals.h +++ b/target/loongarch/internals.h @@ -24,27 +24,12 @@ void G_NORETURN do_raise_exception(CPULoongArchState *env, uint32_t exception, uintptr_t pc); -const char *loongarch_exception_name(int32_t exception); - #ifdef CONFIG_TCG int ieee_ex_to_loongarch(int xcpt); void restore_fp_status(CPULoongArchState *env); #endif #ifndef CONFIG_USER_ONLY -enum { - TLBRET_MATCH = 0, - TLBRET_BADADDR = 1, - TLBRET_NOMATCH = 2, - TLBRET_INVALID = 3, - TLBRET_DIRTY = 4, - TLBRET_RI = 5, - TLBRET_XI = 6, - TLBRET_PE = 7, -}; - -bool check_ps(CPULoongArchState *ent, uint8_t ps); - extern const VMStateDescription vmstate_loongarch_cpu; void loongarch_cpu_set_irq(void *opaque, int irq, int level); @@ -54,13 +39,8 @@ uint64_t cpu_loongarch_get_constant_timer_counter(LoongArchCPU *cpu); uint64_t cpu_loongarch_get_constant_timer_ticks(LoongArchCPU *cpu); void cpu_loongarch_store_constant_timer_config(LoongArchCPU *cpu, uint64_t value); -int get_physical_address(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - MMUAccessType access_type, int mmu_idx, int is_debug); -void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, - uint64_t *dir_width, target_ulong level); -hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); - +bool loongarch_cpu_has_work(CPUState *cs); +bool cpu_loongarch_hw_interrupts_pending(CPULoongArchState *env); #endif /* !CONFIG_USER_ONLY */ uint64_t read_fcc(CPULoongArchState *env); diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index 1bda570..4e4f4e7 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -325,7 +325,7 @@ static int kvm_loongarch_get_csr(CPUState *cs) return ret; } -static int kvm_loongarch_put_csr(CPUState *cs, int level) +static int kvm_loongarch_put_csr(CPUState *cs, KvmPutState level) { int ret = 0; CPULoongArchState *env = cpu_env(cs); @@ -763,7 +763,7 @@ int kvm_arch_get_registers(CPUState *cs, Error **errp) return ret; } -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { int ret; static int once; @@ -1071,7 +1071,11 @@ static int kvm_cpu_check_pv_features(CPUState *cs, Error **errp) env->pv_features |= BIT(KVM_FEATURE_VIRT_EXTIOI); } } + return 0; +} +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ return 0; } @@ -1236,6 +1240,22 @@ void kvm_arch_init_irq_routing(KVMState *s) { } +void kvm_loongarch_init_irq_routing(void) +{ + int i; + + kvm_async_interrupts_allowed = true; + kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled(); + if (kvm_has_gsi_routing()) { + for (i = 0; i < KVM_IRQCHIP_NUM_PINS; ++i) { + kvm_irqchip_add_irq_route(kvm_state, i, 0, i); + } + + kvm_gsi_routing_allowed = true; + kvm_irqchip_commit_routes(kvm_state); + } +} + int kvm_arch_get_default_type(MachineState *ms) { return 0; @@ -1249,7 +1269,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) int kvm_arch_irqchip_create(KVMState *s) { - return 0; + if (kvm_kernel_irqchip_split()) { + error_report("kernel_irqchip=split is not supported on LoongArch"); + exit(1); + } + + return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); } void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) diff --git a/target/loongarch/kvm/kvm_loongarch.h b/target/loongarch/kvm/kvm_loongarch.h index 1051a34..5147567 100644 --- a/target/loongarch/kvm/kvm_loongarch.h +++ b/target/loongarch/kvm/kvm_loongarch.h @@ -5,11 +5,11 @@ * Copyright (c) 2023 Loongson Technology Corporation Limited */ -#include "cpu.h" - #ifndef QEMU_KVM_LOONGARCH_H #define QEMU_KVM_LOONGARCH_H +void kvm_loongarch_cpu_post_init(LoongArchCPU *cpu); +void kvm_loongarch_init_irq_routing(void); int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level); void kvm_arch_reset_vcpu(CPUState *cs); diff --git a/target/loongarch/loongarch-qmp-cmds.c b/target/loongarch/loongarch-qmp-cmds.c index 6f732d8..1d8cd32 100644 --- a/target/loongarch/loongarch-qmp-cmds.c +++ b/target/loongarch/loongarch-qmp-cmds.c @@ -7,8 +7,9 @@ */ #include "qemu/osdep.h" +#include "qemu/target-info.h" #include "qapi/error.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "cpu.h" #include "qobject/qdict.h" #include "qapi/qobject-input-visitor.h" @@ -32,7 +33,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) CpuDefinitionInfoList *cpu_list = NULL; GSList *list; - list = object_class_get_list(TYPE_LOONGARCH_CPU, false); + list = object_class_get_list(target_cpu_type(), false); g_slist_foreach(list, loongarch_cpu_add_definition, &cpu_list); g_slist_free(list); diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c index 4e70f5c..0366a50 100644 --- a/target/loongarch/machine.c +++ b/target/loongarch/machine.c @@ -45,6 +45,26 @@ static const VMStateDescription vmstate_fpu = { }, }; +static bool msgint_needed(void *opaque) +{ + LoongArchCPU *cpu = opaque; + + return FIELD_EX64(cpu->env.cpucfg[1], CPUCFG1, MSG_INT); +} + +static const VMStateDescription vmstate_msgint = { + .name = "cpu/msgint", + .version_id = 1, + .minimum_version_id = 1, + .needed = msgint_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64_ARRAY(env.CSR_MSGIS, LoongArchCPU, N_MSGIS), + VMSTATE_UINT64(env.CSR_MSGIR, LoongArchCPU), + VMSTATE_UINT64(env.CSR_MSGIE, LoongArchCPU), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_lsxh_reg = { .name = "lsxh_reg", .version_id = 1, @@ -168,11 +188,11 @@ static const VMStateDescription vmstate_tlb = { /* LoongArch CPU state */ const VMStateDescription vmstate_loongarch_cpu = { .name = "cpu", - .version_id = 3, - .minimum_version_id = 3, + .version_id = 4, + .minimum_version_id = 4, .fields = (const VMStateField[]) { - VMSTATE_UINTTL_ARRAY(env.gpr, LoongArchCPU, 32), - VMSTATE_UINTTL(env.pc, LoongArchCPU), + VMSTATE_UINT64_ARRAY(env.gpr, LoongArchCPU, 32), + VMSTATE_UINT64(env.pc, LoongArchCPU), /* Remaining CSRs */ VMSTATE_UINT64(env.CSR_CRMD, LoongArchCPU), @@ -245,6 +265,7 @@ const VMStateDescription vmstate_loongarch_cpu = { &vmstate_tlb, #endif &vmstate_lbt, + &vmstate_msgint, NULL } }; diff --git a/target/loongarch/tcg/csr_helper.c b/target/loongarch/tcg/csr_helper.c index 2942d7f..5ebe15f 100644 --- a/target/loongarch/tcg/csr_helper.c +++ b/target/loongarch/tcg/csr_helper.c @@ -16,6 +16,7 @@ #include "accel/tcg/cpu-ldst.h" #include "hw/irq.h" #include "cpu-csr.h" +#include "cpu-mmu.h" target_ulong helper_csrwr_stlbps(CPULoongArchState *env, target_ulong val) { @@ -25,11 +26,16 @@ target_ulong helper_csrwr_stlbps(CPULoongArchState *env, target_ulong val) * The real hardware only supports the min tlb_ps is 12 * tlb_ps=0 may cause undefined-behavior. */ - uint8_t tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); + uint8_t tlb_ps = FIELD_EX64(val, CSR_STLBPS, PS); if (!check_ps(env, tlb_ps)) { qemu_log_mask(LOG_GUEST_ERROR, "Attempted set ps %d\n", tlb_ps); + } else { + /* Only update PS field, reserved bit keeps zero */ + val = FIELD_DP64(val, CSR_STLBPS, RESERVE, 0); + env->CSR_STLBPS = val; } + return old_v; } @@ -68,6 +74,27 @@ target_ulong helper_csrrd_tval(CPULoongArchState *env) return cpu_loongarch_get_constant_timer_ticks(cpu); } +target_ulong helper_csrrd_msgir(CPULoongArchState *env) +{ + int irq, new; + + irq = find_first_bit((unsigned long *)env->CSR_MSGIS, 256); + if (irq < 256) { + clear_bit(irq, (unsigned long *)env->CSR_MSGIS); + new = find_first_bit((unsigned long *)env->CSR_MSGIS, 256); + if (new < 256) { + return irq; + } + + env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, MSGINT, 0); + } else { + /* bit 31 set 1 for no invalid irq */ + irq = BIT(31); + } + + return irq; +} + target_ulong helper_csrwr_estat(CPULoongArchState *env, target_ulong val) { int64_t old_v = env->CSR_ESTAT; @@ -131,8 +158,8 @@ target_ulong helper_csrwr_pwcl(CPULoongArchState *env, target_ulong val) } if (!check_ps(env, ptbase)) { qemu_log_mask(LOG_GUEST_ERROR, - "Attrmpted set ptbase 2^%d\n", ptbase); + "Attempted set ptbase 2^%d\n", ptbase); } - env->CSR_PWCL =val; + env->CSR_PWCL = val; return old_v; } diff --git a/target/loongarch/tcg/helper.h b/target/loongarch/tcg/helper.h index 1d5cb01..7e508c5 100644 --- a/target/loongarch/tcg/helper.h +++ b/target/loongarch/tcg/helper.h @@ -100,6 +100,7 @@ DEF_HELPER_1(rdtime_d, i64, env) DEF_HELPER_1(csrrd_pgd, i64, env) DEF_HELPER_1(csrrd_cpuid, i64, env) DEF_HELPER_1(csrrd_tval, i64, env) +DEF_HELPER_1(csrrd_msgir, i64, env) DEF_HELPER_2(csrwr_stlbps, i64, env, tl) DEF_HELPER_2(csrwr_estat, i64, env, tl) DEF_HELPER_2(csrwr_asid, i64, env, tl) @@ -128,7 +129,7 @@ DEF_HELPER_2(invtlb_all_asid, void, env, tl) DEF_HELPER_3(invtlb_page_asid, void, env, tl, tl) DEF_HELPER_3(invtlb_page_asid_or_g, void, env, tl, tl) -DEF_HELPER_4(lddir, tl, env, tl, tl, i32) +DEF_HELPER_4(lddir, tl, env, tl, i32, i32) DEF_HELPER_4(ldpte, void, env, tl, tl, i32) DEF_HELPER_1(ertn, void, env) DEF_HELPER_1(idle, void, env) diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc index 3d70d75..77eeedb 100644 --- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc @@ -74,38 +74,38 @@ TRANS(sc_w, ALL, gen_sc, MO_TESL) TRANS(ll_d, 64, gen_ll, MO_TEUQ) TRANS(sc_d, 64, gen_sc, MO_TEUQ) TRANS(amswap_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL) -TRANS(amswap_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) +TRANS64(amswap_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) TRANS(amadd_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL) -TRANS(amadd_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) +TRANS64(amadd_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) TRANS(amand_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL) -TRANS(amand_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) +TRANS64(amand_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) TRANS(amor_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL) -TRANS(amor_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) +TRANS64(amor_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) TRANS(amxor_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL) -TRANS(amxor_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) +TRANS64(amxor_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) TRANS(ammax_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL) -TRANS(ammax_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) +TRANS64(ammax_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) TRANS(ammin_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL) -TRANS(ammin_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) +TRANS64(ammin_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) TRANS(ammax_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL) -TRANS(ammax_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) +TRANS64(ammax_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) TRANS(ammin_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL) -TRANS(ammin_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) +TRANS64(ammin_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) TRANS(amswap_db_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL) -TRANS(amswap_db_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) +TRANS64(amswap_db_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) TRANS(amadd_db_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL) -TRANS(amadd_db_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) +TRANS64(amadd_db_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) TRANS(amand_db_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL) -TRANS(amand_db_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) +TRANS64(amand_db_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) TRANS(amor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL) -TRANS(amor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) +TRANS64(amor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) TRANS(amxor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL) -TRANS(amxor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) +TRANS64(amxor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) TRANS(ammax_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL) -TRANS(ammax_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) +TRANS64(ammax_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) TRANS(ammin_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL) -TRANS(ammin_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) +TRANS64(ammin_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) TRANS(ammax_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL) -TRANS(ammax_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) +TRANS64(ammax_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) TRANS(ammin_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL) -TRANS(ammin_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) +TRANS64(ammin_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) diff --git a/target/loongarch/tcg/insn_trans/trans_extra.c.inc b/target/loongarch/tcg/insn_trans/trans_extra.c.inc index eda3d6e..298a80c 100644 --- a/target/loongarch/tcg/insn_trans/trans_extra.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_extra.c.inc @@ -69,6 +69,10 @@ static bool trans_rdtimeh_w(DisasContext *ctx, arg_rdtimeh_w *a) static bool trans_rdtime_d(DisasContext *ctx, arg_rdtime_d *a) { + if (!avail_64(ctx)) { + return false; + } + return gen_rdtime(ctx, a, 0, 0); } @@ -100,8 +104,8 @@ static bool gen_crc(DisasContext *ctx, arg_rrr *a, TRANS(crc_w_b_w, CRC, gen_crc, gen_helper_crc32, tcg_constant_tl(1)) TRANS(crc_w_h_w, CRC, gen_crc, gen_helper_crc32, tcg_constant_tl(2)) TRANS(crc_w_w_w, CRC, gen_crc, gen_helper_crc32, tcg_constant_tl(4)) -TRANS(crc_w_d_w, CRC, gen_crc, gen_helper_crc32, tcg_constant_tl(8)) +TRANS64(crc_w_d_w, CRC, gen_crc, gen_helper_crc32, tcg_constant_tl(8)) TRANS(crcc_w_b_w, CRC, gen_crc, gen_helper_crc32c, tcg_constant_tl(1)) TRANS(crcc_w_h_w, CRC, gen_crc, gen_helper_crc32c, tcg_constant_tl(2)) TRANS(crcc_w_w_w, CRC, gen_crc, gen_helper_crc32c, tcg_constant_tl(4)) -TRANS(crcc_w_d_w, CRC, gen_crc, gen_helper_crc32c, tcg_constant_tl(8)) +TRANS64(crcc_w_d_w, CRC, gen_crc, gen_helper_crc32c, tcg_constant_tl(8)) diff --git a/target/loongarch/tcg/insn_trans/trans_farith.c.inc b/target/loongarch/tcg/insn_trans/trans_farith.c.inc index f4a0dea..ff6cf34 100644 --- a/target/loongarch/tcg/insn_trans/trans_farith.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_farith.c.inc @@ -183,16 +183,16 @@ TRANS(fmaxa_s, FP_SP, gen_fff, gen_helper_fmaxa_s) TRANS(fmaxa_d, FP_DP, gen_fff, gen_helper_fmaxa_d) TRANS(fmina_s, FP_SP, gen_fff, gen_helper_fmina_s) TRANS(fmina_d, FP_DP, gen_fff, gen_helper_fmina_d) -TRANS(fscaleb_s, FP_SP, gen_fff, gen_helper_fscaleb_s) -TRANS(fscaleb_d, FP_DP, gen_fff, gen_helper_fscaleb_d) +TRANS64(fscaleb_s, FP_SP, gen_fff, gen_helper_fscaleb_s) +TRANS64(fscaleb_d, FP_DP, gen_fff, gen_helper_fscaleb_d) TRANS(fsqrt_s, FP_SP, gen_ff, gen_helper_fsqrt_s) TRANS(fsqrt_d, FP_DP, gen_ff, gen_helper_fsqrt_d) TRANS(frecip_s, FP_SP, gen_ff, gen_helper_frecip_s) TRANS(frecip_d, FP_DP, gen_ff, gen_helper_frecip_d) TRANS(frsqrt_s, FP_SP, gen_ff, gen_helper_frsqrt_s) TRANS(frsqrt_d, FP_DP, gen_ff, gen_helper_frsqrt_d) -TRANS(flogb_s, FP_SP, gen_ff, gen_helper_flogb_s) -TRANS(flogb_d, FP_DP, gen_ff, gen_helper_flogb_d) +TRANS64(flogb_s, FP_SP, gen_ff, gen_helper_flogb_s) +TRANS64(flogb_d, FP_DP, gen_ff, gen_helper_flogb_d) TRANS(fclass_s, FP_SP, gen_ff, gen_helper_fclass_s) TRANS(fclass_d, FP_DP, gen_ff, gen_helper_fclass_d) TRANS(fmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, 0) diff --git a/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc index 3babf69..6a2c030 100644 --- a/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc @@ -4,10 +4,15 @@ */ /* bit0(signaling/quiet) bit1(lt) bit2(eq) bit3(un) bit4(neq) */ -static uint32_t get_fcmp_flags(int cond) +static uint32_t get_fcmp_flags(DisasContext *ctx, int cond) { uint32_t flags = 0; + /*check cond , cond =[0-8,10,12] */ + if ((cond > 8) &&(cond != 10) && (cond != 12)) { + return -1; + } + if (cond & 0x1) { flags |= FCMP_LT; } @@ -26,9 +31,14 @@ static uint32_t get_fcmp_flags(int cond) static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) { TCGv var, src1, src2; - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >>1); void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!avail_FP_SP(ctx)) { return false; } @@ -39,8 +49,6 @@ static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) src1 = get_fpr(ctx, a->fj); src2 = get_fpr(ctx, a->fk); fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s); - flags = get_fcmp_flags(a->fcond >> 1); - fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); @@ -50,9 +58,14 @@ static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a) { TCGv var, src1, src2; - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!avail_FP_DP(ctx)) { return false; } @@ -63,8 +76,6 @@ static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a) src1 = get_fpr(ctx, a->fj); src2 = get_fpr(ctx, a->fk); fn = (a->fcond & 1 ? gen_helper_fcmp_s_d : gen_helper_fcmp_c_d); - flags = get_fcmp_flags(a->fcond >> 1); - fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); diff --git a/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc b/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc index 833c059..ca1d76a 100644 --- a/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc @@ -29,5 +29,5 @@ TRANS(ffint_s_w, FP_SP, gen_ff, gen_helper_ffint_s_w) TRANS(ffint_s_l, FP_SP, gen_ff, gen_helper_ffint_s_l) TRANS(ffint_d_w, FP_DP, gen_ff, gen_helper_ffint_d_w) TRANS(ffint_d_l, FP_DP, gen_ff, gen_helper_ffint_d_l) -TRANS(frint_s, FP_SP, gen_ff, gen_helper_frint_s) -TRANS(frint_d, FP_DP, gen_ff, gen_helper_frint_d) +TRANS64(frint_s, FP_SP, gen_ff, gen_helper_frint_s) +TRANS64(frint_d, FP_DP, gen_ff, gen_helper_frint_d) diff --git a/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc b/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc index 13452bc..79da471 100644 --- a/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc @@ -148,11 +148,11 @@ TRANS(fldx_s, FP_SP, gen_floadx, MO_TEUL) TRANS(fldx_d, FP_DP, gen_floadx, MO_TEUQ) TRANS(fstx_s, FP_SP, gen_fstorex, MO_TEUL) TRANS(fstx_d, FP_DP, gen_fstorex, MO_TEUQ) -TRANS(fldgt_s, FP_SP, gen_fload_gt, MO_TEUL) -TRANS(fldgt_d, FP_DP, gen_fload_gt, MO_TEUQ) -TRANS(fldle_s, FP_SP, gen_fload_le, MO_TEUL) -TRANS(fldle_d, FP_DP, gen_fload_le, MO_TEUQ) -TRANS(fstgt_s, FP_SP, gen_fstore_gt, MO_TEUL) -TRANS(fstgt_d, FP_DP, gen_fstore_gt, MO_TEUQ) -TRANS(fstle_s, FP_SP, gen_fstore_le, MO_TEUL) -TRANS(fstle_d, FP_DP, gen_fstore_le, MO_TEUQ) +TRANS64(fldgt_s, FP_SP, gen_fload_gt, MO_TEUL) +TRANS64(fldgt_d, FP_DP, gen_fload_gt, MO_TEUQ) +TRANS64(fldle_s, FP_SP, gen_fload_le, MO_TEUL) +TRANS64(fldle_d, FP_DP, gen_fload_le, MO_TEUQ) +TRANS64(fstgt_s, FP_SP, gen_fstore_gt, MO_TEUL) +TRANS64(fstgt_d, FP_DP, gen_fstore_gt, MO_TEUQ) +TRANS64(fstle_s, FP_SP, gen_fstore_le, MO_TEUL) +TRANS64(fstle_d, FP_DP, gen_fstore_le, MO_TEUQ) diff --git a/target/loongarch/tcg/insn_trans/trans_privileged.c.inc b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc index ecbfe23..64e53a4 100644 --- a/target/loongarch/tcg/insn_trans/trans_privileged.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc @@ -83,6 +83,7 @@ void loongarch_csr_translate_init(void) SET_CSR_FUNC(TCFG, NULL, gen_helper_csrwr_tcfg); SET_CSR_FUNC(TVAL, gen_helper_csrrd_tval, NULL); SET_CSR_FUNC(TICLR, NULL, gen_helper_csrwr_ticlr); + SET_CSR_FUNC(MSGIR, gen_helper_csrrd_msgir, NULL); } #undef SET_CSR_FUNC @@ -233,11 +234,11 @@ static bool gen_iocsrwr(DisasContext *ctx, arg_rr *a, TRANS(iocsrrd_b, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_b) TRANS(iocsrrd_h, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_h) TRANS(iocsrrd_w, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_w) -TRANS(iocsrrd_d, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_d) +TRANS64(iocsrrd_d, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_d) TRANS(iocsrwr_b, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_b) TRANS(iocsrwr_h, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_h) TRANS(iocsrwr_w, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_w) -TRANS(iocsrwr_d, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_d) +TRANS64(iocsrwr_d, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_d) static void check_mmu_idx(DisasContext *ctx) { @@ -379,7 +380,7 @@ static bool trans_lddir(DisasContext *ctx, arg_lddir *a) if (check_plv(ctx)) { return false; } - gen_helper_lddir(dest, tcg_env, src, tcg_constant_tl(a->imm), mem_idx); + gen_helper_lddir(dest, tcg_env, src, tcg_constant_i32(a->imm), mem_idx); return true; } diff --git a/target/loongarch/tcg/insn_trans/trans_shift.c.inc b/target/loongarch/tcg/insn_trans/trans_shift.c.inc index 3773077..136c4c8 100644 --- a/target/loongarch/tcg/insn_trans/trans_shift.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_shift.c.inc @@ -78,7 +78,7 @@ TRANS(sra_w, ALL, gen_rrr, EXT_SIGN, EXT_NONE, EXT_SIGN, gen_sra_w) TRANS(sll_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sll_d) TRANS(srl_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_srl_d) TRANS(sra_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sra_d) -TRANS(rotr_w, 64, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_rotr_w) +TRANS(rotr_w, ALL, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_rotr_w) TRANS(rotr_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rotr_d) TRANS(slli_w, ALL, gen_rri_c, EXT_NONE, EXT_SIGN, tcg_gen_shli_tl) TRANS(slli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shli_tl) @@ -86,5 +86,5 @@ TRANS(srli_w, ALL, gen_rri_c, EXT_ZERO, EXT_SIGN, tcg_gen_shri_tl) TRANS(srli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shri_tl) TRANS(srai_w, ALL, gen_rri_c, EXT_NONE, EXT_NONE, gen_sari_w) TRANS(srai_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_sari_tl) -TRANS(rotri_w, 64, gen_rri_v, EXT_NONE, EXT_NONE, gen_rotr_w) +TRANS(rotri_w, ALL, gen_rri_v, EXT_NONE, EXT_NONE, gen_rotr_w) TRANS(rotri_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_rotri_tl) diff --git a/target/loongarch/tcg/insn_trans/trans_vec.c.inc b/target/loongarch/tcg/insn_trans/trans_vec.c.inc index dff9277..38bccf2 100644 --- a/target/loongarch/tcg/insn_trans/trans_vec.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_vec.c.inc @@ -3465,7 +3465,7 @@ TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b) static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm) { int mode; - uint64_t data, t; + uint64_t data = 0, t; /* * imm bit [11:8] is mode, mode value is 0-12. @@ -3570,22 +3570,31 @@ static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm) } break; default: - generate_exception(ctx, EXCCODE_INE); g_assert_not_reached(); } return data; } +static bool check_valid_vldi_mode(arg_vldi *a) +{ + return extract32(a->imm, 8, 4) <= 12; +} + static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz) { int sel, vece; uint64_t value; - if (!check_vec(ctx, oprsz)) { + sel = (a->imm >> 12) & 0x1; + + if (sel && !check_valid_vldi_mode(a)) { + generate_exception(ctx, EXCCODE_INE); return true; } - sel = (a->imm >> 12) & 0x1; + if (!check_vec(ctx, oprsz)) { + return true; + } if (sel) { value = vldi_get_value(ctx, a->imm); @@ -4655,19 +4664,23 @@ TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU) static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) { - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); TCGv_i32 vd = tcg_constant_i32(a->vd); TCGv_i32 vj = tcg_constant_i32(a->vj); TCGv_i32 vk = tcg_constant_i32(a->vk); TCGv_i32 oprsz = tcg_constant_i32(sz); + if(flags == -1){ + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!check_vec(ctx, sz)) { return true; } fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s); - flags = get_fcmp_flags(a->fcond >> 1); fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); return true; @@ -4675,19 +4688,23 @@ static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) { - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); TCGv_i32 vd = tcg_constant_i32(a->vd); TCGv_i32 vj = tcg_constant_i32(a->vj); TCGv_i32 vk = tcg_constant_i32(a->vk); TCGv_i32 oprsz = tcg_constant_i32(sz); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!check_vec(ctx, sz)) { return true; } fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d); - flags = get_fcmp_flags(a->fcond >> 1); fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); return true; diff --git a/target/loongarch/tcg/meson.build b/target/loongarch/tcg/meson.build index bdf34f9..b7adfe4 100644 --- a/target/loongarch/tcg/meson.build +++ b/target/loongarch/tcg/meson.build @@ -7,6 +7,7 @@ loongarch_ss.add([zlib, gen]) loongarch_ss.add(files( 'fpu_helper.c', 'op_helper.c', + 'tcg_cpu.c', 'translate.c', 'vec_helper.c', )) diff --git a/target/loongarch/tcg/tcg_cpu.c b/target/loongarch/tcg/tcg_cpu.c new file mode 100644 index 0000000..82b54e6 --- /dev/null +++ b/target/loongarch/tcg/tcg_cpu.c @@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch CPU parameters for QEMU. + * + * Copyright (c) 2025 Loongson Technology Corporation Limited + */ +#include "qemu/osdep.h" +#include "qemu/accel.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "accel/accel-cpu-target.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/cpu-ops.h" +#include "exec/translation-block.h" +#include "exec/target_page.h" +#include "tcg_loongarch.h" +#include "internals.h" + +struct TypeExcp { + int32_t exccode; + const char * const name; +}; + +static const struct TypeExcp excp_names[] = { + {EXCCODE_INT, "Interrupt"}, + {EXCCODE_PIL, "Page invalid exception for load"}, + {EXCCODE_PIS, "Page invalid exception for store"}, + {EXCCODE_PIF, "Page invalid exception for fetch"}, + {EXCCODE_PME, "Page modified exception"}, + {EXCCODE_PNR, "Page Not Readable exception"}, + {EXCCODE_PNX, "Page Not Executable exception"}, + {EXCCODE_PPI, "Page Privilege error"}, + {EXCCODE_ADEF, "Address error for instruction fetch"}, + {EXCCODE_ADEM, "Address error for Memory access"}, + {EXCCODE_SYS, "Syscall"}, + {EXCCODE_BRK, "Break"}, + {EXCCODE_INE, "Instruction Non-Existent"}, + {EXCCODE_IPE, "Instruction privilege error"}, + {EXCCODE_FPD, "Floating Point Disabled"}, + {EXCCODE_FPE, "Floating Point Exception"}, + {EXCCODE_DBP, "Debug breakpoint"}, + {EXCCODE_BCE, "Bound Check Exception"}, + {EXCCODE_SXD, "128 bit vector instructions Disable exception"}, + {EXCCODE_ASXD, "256 bit vector instructions Disable exception"}, + {EXCP_HLT, "EXCP_HLT"}, +}; + +static const char *loongarch_exception_name(int32_t exception) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(excp_names); i++) { + if (excp_names[i].exccode == exception) { + return excp_names[i].name; + } + } + return "Unknown"; +} + +void G_NORETURN do_raise_exception(CPULoongArchState *env, + uint32_t exception, + uintptr_t pc) +{ + CPUState *cs = env_cpu(env); + + qemu_log_mask(CPU_LOG_INT, "%s: exception: %d (%s)\n", + __func__, + exception, + loongarch_exception_name(exception)); + cs->exception_index = exception; + + cpu_loop_exit_restore(cs, pc); +} + +#ifndef CONFIG_USER_ONLY +static void loongarch_cpu_do_interrupt(CPUState *cs) +{ + CPULoongArchState *env = cpu_env(cs); + bool update_badinstr = 1; + int cause = -1; + bool tlbfill = FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR); + uint32_t vec_size = FIELD_EX64(env->CSR_ECFG, CSR_ECFG, VS); + + if (cs->exception_index != EXCCODE_INT) { + qemu_log_mask(CPU_LOG_INT, + "%s enter: pc " TARGET_FMT_lx " ERA " TARGET_FMT_lx + " TLBRERA " TARGET_FMT_lx " exception: %d (%s)\n", + __func__, env->pc, env->CSR_ERA, env->CSR_TLBRERA, + cs->exception_index, + loongarch_exception_name(cs->exception_index)); + } + + switch (cs->exception_index) { + case EXCCODE_DBP: + env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DCL, 1); + env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, ECODE, 0xC); + goto set_DERA; + set_DERA: + env->CSR_DERA = env->pc; + env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DST, 1); + set_pc(env, env->CSR_EENTRY + 0x480); + break; + case EXCCODE_INT: + if (FIELD_EX64(env->CSR_DBG, CSR_DBG, DST)) { + env->CSR_DBG = FIELD_DP64(env->CSR_DBG, CSR_DBG, DEI, 1); + goto set_DERA; + } + QEMU_FALLTHROUGH; + case EXCCODE_PIF: + case EXCCODE_ADEF: + cause = cs->exception_index; + update_badinstr = 0; + break; + case EXCCODE_SYS: + case EXCCODE_BRK: + case EXCCODE_INE: + case EXCCODE_IPE: + case EXCCODE_FPD: + case EXCCODE_FPE: + case EXCCODE_SXD: + case EXCCODE_ASXD: + env->CSR_BADV = env->pc; + QEMU_FALLTHROUGH; + case EXCCODE_BCE: + case EXCCODE_ADEM: + case EXCCODE_PIL: + case EXCCODE_PIS: + case EXCCODE_PME: + case EXCCODE_PNR: + case EXCCODE_PNX: + case EXCCODE_PPI: + cause = cs->exception_index; + break; + default: + qemu_log("Error: exception(%d) has not been supported\n", + cs->exception_index); + abort(); + } + + if (update_badinstr) { + env->CSR_BADI = cpu_ldl_code(env, env->pc); + } + + /* Save PLV and IE */ + if (tlbfill) { + env->CSR_TLBRPRMD = FIELD_DP64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PPLV, + FIELD_EX64(env->CSR_CRMD, + CSR_CRMD, PLV)); + env->CSR_TLBRPRMD = FIELD_DP64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PIE, + FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE)); + /* set the DA mode */ + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DA, 1); + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PG, 0); + env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, + PC, (env->pc >> 2)); + } else { + env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, ECODE, + EXCODE_MCODE(cause)); + env->CSR_ESTAT = FIELD_DP64(env->CSR_ESTAT, CSR_ESTAT, ESUBCODE, + EXCODE_SUBCODE(cause)); + env->CSR_PRMD = FIELD_DP64(env->CSR_PRMD, CSR_PRMD, PPLV, + FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV)); + env->CSR_PRMD = FIELD_DP64(env->CSR_PRMD, CSR_PRMD, PIE, + FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE)); + env->CSR_ERA = env->pc; + } + + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PLV, 0); + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, IE, 0); + + if (vec_size) { + vec_size = (1 << vec_size) * 4; + } + + if (cs->exception_index == EXCCODE_INT) { + /* Interrupt */ + uint32_t vector = 0; + uint32_t pending = FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS); + pending &= FIELD_EX64(env->CSR_ECFG, CSR_ECFG, LIE); + + /* Find the highest-priority interrupt. */ + vector = 31 - clz32(pending); + set_pc(env, env->CSR_EENTRY + \ + (EXCCODE_EXTERNAL_INT + vector) * vec_size); + qemu_log_mask(CPU_LOG_INT, + "%s: PC " TARGET_FMT_lx " ERA " TARGET_FMT_lx + " cause %d\n" " A " TARGET_FMT_lx " D " + TARGET_FMT_lx " vector = %d ExC " TARGET_FMT_lx "ExS" + TARGET_FMT_lx "\n", + __func__, env->pc, env->CSR_ERA, + cause, env->CSR_BADV, env->CSR_DERA, vector, + env->CSR_ECFG, env->CSR_ESTAT); + } else { + if (tlbfill) { + set_pc(env, env->CSR_TLBRENTRY); + } else { + set_pc(env, env->CSR_EENTRY + EXCODE_MCODE(cause) * vec_size); + } + qemu_log_mask(CPU_LOG_INT, + "%s: PC " TARGET_FMT_lx " ERA " TARGET_FMT_lx + " cause %d%s\n, ESTAT " TARGET_FMT_lx + " EXCFG " TARGET_FMT_lx " BADVA " TARGET_FMT_lx + "BADI " TARGET_FMT_lx " SYS_NUM " TARGET_FMT_lu + " cpu %d asid " TARGET_FMT_lx "\n", __func__, env->pc, + tlbfill ? env->CSR_TLBRERA : env->CSR_ERA, + cause, tlbfill ? "(refill)" : "", env->CSR_ESTAT, + env->CSR_ECFG, + tlbfill ? env->CSR_TLBRBADV : env->CSR_BADV, + env->CSR_BADI, env->gpr[11], cs->cpu_index, + env->CSR_ASID); + } + cs->exception_index = -1; +} + +static void loongarch_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, + vaddr addr, unsigned size, + MMUAccessType access_type, + int mmu_idx, MemTxAttrs attrs, + MemTxResult response, + uintptr_t retaddr) +{ + CPULoongArchState *env = cpu_env(cs); + + if (access_type == MMU_INST_FETCH) { + do_raise_exception(env, EXCCODE_ADEF, retaddr); + } else { + do_raise_exception(env, EXCCODE_ADEM, retaddr); + } +} + +static inline bool cpu_loongarch_hw_interrupts_enabled(CPULoongArchState *env) +{ + bool ret = 0; + + ret = (FIELD_EX64(env->CSR_CRMD, CSR_CRMD, IE) && + !(FIELD_EX64(env->CSR_DBG, CSR_DBG, DST))); + + return ret; +} + +static bool loongarch_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + if (interrupt_request & CPU_INTERRUPT_HARD) { + CPULoongArchState *env = cpu_env(cs); + + if (cpu_loongarch_hw_interrupts_enabled(env) && + cpu_loongarch_hw_interrupts_pending(env)) { + /* Raise it */ + cs->exception_index = EXCCODE_INT; + loongarch_cpu_do_interrupt(cs); + return true; + } + } + return false; +} + +static vaddr loongarch_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return is_va32(cpu_env(cs)) ? (uint32_t)result : result; +} +#endif + +static TCGTBCPUState loongarch_get_tb_cpu_state(CPUState *cs) +{ + CPULoongArchState *env = cpu_env(cs); + uint32_t flags; + + flags = env->CSR_CRMD & (R_CSR_CRMD_PLV_MASK | R_CSR_CRMD_PG_MASK); + flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, FPE) * HW_FLAGS_EUEN_FPE; + flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE) * HW_FLAGS_EUEN_SXE; + flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE) * HW_FLAGS_EUEN_ASXE; + flags |= is_va32(env) * HW_FLAGS_VA32; + + return (TCGTBCPUState){ .pc = env->pc, .flags = flags }; +} + +static void loongarch_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) +{ + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); + set_pc(cpu_env(cs), tb->pc); +} + +static void loongarch_restore_state_to_opc(CPUState *cs, + const TranslationBlock *tb, + const uint64_t *data) +{ + set_pc(cpu_env(cs), data[0]); +} + +static int loongarch_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + CPULoongArchState *env = cpu_env(cs); + + if (FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG)) { + return FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV); + } + return MMU_DA_IDX; +} + +const TCGCPUOps loongarch_tcg_ops = { + .guest_default_memory_order = 0, + .mttcg_supported = true, + + .initialize = loongarch_translate_init, + .translate_code = loongarch_translate_code, + .get_tb_cpu_state = loongarch_get_tb_cpu_state, + .synchronize_from_tb = loongarch_cpu_synchronize_from_tb, + .restore_state_to_opc = loongarch_restore_state_to_opc, + .mmu_index = loongarch_cpu_mmu_index, + +#ifndef CONFIG_USER_ONLY + .tlb_fill = loongarch_cpu_tlb_fill, + .pointer_wrap = loongarch_pointer_wrap, + .cpu_exec_interrupt = loongarch_cpu_exec_interrupt, + .cpu_exec_halt = loongarch_cpu_has_work, + .cpu_exec_reset = cpu_reset, + .do_interrupt = loongarch_cpu_do_interrupt, + .do_transaction_failed = loongarch_cpu_do_transaction_failed, +#endif +}; diff --git a/target/loongarch/tcg/tcg_loongarch.h b/target/loongarch/tcg/tcg_loongarch.h index fd4e116..7fb627f 100644 --- a/target/loongarch/tcg/tcg_loongarch.h +++ b/target/loongarch/tcg/tcg_loongarch.h @@ -7,15 +7,17 @@ #ifndef TARGET_LOONGARCH_TCG_LOONGARCH_H #define TARGET_LOONGARCH_TCG_LOONGARCH_H #include "cpu.h" +#include "cpu-mmu.h" +extern const TCGCPUOps loongarch_tcg_ops; void loongarch_csr_translate_init(void); bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); -int loongarch_get_addr_from_tlb(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - MMUAccessType access_type, int mmu_idx); +TLBRet loongarch_get_addr_from_tlb(CPULoongArchState *env, + MMUContext *context, + MMUAccessType access_type, int mmu_idx); #endif /* TARGET_LOONGARCH_TCG_LOONGARCH_H */ diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c index dc48b0f..e119f78 100644 --- a/target/loongarch/tcg/tlb_helper.c +++ b/target/loongarch/tcg/tlb_helper.c @@ -10,6 +10,7 @@ #include "qemu/guest-random.h" #include "cpu.h" +#include "cpu-mmu.h" #include "internals.h" #include "exec/helper-proto.h" #include "exec/cputlb.h" @@ -20,6 +21,18 @@ #include "cpu-csr.h" #include "tcg/tcg_loongarch.h" +typedef bool (*tlb_match)(bool global, int asid, int tlb_asid); + +static bool tlb_match_any(bool global, int asid, int tlb_asid) +{ + return global || tlb_asid == asid; +} + +static bool tlb_match_asid(bool global, int asid, int tlb_asid) +{ + return !global && tlb_asid == asid; +} + bool check_ps(CPULoongArchState *env, uint8_t tlb_ps) { if (tlb_ps >= 64) { @@ -28,8 +41,8 @@ bool check_ps(CPULoongArchState *env, uint8_t tlb_ps) return BIT_ULL(tlb_ps) & (env->CSR_PRCFG2); } -static void raise_mmu_exception(CPULoongArchState *env, target_ulong address, - MMUAccessType access_type, int tlb_error) +static void raise_mmu_exception(CPULoongArchState *env, vaddr address, + MMUAccessType access_type, TLBRet tlb_error) { CPUState *cs = env_cpu(env); @@ -100,34 +113,25 @@ static void invalidate_tlb_entry(CPULoongArchState *env, int index) target_ulong addr, mask, pagesize; uint8_t tlb_ps; LoongArchTLB *tlb = &env->tlb[index]; - - int mmu_idx = cpu_mmu_index(env_cpu(env), false); + int idxmap = BIT(MMU_KERNEL_IDX) | BIT(MMU_USER_IDX); uint8_t tlb_v0 = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, V); uint8_t tlb_v1 = FIELD_EX64(tlb->tlb_entry1, TLBENTRY, V); uint64_t tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - uint8_t tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); - if (!tlb_e) { - return; - } - if (index >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); pagesize = MAKE_64BIT_MASK(tlb_ps, 1); mask = MAKE_64BIT_MASK(0, tlb_ps + 1); + addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & ~mask; + addr = sextract64(addr, 0, TARGET_VIRT_ADDR_SPACE_BITS); if (tlb_v0) { - addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & ~mask; /* even */ tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize, - mmu_idx, TARGET_LONG_BITS); + idxmap, TARGET_LONG_BITS); } if (tlb_v1) { - addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & pagesize; /* odd */ - tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize, - mmu_idx, TARGET_LONG_BITS); + tlb_flush_range_by_mmuidx(env_cpu(env), addr + pagesize, pagesize, + idxmap, TARGET_LONG_BITS); } } @@ -135,20 +139,27 @@ static void invalidate_tlb(CPULoongArchState *env, int index) { LoongArchTLB *tlb; uint16_t csr_asid, tlb_asid, tlb_g; + uint8_t tlb_e; csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); tlb = &env->tlb[index]; + tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); + if (!tlb_e) { + return; + } + + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + /* QEMU TLB is flushed when asid is changed */ if (tlb_g == 0 && tlb_asid != csr_asid) { return; } invalidate_tlb_entry(env, index); } -static void fill_tlb_entry(CPULoongArchState *env, int index) +static void fill_tlb_entry(CPULoongArchState *env, LoongArchTLB *tlb) { - LoongArchTLB *tlb = &env->tlb[index]; uint64_t lo0, lo1, csr_vppn; uint16_t csr_asid; uint8_t csr_ps; @@ -173,17 +184,8 @@ static void fill_tlb_entry(CPULoongArchState *env, int index) lo1 = env->CSR_TLBELO1; } - /*check csr_ps */ - if (!check_ps(env, csr_ps)) { - qemu_log_mask(LOG_GUEST_ERROR, "csr_ps %d is illegal\n", csr_ps); - return; - } - - /* Only MTLB has the ps fields */ - if (index >= LOONGARCH_STLB) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, PS, csr_ps); - } - + /* Store page size in field PS */ + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, PS, csr_ps); tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, VPPN, csr_vppn); tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 1); csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); @@ -209,17 +211,18 @@ static uint32_t get_random_tlb(uint32_t low, uint32_t high) * field in tlb entry contains bit[47:13], so need adjust. * virt_vpn = vaddr[47:13] */ -static bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, - int *index) +static LoongArchTLB *loongarch_tlb_search_cb(CPULoongArchState *env, + vaddr vaddr, int csr_asid, + tlb_match func) { LoongArchTLB *tlb; - uint16_t csr_asid, tlb_asid, stlb_idx; - uint8_t tlb_e, tlb_ps, tlb_g, stlb_ps; + uint16_t tlb_asid, stlb_idx; + uint8_t tlb_e, tlb_ps, stlb_ps; + bool tlb_g; int i, compare_shift; uint64_t vpn, tlb_vppn; - csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); - stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); + stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); vpn = (vaddr & TARGET_VIRT_MASK) >> (stlb_ps + 1); stlb_idx = vpn & 0xff; /* VA[25:15] <==> TLBIDX.index for 16KiB Page */ compare_shift = stlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; @@ -231,12 +234,11 @@ static bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, if (tlb_e) { tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + tlb_g = !!FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - if ((tlb_g == 1 || tlb_asid == csr_asid) && + if (func(tlb_g, csr_asid, tlb_asid) && (vpn == (tlb_vppn >> compare_shift))) { - *index = i * 256 + stlb_idx; - return true; + return tlb; } } } @@ -252,13 +254,30 @@ static bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; vpn = (vaddr & TARGET_VIRT_MASK) >> (tlb_ps + 1); - if ((tlb_g == 1 || tlb_asid == csr_asid) && + if (func(tlb_g, csr_asid, tlb_asid) && (vpn == (tlb_vppn >> compare_shift))) { - *index = i; - return true; + return tlb; } } } + return NULL; +} + +static bool loongarch_tlb_search(CPULoongArchState *env, vaddr vaddr, + int *index) +{ + int csr_asid; + tlb_match func; + LoongArchTLB *tlb; + + func = tlb_match_any; + csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); + tlb = loongarch_tlb_search_cb(env, vaddr, csr_asid, func); + if (tlb) { + *index = tlb - env->tlb; + return true; + } + return false; } @@ -289,12 +308,7 @@ void helper_tlbrd(CPULoongArchState *env) index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); tlb = &env->tlb[index]; - - if (index >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); if (!tlb_e) { @@ -320,61 +334,109 @@ void helper_tlbrd(CPULoongArchState *env) void helper_tlbwr(CPULoongArchState *env) { int index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); + LoongArchTLB *old, new = {}; + bool skip_inv = false; + uint8_t tlb_v0, tlb_v1; - invalidate_tlb(env, index); - + old = env->tlb + index; if (FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, NE)) { - env->tlb[index].tlb_misc = FIELD_DP64(env->tlb[index].tlb_misc, - TLB_MISC, E, 0); + invalidate_tlb(env, index); return; } - fill_tlb_entry(env, index); + fill_tlb_entry(env, &new); + /* Check whether ASID/VPPN is the same */ + if (old->tlb_misc == new.tlb_misc) { + /* Check whether both even/odd pages is the same or invalid */ + tlb_v0 = FIELD_EX64(old->tlb_entry0, TLBENTRY, V); + tlb_v1 = FIELD_EX64(old->tlb_entry1, TLBENTRY, V); + if ((!tlb_v0 || new.tlb_entry0 == old->tlb_entry0) && + (!tlb_v1 || new.tlb_entry1 == old->tlb_entry1)) { + skip_inv = true; + } + } + + /* flush tlb before updating the entry */ + if (!skip_inv) { + invalidate_tlb(env, index); + } + + *old = new; } void helper_tlbfill(CPULoongArchState *env) { uint64_t address, entryhi; - int index, set, stlb_idx; + int index, set, i, stlb_idx; uint16_t pagesize, stlb_ps; + uint16_t asid, tlb_asid; + LoongArchTLB *tlb; + uint8_t tlb_e, tlb_g; if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) { entryhi = env->CSR_TLBREHI; + /* Validity of pagesize is checked in helper_ldpte() */ pagesize = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI, PS); } else { entryhi = env->CSR_TLBEHI; + /* Validity of pagesize is checked in helper_tlbrd() */ pagesize = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, PS); } - if (!check_ps(env, pagesize)) { - qemu_log_mask(LOG_GUEST_ERROR, "pagesize %d is illegal\n", pagesize); - return; - } - + /* Validity of stlb_ps is checked in helper_csrwr_stlbps() */ stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - if (!check_ps(env, stlb_ps)) { - qemu_log_mask(LOG_GUEST_ERROR, "stlb_ps %d is illegal\n", stlb_ps); - return; - } - + asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); if (pagesize == stlb_ps) { /* Only write into STLB bits [47:13] */ address = entryhi & ~MAKE_64BIT_MASK(0, R_CSR_TLBEHI_64_VPPN_SHIFT); - - /* Choose one set ramdomly */ - set = get_random_tlb(0, 7); - - /* Index in one set */ + set = -1; stlb_idx = (address >> (stlb_ps + 1)) & 0xff; /* [0,255] */ + for (i = 0; i < 8; ++i) { + tlb = &env->tlb[i * 256 + stlb_idx]; + tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); + if (!tlb_e) { + set = i; + break; + } + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + if (tlb_g == 0 && asid != tlb_asid) { + set = i; + } + } + + /* Choose one set randomly */ + if (set < 0) { + set = get_random_tlb(0, 7); + } index = set * 256 + stlb_idx; } else { /* Only write into MTLB */ - index = get_random_tlb(LOONGARCH_STLB, LOONGARCH_TLB_MAX - 1); + index = -1; + for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; i++) { + tlb = &env->tlb[i]; + tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); + + if (!tlb_e) { + index = i; + break; + } + + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + if (tlb_g == 0 && asid != tlb_asid) { + index = i; + } + } + + if (index < 0) { + index = get_random_tlb(LOONGARCH_STLB, LOONGARCH_TLB_MAX - 1); + } } invalidate_tlb(env, index); - fill_tlb_entry(env, index); + fill_tlb_entry(env, env->tlb + index); } void helper_tlbclr(CPULoongArchState *env) @@ -476,67 +538,29 @@ void helper_invtlb_all_asid(CPULoongArchState *env, target_ulong info) void helper_invtlb_page_asid(CPULoongArchState *env, target_ulong info, target_ulong addr) { - uint16_t asid = info & 0x3ff; - - for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { - LoongArchTLB *tlb = &env->tlb[i]; - uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - uint64_t vpn, tlb_vppn; - uint8_t tlb_ps, compare_shift; - uint8_t tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); - - if (!tlb_e) { - continue; - } - if (i >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } - tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1); - compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; + int asid = info & 0x3ff; + LoongArchTLB *tlb; + tlb_match func; - if (!tlb_g && (tlb_asid == asid) && - (vpn == (tlb_vppn >> compare_shift))) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); - } + func = tlb_match_asid; + tlb = loongarch_tlb_search_cb(env, addr, asid, func); + if (tlb) { + invalidate_tlb(env, tlb - env->tlb); } - tlb_flush(env_cpu(env)); } void helper_invtlb_page_asid_or_g(CPULoongArchState *env, target_ulong info, target_ulong addr) { - uint16_t asid = info & 0x3ff; - - for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { - LoongArchTLB *tlb = &env->tlb[i]; - uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - uint64_t vpn, tlb_vppn; - uint8_t tlb_ps, compare_shift; - uint8_t tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); - - if (!tlb_e) { - continue; - } - if (i >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } - tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1); - compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; + int asid = info & 0x3ff; + LoongArchTLB *tlb; + tlb_match func; - if ((tlb_g || (tlb_asid == asid)) && - (vpn == (tlb_vppn >> compare_shift))) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); - } + func = tlb_match_any; + tlb = loongarch_tlb_search_cb(env, addr, asid, func); + if (tlb) { + invalidate_tlb(env, tlb - env->tlb); } - tlb_flush(env_cpu(env)); } bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, @@ -546,13 +570,15 @@ bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, CPULoongArchState *env = cpu_env(cs); hwaddr physical; int prot; - int ret; + MMUContext context; + TLBRet ret; /* Data access */ - ret = get_physical_address(env, &physical, &prot, address, - access_type, mmu_idx, 0); - + context.addr = address; + ret = get_physical_address(env, &context, access_type, mmu_idx, 0); if (ret == TLBRET_MATCH) { + physical = context.physical; + prot = context.prot; tlb_set_page(cs, address & TARGET_PAGE_MASK, physical & TARGET_PAGE_MASK, prot, mmu_idx, TARGET_PAGE_SIZE); @@ -573,7 +599,7 @@ bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, } target_ulong helper_lddir(CPULoongArchState *env, target_ulong base, - target_ulong level, uint32_t mem_idx) + uint32_t level, uint32_t mem_idx) { CPUState *cs = env_cpu(env); target_ulong badvaddr, index, phys; @@ -581,7 +607,7 @@ target_ulong helper_lddir(CPULoongArchState *env, target_ulong base, if (unlikely((level == 0) || (level > 4))) { qemu_log_mask(LOG_GUEST_ERROR, - "Attepted LDDIR with level %"PRId64"\n", level); + "Attepted LDDIR with level %u\n", level); return base; } @@ -611,10 +637,11 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, uint32_t mem_idx) { CPUState *cs = env_cpu(env); - target_ulong phys, tmp0, ptindex, ptoffset0, ptoffset1, ps, badv; + target_ulong phys, tmp0, ptindex, ptoffset0, ptoffset1, badv; uint64_t ptbase = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE); uint64_t ptwidth = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH); uint64_t dir_base, dir_width; + uint8_t ps; /* * The parameter "base" has only two types, @@ -651,6 +678,11 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, if (odd) { tmp0 += MAKE_64BIT_MASK(ps, 1); } + + if (!check_ps(env, ps)) { + qemu_log_mask(LOG_GUEST_ERROR, "Illegal huge pagesize %d\n", ps); + return; + } } else { badv = env->CSR_TLBRBADV; @@ -671,85 +703,31 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI, PS, ps); } -static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - int access_type, int index, int mmu_idx) +static TLBRet loongarch_map_tlb_entry(CPULoongArchState *env, + MMUContext *context, + MMUAccessType access_type, int index, + int mmu_idx) { LoongArchTLB *tlb = &env->tlb[index]; - uint64_t plv = mmu_idx; - uint64_t tlb_entry, tlb_ppn; - uint8_t tlb_ps, n, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv; + uint8_t tlb_ps, n; - if (index >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } - n = (address >> tlb_ps) & 0x1;/* Odd or even */ - - tlb_entry = n ? tlb->tlb_entry1 : tlb->tlb_entry0; - tlb_v = FIELD_EX64(tlb_entry, TLBENTRY, V); - tlb_d = FIELD_EX64(tlb_entry, TLBENTRY, D); - tlb_plv = FIELD_EX64(tlb_entry, TLBENTRY, PLV); - if (is_la64(env)) { - tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_64, PPN); - tlb_nx = FIELD_EX64(tlb_entry, TLBENTRY_64, NX); - tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY_64, NR); - tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY_64, RPLV); - } else { - tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_32, PPN); - tlb_nx = 0; - tlb_nr = 0; - tlb_rplv = 0; - } - - /* Remove sw bit between bit12 -- bit PS*/ - tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) - 1)); - - /* Check access rights */ - if (!tlb_v) { - return TLBRET_INVALID; - } - - if (access_type == MMU_INST_FETCH && tlb_nx) { - return TLBRET_XI; - } - - if (access_type == MMU_DATA_LOAD && tlb_nr) { - return TLBRET_RI; - } - - if (((tlb_rplv == 0) && (plv > tlb_plv)) || - ((tlb_rplv == 1) && (plv != tlb_plv))) { - return TLBRET_PE; - } - - if ((access_type == MMU_DATA_STORE) && !tlb_d) { - return TLBRET_DIRTY; - } - - *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | - (address & MAKE_64BIT_MASK(0, tlb_ps)); - *prot = PAGE_READ; - if (tlb_d) { - *prot |= PAGE_WRITE; - } - if (!tlb_nx) { - *prot |= PAGE_EXEC; - } - return TLBRET_MATCH; + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); + n = (context->addr >> tlb_ps) & 0x1;/* Odd or even */ + context->pte = n ? tlb->tlb_entry1 : tlb->tlb_entry0; + context->ps = tlb_ps; + return loongarch_check_pte(env, context, access_type, mmu_idx); } -int loongarch_get_addr_from_tlb(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - MMUAccessType access_type, int mmu_idx) +TLBRet loongarch_get_addr_from_tlb(CPULoongArchState *env, + MMUContext *context, + MMUAccessType access_type, int mmu_idx) { int index, match; - match = loongarch_tlb_search(env, address, &index); + match = loongarch_tlb_search(env, context->addr, &index); if (match) { - return loongarch_map_tlb_entry(env, physical, prot, - address, access_type, index, mmu_idx); + return loongarch_map_tlb_entry(env, context, access_type, index, + mmu_idx); } return TLBRET_NOMATCH; diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c index 53a0b4c..055f6fb 100644 --- a/target/loongarch/tcg/translate.c +++ b/target/loongarch/tcg/translate.c @@ -99,16 +99,16 @@ void generate_exception(DisasContext *ctx, int excp) ctx->base.is_jmp = DISAS_NORETURN; } -static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) +static void gen_goto_tb(DisasContext *ctx, unsigned tb_slot_idx, vaddr dest) { if (ctx->va32) { dest = (uint32_t) dest; } if (translator_use_goto_tb(&ctx->base, dest)) { - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); tcg_gen_movi_tl(cpu_pc, dest); - tcg_gen_exit_tb(ctx->base.tb, n); + tcg_gen_exit_tb(ctx->base.tb, tb_slot_idx); } else { tcg_gen_movi_tl(cpu_pc, dest); tcg_gen_lookup_and_goto_ptr(); diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h index 018dc5e..bbe015b 100644 --- a/target/loongarch/translate.h +++ b/target/loongarch/translate.h @@ -14,6 +14,10 @@ static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \ { return avail_##AVAIL(ctx) && FUNC(ctx, a, __VA_ARGS__); } +#define TRANS64(NAME, AVAIL, FUNC, ...) \ + static bool trans_##NAME(DisasContext *ctx, arg_##NAME * a) \ + { return avail_64(ctx) && avail_##AVAIL(ctx) && FUNC(ctx, a, __VA_ARGS__); } + #define avail_ALL(C) true #define avail_64(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, ARCH) == \ CPUCFG1_ARCH_LA64) diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c index c5196a6..f1b6731 100644 --- a/target/m68k/cpu.c +++ b/target/m68k/cpu.c @@ -74,7 +74,7 @@ static void m68k_restore_state_to_opc(CPUState *cs, #ifndef CONFIG_USER_ONLY static bool m68k_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & CPU_INTERRUPT_HARD; + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD); } #endif /* !CONFIG_USER_ONLY */ @@ -619,6 +619,7 @@ static const TCGCPUOps m68k_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = m68k_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = m68k_cpu_exec_interrupt, .cpu_exec_halt = m68k_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/m68k/translate.c b/target/m68k/translate.c index 97afceb..eb1ba15 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -44,9 +44,6 @@ #undef DEFO32 #undef DEFO64 -static TCGv_i32 cpu_halted; -static TCGv_i32 cpu_exception_index; - static char cpu_reg_names[2 * 8 * 3 + 5 * 4]; static TCGv cpu_dregs[8]; static TCGv cpu_aregs[8]; @@ -78,14 +75,6 @@ void m68k_tcg_init(void) #undef DEFO32 #undef DEFO64 - cpu_halted = tcg_global_mem_new_i32(tcg_env, - -offsetof(M68kCPU, env) + - offsetof(CPUState, halted), "HALTED"); - cpu_exception_index = tcg_global_mem_new_i32(tcg_env, - -offsetof(M68kCPU, env) + - offsetof(CPUState, exception_index), - "EXCEPTION"); - p = cpu_reg_names; for (i = 0; i < 8; i++) { sprintf(p, "D%d", i); @@ -4512,7 +4501,8 @@ DISAS_INSN(halt) gen_exception(s, s->pc, EXCP_SEMIHOSTING); return; } - tcg_gen_movi_i32(cpu_halted, 1); + tcg_gen_st_i32(tcg_constant_i32(1), tcg_env, + offsetof(CPUState, halted) - offsetof(M68kCPU, env)); gen_exception(s, s->pc, EXCP_HLT); } @@ -4528,7 +4518,8 @@ DISAS_INSN(stop) ext = read_im16(env, s); gen_set_sr_im(s, ext, 0); - tcg_gen_movi_i32(cpu_halted, 1); + tcg_gen_st_i32(tcg_constant_i32(1), tcg_env, + offsetof(CPUState, halted) - offsetof(M68kCPU, env)); gen_exception(s, s->pc, EXCP_HLT); } diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index 615a959..22231f0 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -129,7 +129,7 @@ static void mb_restore_state_to_opc(CPUState *cs, #ifndef CONFIG_USER_ONLY static bool mb_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI); + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI); } #endif /* !CONFIG_USER_ONLY */ @@ -447,6 +447,7 @@ static const TCGCPUOps mb_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = mb_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = mb_cpu_exec_interrupt, .cpu_exec_halt = mb_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h index 6ad8643..14b1078 100644 --- a/target/microblaze/cpu.h +++ b/target/microblaze/cpu.h @@ -248,7 +248,7 @@ struct CPUArchState { uint32_t pc; uint32_t msr; /* All bits of MSR except MSR[C] and MSR[CC] */ uint32_t msr_c; /* MSR[C], in low bit; other bits must be 0 */ - target_ulong ear; + uint64_t ear; uint32_t esr; uint32_t fsr; uint32_t btr; @@ -259,7 +259,7 @@ struct CPUArchState { /* lwx/swx reserved address */ #define RES_ADDR_NONE 0xffffffff /* Use 0xffffffff to indicate no reservation */ - target_ulong res_addr; + uint32_t res_addr; uint32_t res_val; /* Internal flags. */ diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c index 9203192..cf577a7 100644 --- a/target/microblaze/helper.c +++ b/target/microblaze/helper.c @@ -26,8 +26,51 @@ #include "exec/target_page.h" #include "qemu/host-utils.h" #include "exec/log.h" +#include "exec/helper-proto.h" + + +G_NORETURN +static void mb_unaligned_access_internal(CPUState *cs, uint64_t addr, + uintptr_t retaddr) +{ + CPUMBState *env = cpu_env(cs); + uint32_t esr, iflags; + + /* Recover the pc and iflags from the corresponding insn_start. */ + cpu_restore_state(cs, retaddr); + iflags = env->iflags; + + qemu_log_mask(CPU_LOG_INT, + "Unaligned access addr=0x%" PRIx64 " pc=%x iflags=%x\n", + addr, env->pc, iflags); + + esr = ESR_EC_UNALIGNED_DATA; + if (likely(iflags & ESR_ESS_FLAG)) { + esr |= iflags & ESR_ESS_MASK; + } else { + qemu_log_mask(LOG_UNIMP, "Unaligned access without ESR_ESS_FLAG\n"); + } + + env->ear = addr; + env->esr = esr; + cs->exception_index = EXCP_HW_EXCP; + cpu_loop_exit(cs); +} + +void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + mb_unaligned_access_internal(cs, addr, retaddr); +} #ifndef CONFIG_USER_ONLY + +void HELPER(unaligned_access)(CPUMBState *env, uint64_t addr) +{ + mb_unaligned_access_internal(env_cpu(env), addr, GETPC()); +} + static bool mb_cpu_access_is_secure(MicroBlazeCPU *cpu, MMUAccessType access_type) { @@ -231,7 +274,8 @@ hwaddr mb_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, MemTxAttrs *attrs) { MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); - target_ulong vaddr, paddr = 0; + vaddr vaddr; + hwaddr paddr = 0; MicroBlazeMMULookup lu; int mmu_idx = cpu_mmu_index(cs, false); unsigned int hit; @@ -269,31 +313,3 @@ bool mb_cpu_exec_interrupt(CPUState *cs, int interrupt_request) } #endif /* !CONFIG_USER_ONLY */ - -void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr, - MMUAccessType access_type, - int mmu_idx, uintptr_t retaddr) -{ - MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); - uint32_t esr, iflags; - - /* Recover the pc and iflags from the corresponding insn_start. */ - cpu_restore_state(cs, retaddr); - iflags = cpu->env.iflags; - - qemu_log_mask(CPU_LOG_INT, - "Unaligned access addr=" TARGET_FMT_lx " pc=%x iflags=%x\n", - (target_ulong)addr, cpu->env.pc, iflags); - - esr = ESR_EC_UNALIGNED_DATA; - if (likely(iflags & ESR_ESS_FLAG)) { - esr |= iflags & ESR_ESS_MASK; - } else { - qemu_log_mask(LOG_UNIMP, "Unaligned access without ESR_ESS_FLAG\n"); - } - - cpu->env.ear = addr; - cpu->env.esr = esr; - cs->exception_index = EXCP_HW_EXCP; - cpu_loop_exit(cs); -} diff --git a/target/microblaze/helper.h b/target/microblaze/helper.h index f740835..01eba59 100644 --- a/target/microblaze/helper.h +++ b/target/microblaze/helper.h @@ -20,12 +20,22 @@ DEF_HELPER_FLAGS_3(fcmp_ne, TCG_CALL_NO_WG, i32, env, i32, i32) DEF_HELPER_FLAGS_3(fcmp_ge, TCG_CALL_NO_WG, i32, env, i32, i32) DEF_HELPER_FLAGS_2(pcmpbf, TCG_CALL_NO_RWG_SE, i32, i32, i32) -#if !defined(CONFIG_USER_ONLY) +DEF_HELPER_FLAGS_2(stackprot, TCG_CALL_NO_WG, void, env, i32) +DEF_HELPER_FLAGS_2(get, TCG_CALL_NO_RWG, i32, i32, i32) +DEF_HELPER_FLAGS_3(put, TCG_CALL_NO_RWG, void, i32, i32, i32) + +#ifndef CONFIG_USER_ONLY DEF_HELPER_FLAGS_3(mmu_read, TCG_CALL_NO_RWG, i32, env, i32, i32) DEF_HELPER_FLAGS_4(mmu_write, TCG_CALL_NO_RWG, void, env, i32, i32, i32) +DEF_HELPER_FLAGS_2(unaligned_access, TCG_CALL_NO_WG, noreturn, env, i64) +DEF_HELPER_FLAGS_2(lbuea, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lhuea_be, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lhuea_le, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lwea_be, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lwea_le, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_3(sbea, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(shea_be, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(shea_le, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(swea_be, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(swea_le, TCG_CALL_NO_WG, void, env, i32, i64) #endif - -DEF_HELPER_FLAGS_2(stackprot, TCG_CALL_NO_WG, void, env, tl) - -DEF_HELPER_FLAGS_2(get, TCG_CALL_NO_RWG, i32, i32, i32) -DEF_HELPER_FLAGS_3(put, TCG_CALL_NO_RWG, void, i32, i32, i32) diff --git a/target/microblaze/machine.c b/target/microblaze/machine.c index a4cf38d..48efa54 100644 --- a/target/microblaze/machine.c +++ b/target/microblaze/machine.c @@ -78,7 +78,7 @@ static const VMStateField vmstate_env_fields[] = { VMSTATE_UINT32(iflags, CPUMBState), VMSTATE_UINT32(res_val, CPUMBState), - VMSTATE_UINTTL(res_addr, CPUMBState), + VMSTATE_UINT32(res_addr, CPUMBState), VMSTATE_STRUCT(mmu, CPUMBState, 0, vmstate_mmu, MicroBlazeMMU), @@ -87,8 +87,8 @@ static const VMStateField vmstate_env_fields[] = { static const VMStateDescription vmstate_env = { .name = "env", - .version_id = 0, - .minimum_version_id = 0, + .version_id = 1, + .minimum_version_id = 1, .fields = vmstate_env_fields, }; diff --git a/target/microblaze/mmu.c b/target/microblaze/mmu.c index 95a12e1..db24cb3 100644 --- a/target/microblaze/mmu.c +++ b/target/microblaze/mmu.c @@ -78,7 +78,7 @@ static void mmu_change_pid(CPUMBState *env, unsigned int newpid) /* rw - 0 = read, 1 = write, 2 = fetch. */ unsigned int mmu_translate(MicroBlazeCPU *cpu, MicroBlazeMMULookup *lu, - target_ulong vaddr, MMUAccessType rw, int mmu_idx) + vaddr vaddr, MMUAccessType rw, int mmu_idx) { MicroBlazeMMU *mmu = &cpu->env.mmu; unsigned int i, hit = 0; @@ -172,7 +172,8 @@ unsigned int mmu_translate(MicroBlazeCPU *cpu, MicroBlazeMMULookup *lu, } done: qemu_log_mask(CPU_LOG_MMU, - "MMU vaddr=%" PRIx64 " rw=%d tlb_wr=%d tlb_ex=%d hit=%d\n", + "MMU vaddr=0x%" VADDR_PRIx + " rw=%d tlb_wr=%d tlb_ex=%d hit=%d\n", vaddr, rw, tlb_wr, tlb_ex, hit); return hit; } diff --git a/target/microblaze/mmu.h b/target/microblaze/mmu.h index 1068bd2..2aca39c 100644 --- a/target/microblaze/mmu.h +++ b/target/microblaze/mmu.h @@ -86,7 +86,7 @@ typedef struct { } MicroBlazeMMULookup; unsigned int mmu_translate(MicroBlazeCPU *cpu, MicroBlazeMMULookup *lu, - target_ulong vaddr, MMUAccessType rw, int mmu_idx); + vaddr vaddr, MMUAccessType rw, int mmu_idx); uint32_t mmu_read(CPUMBState *env, bool ea, uint32_t rn); void mmu_write(CPUMBState *env, bool ea, uint32_t rn, uint32_t v); void mmu_init(MicroBlazeMMU *mmu); diff --git a/target/microblaze/op_helper.c b/target/microblaze/op_helper.c index 9e838df..df93c42 100644 --- a/target/microblaze/op_helper.c +++ b/target/microblaze/op_helper.c @@ -365,13 +365,13 @@ uint32_t helper_pcmpbf(uint32_t a, uint32_t b) return 0; } -void helper_stackprot(CPUMBState *env, target_ulong addr) +void helper_stackprot(CPUMBState *env, uint32_t addr) { if (addr < env->slr || addr > env->shr) { CPUState *cs = env_cpu(env); qemu_log_mask(CPU_LOG_INT, "Stack protector violation at " - TARGET_FMT_lx " %x %x\n", + "0x%x 0x%x 0x%x\n", addr, env->slr, env->shr); env->ear = addr; @@ -382,6 +382,8 @@ void helper_stackprot(CPUMBState *env, target_ulong addr) } #if !defined(CONFIG_USER_ONLY) +#include "system/memory.h" + /* Writes/reads to the MMU's special regs end up here. */ uint32_t helper_mmu_read(CPUMBState *env, uint32_t ext, uint32_t rn) { @@ -393,38 +395,90 @@ void helper_mmu_write(CPUMBState *env, uint32_t ext, uint32_t rn, uint32_t v) mmu_write(env, ext, rn, v); } +static void mb_transaction_failed_internal(CPUState *cs, hwaddr physaddr, + uint64_t addr, unsigned size, + MMUAccessType access_type, + uintptr_t retaddr) +{ + CPUMBState *env = cpu_env(cs); + MicroBlazeCPU *cpu = env_archcpu(env); + const char *access_name = "INVALID"; + bool take = env->msr & MSR_EE; + uint32_t esr = ESR_EC_DATA_BUS; + + switch (access_type) { + case MMU_INST_FETCH: + access_name = "INST_FETCH"; + esr = ESR_EC_INSN_BUS; + take &= cpu->cfg.iopb_bus_exception; + break; + case MMU_DATA_LOAD: + access_name = "DATA_LOAD"; + take &= cpu->cfg.dopb_bus_exception; + break; + case MMU_DATA_STORE: + access_name = "DATA_STORE"; + take &= cpu->cfg.dopb_bus_exception; + break; + } + + qemu_log_mask(CPU_LOG_INT, "Transaction failed: addr 0x%" PRIx64 + "physaddr 0x" HWADDR_FMT_plx " size %d access-type %s (%s)\n", + addr, physaddr, size, access_name, + take ? "TAKEN" : "DROPPED"); + + if (take) { + env->esr = esr; + env->ear = addr; + cs->exception_index = EXCP_HW_EXCP; + cpu_loop_exit_restore(cs, retaddr); + } +} + void mb_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, unsigned size, MMUAccessType access_type, int mmu_idx, MemTxAttrs attrs, MemTxResult response, uintptr_t retaddr) { - MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); - CPUMBState *env = &cpu->env; + mb_transaction_failed_internal(cs, physaddr, addr, size, + access_type, retaddr); +} - qemu_log_mask(CPU_LOG_INT, "Transaction failed: vaddr 0x%" VADDR_PRIx - " physaddr 0x" HWADDR_FMT_plx " size %d access type %s\n", - addr, physaddr, size, - access_type == MMU_INST_FETCH ? "INST_FETCH" : - (access_type == MMU_DATA_LOAD ? "DATA_LOAD" : "DATA_STORE")); +#define LD_EA(NAME, TYPE, FUNC) \ +uint32_t HELPER(NAME)(CPUMBState *env, uint64_t ea) \ +{ \ + CPUState *cs = env_cpu(env); \ + MemTxResult txres; \ + TYPE ret = FUNC(cs->as, ea, MEMTXATTRS_UNSPECIFIED, &txres); \ + if (unlikely(txres != MEMTX_OK)) { \ + mb_transaction_failed_internal(cs, ea, ea, sizeof(TYPE), \ + MMU_DATA_LOAD, GETPC()); \ + } \ + return ret; \ +} - if (!(env->msr & MSR_EE)) { - return; - } +LD_EA(lbuea, uint8_t, address_space_ldub) +LD_EA(lhuea_be, uint16_t, address_space_lduw_be) +LD_EA(lhuea_le, uint16_t, address_space_lduw_le) +LD_EA(lwea_be, uint32_t, address_space_ldl_be) +LD_EA(lwea_le, uint32_t, address_space_ldl_le) + +#define ST_EA(NAME, TYPE, FUNC) \ +void HELPER(NAME)(CPUMBState *env, uint32_t data, uint64_t ea) \ +{ \ + CPUState *cs = env_cpu(env); \ + MemTxResult txres; \ + FUNC(cs->as, ea, data, MEMTXATTRS_UNSPECIFIED, &txres); \ + if (unlikely(txres != MEMTX_OK)) { \ + mb_transaction_failed_internal(cs, ea, ea, sizeof(TYPE), \ + MMU_DATA_STORE, GETPC()); \ + } \ +} - if (access_type == MMU_INST_FETCH) { - if (!cpu->cfg.iopb_bus_exception) { - return; - } - env->esr = ESR_EC_INSN_BUS; - } else { - if (!cpu->cfg.dopb_bus_exception) { - return; - } - env->esr = ESR_EC_DATA_BUS; - } +ST_EA(sbea, uint8_t, address_space_stb) +ST_EA(shea_be, uint16_t, address_space_stw_be) +ST_EA(shea_le, uint16_t, address_space_stw_le) +ST_EA(swea_be, uint32_t, address_space_stl_be) +ST_EA(swea_le, uint32_t, address_space_stl_le) - env->ear = addr; - cs->exception_index = EXCP_HW_EXCP; - cpu_loop_exit_restore(cs, retaddr); -} #endif diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index 671b1ae..6442a25 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -55,7 +55,7 @@ static TCGv_i32 cpu_imm; static TCGv_i32 cpu_bvalue; static TCGv_i32 cpu_btarget; static TCGv_i32 cpu_iflags; -static TCGv cpu_res_addr; +static TCGv_i32 cpu_res_addr; static TCGv_i32 cpu_res_val; /* This is the state at translation time. */ @@ -63,9 +63,6 @@ typedef struct DisasContext { DisasContextBase base; const MicroBlazeCPUConfig *cfg; - TCGv_i32 r0; - bool r0_set; - /* Decoder. */ uint32_t ext_imm; unsigned int tb_flags; @@ -119,12 +116,12 @@ static void gen_raise_hw_excp(DisasContext *dc, uint32_t esr_ec) gen_raise_exception_sync(dc, EXCP_HW_EXCP); } -static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest) +static void gen_goto_tb(DisasContext *dc, unsigned tb_slot_idx, vaddr dest) { if (translator_use_goto_tb(&dc->base, dest)) { - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); tcg_gen_movi_i32(cpu_pc, dest); - tcg_gen_exit_tb(dc->base.tb, n); + tcg_gen_exit_tb(dc->base.tb, tb_slot_idx); } else { tcg_gen_movi_i32(cpu_pc, dest); tcg_gen_lookup_and_goto_ptr(); @@ -179,14 +176,7 @@ static TCGv_i32 reg_for_read(DisasContext *dc, int reg) if (likely(reg != 0)) { return cpu_R[reg]; } - if (!dc->r0_set) { - if (dc->r0 == NULL) { - dc->r0 = tcg_temp_new_i32(); - } - tcg_gen_movi_i32(dc->r0, 0); - dc->r0_set = true; - } - return dc->r0; + return tcg_constant_i32(0); } static TCGv_i32 reg_for_write(DisasContext *dc, int reg) @@ -194,10 +184,7 @@ static TCGv_i32 reg_for_write(DisasContext *dc, int reg) if (likely(reg != 0)) { return cpu_R[reg]; } - if (dc->r0 == NULL) { - dc->r0 = tcg_temp_new_i32(); - } - return dc->r0; + return tcg_temp_new_i32(); } static bool do_typea(DisasContext *dc, arg_typea *arg, bool side_effects, @@ -617,21 +604,20 @@ static bool trans_wdic(DisasContext *dc, arg_wdic *a) DO_TYPEA(xor, false, tcg_gen_xor_i32) DO_TYPEBI(xori, false, tcg_gen_xori_i32) -static TCGv compute_ldst_addr_typea(DisasContext *dc, int ra, int rb) +static TCGv_i32 compute_ldst_addr_typea(DisasContext *dc, int ra, int rb) { - TCGv ret = tcg_temp_new(); + TCGv_i32 ret; /* If any of the regs is r0, set t to the value of the other reg. */ if (ra && rb) { - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_add_i32(tmp, cpu_R[ra], cpu_R[rb]); - tcg_gen_extu_i32_tl(ret, tmp); + ret = tcg_temp_new_i32(); + tcg_gen_add_i32(ret, cpu_R[ra], cpu_R[rb]); } else if (ra) { - tcg_gen_extu_i32_tl(ret, cpu_R[ra]); + ret = cpu_R[ra]; } else if (rb) { - tcg_gen_extu_i32_tl(ret, cpu_R[rb]); + ret = cpu_R[rb]; } else { - tcg_gen_movi_tl(ret, 0); + ret = tcg_constant_i32(0); } if ((ra == 1 || rb == 1) && dc->cfg->stackprot) { @@ -640,17 +626,18 @@ static TCGv compute_ldst_addr_typea(DisasContext *dc, int ra, int rb) return ret; } -static TCGv compute_ldst_addr_typeb(DisasContext *dc, int ra, int imm) +static TCGv_i32 compute_ldst_addr_typeb(DisasContext *dc, int ra, int imm) { - TCGv ret = tcg_temp_new(); + TCGv_i32 ret; /* If any of the regs is r0, set t to the value of the other reg. */ - if (ra) { - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_addi_i32(tmp, cpu_R[ra], imm); - tcg_gen_extu_i32_tl(ret, tmp); + if (ra && imm) { + ret = tcg_temp_new_i32(); + tcg_gen_addi_i32(ret, cpu_R[ra], imm); + } else if (ra) { + ret = cpu_R[ra]; } else { - tcg_gen_movi_tl(ret, (uint32_t)imm); + ret = tcg_constant_i32(imm); } if (ra == 1 && dc->cfg->stackprot) { @@ -660,23 +647,23 @@ static TCGv compute_ldst_addr_typeb(DisasContext *dc, int ra, int imm) } #ifndef CONFIG_USER_ONLY -static TCGv compute_ldst_addr_ea(DisasContext *dc, int ra, int rb) +static TCGv_i64 compute_ldst_addr_ea(DisasContext *dc, int ra, int rb) { int addr_size = dc->cfg->addr_size; - TCGv ret = tcg_temp_new(); + TCGv_i64 ret = tcg_temp_new_i64(); if (addr_size == 32 || ra == 0) { if (rb) { - tcg_gen_extu_i32_tl(ret, cpu_R[rb]); + tcg_gen_extu_i32_i64(ret, cpu_R[rb]); } else { - tcg_gen_movi_tl(ret, 0); + return tcg_constant_i64(0); } } else { if (rb) { tcg_gen_concat_i32_i64(ret, cpu_R[rb], cpu_R[ra]); } else { - tcg_gen_extu_i32_tl(ret, cpu_R[ra]); - tcg_gen_shli_tl(ret, ret, 32); + tcg_gen_extu_i32_i64(ret, cpu_R[ra]); + tcg_gen_shli_i64(ret, ret, 32); } if (addr_size < 64) { /* Mask off out of range bits. */ @@ -700,6 +687,20 @@ static void record_unaligned_ess(DisasContext *dc, int rd, tcg_set_insn_start_param(dc->base.insn_start, 1, iflags); } + +static void gen_alignment_check_ea(DisasContext *dc, TCGv_i64 ea, int rb, + int rd, MemOp size, bool store) +{ + if (rb && (dc->tb_flags & MSR_EE) && dc->cfg->unaligned_exceptions) { + TCGLabel *over = gen_new_label(); + + record_unaligned_ess(dc, rd, size, store); + + tcg_gen_brcondi_i64(TCG_COND_TSTEQ, ea, (1 << size) - 1, over); + gen_helper_unaligned_access(tcg_env, ea); + gen_set_label(over); + } +} #endif static inline MemOp mo_endian(DisasContext *dc) @@ -707,7 +708,7 @@ static inline MemOp mo_endian(DisasContext *dc) return dc->cfg->endi ? MO_LE : MO_BE; } -static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop, +static bool do_load(DisasContext *dc, int rd, TCGv_i32 addr, MemOp mop, int mem_index, bool rev) { MemOp size = mop & MO_SIZE; @@ -725,7 +726,7 @@ static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop, mop ^= MO_BSWAP; } if (size < MO_32) { - tcg_gen_xori_tl(addr, addr, 3 - size); + tcg_gen_xori_i32(addr, addr, 3 - size); } } @@ -749,13 +750,13 @@ static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop, static bool trans_lbu(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_load(dc, arg->rd, addr, MO_UB, dc->mem_index, false); } static bool trans_lbur(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_load(dc, arg->rd, addr, MO_UB, dc->mem_index, true); } @@ -765,28 +766,29 @@ static bool trans_lbuea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_load(dc, arg->rd, addr, MO_UB, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_helper_lbuea(reg_for_write(dc, arg->rd), tcg_env, addr); + return true; #endif } static bool trans_lbui(DisasContext *dc, arg_typeb *arg) { - TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + TCGv_i32 addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); return do_load(dc, arg->rd, addr, MO_UB, dc->mem_index, false); } static bool trans_lhu(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_load(dc, arg->rd, addr, MO_UW, dc->mem_index, false); } static bool trans_lhur(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_load(dc, arg->rd, addr, MO_UW, dc->mem_index, true); } @@ -796,28 +798,31 @@ static bool trans_lhuea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_load(dc, arg->rd, addr, MO_UW, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_16, false); + (mo_endian(dc) == MO_BE ? gen_helper_lhuea_be : gen_helper_lhuea_le) + (reg_for_write(dc, arg->rd), tcg_env, addr); + return true; #endif } static bool trans_lhui(DisasContext *dc, arg_typeb *arg) { - TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + TCGv_i32 addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); return do_load(dc, arg->rd, addr, MO_UW, dc->mem_index, false); } static bool trans_lw(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_load(dc, arg->rd, addr, MO_UL, dc->mem_index, false); } static bool trans_lwr(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_load(dc, arg->rd, addr, MO_UL, dc->mem_index, true); } @@ -827,29 +832,32 @@ static bool trans_lwea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_load(dc, arg->rd, addr, MO_UL, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_32, false); + (mo_endian(dc) == MO_BE ? gen_helper_lwea_be : gen_helper_lwea_le) + (reg_for_write(dc, arg->rd), tcg_env, addr); + return true; #endif } static bool trans_lwi(DisasContext *dc, arg_typeb *arg) { - TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + TCGv_i32 addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); return do_load(dc, arg->rd, addr, MO_UL, dc->mem_index, false); } static bool trans_lwx(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); /* lwx does not throw unaligned access errors, so force alignment */ - tcg_gen_andi_tl(addr, addr, ~3); + tcg_gen_andi_i32(addr, addr, ~3); tcg_gen_qemu_ld_i32(cpu_res_val, addr, dc->mem_index, mo_endian(dc) | MO_UL); - tcg_gen_mov_tl(cpu_res_addr, addr); + tcg_gen_mov_i32(cpu_res_addr, addr); if (arg->rd) { tcg_gen_mov_i32(cpu_R[arg->rd], cpu_res_val); @@ -860,7 +868,7 @@ static bool trans_lwx(DisasContext *dc, arg_typea *arg) return true; } -static bool do_store(DisasContext *dc, int rd, TCGv addr, MemOp mop, +static bool do_store(DisasContext *dc, int rd, TCGv_i32 addr, MemOp mop, int mem_index, bool rev) { MemOp size = mop & MO_SIZE; @@ -878,7 +886,7 @@ static bool do_store(DisasContext *dc, int rd, TCGv addr, MemOp mop, mop ^= MO_BSWAP; } if (size < MO_32) { - tcg_gen_xori_tl(addr, addr, 3 - size); + tcg_gen_xori_i32(addr, addr, 3 - size); } } @@ -902,13 +910,13 @@ static bool do_store(DisasContext *dc, int rd, TCGv addr, MemOp mop, static bool trans_sb(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_store(dc, arg->rd, addr, MO_UB, dc->mem_index, false); } static bool trans_sbr(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_store(dc, arg->rd, addr, MO_UB, dc->mem_index, true); } @@ -918,28 +926,29 @@ static bool trans_sbea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_store(dc, arg->rd, addr, MO_UB, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_helper_sbea(tcg_env, reg_for_read(dc, arg->rd), addr); + return true; #endif } static bool trans_sbi(DisasContext *dc, arg_typeb *arg) { - TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + TCGv_i32 addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); return do_store(dc, arg->rd, addr, MO_UB, dc->mem_index, false); } static bool trans_sh(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_store(dc, arg->rd, addr, MO_UW, dc->mem_index, false); } static bool trans_shr(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_store(dc, arg->rd, addr, MO_UW, dc->mem_index, true); } @@ -949,28 +958,31 @@ static bool trans_shea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_store(dc, arg->rd, addr, MO_UW, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_16, true); + (mo_endian(dc) == MO_BE ? gen_helper_shea_be : gen_helper_shea_le) + (tcg_env, reg_for_read(dc, arg->rd), addr); + return true; #endif } static bool trans_shi(DisasContext *dc, arg_typeb *arg) { - TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + TCGv_i32 addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); return do_store(dc, arg->rd, addr, MO_UW, dc->mem_index, false); } static bool trans_sw(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_store(dc, arg->rd, addr, MO_UL, dc->mem_index, false); } static bool trans_swr(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); return do_store(dc, arg->rd, addr, MO_UL, dc->mem_index, true); } @@ -980,28 +992,31 @@ static bool trans_swea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_store(dc, arg->rd, addr, MO_UL, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_32, true); + (mo_endian(dc) == MO_BE ? gen_helper_swea_be : gen_helper_swea_le) + (tcg_env, reg_for_read(dc, arg->rd), addr); + return true; #endif } static bool trans_swi(DisasContext *dc, arg_typeb *arg) { - TCGv addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); + TCGv_i32 addr = compute_ldst_addr_typeb(dc, arg->ra, arg->imm); return do_store(dc, arg->rd, addr, MO_UL, dc->mem_index, false); } static bool trans_swx(DisasContext *dc, arg_typea *arg) { - TCGv addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); + TCGv_i32 addr = compute_ldst_addr_typea(dc, arg->ra, arg->rb); TCGLabel *swx_done = gen_new_label(); TCGLabel *swx_fail = gen_new_label(); TCGv_i32 tval; /* swx does not throw unaligned access errors, so force alignment */ - tcg_gen_andi_tl(addr, addr, ~3); + tcg_gen_andi_i32(addr, addr, ~3); /* * Compare the address vs the one we used during lwx. @@ -1009,7 +1024,7 @@ static bool trans_swx(DisasContext *dc, arg_typea *arg) * branch, but we know we can use the equal version in the global. * In either case, addr is no longer needed. */ - tcg_gen_brcond_tl(TCG_COND_NE, cpu_res_addr, addr, swx_fail); + tcg_gen_brcond_i32(TCG_COND_NE, cpu_res_addr, addr, swx_fail); /* * Compare the value loaded during lwx with current contents of @@ -1037,7 +1052,7 @@ static bool trans_swx(DisasContext *dc, arg_typea *arg) * Prevent the saved address from working again without another ldx. * Akin to the pseudocode setting reservation = 0. */ - tcg_gen_movi_tl(cpu_res_addr, -1); + tcg_gen_movi_i32(cpu_res_addr, RES_ADDR_NONE); return true; } @@ -1158,7 +1173,7 @@ static bool trans_brk(DisasContext *dc, arg_typea_br *arg) tcg_gen_movi_i32(cpu_R[arg->rd], dc->base.pc_next); } tcg_gen_ori_i32(cpu_msr, cpu_msr, MSR_BIP); - tcg_gen_movi_tl(cpu_res_addr, -1); + tcg_gen_movi_i32(cpu_res_addr, RES_ADDR_NONE); dc->base.is_jmp = DISAS_EXIT; return true; @@ -1179,7 +1194,7 @@ static bool trans_brki(DisasContext *dc, arg_typeb_br *arg) if (arg->rd) { tcg_gen_movi_i32(cpu_R[arg->rd], dc->base.pc_next); } - tcg_gen_movi_tl(cpu_res_addr, -1); + tcg_gen_movi_i32(cpu_res_addr, RES_ADDR_NONE); #ifdef CONFIG_USER_ONLY switch (imm) { @@ -1607,8 +1622,6 @@ static void mb_tr_init_disas_context(DisasContextBase *dcb, CPUState *cs) dc->cfg = &cpu->cfg; dc->tb_flags = dc->base.tb->flags; dc->ext_imm = dc->base.tb->cs_base; - dc->r0 = NULL; - dc->r0_set = false; dc->mem_index = cpu_mmu_index(cs, false); dc->jmp_cond = dc->tb_flags & D_FLAG ? TCG_COND_ALWAYS : TCG_COND_NEVER; dc->jmp_dest = -1; @@ -1647,11 +1660,6 @@ static void mb_tr_translate_insn(DisasContextBase *dcb, CPUState *cs) trap_illegal(dc, true); } - if (dc->r0) { - dc->r0 = NULL; - dc->r0_set = false; - } - /* Discard the imm global when its contents cannot be used. */ if ((dc->tb_flags & ~dc->tb_flags_to_set) & IMM_FLAG) { tcg_gen_discard_i32(cpu_imm); @@ -1829,7 +1837,7 @@ void mb_cpu_dump_state(CPUState *cs, FILE *f, int flags) } qemu_fprintf(f, "\nesr=0x%04x fsr=0x%02x btr=0x%08x edr=0x%x\n" - "ear=0x" TARGET_FMT_lx " slr=0x%x shr=0x%x\n", + "ear=0x%" PRIx64 " slr=0x%x shr=0x%x\n", env->esr, env->fsr, env->btr, env->edr, env->ear, env->slr, env->shr); @@ -1877,6 +1885,7 @@ void mb_tcg_init(void) tcg_global_mem_new_i32(tcg_env, i32s[i].ofs, i32s[i].name); } - cpu_res_addr = - tcg_global_mem_new(tcg_env, offsetof(CPUMBState, res_addr), "res_addr"); + cpu_res_addr = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUMBState, res_addr), + "res_addr"); } diff --git a/target/mips/cpu-defs.c.inc b/target/mips/cpu-defs.c.inc index 922fc39..d93b9d3 100644 --- a/target/mips/cpu-defs.c.inc +++ b/target/mips/cpu-defs.c.inc @@ -756,8 +756,9 @@ const mips_def_t mips_defs[] = (1 << CP0C3_RXI) | (1 << CP0C3_LPA) | (1 << CP0C3_VInt), .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) | (1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist), - .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) | - (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI), + .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_CRCP) | (1 << CP0C5_XNP) | + (1 << CP0C5_VP) | (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | + (3 << CP0C5_GI), .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) | (1 << CP0C5_UFE), .CP0_LLAddr_rw_bitmask = 0, @@ -796,8 +797,9 @@ const mips_def_t mips_defs[] = (1 << CP0C3_RXI) | (1 << CP0C3_LPA) | (1 << CP0C3_VInt), .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (3 << CP0C4_IE) | (1 << CP0C4_AE) | (0xfc << CP0C4_KScrExist), - .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_XNP) | (1 << CP0C5_VP) | - (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | (3 << CP0C5_GI), + .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_CRCP) | (1 << CP0C5_XNP) | + (1 << CP0C5_VP) | (1 << CP0C5_LLB) | (1 << CP0C5_MRP) | + (3 << CP0C5_GI), .CP0_Config5_rw_bitmask = (1 << CP0C5_MSAEn) | (1 << CP0C5_SBRI) | (1 << CP0C5_FRE) | (1 << CP0C5_UFE), .CP0_LLAddr_rw_bitmask = 0, diff --git a/target/mips/cpu.c b/target/mips/cpu.c index 4cbfb94..5989c3b 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -145,7 +145,7 @@ static bool mips_cpu_has_work(CPUState *cs) * check for interrupts that can be taken. For pre-release 6 CPUs, * check for CP0 Config7 'Wait IE ignore' bit. */ - if ((cs->interrupt_request & CPU_INTERRUPT_HARD) && + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && cpu_mips_hw_interrupts_pending(env)) { if (cpu_mips_hw_interrupts_enabled(env) || (env->CP0_Config7 & (1 << CP0C7_WII)) || @@ -160,7 +160,7 @@ static bool mips_cpu_has_work(CPUState *cs) * The QEMU model will issue an _WAKE request whenever the CPUs * should be woken up. */ - if (cs->interrupt_request & CPU_INTERRUPT_WAKE) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_WAKE)) { has_work = true; } @@ -170,7 +170,7 @@ static bool mips_cpu_has_work(CPUState *cs) } /* MIPS Release 6 has the ability to halt the CPU. */ if (env->CP0_Config5 & (1 << CP0C5_VP)) { - if (cs->interrupt_request & CPU_INTERRUPT_WAKE) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_WAKE)) { has_work = true; } if (!mips_vp_active(env)) { @@ -560,6 +560,14 @@ static TCGTBCPUState mips_get_tb_cpu_state(CPUState *cs) }; } +#ifndef CONFIG_USER_ONLY +static vaddr mips_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return cpu_env(cs)->hflags & MIPS_HFLAG_AWRAP ? (int32_t)result : result; +} +#endif + static const TCGCPUOps mips_tcg_ops = { .mttcg_supported = TARGET_LONG_BITS == 32, .guest_default_memory_order = 0, @@ -573,6 +581,7 @@ static const TCGCPUOps mips_tcg_ops = { #if !defined(CONFIG_USER_ONLY) .tlb_fill = mips_cpu_tlb_fill, + .pointer_wrap = mips_pointer_wrap, .cpu_exec_interrupt = mips_cpu_exec_interrupt, .cpu_exec_halt = mips_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/mips/helper.h b/target/mips/helper.h index 7e40041..b6cd53c 100644 --- a/target/mips/helper.h +++ b/target/mips/helper.h @@ -21,6 +21,8 @@ DEF_HELPER_FLAGS_1(bitswap, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(dbitswap, TCG_CALL_NO_RWG_SE, tl, tl) #endif +DEF_HELPER_3(crc32, tl, tl, tl, i32) +DEF_HELPER_3(crc32c, tl, tl, tl, i32) DEF_HELPER_FLAGS_4(rotx, TCG_CALL_NO_RWG_SE, tl, tl, i32, i32, i32) /* microMIPS functions */ diff --git a/target/mips/kvm.c b/target/mips/kvm.c index d67b7c1..912cd5d 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -61,6 +61,11 @@ int kvm_arch_irqchip_create(KVMState *s) return 0; } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { CPUMIPSState *env = cpu_env(cs); @@ -139,7 +144,7 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) bql_lock(); - if ((cs->interrupt_request & CPU_INTERRUPT_HARD) && + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && cpu_mips_io_interrupts_pending(cpu)) { intr.cpu = -1; intr.irq = 2; @@ -585,7 +590,7 @@ static void kvm_mips_update_state(void *opaque, bool running, RunState state) } } -static int kvm_mips_put_fpu_registers(CPUState *cs, int level) +static int kvm_mips_put_fpu_registers(CPUState *cs, KvmPutState level) { CPUMIPSState *env = cpu_env(cs); int err, ret = 0; @@ -744,7 +749,7 @@ static int kvm_mips_get_fpu_registers(CPUState *cs) } -static int kvm_mips_put_cp0_registers(CPUState *cs, int level) +static int kvm_mips_put_cp0_registers(CPUState *cs, KvmPutState level) { CPUMIPSState *env = cpu_env(cs); int err, ret = 0; @@ -1172,7 +1177,7 @@ static int kvm_mips_get_cp0_registers(CPUState *cs) return ret; } -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { CPUMIPSState *env = cpu_env(cs); struct kvm_regs regs; diff --git a/target/mips/meson.build b/target/mips/meson.build index 247979a..abf0ce3 100644 --- a/target/mips/meson.build +++ b/target/mips/meson.build @@ -7,6 +7,7 @@ mips_ss.add(files( 'gdbstub.c', 'msa.c', )) +mips_ss.add(zlib) if have_system subdir('system') diff --git a/target/mips/system/mips-qmp-cmds.c b/target/mips/system/mips-qmp-cmds.c index 7340ac7..b6a2874 100644 --- a/target/mips/system/mips-qmp-cmds.c +++ b/target/mips/system/mips-qmp-cmds.c @@ -7,9 +7,20 @@ */ #include "qemu/osdep.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qemu/target-info.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" #include "cpu.h" +CpuModelExpansionInfo * +qmp_query_cpu_model_expansion(CpuModelExpansionType type, + CpuModelInfo *model, + Error **errp) +{ + error_setg(errp, "CPU model expansion is not supported on this target"); + return NULL; +} + static void mips_cpu_add_definition(gpointer data, gpointer user_data) { ObjectClass *oc = data; @@ -30,7 +41,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) CpuDefinitionInfoList *cpu_list = NULL; GSList *list; - list = object_class_get_list(TYPE_MIPS_CPU, false); + list = object_class_get_list(target_cpu_type(), false); g_slist_foreach(list, mips_cpu_add_definition, &cpu_list); g_slist_free(list); diff --git a/target/mips/tcg/micromips_translate.c.inc b/target/mips/tcg/micromips_translate.c.inc index c479bec..8fda7c8 100644 --- a/target/mips/tcg/micromips_translate.c.inc +++ b/target/mips/tcg/micromips_translate.c.inc @@ -1795,7 +1795,7 @@ static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx) return; case LSA: check_insn(ctx, ISA_MIPS_R6); - gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2)); + gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2) + 1); break; case ALIGN: check_insn(ctx, ISA_MIPS_R6); diff --git a/target/mips/tcg/msa_translate.c b/target/mips/tcg/msa_translate.c index 75cf80a..82b1499 100644 --- a/target/mips/tcg/msa_translate.c +++ b/target/mips/tcg/msa_translate.c @@ -780,7 +780,7 @@ TRANS_DF_iv(ST, trans_msa_ldst, gen_helper_msa_st); static bool trans_LSA(DisasContext *ctx, arg_r *a) { - return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa); + return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa + 1); } static bool trans_DLSA(DisasContext *ctx, arg_r *a) @@ -788,5 +788,5 @@ static bool trans_DLSA(DisasContext *ctx, arg_r *a) if (TARGET_LONG_BITS != 64) { return false; } - return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa); + return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa + 1); } diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc index 1e27414..9d4e0be 100644 --- a/target/mips/tcg/nanomips_translate.c.inc +++ b/target/mips/tcg/nanomips_translate.c.inc @@ -3626,12 +3626,7 @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx) gen_p_lsx(ctx, rd, rs, rt); break; case NM_LSA: - /* - * In nanoMIPS, the shift field directly encodes the shift - * amount, meaning that the supported shift values are in - * the range 0 to 3 (instead of 1 to 4 in MIPSR6). - */ - gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2) - 1); + gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2)); break; case NM_EXTW: gen_ext(ctx, 32, rd, rs, rt, extract32(ctx->opcode, 6, 5)); diff --git a/target/mips/tcg/octeon.decode b/target/mips/tcg/octeon.decode index 0c787cb..102a058 100644 --- a/target/mips/tcg/octeon.decode +++ b/target/mips/tcg/octeon.decode @@ -1,6 +1,7 @@ # Octeon Architecture Module instruction set # # Copyright (C) 2022 Pavel Dovgalyuk +# Copyright (C) 2024 Philippe Mathieu-Daudé # # SPDX-License-Identifier: LGPL-2.1-or-later # @@ -39,3 +40,10 @@ CINS 011100 ..... ..... ..... ..... 11001 . @bitfield POP 011100 rs:5 00000 rd:5 00000 10110 dw:1 SEQNE 011100 rs:5 rt:5 rd:5 00000 10101 ne:1 SEQNEI 011100 rs:5 rt:5 imm:s10 10111 ne:1 + +&lx base index rd +@lx ...... base:5 index:5 rd:5 ...... ..... &lx +LWX 011111 ..... ..... ..... 00000 001010 @lx +LHX 011111 ..... ..... ..... 00100 001010 @lx +LBUX 011111 ..... ..... ..... 00110 001010 @lx +LDX 011111 ..... ..... ..... 01000 001010 @lx diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c index d9eb437..b2eca29 100644 --- a/target/mips/tcg/octeon_translate.c +++ b/target/mips/tcg/octeon_translate.c @@ -174,3 +174,15 @@ static bool trans_SEQNEI(DisasContext *ctx, arg_SEQNEI *a) } return true; } + +static bool trans_lx(DisasContext *ctx, arg_lx *a, MemOp mop) +{ + gen_lx(ctx, a->rd, a->base, a->index, mop); + + return true; +} + +TRANS(LBUX, trans_lx, MO_UB); +TRANS(LHX, trans_lx, MO_SW); +TRANS(LWX, trans_lx, MO_SL); +TRANS(LDX, trans_lx, MO_UQ); diff --git a/target/mips/tcg/op_helper.c b/target/mips/tcg/op_helper.c index b906d10..4502ae2 100644 --- a/target/mips/tcg/op_helper.c +++ b/target/mips/tcg/op_helper.c @@ -24,6 +24,8 @@ #include "exec/helper-proto.h" #include "exec/memop.h" #include "fpu_helper.h" +#include "qemu/crc32c.h" +#include <zlib.h> static inline target_ulong bitswap(target_ulong v) { @@ -142,6 +144,30 @@ target_ulong helper_rotx(target_ulong rs, uint32_t shift, uint32_t shiftx, return (int64_t)(int32_t)(uint32_t)tmp5; } +/* these crc32 functions are based on target/loongarch/tcg/op_helper.c */ +target_ulong helper_crc32(target_ulong val, target_ulong m, uint32_t sz) +{ + uint8_t buf[8]; + target_ulong mask = ((sz * 8) == 64) ? + (target_ulong) -1ULL : + ((1ULL << (sz * 8)) - 1); + + m &= mask; + stq_le_p(buf, m); + return (int32_t) (crc32(val ^ 0xffffffff, buf, sz) ^ 0xffffffff); +} + +target_ulong helper_crc32c(target_ulong val, target_ulong m, uint32_t sz) +{ + uint8_t buf[8]; + target_ulong mask = ((sz * 8) == 64) ? + (target_ulong) -1ULL : + ((1ULL << (sz * 8)) - 1); + m &= mask; + stq_le_p(buf, m); + return (int32_t) (crc32c(val, buf, sz) ^ 0xffffffff); +} + void helper_fork(target_ulong arg1, target_ulong arg2) { /* diff --git a/target/mips/tcg/rel6.decode b/target/mips/tcg/rel6.decode index d6989cf..7fbcb10 100644 --- a/target/mips/tcg/rel6.decode +++ b/target/mips/tcg/rel6.decode @@ -16,11 +16,16 @@ &r rs rt rd sa +&special3_crc rs rt c sz + @lsa ...... rs:5 rt:5 rd:5 ... sa:2 ...... &r +@crc32 ...... rs:5 rt:5 ..... c:3 sz:2 ...... &special3_crc LSA 000000 ..... ..... ..... 000 .. 000101 @lsa DLSA 000000 ..... ..... ..... 000 .. 010101 @lsa +CRC32 011111 ..... ..... 00000 ... .. 001111 @crc32 + REMOVED 010011 ----- ----- ----- ----- ------ # COP1X (COP3) REMOVED 011100 ----- ----- ----- ----- ------ # SPECIAL2 diff --git a/target/mips/tcg/rel6_translate.c b/target/mips/tcg/rel6_translate.c index 59f237b..4c05662 100644 --- a/target/mips/tcg/rel6_translate.c +++ b/target/mips/tcg/rel6_translate.c @@ -23,7 +23,7 @@ bool trans_REMOVED(DisasContext *ctx, arg_REMOVED *a) static bool trans_LSA(DisasContext *ctx, arg_r *a) { - return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa); + return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa + 1); } static bool trans_DLSA(DisasContext *ctx, arg_r *a) @@ -31,5 +31,17 @@ static bool trans_DLSA(DisasContext *ctx, arg_r *a) if (TARGET_LONG_BITS != 64) { return false; } - return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa); + return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa + 1); +} + +static bool trans_CRC32(DisasContext *ctx, arg_special3_crc *a) +{ + if (unlikely(!ctx->crcp) + || unlikely((a->sz == 3) && (!(ctx->hflags & MIPS_HFLAG_64))) + || unlikely((a->c >= 2))) { + gen_reserved_instruction(ctx); + return true; + } + gen_crc32(ctx, a->rt, a->rs, a->rt, a->sz, a->c); + return true; } diff --git a/target/mips/tcg/system/cp0_helper.c b/target/mips/tcg/system/cp0_helper.c index 101b1e6..b69e70d 100644 --- a/target/mips/tcg/system/cp0_helper.c +++ b/target/mips/tcg/system/cp0_helper.c @@ -1562,12 +1562,14 @@ target_ulong helper_dvpe(CPUMIPSState *env) CPUState *other_cs = first_cpu; target_ulong prev = env->mvp->CP0_MVPControl; - CPU_FOREACH(other_cs) { - MIPSCPU *other_cpu = MIPS_CPU(other_cs); - /* Turn off all VPEs except the one executing the dvpe. */ - if (&other_cpu->env != env) { - other_cpu->env.mvp->CP0_MVPControl &= ~(1 << CP0MVPCo_EVP); - mips_vpe_sleep(other_cpu); + if (env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP)) { + CPU_FOREACH(other_cs) { + MIPSCPU *other_cpu = MIPS_CPU(other_cs); + /* Turn off all VPEs except the one executing the dvpe. */ + if (&other_cpu->env != env) { + other_cpu->env.mvp->CP0_MVPControl &= ~(1 << CP0MVPCo_EVP); + mips_vpe_sleep(other_cpu); + } } } return prev; @@ -1578,15 +1580,17 @@ target_ulong helper_evpe(CPUMIPSState *env) CPUState *other_cs = first_cpu; target_ulong prev = env->mvp->CP0_MVPControl; - CPU_FOREACH(other_cs) { - MIPSCPU *other_cpu = MIPS_CPU(other_cs); + if (env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP)) { + CPU_FOREACH(other_cs) { + MIPSCPU *other_cpu = MIPS_CPU(other_cs); - if (&other_cpu->env != env - /* If the VPE is WFI, don't disturb its sleep. */ - && !mips_vpe_is_wfi(other_cpu)) { - /* Enable the VPE. */ - other_cpu->env.mvp->CP0_MVPControl |= (1 << CP0MVPCo_EVP); - mips_vpe_wake(other_cpu); /* And wake it up. */ + if (&other_cpu->env != env + /* If the VPE is WFI, don't disturb its sleep. */ + && !mips_vpe_is_wfi(other_cpu)) { + /* Enable the VPE. */ + other_cpu->env.mvp->CP0_MVPControl |= (1 << CP0MVPCo_EVP); + mips_vpe_wake(other_cpu); /* And wake it up. */ + } } } return prev; diff --git a/target/mips/tcg/system/tlb_helper.c b/target/mips/tcg/system/tlb_helper.c index eccaf36..1e89015 100644 --- a/target/mips/tcg/system/tlb_helper.c +++ b/target/mips/tcg/system/tlb_helper.c @@ -652,7 +652,7 @@ static int walk_directory(CPUMIPSState *env, uint64_t *vaddr, return 0; } - if ((entry & (1 << psn)) && hugepg) { + if (extract64(entry, psn, 1) && hugepg) { *huge_page = true; *hgpg_directory_hit = true; entry = get_tlb_entry_layout(env, entry, leaf_mop, pf_ptew); diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c index 8658315..54849e9 100644 --- a/target/mips/tcg/translate.c +++ b/target/mips/tcg/translate.c @@ -1957,6 +1957,17 @@ void gen_base_offset_addr(DisasContext *ctx, TCGv addr, int base, int offset) } } +void gen_base_index_addr(DisasContext *ctx, TCGv addr, int base, int index) +{ + if (base == 0) { + gen_load_gpr(addr, index); + } else if (index == 0) { + gen_load_gpr(addr, base); + } else { + gen_op_addr_add(ctx, addr, cpu_gpr[base], cpu_gpr[index]); + } +} + static target_ulong pc_relative_pc(DisasContext *ctx) { target_ulong pc = ctx->base.pc_next; @@ -2025,6 +2036,15 @@ static void gen_lxr(DisasContext *ctx, TCGv reg, TCGv addr, tcg_gen_or_tl(reg, t0, t1); } +void gen_lx(DisasContext *ctx, int rd, int base, int index, MemOp mop) +{ + TCGv t0 = tcg_temp_new(); + + gen_base_index_addr(ctx, t0, base, index); + tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, mo_endian(ctx) | mop); + gen_store_gpr(t0, rd); +} + /* Load */ static void gen_ld(DisasContext *ctx, uint32_t opc, int rt, int base, int offset) @@ -4342,12 +4362,13 @@ static void gen_trap(DisasContext *ctx, uint32_t opc, } } -static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) +static void gen_goto_tb(DisasContext *ctx, unsigned tb_slot_idx, + target_ulong dest) { if (translator_use_goto_tb(&ctx->base, dest)) { - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); gen_save_pc(dest); - tcg_gen_exit_tb(ctx->base.tb, n); + tcg_gen_exit_tb(ctx->base.tb, tb_slot_idx); } else { gen_save_pc(dest); tcg_gen_lookup_and_goto_ptr(); @@ -10546,13 +10567,7 @@ static void gen_flt3_ldst(DisasContext *ctx, uint32_t opc, { TCGv t0 = tcg_temp_new(); - if (base == 0) { - gen_load_gpr(t0, index); - } else if (index == 0) { - gen_load_gpr(t0, base); - } else { - gen_op_addr_add(ctx, t0, cpu_gpr[base], cpu_gpr[index]); - } + gen_base_index_addr(ctx, t0, base, index); /* * Don't do NOP if destination is zero: we must perform the actual * memory access. @@ -11323,47 +11338,6 @@ enum { /* MIPSDSP functions. */ -/* Indexed load is not for DSP only */ -static void gen_mips_lx(DisasContext *ctx, uint32_t opc, - int rd, int base, int offset) -{ - TCGv t0; - - if (!(ctx->insn_flags & INSN_OCTEON)) { - check_dsp(ctx); - } - t0 = tcg_temp_new(); - - if (base == 0) { - gen_load_gpr(t0, offset); - } else if (offset == 0) { - gen_load_gpr(t0, base); - } else { - gen_op_addr_add(ctx, t0, cpu_gpr[base], cpu_gpr[offset]); - } - - switch (opc) { - case OPC_LBUX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_UB); - gen_store_gpr(t0, rd); - break; - case OPC_LHX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, mo_endian(ctx) | MO_SW); - gen_store_gpr(t0, rd); - break; - case OPC_LWX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, mo_endian(ctx) | MO_SL); - gen_store_gpr(t0, rd); - break; -#if defined(TARGET_MIPS64) - case OPC_LDX: - tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, mo_endian(ctx) | MO_UQ); - gen_store_gpr(t0, rd); - break; -#endif - } -} - static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2, int ret, int v1, int v2) { @@ -13449,6 +13423,29 @@ static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx) } } +void gen_crc32(DisasContext *ctx, int rd, int rs, int rt, int sz, + int crc32c) +{ + TCGv t0; + TCGv t1; + TCGv_i32 tsz = tcg_constant_i32(1 << sz); + if (rd == 0) { + /* Treat as NOP. */ + return; + } + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + gen_load_gpr(t0, rt); + gen_load_gpr(t1, rs); + + if (crc32c) { + gen_helper_crc32c(cpu_gpr[rd], t0, t1, tsz); + } else { + gen_helper_crc32(cpu_gpr[rd], t0, t1, tsz); + } +} + static void decode_opc_special3_r6(CPUMIPSState *env, DisasContext *ctx) { int rs, rt, rd, sa; @@ -13611,15 +13608,22 @@ static void decode_opc_special3_legacy(CPUMIPSState *env, DisasContext *ctx) } break; case OPC_LX_DSP: + check_dsp(ctx); op2 = MASK_LX(ctx->opcode); switch (op2) { #if defined(TARGET_MIPS64) case OPC_LDX: + gen_lx(ctx, rd, rs, rt, MO_UQ); + break; #endif case OPC_LBUX: + gen_lx(ctx, rd, rs, rt, MO_UB); + break; case OPC_LHX: + gen_lx(ctx, rd, rs, rt, MO_SW); + break; case OPC_LWX: - gen_mips_lx(ctx, op2, rd, rs, rt); + gen_lx(ctx, rd, rs, rt, MO_SL); break; default: /* Invalid */ MIPS_INVAL("MASK LX"); @@ -15095,6 +15099,7 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->abs2008 = (env->active_fpu.fcr31 >> FCR31_ABS2008) & 1; ctx->mi = (env->CP0_Config5 >> CP0C5_MI) & 1; ctx->gi = (env->CP0_Config5 >> CP0C5_GI) & 3; + ctx->crcp = (env->CP0_Config5 >> CP0C5_CRCP) & 1; restore_cpu_state(env, ctx); #ifdef CONFIG_USER_ONLY ctx->mem_idx = MIPS_HFLAG_UM; diff --git a/target/mips/tcg/translate.h b/target/mips/tcg/translate.h index 1bf153d..89dde1e 100644 --- a/target/mips/tcg/translate.h +++ b/target/mips/tcg/translate.h @@ -51,6 +51,7 @@ typedef struct DisasContext { bool abs2008; bool mi; int gi; + bool crcp; } DisasContext; #define DISAS_STOP DISAS_TARGET_0 @@ -153,6 +154,7 @@ void check_cp1_registers(DisasContext *ctx, int regs); void check_cop1x(DisasContext *ctx); void gen_base_offset_addr(DisasContext *ctx, TCGv addr, int base, int offset); +void gen_base_index_addr(DisasContext *ctx, TCGv addr, int base, int index); void gen_move_low32(TCGv ret, TCGv_i64 arg); void gen_move_high32(TCGv ret, TCGv_i64 arg); void gen_load_gpr(TCGv t, int reg); @@ -167,6 +169,7 @@ void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg); void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg); int get_fp_bit(int cc); +void gen_lx(DisasContext *ctx, int rd, int base, int index, MemOp mop); void gen_ldxs(DisasContext *ctx, int base, int index, int rd); void gen_align(DisasContext *ctx, int wordsz, int rd, int rs, int rt, int bp); void gen_addiupc(DisasContext *ctx, int rx, int imm, @@ -181,6 +184,7 @@ bool gen_lsa(DisasContext *ctx, int rd, int rt, int rs, int sa); bool gen_dlsa(DisasContext *ctx, int rd, int rt, int rs, int sa); void gen_rdhwr(DisasContext *ctx, int rt, int rd, int sel); +void gen_crc32(DisasContext *ctx, int rd, int rs, int rt, int sz, int crc32c); extern TCGv cpu_gpr[32], cpu_PC; #if defined(TARGET_MIPS64) diff --git a/target/mips/tcg/translate_addr_const.c b/target/mips/tcg/translate_addr_const.c index 6f4b39f..1d140e9 100644 --- a/target/mips/tcg/translate_addr_const.c +++ b/target/mips/tcg/translate_addr_const.c @@ -26,7 +26,7 @@ bool gen_lsa(DisasContext *ctx, int rd, int rt, int rs, int sa) t1 = tcg_temp_new(); gen_load_gpr(t0, rs); gen_load_gpr(t1, rt); - tcg_gen_shli_tl(t0, t0, sa + 1); + tcg_gen_shli_tl(t0, t0, sa); tcg_gen_add_tl(cpu_gpr[rd], t0, t1); tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]); return true; @@ -47,7 +47,7 @@ bool gen_dlsa(DisasContext *ctx, int rd, int rt, int rs, int sa) t1 = tcg_temp_new(); gen_load_gpr(t0, rs); gen_load_gpr(t1, rt); - tcg_gen_shli_tl(t0, t0, sa + 1); + tcg_gen_shli_tl(t0, t0, sa); tcg_gen_add_tl(cpu_gpr[rd], t0, t1); return true; } diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c index 054ad33..9bbfe22 100644 --- a/target/openrisc/cpu.c +++ b/target/openrisc/cpu.c @@ -78,8 +78,7 @@ static void openrisc_restore_state_to_opc(CPUState *cs, #ifndef CONFIG_USER_ONLY static bool openrisc_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & (CPU_INTERRUPT_HARD | - CPU_INTERRUPT_TIMER); + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_TIMER); } #endif /* !CONFIG_USER_ONLY */ @@ -265,6 +264,7 @@ static const TCGCPUOps openrisc_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = openrisc_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = openrisc_cpu_exec_interrupt, .cpu_exec_halt = openrisc_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h index f4bcf00..c8e2827 100644 --- a/target/openrisc/cpu.h +++ b/target/openrisc/cpu.h @@ -220,33 +220,24 @@ typedef struct OpenRISCTLBEntry { typedef struct CPUOpenRISCTLBContext { OpenRISCTLBEntry itlb[TLB_SIZE]; OpenRISCTLBEntry dtlb[TLB_SIZE]; - - int (*cpu_openrisc_map_address_code)(OpenRISCCPU *cpu, - hwaddr *physical, - int *prot, - target_ulong address, int rw); - int (*cpu_openrisc_map_address_data)(OpenRISCCPU *cpu, - hwaddr *physical, - int *prot, - target_ulong address, int rw); } CPUOpenRISCTLBContext; #endif typedef struct CPUArchState { - target_ulong shadow_gpr[16][32]; /* Shadow registers */ + uint32_t shadow_gpr[16][32]; /* Shadow registers */ - target_ulong pc; /* Program counter */ - target_ulong ppc; /* Prev PC */ - target_ulong jmp_pc; /* Jump PC */ + uint32_t pc; /* Program counter */ + uint32_t ppc; /* Prev PC */ + uint32_t jmp_pc; /* Jump PC */ uint64_t mac; /* Multiply registers MACHI:MACLO */ - target_ulong epcr; /* Exception PC register */ - target_ulong eear; /* Exception EA register */ + uint32_t epcr; /* Exception PC register */ + uint32_t eear; /* Exception EA register */ - target_ulong sr_f; /* the SR_F bit, values 0, 1. */ - target_ulong sr_cy; /* the SR_CY bit, values 0, 1. */ - target_long sr_ov; /* the SR_OV bit (in the sign bit only) */ + uint32_t sr_f; /* the SR_F bit, values 0, 1. */ + uint32_t sr_cy; /* the SR_CY bit, values 0, 1. */ + int32_t sr_ov; /* the SR_OV bit (in the sign bit only) */ uint32_t sr; /* Supervisor register, without SR_{F,CY,OV} */ uint32_t esr; /* Exception supervisor register */ uint32_t evbar; /* Exception vector base address register */ @@ -254,8 +245,8 @@ typedef struct CPUArchState { uint32_t fpcsr; /* Float register */ float_status fp_status; - target_ulong lock_addr; - target_ulong lock_value; + uint32_t lock_addr; + uint32_t lock_value; uint32_t dflag; /* In delay slot (boolean) */ diff --git a/target/openrisc/fpu_helper.c b/target/openrisc/fpu_helper.c index dba9972..39b6195 100644 --- a/target/openrisc/fpu_helper.c +++ b/target/openrisc/fpu_helper.c @@ -146,10 +146,10 @@ uint32_t helper_float_madd_s(CPUOpenRISCState *env, uint32_t a, #define FLOAT_CMP(name, impl) \ -target_ulong helper_float_ ## name ## _d(CPUOpenRISCState *env, \ +uint32_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \ uint64_t fdt0, uint64_t fdt1) \ { return float64_ ## impl(fdt0, fdt1, &env->fp_status); } \ -target_ulong helper_float_ ## name ## _s(CPUOpenRISCState *env, \ +uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \ uint32_t fdt0, uint32_t fdt1) \ { return float32_ ## impl(fdt0, fdt1, &env->fp_status); } @@ -160,13 +160,13 @@ FLOAT_CMP(un, unordered_quiet) #undef FLOAT_CMP #define FLOAT_UCMP(name, expr) \ -target_ulong helper_float_ ## name ## _d(CPUOpenRISCState *env, \ +uint32_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \ uint64_t fdt0, uint64_t fdt1) \ { \ FloatRelation r = float64_compare_quiet(fdt0, fdt1, &env->fp_status); \ return expr; \ } \ -target_ulong helper_float_ ## name ## _s(CPUOpenRISCState *env, \ +uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \ uint32_t fdt0, uint32_t fdt1) \ { \ FloatRelation r = float32_compare_quiet(fdt0, fdt1, &env->fp_status); \ diff --git a/target/openrisc/helper.h b/target/openrisc/helper.h index d847814..e0a8d40 100644 --- a/target/openrisc/helper.h +++ b/target/openrisc/helper.h @@ -47,8 +47,8 @@ FOP_CALC(rem) #undef FOP_CALC #define FOP_CMP(op) \ -DEF_HELPER_FLAGS_3(float_ ## op ## _s, TCG_CALL_NO_RWG, tl, env, i32, i32) \ -DEF_HELPER_FLAGS_3(float_ ## op ## _d, TCG_CALL_NO_RWG, tl, env, i64, i64) +DEF_HELPER_FLAGS_3(float_ ## op ## _s, TCG_CALL_NO_RWG, i32, env, i32, i32) \ +DEF_HELPER_FLAGS_3(float_ ## op ## _d, TCG_CALL_NO_RWG, i32, env, i64, i64) FOP_CMP(eq) FOP_CMP(lt) FOP_CMP(le) @@ -62,5 +62,5 @@ FOP_CMP(ult) DEF_HELPER_FLAGS_1(rfe, 0, void, env) /* sys */ -DEF_HELPER_FLAGS_3(mtspr, 0, void, env, tl, tl) -DEF_HELPER_FLAGS_3(mfspr, TCG_CALL_NO_WG, tl, env, tl, tl) +DEF_HELPER_FLAGS_3(mtspr, 0, void, env, i32, i32) +DEF_HELPER_FLAGS_3(mfspr, TCG_CALL_NO_WG, i32, env, i32, i32) diff --git a/target/openrisc/machine.c b/target/openrisc/machine.c index 081c706..f285367 100644 --- a/target/openrisc/machine.c +++ b/target/openrisc/machine.c @@ -26,8 +26,8 @@ static const VMStateDescription vmstate_tlb_entry = { .version_id = 1, .minimum_version_id = 1, .fields = (const VMStateField[]) { - VMSTATE_UINTTL(mr, OpenRISCTLBEntry), - VMSTATE_UINTTL(tr, OpenRISCTLBEntry), + VMSTATE_UINT32(mr, OpenRISCTLBEntry), + VMSTATE_UINT32(tr, OpenRISCTLBEntry), VMSTATE_END_OF_LIST() } }; @@ -72,14 +72,14 @@ static const VMStateDescription vmstate_env = { .version_id = 6, .minimum_version_id = 6, .fields = (const VMStateField[]) { - VMSTATE_UINTTL_2DARRAY(shadow_gpr, CPUOpenRISCState, 16, 32), - VMSTATE_UINTTL(pc, CPUOpenRISCState), - VMSTATE_UINTTL(ppc, CPUOpenRISCState), - VMSTATE_UINTTL(jmp_pc, CPUOpenRISCState), - VMSTATE_UINTTL(lock_addr, CPUOpenRISCState), - VMSTATE_UINTTL(lock_value, CPUOpenRISCState), - VMSTATE_UINTTL(epcr, CPUOpenRISCState), - VMSTATE_UINTTL(eear, CPUOpenRISCState), + VMSTATE_UINT32_2DARRAY(shadow_gpr, CPUOpenRISCState, 16, 32), + VMSTATE_UINT32(pc, CPUOpenRISCState), + VMSTATE_UINT32(ppc, CPUOpenRISCState), + VMSTATE_UINT32(jmp_pc, CPUOpenRISCState), + VMSTATE_UINT32(lock_addr, CPUOpenRISCState), + VMSTATE_UINT32(lock_value, CPUOpenRISCState), + VMSTATE_UINT32(epcr, CPUOpenRISCState), + VMSTATE_UINT32(eear, CPUOpenRISCState), /* Save the architecture value of the SR, not the internally expanded version. Since this architecture value does not diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c index acea50c..ffb732e 100644 --- a/target/openrisc/mmu.c +++ b/target/openrisc/mmu.c @@ -28,15 +28,14 @@ #include "qemu/host-utils.h" #include "hw/loader.h" -static inline void get_phys_nommu(hwaddr *phys_addr, int *prot, - target_ulong address) +static void get_phys_nommu(hwaddr *phys_addr, int *prot, vaddr address) { *phys_addr = address; *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; } static int get_phys_mmu(OpenRISCCPU *cpu, hwaddr *phys_addr, int *prot, - target_ulong addr, int need, bool super) + vaddr addr, int need, bool super) { int idx = (addr >> TARGET_PAGE_BITS) & TLB_MASK; uint32_t imr = cpu->env.tlb.itlb[idx].mr; @@ -95,7 +94,7 @@ static int get_phys_mmu(OpenRISCCPU *cpu, hwaddr *phys_addr, int *prot, } } -static void raise_mmu_exception(OpenRISCCPU *cpu, target_ulong address, +static void raise_mmu_exception(OpenRISCCPU *cpu, vaddr address, int exception) { CPUState *cs = CPU(cpu); diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c index d96b41a..7ad908b 100644 --- a/target/openrisc/sys_helper.c +++ b/target/openrisc/sys_helper.c @@ -40,12 +40,12 @@ static inline bool is_user(CPUOpenRISCState *env) #endif } -void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) +void HELPER(mtspr)(CPUOpenRISCState *env, uint32_t spr, uint32_t rb) { OpenRISCCPU *cpu = env_archcpu(env); #ifndef CONFIG_USER_ONLY CPUState *cs = env_cpu(env); - target_ulong mr; + uint32_t mr; int idx; #endif @@ -196,7 +196,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) env->ttmr = (rb & ~TTMR_IP) | ip; } else { /* Clear IP bit. */ env->ttmr = rb & ~TTMR_IP; - cs->interrupt_request &= ~CPU_INTERRUPT_TIMER; + cpu_reset_interrupt(cs, CPU_INTERRUPT_TIMER); } cpu_openrisc_timer_update(cpu); bql_unlock(); @@ -213,8 +213,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) #endif } -target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd, - target_ulong spr) +uint32_t HELPER(mfspr)(CPUOpenRISCState *env, uint32_t rd, uint32_t spr) { OpenRISCCPU *cpu = env_archcpu(env); #ifndef CONFIG_USER_ONLY diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c index 5ab3bc7..6fa4d6c 100644 --- a/target/openrisc/translate.c +++ b/target/openrisc/translate.c @@ -51,14 +51,20 @@ typedef struct DisasContext { uint32_t avr; /* If not -1, jmp_pc contains this value and so is a direct jump. */ - target_ulong jmp_pc_imm; + vaddr jmp_pc_imm; /* The temporary corresponding to register 0 for this compilation. */ - TCGv R0; + TCGv_i32 R0; /* The constant zero. */ - TCGv zero; + TCGv_i32 zero; } DisasContext; +static inline MemOp mo_endian(DisasContext *dc) +{ + /* The SR_LEE bit sets the (little) endianness, but we don't implement it. */ + return MO_BE; +} + static inline bool is_user(DisasContext *dc) { #ifdef CONFIG_USER_ONLY @@ -71,16 +77,16 @@ static inline bool is_user(DisasContext *dc) /* Include the auto-generated decoder. */ #include "decode-insns.c.inc" -static TCGv cpu_sr; -static TCGv cpu_regs[32]; -static TCGv cpu_pc; -static TCGv jmp_pc; /* l.jr/l.jalr temp pc */ -static TCGv cpu_ppc; -static TCGv cpu_sr_f; /* bf/bnf, F flag taken */ -static TCGv cpu_sr_cy; /* carry (unsigned overflow) */ -static TCGv cpu_sr_ov; /* signed overflow */ -static TCGv cpu_lock_addr; -static TCGv cpu_lock_value; +static TCGv_i32 cpu_sr; +static TCGv_i32 cpu_regs[32]; +static TCGv_i32 cpu_pc; +static TCGv_i32 jmp_pc; /* l.jr/l.jalr temp pc */ +static TCGv_i32 cpu_ppc; +static TCGv_i32 cpu_sr_f; /* bf/bnf, F flag taken */ +static TCGv_i32 cpu_sr_cy; /* carry (unsigned overflow) */ +static TCGv_i32 cpu_sr_ov; /* signed overflow */ +static TCGv_i32 cpu_lock_addr; +static TCGv_i32 cpu_lock_value; static TCGv_i32 fpcsr; static TCGv_i64 cpu_mac; /* MACHI:MACLO */ static TCGv_i32 cpu_dflag; @@ -95,27 +101,27 @@ void openrisc_translate_init(void) }; int i; - cpu_sr = tcg_global_mem_new(tcg_env, + cpu_sr = tcg_global_mem_new_i32(tcg_env, offsetof(CPUOpenRISCState, sr), "sr"); cpu_dflag = tcg_global_mem_new_i32(tcg_env, offsetof(CPUOpenRISCState, dflag), "dflag"); - cpu_pc = tcg_global_mem_new(tcg_env, + cpu_pc = tcg_global_mem_new_i32(tcg_env, offsetof(CPUOpenRISCState, pc), "pc"); - cpu_ppc = tcg_global_mem_new(tcg_env, + cpu_ppc = tcg_global_mem_new_i32(tcg_env, offsetof(CPUOpenRISCState, ppc), "ppc"); - jmp_pc = tcg_global_mem_new(tcg_env, + jmp_pc = tcg_global_mem_new_i32(tcg_env, offsetof(CPUOpenRISCState, jmp_pc), "jmp_pc"); - cpu_sr_f = tcg_global_mem_new(tcg_env, + cpu_sr_f = tcg_global_mem_new_i32(tcg_env, offsetof(CPUOpenRISCState, sr_f), "sr_f"); - cpu_sr_cy = tcg_global_mem_new(tcg_env, + cpu_sr_cy = tcg_global_mem_new_i32(tcg_env, offsetof(CPUOpenRISCState, sr_cy), "sr_cy"); - cpu_sr_ov = tcg_global_mem_new(tcg_env, + cpu_sr_ov = tcg_global_mem_new_i32(tcg_env, offsetof(CPUOpenRISCState, sr_ov), "sr_ov"); - cpu_lock_addr = tcg_global_mem_new(tcg_env, + cpu_lock_addr = tcg_global_mem_new_i32(tcg_env, offsetof(CPUOpenRISCState, lock_addr), "lock_addr"); - cpu_lock_value = tcg_global_mem_new(tcg_env, + cpu_lock_value = tcg_global_mem_new_i32(tcg_env, offsetof(CPUOpenRISCState, lock_value), "lock_value"); fpcsr = tcg_global_mem_new_i32(tcg_env, @@ -125,7 +131,7 @@ void openrisc_translate_init(void) offsetof(CPUOpenRISCState, mac), "mac"); for (i = 0; i < 32; i++) { - cpu_regs[i] = tcg_global_mem_new(tcg_env, + cpu_regs[i] = tcg_global_mem_new_i32(tcg_env, offsetof(CPUOpenRISCState, shadow_gpr[0][i]), regnames[i]); @@ -139,7 +145,7 @@ static void gen_exception(DisasContext *dc, unsigned int excp) static void gen_illegal_exception(DisasContext *dc) { - tcg_gen_movi_tl(cpu_pc, dc->base.pc_next); + tcg_gen_movi_i32(cpu_pc, dc->base.pc_next); gen_exception(dc, EXCP_ILLEGAL); dc->base.is_jmp = DISAS_NORETURN; } @@ -159,7 +165,7 @@ static bool check_of64a32s(DisasContext *dc) return dc->cpucfgr & CPUCFGR_OF64A32S; } -static TCGv cpu_R(DisasContext *dc, int reg) +static TCGv_i32 cpu_R(DisasContext *dc, int reg) { if (reg == 0) { return dc->R0; @@ -200,147 +206,133 @@ static void gen_ove_cyov(DisasContext *dc) } } -static void gen_add(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb) +static void gen_add(DisasContext *dc, TCGv_i32 dest, + TCGv_i32 srca, TCGv_i32 srcb) { - TCGv t0 = tcg_temp_new(); - TCGv res = tcg_temp_new(); + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 res = tcg_temp_new_i32(); - tcg_gen_add2_tl(res, cpu_sr_cy, srca, dc->zero, srcb, dc->zero); - tcg_gen_xor_tl(cpu_sr_ov, srca, srcb); - tcg_gen_xor_tl(t0, res, srcb); - tcg_gen_andc_tl(cpu_sr_ov, t0, cpu_sr_ov); + tcg_gen_add2_i32(res, cpu_sr_cy, srca, dc->zero, srcb, dc->zero); + tcg_gen_xor_i32(cpu_sr_ov, srca, srcb); + tcg_gen_xor_i32(t0, res, srcb); + tcg_gen_andc_i32(cpu_sr_ov, t0, cpu_sr_ov); - tcg_gen_mov_tl(dest, res); + tcg_gen_mov_i32(dest, res); gen_ove_cyov(dc); } -static void gen_addc(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb) +static void gen_addc(DisasContext *dc, TCGv_i32 dest, + TCGv_i32 srca, TCGv_i32 srcb) { - TCGv t0 = tcg_temp_new(); - TCGv res = tcg_temp_new(); + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 res = tcg_temp_new_i32(); - tcg_gen_addcio_tl(res, cpu_sr_cy, srca, srcb, cpu_sr_cy); - tcg_gen_xor_tl(cpu_sr_ov, srca, srcb); - tcg_gen_xor_tl(t0, res, srcb); - tcg_gen_andc_tl(cpu_sr_ov, t0, cpu_sr_ov); + tcg_gen_addcio_i32(res, cpu_sr_cy, srca, srcb, cpu_sr_cy); + tcg_gen_xor_i32(cpu_sr_ov, srca, srcb); + tcg_gen_xor_i32(t0, res, srcb); + tcg_gen_andc_i32(cpu_sr_ov, t0, cpu_sr_ov); - tcg_gen_mov_tl(dest, res); + tcg_gen_mov_i32(dest, res); gen_ove_cyov(dc); } -static void gen_sub(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb) +static void gen_sub(DisasContext *dc, TCGv_i32 dest, + TCGv_i32 srca, TCGv_i32 srcb) { - TCGv res = tcg_temp_new(); + TCGv_i32 res = tcg_temp_new_i32(); - tcg_gen_sub_tl(res, srca, srcb); - tcg_gen_xor_tl(cpu_sr_cy, srca, srcb); - tcg_gen_xor_tl(cpu_sr_ov, res, srcb); - tcg_gen_and_tl(cpu_sr_ov, cpu_sr_ov, cpu_sr_cy); - tcg_gen_setcond_tl(TCG_COND_LTU, cpu_sr_cy, srca, srcb); + tcg_gen_sub_i32(res, srca, srcb); + tcg_gen_xor_i32(cpu_sr_cy, srca, srcb); + tcg_gen_xor_i32(cpu_sr_ov, res, srcb); + tcg_gen_and_i32(cpu_sr_ov, cpu_sr_ov, cpu_sr_cy); + tcg_gen_setcond_i32(TCG_COND_LTU, cpu_sr_cy, srca, srcb); - tcg_gen_mov_tl(dest, res); + tcg_gen_mov_i32(dest, res); gen_ove_cyov(dc); } -static void gen_mul(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb) +static void gen_mul(DisasContext *dc, TCGv_i32 dest, + TCGv_i32 srca, TCGv_i32 srcb) { - TCGv t0 = tcg_temp_new(); + TCGv_i32 t0 = tcg_temp_new_i32(); - tcg_gen_muls2_tl(dest, cpu_sr_ov, srca, srcb); - tcg_gen_sari_tl(t0, dest, TARGET_LONG_BITS - 1); - tcg_gen_negsetcond_tl(TCG_COND_NE, cpu_sr_ov, cpu_sr_ov, t0); + tcg_gen_muls2_i32(dest, cpu_sr_ov, srca, srcb); + tcg_gen_sari_i32(t0, dest, TARGET_LONG_BITS - 1); + tcg_gen_negsetcond_i32(TCG_COND_NE, cpu_sr_ov, cpu_sr_ov, t0); gen_ove_ov(dc); } -static void gen_mulu(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb) +static void gen_mulu(DisasContext *dc, TCGv_i32 dest, + TCGv_i32 srca, TCGv_i32 srcb) { - tcg_gen_muls2_tl(dest, cpu_sr_cy, srca, srcb); - tcg_gen_setcondi_tl(TCG_COND_NE, cpu_sr_cy, cpu_sr_cy, 0); + tcg_gen_muls2_i32(dest, cpu_sr_cy, srca, srcb); + tcg_gen_setcondi_i32(TCG_COND_NE, cpu_sr_cy, cpu_sr_cy, 0); gen_ove_cy(dc); } -static void gen_div(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb) +static void gen_div(DisasContext *dc, TCGv_i32 dest, + TCGv_i32 srca, TCGv_i32 srcb) { - TCGv t0 = tcg_temp_new(); + TCGv_i32 t0 = tcg_temp_new_i32(); - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_sr_ov, srcb, 0); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_ov, srcb, 0); /* The result of divide-by-zero is undefined. Suppress the host-side exception by dividing by 1. */ - tcg_gen_or_tl(t0, srcb, cpu_sr_ov); - tcg_gen_div_tl(dest, srca, t0); + tcg_gen_or_i32(t0, srcb, cpu_sr_ov); + tcg_gen_div_i32(dest, srca, t0); - tcg_gen_neg_tl(cpu_sr_ov, cpu_sr_ov); + tcg_gen_neg_i32(cpu_sr_ov, cpu_sr_ov); gen_ove_ov(dc); } -static void gen_divu(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb) +static void gen_divu(DisasContext *dc, TCGv_i32 dest, + TCGv_i32 srca, TCGv_i32 srcb) { - TCGv t0 = tcg_temp_new(); + TCGv_i32 t0 = tcg_temp_new_i32(); - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_sr_cy, srcb, 0); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_cy, srcb, 0); /* The result of divide-by-zero is undefined. Suppress the host-side exception by dividing by 1. */ - tcg_gen_or_tl(t0, srcb, cpu_sr_cy); - tcg_gen_divu_tl(dest, srca, t0); + tcg_gen_or_i32(t0, srcb, cpu_sr_cy); + tcg_gen_divu_i32(dest, srca, t0); gen_ove_cy(dc); } -static void gen_muld(DisasContext *dc, TCGv srca, TCGv srcb) +static void gen_muld(DisasContext *dc, TCGv_i32 srca, TCGv_i32 srcb) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); - tcg_gen_ext_tl_i64(t1, srca); - tcg_gen_ext_tl_i64(t2, srcb); - if (TARGET_LONG_BITS == 32) { - tcg_gen_mul_i64(cpu_mac, t1, t2); - tcg_gen_movi_tl(cpu_sr_ov, 0); - } else { - TCGv_i64 high = tcg_temp_new_i64(); - - tcg_gen_muls2_i64(cpu_mac, high, t1, t2); - tcg_gen_sari_i64(t1, cpu_mac, 63); - tcg_gen_negsetcond_i64(TCG_COND_NE, t1, t1, high); - tcg_gen_trunc_i64_tl(cpu_sr_ov, t1); - - gen_ove_ov(dc); - } + tcg_gen_ext_i32_i64(t1, srca); + tcg_gen_ext_i32_i64(t2, srcb); + tcg_gen_mul_i64(cpu_mac, t1, t2); + tcg_gen_movi_i32(cpu_sr_ov, 0); } -static void gen_muldu(DisasContext *dc, TCGv srca, TCGv srcb) +static void gen_muldu(DisasContext *dc, TCGv_i32 srca, TCGv_i32 srcb) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); - tcg_gen_extu_tl_i64(t1, srca); - tcg_gen_extu_tl_i64(t2, srcb); - if (TARGET_LONG_BITS == 32) { - tcg_gen_mul_i64(cpu_mac, t1, t2); - tcg_gen_movi_tl(cpu_sr_cy, 0); - } else { - TCGv_i64 high = tcg_temp_new_i64(); - - tcg_gen_mulu2_i64(cpu_mac, high, t1, t2); - tcg_gen_setcondi_i64(TCG_COND_NE, high, high, 0); - tcg_gen_trunc_i64_tl(cpu_sr_cy, high); - - gen_ove_cy(dc); - } + tcg_gen_extu_i32_i64(t1, srca); + tcg_gen_extu_i32_i64(t2, srcb); + tcg_gen_mul_i64(cpu_mac, t1, t2); + tcg_gen_movi_i32(cpu_sr_cy, 0); } -static void gen_mac(DisasContext *dc, TCGv srca, TCGv srcb) +static void gen_mac(DisasContext *dc, TCGv_i32 srca, TCGv_i32 srcb) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); - tcg_gen_ext_tl_i64(t1, srca); - tcg_gen_ext_tl_i64(t2, srcb); + tcg_gen_ext_i32_i64(t1, srca); + tcg_gen_ext_i32_i64(t2, srcb); tcg_gen_mul_i64(t1, t1, t2); /* Note that overflow is only computed during addition stage. */ @@ -349,39 +341,35 @@ static void gen_mac(DisasContext *dc, TCGv srca, TCGv srcb) tcg_gen_xor_i64(t1, t1, cpu_mac); tcg_gen_andc_i64(t1, t1, t2); -#if TARGET_LONG_BITS == 32 tcg_gen_extrh_i64_i32(cpu_sr_ov, t1); -#else - tcg_gen_mov_i64(cpu_sr_ov, t1); -#endif gen_ove_ov(dc); } -static void gen_macu(DisasContext *dc, TCGv srca, TCGv srcb) +static void gen_macu(DisasContext *dc, TCGv_i32 srca, TCGv_i32 srcb) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); - tcg_gen_extu_tl_i64(t1, srca); - tcg_gen_extu_tl_i64(t2, srcb); + tcg_gen_extu_i32_i64(t1, srca); + tcg_gen_extu_i32_i64(t2, srcb); tcg_gen_mul_i64(t1, t1, t2); /* Note that overflow is only computed during addition stage. */ tcg_gen_add_i64(cpu_mac, cpu_mac, t1); tcg_gen_setcond_i64(TCG_COND_LTU, t1, cpu_mac, t1); - tcg_gen_trunc_i64_tl(cpu_sr_cy, t1); + tcg_gen_extrl_i64_i32(cpu_sr_cy, t1); gen_ove_cy(dc); } -static void gen_msb(DisasContext *dc, TCGv srca, TCGv srcb) +static void gen_msb(DisasContext *dc, TCGv_i32 srca, TCGv_i32 srcb) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); - tcg_gen_ext_tl_i64(t1, srca); - tcg_gen_ext_tl_i64(t2, srcb); + tcg_gen_ext_i32_i64(t1, srca); + tcg_gen_ext_i32_i64(t2, srcb); tcg_gen_mul_i64(t1, t1, t2); /* Note that overflow is only computed during subtraction stage. */ @@ -399,19 +387,19 @@ static void gen_msb(DisasContext *dc, TCGv srca, TCGv srcb) gen_ove_ov(dc); } -static void gen_msbu(DisasContext *dc, TCGv srca, TCGv srcb) +static void gen_msbu(DisasContext *dc, TCGv_i32 srca, TCGv_i32 srcb) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); - tcg_gen_extu_tl_i64(t1, srca); - tcg_gen_extu_tl_i64(t2, srcb); + tcg_gen_extu_i32_i64(t1, srca); + tcg_gen_extu_i32_i64(t2, srcb); tcg_gen_mul_i64(t1, t1, t2); /* Note that overflow is only computed during subtraction stage. */ tcg_gen_setcond_i64(TCG_COND_LTU, t2, cpu_mac, t1); tcg_gen_sub_i64(cpu_mac, cpu_mac, t1); - tcg_gen_trunc_i64_tl(cpu_sr_cy, t2); + tcg_gen_extrl_i64_i32(cpu_sr_cy, t2); gen_ove_cy(dc); } @@ -440,84 +428,84 @@ static bool trans_l_sub(DisasContext *dc, arg_dab *a) static bool trans_l_and(DisasContext *dc, arg_dab *a) { check_r0_write(dc, a->d); - tcg_gen_and_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); + tcg_gen_and_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_or(DisasContext *dc, arg_dab *a) { check_r0_write(dc, a->d); - tcg_gen_or_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); + tcg_gen_or_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_xor(DisasContext *dc, arg_dab *a) { check_r0_write(dc, a->d); - tcg_gen_xor_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); + tcg_gen_xor_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sll(DisasContext *dc, arg_dab *a) { check_r0_write(dc, a->d); - tcg_gen_shl_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); + tcg_gen_shl_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_srl(DisasContext *dc, arg_dab *a) { check_r0_write(dc, a->d); - tcg_gen_shr_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); + tcg_gen_shr_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sra(DisasContext *dc, arg_dab *a) { check_r0_write(dc, a->d); - tcg_gen_sar_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); + tcg_gen_sar_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_ror(DisasContext *dc, arg_dab *a) { check_r0_write(dc, a->d); - tcg_gen_rotr_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); + tcg_gen_rotr_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_exths(DisasContext *dc, arg_da *a) { check_r0_write(dc, a->d); - tcg_gen_ext16s_tl(cpu_R(dc, a->d), cpu_R(dc, a->a)); + tcg_gen_ext16s_i32(cpu_R(dc, a->d), cpu_R(dc, a->a)); return true; } static bool trans_l_extbs(DisasContext *dc, arg_da *a) { check_r0_write(dc, a->d); - tcg_gen_ext8s_tl(cpu_R(dc, a->d), cpu_R(dc, a->a)); + tcg_gen_ext8s_i32(cpu_R(dc, a->d), cpu_R(dc, a->a)); return true; } static bool trans_l_exthz(DisasContext *dc, arg_da *a) { check_r0_write(dc, a->d); - tcg_gen_ext16u_tl(cpu_R(dc, a->d), cpu_R(dc, a->a)); + tcg_gen_ext16u_i32(cpu_R(dc, a->d), cpu_R(dc, a->a)); return true; } static bool trans_l_extbz(DisasContext *dc, arg_da *a) { check_r0_write(dc, a->d); - tcg_gen_ext8u_tl(cpu_R(dc, a->d), cpu_R(dc, a->a)); + tcg_gen_ext8u_i32(cpu_R(dc, a->d), cpu_R(dc, a->a)); return true; } static bool trans_l_cmov(DisasContext *dc, arg_dab *a) { check_r0_write(dc, a->d); - tcg_gen_movcond_tl(TCG_COND_NE, cpu_R(dc, a->d), cpu_sr_f, dc->zero, + tcg_gen_movcond_i32(TCG_COND_NE, cpu_R(dc, a->d), cpu_sr_f, dc->zero, cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } @@ -525,16 +513,16 @@ static bool trans_l_cmov(DisasContext *dc, arg_dab *a) static bool trans_l_ff1(DisasContext *dc, arg_da *a) { check_r0_write(dc, a->d); - tcg_gen_ctzi_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), -1); - tcg_gen_addi_tl(cpu_R(dc, a->d), cpu_R(dc, a->d), 1); + tcg_gen_ctzi_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), -1); + tcg_gen_addi_i32(cpu_R(dc, a->d), cpu_R(dc, a->d), 1); return true; } static bool trans_l_fl1(DisasContext *dc, arg_da *a) { check_r0_write(dc, a->d); - tcg_gen_clzi_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), TARGET_LONG_BITS); - tcg_gen_subfi_tl(cpu_R(dc, a->d), TARGET_LONG_BITS, cpu_R(dc, a->d)); + tcg_gen_clzi_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), TARGET_LONG_BITS); + tcg_gen_subfi_i32(cpu_R(dc, a->d), TARGET_LONG_BITS, cpu_R(dc, a->d)); return true; } @@ -580,9 +568,9 @@ static bool trans_l_muldu(DisasContext *dc, arg_ab *a) static bool trans_l_j(DisasContext *dc, arg_l_j *a) { - target_ulong tmp_pc = dc->base.pc_next + a->n * 4; + vaddr tmp_pc = dc->base.pc_next + a->n * 4; - tcg_gen_movi_tl(jmp_pc, tmp_pc); + tcg_gen_movi_i32(jmp_pc, tmp_pc); dc->jmp_pc_imm = tmp_pc; dc->delayed_branch = 2; return true; @@ -590,13 +578,13 @@ static bool trans_l_j(DisasContext *dc, arg_l_j *a) static bool trans_l_jal(DisasContext *dc, arg_l_jal *a) { - target_ulong tmp_pc = dc->base.pc_next + a->n * 4; - target_ulong ret_pc = dc->base.pc_next + 8; + vaddr tmp_pc = dc->base.pc_next + a->n * 4; + vaddr ret_pc = dc->base.pc_next + 8; - tcg_gen_movi_tl(cpu_regs[9], ret_pc); + tcg_gen_movi_i32(cpu_regs[9], ret_pc); /* Optimize jal being used to load the PC for PIC. */ if (tmp_pc != ret_pc) { - tcg_gen_movi_tl(jmp_pc, tmp_pc); + tcg_gen_movi_i32(jmp_pc, tmp_pc); dc->jmp_pc_imm = tmp_pc; dc->delayed_branch = 2; } @@ -605,11 +593,11 @@ static bool trans_l_jal(DisasContext *dc, arg_l_jal *a) static void do_bf(DisasContext *dc, arg_l_bf *a, TCGCond cond) { - target_ulong tmp_pc = dc->base.pc_next + a->n * 4; - TCGv t_next = tcg_constant_tl(dc->base.pc_next + 8); - TCGv t_true = tcg_constant_tl(tmp_pc); + vaddr tmp_pc = dc->base.pc_next + a->n * 4; + TCGv_i32 t_next = tcg_constant_i32(dc->base.pc_next + 8); + TCGv_i32 t_true = tcg_constant_i32(tmp_pc); - tcg_gen_movcond_tl(cond, jmp_pc, cpu_sr_f, dc->zero, t_true, t_next); + tcg_gen_movcond_i32(cond, jmp_pc, cpu_sr_f, dc->zero, t_true, t_next); dc->delayed_branch = 2; } @@ -627,51 +615,54 @@ static bool trans_l_bnf(DisasContext *dc, arg_l_bf *a) static bool trans_l_jr(DisasContext *dc, arg_l_jr *a) { - tcg_gen_mov_tl(jmp_pc, cpu_R(dc, a->b)); + tcg_gen_mov_i32(jmp_pc, cpu_R(dc, a->b)); dc->delayed_branch = 2; return true; } static bool trans_l_jalr(DisasContext *dc, arg_l_jalr *a) { - tcg_gen_mov_tl(jmp_pc, cpu_R(dc, a->b)); - tcg_gen_movi_tl(cpu_regs[9], dc->base.pc_next + 8); + tcg_gen_mov_i32(jmp_pc, cpu_R(dc, a->b)); + tcg_gen_movi_i32(cpu_regs[9], dc->base.pc_next + 8); dc->delayed_branch = 2; return true; } static bool trans_l_lwa(DisasContext *dc, arg_load *a) { - TCGv ea; + TCGv_i32 ea; check_r0_write(dc, a->d); - ea = tcg_temp_new(); - tcg_gen_addi_tl(ea, cpu_R(dc, a->a), a->i); - tcg_gen_qemu_ld_tl(cpu_R(dc, a->d), ea, dc->mem_idx, MO_TEUL); - tcg_gen_mov_tl(cpu_lock_addr, ea); - tcg_gen_mov_tl(cpu_lock_value, cpu_R(dc, a->d)); + ea = tcg_temp_new_i32(); + tcg_gen_addi_i32(ea, cpu_R(dc, a->a), a->i); + tcg_gen_qemu_ld_i32(cpu_R(dc, a->d), ea, dc->mem_idx, + mo_endian(dc) | MO_UL); + tcg_gen_mov_i32(cpu_lock_addr, ea); + tcg_gen_mov_i32(cpu_lock_value, cpu_R(dc, a->d)); return true; } static void do_load(DisasContext *dc, arg_load *a, MemOp mop) { - TCGv ea; + TCGv_i32 ea; + + mop |= mo_endian(dc); check_r0_write(dc, a->d); - ea = tcg_temp_new(); - tcg_gen_addi_tl(ea, cpu_R(dc, a->a), a->i); - tcg_gen_qemu_ld_tl(cpu_R(dc, a->d), ea, dc->mem_idx, mop); + ea = tcg_temp_new_i32(); + tcg_gen_addi_i32(ea, cpu_R(dc, a->a), a->i); + tcg_gen_qemu_ld_i32(cpu_R(dc, a->d), ea, dc->mem_idx, mop); } static bool trans_l_lwz(DisasContext *dc, arg_load *a) { - do_load(dc, a, MO_TEUL); + do_load(dc, a, MO_UL); return true; } static bool trans_l_lws(DisasContext *dc, arg_load *a) { - do_load(dc, a, MO_TESL); + do_load(dc, a, MO_SL); return true; } @@ -689,53 +680,57 @@ static bool trans_l_lbs(DisasContext *dc, arg_load *a) static bool trans_l_lhz(DisasContext *dc, arg_load *a) { - do_load(dc, a, MO_TEUW); + do_load(dc, a, MO_UW); return true; } static bool trans_l_lhs(DisasContext *dc, arg_load *a) { - do_load(dc, a, MO_TESW); + do_load(dc, a, MO_SW); return true; } static bool trans_l_swa(DisasContext *dc, arg_store *a) { - TCGv ea, val; + TCGv_i32 ea, val; TCGLabel *lab_fail, *lab_done; - ea = tcg_temp_new(); - tcg_gen_addi_tl(ea, cpu_R(dc, a->a), a->i); + ea = tcg_temp_new_i32(); + tcg_gen_addi_i32(ea, cpu_R(dc, a->a), a->i); lab_fail = gen_new_label(); lab_done = gen_new_label(); - tcg_gen_brcond_tl(TCG_COND_NE, ea, cpu_lock_addr, lab_fail); + tcg_gen_brcond_i32(TCG_COND_NE, ea, cpu_lock_addr, lab_fail); - val = tcg_temp_new(); - tcg_gen_atomic_cmpxchg_tl(val, cpu_lock_addr, cpu_lock_value, - cpu_R(dc, a->b), dc->mem_idx, MO_TEUL); - tcg_gen_setcond_tl(TCG_COND_EQ, cpu_sr_f, val, cpu_lock_value); + val = tcg_temp_new_i32(); + tcg_gen_atomic_cmpxchg_i32(val, cpu_lock_addr, cpu_lock_value, + cpu_R(dc, a->b), dc->mem_idx, + mo_endian(dc) | MO_UL); + tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_f, val, cpu_lock_value); tcg_gen_br(lab_done); gen_set_label(lab_fail); - tcg_gen_movi_tl(cpu_sr_f, 0); + tcg_gen_movi_i32(cpu_sr_f, 0); gen_set_label(lab_done); - tcg_gen_movi_tl(cpu_lock_addr, -1); + tcg_gen_movi_i32(cpu_lock_addr, -1); return true; } static void do_store(DisasContext *dc, arg_store *a, MemOp mop) { - TCGv t0 = tcg_temp_new(); - tcg_gen_addi_tl(t0, cpu_R(dc, a->a), a->i); - tcg_gen_qemu_st_tl(cpu_R(dc, a->b), t0, dc->mem_idx, mop); + TCGv_i32 t0 = tcg_temp_new_i32(); + + mop |= mo_endian(dc); + + tcg_gen_addi_i32(t0, cpu_R(dc, a->a), a->i); + tcg_gen_qemu_st_i32(cpu_R(dc, a->b), t0, dc->mem_idx, mop); } static bool trans_l_sw(DisasContext *dc, arg_store *a) { - do_store(dc, a, MO_TEUL); + do_store(dc, a, MO_UL); return true; } @@ -747,7 +742,7 @@ static bool trans_l_sb(DisasContext *dc, arg_store *a) static bool trans_l_sh(DisasContext *dc, arg_store *a) { - do_store(dc, a, MO_TEUW); + do_store(dc, a, MO_UW); return true; } @@ -772,75 +767,75 @@ static bool trans_l_adrp(DisasContext *dc, arg_l_adrp *a) static bool trans_l_addi(DisasContext *dc, arg_rri *a) { check_r0_write(dc, a->d); - gen_add(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), tcg_constant_tl(a->i)); + gen_add(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), tcg_constant_i32(a->i)); return true; } static bool trans_l_addic(DisasContext *dc, arg_rri *a) { check_r0_write(dc, a->d); - gen_addc(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), tcg_constant_tl(a->i)); + gen_addc(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), tcg_constant_i32(a->i)); return true; } static bool trans_l_muli(DisasContext *dc, arg_rri *a) { check_r0_write(dc, a->d); - gen_mul(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), tcg_constant_tl(a->i)); + gen_mul(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), tcg_constant_i32(a->i)); return true; } static bool trans_l_maci(DisasContext *dc, arg_l_maci *a) { - gen_mac(dc, cpu_R(dc, a->a), tcg_constant_tl(a->i)); + gen_mac(dc, cpu_R(dc, a->a), tcg_constant_i32(a->i)); return true; } static bool trans_l_andi(DisasContext *dc, arg_rrk *a) { check_r0_write(dc, a->d); - tcg_gen_andi_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), a->k); + tcg_gen_andi_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), a->k); return true; } static bool trans_l_ori(DisasContext *dc, arg_rrk *a) { check_r0_write(dc, a->d); - tcg_gen_ori_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), a->k); + tcg_gen_ori_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), a->k); return true; } static bool trans_l_xori(DisasContext *dc, arg_rri *a) { check_r0_write(dc, a->d); - tcg_gen_xori_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), a->i); + tcg_gen_xori_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), a->i); return true; } static bool trans_l_mfspr(DisasContext *dc, arg_l_mfspr *a) { - TCGv spr = tcg_temp_new(); + TCGv_i32 spr = tcg_temp_new_i32(); check_r0_write(dc, a->d); if (translator_io_start(&dc->base)) { if (dc->delayed_branch) { - tcg_gen_mov_tl(cpu_pc, jmp_pc); - tcg_gen_discard_tl(jmp_pc); + tcg_gen_mov_i32(cpu_pc, jmp_pc); + tcg_gen_discard_i32(jmp_pc); } else { - tcg_gen_movi_tl(cpu_pc, dc->base.pc_next + 4); + tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4); } dc->base.is_jmp = DISAS_EXIT; } - tcg_gen_ori_tl(spr, cpu_R(dc, a->a), a->k); + tcg_gen_ori_i32(spr, cpu_R(dc, a->a), a->k); gen_helper_mfspr(cpu_R(dc, a->d), tcg_env, cpu_R(dc, a->d), spr); return true; } static bool trans_l_mtspr(DisasContext *dc, arg_l_mtspr *a) { - TCGv spr = tcg_temp_new(); + TCGv_i32 spr = tcg_temp_new_i32(); translator_io_start(&dc->base); @@ -851,14 +846,14 @@ static bool trans_l_mtspr(DisasContext *dc, arg_l_mtspr *a) * of the cpu state first, allowing it to be overwritten. */ if (dc->delayed_branch) { - tcg_gen_mov_tl(cpu_pc, jmp_pc); - tcg_gen_discard_tl(jmp_pc); + tcg_gen_mov_i32(cpu_pc, jmp_pc); + tcg_gen_discard_i32(jmp_pc); } else { - tcg_gen_movi_tl(cpu_pc, dc->base.pc_next + 4); + tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4); } dc->base.is_jmp = DISAS_EXIT; - tcg_gen_ori_tl(spr, cpu_R(dc, a->a), a->k); + tcg_gen_ori_i32(spr, cpu_R(dc, a->a), a->k); gen_helper_mtspr(tcg_env, spr, cpu_R(dc, a->b)); return true; } @@ -890,7 +885,7 @@ static bool trans_l_msbu(DisasContext *dc, arg_ab *a) static bool trans_l_slli(DisasContext *dc, arg_dal *a) { check_r0_write(dc, a->d); - tcg_gen_shli_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), + tcg_gen_shli_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), a->l & (TARGET_LONG_BITS - 1)); return true; } @@ -898,7 +893,7 @@ static bool trans_l_slli(DisasContext *dc, arg_dal *a) static bool trans_l_srli(DisasContext *dc, arg_dal *a) { check_r0_write(dc, a->d); - tcg_gen_shri_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), + tcg_gen_shri_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), a->l & (TARGET_LONG_BITS - 1)); return true; } @@ -906,7 +901,7 @@ static bool trans_l_srli(DisasContext *dc, arg_dal *a) static bool trans_l_srai(DisasContext *dc, arg_dal *a) { check_r0_write(dc, a->d); - tcg_gen_sari_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), + tcg_gen_sari_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), a->l & (TARGET_LONG_BITS - 1)); return true; } @@ -914,7 +909,7 @@ static bool trans_l_srai(DisasContext *dc, arg_dal *a) static bool trans_l_rori(DisasContext *dc, arg_dal *a) { check_r0_write(dc, a->d); - tcg_gen_rotri_tl(cpu_R(dc, a->d), cpu_R(dc, a->a), + tcg_gen_rotri_i32(cpu_R(dc, a->d), cpu_R(dc, a->a), a->l & (TARGET_LONG_BITS - 1)); return true; } @@ -922,151 +917,151 @@ static bool trans_l_rori(DisasContext *dc, arg_dal *a) static bool trans_l_movhi(DisasContext *dc, arg_l_movhi *a) { check_r0_write(dc, a->d); - tcg_gen_movi_tl(cpu_R(dc, a->d), a->k << 16); + tcg_gen_movi_i32(cpu_R(dc, a->d), a->k << 16); return true; } static bool trans_l_macrc(DisasContext *dc, arg_l_macrc *a) { check_r0_write(dc, a->d); - tcg_gen_trunc_i64_tl(cpu_R(dc, a->d), cpu_mac); + tcg_gen_extrl_i64_i32(cpu_R(dc, a->d), cpu_mac); tcg_gen_movi_i64(cpu_mac, 0); return true; } static bool trans_l_sfeq(DisasContext *dc, arg_ab *a) { - tcg_gen_setcond_tl(TCG_COND_EQ, cpu_sr_f, + tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_f, cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sfne(DisasContext *dc, arg_ab *a) { - tcg_gen_setcond_tl(TCG_COND_NE, cpu_sr_f, + tcg_gen_setcond_i32(TCG_COND_NE, cpu_sr_f, cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sfgtu(DisasContext *dc, arg_ab *a) { - tcg_gen_setcond_tl(TCG_COND_GTU, cpu_sr_f, + tcg_gen_setcond_i32(TCG_COND_GTU, cpu_sr_f, cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sfgeu(DisasContext *dc, arg_ab *a) { - tcg_gen_setcond_tl(TCG_COND_GEU, cpu_sr_f, + tcg_gen_setcond_i32(TCG_COND_GEU, cpu_sr_f, cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sfltu(DisasContext *dc, arg_ab *a) { - tcg_gen_setcond_tl(TCG_COND_LTU, cpu_sr_f, + tcg_gen_setcond_i32(TCG_COND_LTU, cpu_sr_f, cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sfleu(DisasContext *dc, arg_ab *a) { - tcg_gen_setcond_tl(TCG_COND_LEU, cpu_sr_f, + tcg_gen_setcond_i32(TCG_COND_LEU, cpu_sr_f, cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sfgts(DisasContext *dc, arg_ab *a) { - tcg_gen_setcond_tl(TCG_COND_GT, cpu_sr_f, + tcg_gen_setcond_i32(TCG_COND_GT, cpu_sr_f, cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sfges(DisasContext *dc, arg_ab *a) { - tcg_gen_setcond_tl(TCG_COND_GE, cpu_sr_f, + tcg_gen_setcond_i32(TCG_COND_GE, cpu_sr_f, cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sflts(DisasContext *dc, arg_ab *a) { - tcg_gen_setcond_tl(TCG_COND_LT, cpu_sr_f, + tcg_gen_setcond_i32(TCG_COND_LT, cpu_sr_f, cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sfles(DisasContext *dc, arg_ab *a) { - tcg_gen_setcond_tl(TCG_COND_LE, + tcg_gen_setcond_i32(TCG_COND_LE, cpu_sr_f, cpu_R(dc, a->a), cpu_R(dc, a->b)); return true; } static bool trans_l_sfeqi(DisasContext *dc, arg_ai *a) { - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_sr_f, cpu_R(dc, a->a), a->i); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_f, cpu_R(dc, a->a), a->i); return true; } static bool trans_l_sfnei(DisasContext *dc, arg_ai *a) { - tcg_gen_setcondi_tl(TCG_COND_NE, cpu_sr_f, cpu_R(dc, a->a), a->i); + tcg_gen_setcondi_i32(TCG_COND_NE, cpu_sr_f, cpu_R(dc, a->a), a->i); return true; } static bool trans_l_sfgtui(DisasContext *dc, arg_ai *a) { - tcg_gen_setcondi_tl(TCG_COND_GTU, cpu_sr_f, cpu_R(dc, a->a), a->i); + tcg_gen_setcondi_i32(TCG_COND_GTU, cpu_sr_f, cpu_R(dc, a->a), a->i); return true; } static bool trans_l_sfgeui(DisasContext *dc, arg_ai *a) { - tcg_gen_setcondi_tl(TCG_COND_GEU, cpu_sr_f, cpu_R(dc, a->a), a->i); + tcg_gen_setcondi_i32(TCG_COND_GEU, cpu_sr_f, cpu_R(dc, a->a), a->i); return true; } static bool trans_l_sfltui(DisasContext *dc, arg_ai *a) { - tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_sr_f, cpu_R(dc, a->a), a->i); + tcg_gen_setcondi_i32(TCG_COND_LTU, cpu_sr_f, cpu_R(dc, a->a), a->i); return true; } static bool trans_l_sfleui(DisasContext *dc, arg_ai *a) { - tcg_gen_setcondi_tl(TCG_COND_LEU, cpu_sr_f, cpu_R(dc, a->a), a->i); + tcg_gen_setcondi_i32(TCG_COND_LEU, cpu_sr_f, cpu_R(dc, a->a), a->i); return true; } static bool trans_l_sfgtsi(DisasContext *dc, arg_ai *a) { - tcg_gen_setcondi_tl(TCG_COND_GT, cpu_sr_f, cpu_R(dc, a->a), a->i); + tcg_gen_setcondi_i32(TCG_COND_GT, cpu_sr_f, cpu_R(dc, a->a), a->i); return true; } static bool trans_l_sfgesi(DisasContext *dc, arg_ai *a) { - tcg_gen_setcondi_tl(TCG_COND_GE, cpu_sr_f, cpu_R(dc, a->a), a->i); + tcg_gen_setcondi_i32(TCG_COND_GE, cpu_sr_f, cpu_R(dc, a->a), a->i); return true; } static bool trans_l_sfltsi(DisasContext *dc, arg_ai *a) { - tcg_gen_setcondi_tl(TCG_COND_LT, cpu_sr_f, cpu_R(dc, a->a), a->i); + tcg_gen_setcondi_i32(TCG_COND_LT, cpu_sr_f, cpu_R(dc, a->a), a->i); return true; } static bool trans_l_sflesi(DisasContext *dc, arg_ai *a) { - tcg_gen_setcondi_tl(TCG_COND_LE, cpu_sr_f, cpu_R(dc, a->a), a->i); + tcg_gen_setcondi_i32(TCG_COND_LE, cpu_sr_f, cpu_R(dc, a->a), a->i); return true; } static bool trans_l_sys(DisasContext *dc, arg_l_sys *a) { - tcg_gen_movi_tl(cpu_pc, dc->base.pc_next); + tcg_gen_movi_i32(cpu_pc, dc->base.pc_next); gen_exception(dc, EXCP_SYSCALL); dc->base.is_jmp = DISAS_NORETURN; return true; @@ -1074,7 +1069,7 @@ static bool trans_l_sys(DisasContext *dc, arg_l_sys *a) static bool trans_l_trap(DisasContext *dc, arg_l_trap *a) { - tcg_gen_movi_tl(cpu_pc, dc->base.pc_next); + tcg_gen_movi_i32(cpu_pc, dc->base.pc_next); gen_exception(dc, EXCP_TRAP); dc->base.is_jmp = DISAS_NORETURN; return true; @@ -1108,7 +1103,7 @@ static bool trans_l_rfe(DisasContext *dc, arg_l_rfe *a) } static bool do_fp2(DisasContext *dc, arg_da *a, - void (*fn)(TCGv, TCGv_env, TCGv)) + void (*fn)(TCGv_i32, TCGv_env, TCGv_i32)) { if (!check_of32s(dc)) { return false; @@ -1120,7 +1115,7 @@ static bool do_fp2(DisasContext *dc, arg_da *a, } static bool do_fp3(DisasContext *dc, arg_dab *a, - void (*fn)(TCGv, TCGv_env, TCGv, TCGv)) + void (*fn)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32)) { if (!check_of32s(dc)) { return false; @@ -1132,7 +1127,7 @@ static bool do_fp3(DisasContext *dc, arg_dab *a, } static bool do_fpcmp(DisasContext *dc, arg_ab *a, - void (*fn)(TCGv, TCGv_env, TCGv, TCGv), + void (*fn)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32), bool inv, bool swap) { if (!check_of32s(dc)) { @@ -1144,7 +1139,7 @@ static bool do_fpcmp(DisasContext *dc, arg_ab *a, fn(cpu_sr_f, tcg_env, cpu_R(dc, a->a), cpu_R(dc, a->b)); } if (inv) { - tcg_gen_xori_tl(cpu_sr_f, cpu_sr_f, 1); + tcg_gen_xori_i32(cpu_sr_f, cpu_sr_f, 1); } gen_helper_update_fpcsr(tcg_env); return true; @@ -1337,7 +1332,7 @@ static bool do_dp2(DisasContext *dc, arg_da_pair *a, } static bool do_dpcmp(DisasContext *dc, arg_ab_pair *a, - void (*fn)(TCGv, TCGv_env, TCGv_i64, TCGv_i64), + void (*fn)(TCGv_i32, TCGv_env, TCGv_i64, TCGv_i64), bool inv, bool swap) { TCGv_i64 t0, t1; @@ -1359,7 +1354,7 @@ static bool do_dpcmp(DisasContext *dc, arg_ab_pair *a, } if (inv) { - tcg_gen_xori_tl(cpu_sr_f, cpu_sr_f, 1); + tcg_gen_xori_i32(cpu_sr_f, cpu_sr_f, 1); } gen_helper_update_fpcsr(tcg_env); return true; @@ -1544,7 +1539,7 @@ static void openrisc_tr_tb_start(DisasContextBase *db, CPUState *cs) /* Allow the TCG optimizer to see that R0 == 0, when it's true, which is the common case. */ - dc->zero = tcg_constant_tl(0); + dc->zero = tcg_constant_i32(0); if (dc->tb_flags & TB_FLAGS_R0_0) { dc->R0 = dc->zero; } else { @@ -1586,7 +1581,7 @@ static void openrisc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) static void openrisc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) { DisasContext *dc = container_of(dcbase, DisasContext, base); - target_ulong jmp_dest; + vaddr jmp_dest; /* If we have already exited the TB, nothing following has effect. */ if (dc->base.is_jmp == DISAS_NORETURN) { @@ -1600,32 +1595,32 @@ static void openrisc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) /* For DISAS_TOO_MANY, jump to the next insn. */ jmp_dest = dc->base.pc_next; - tcg_gen_movi_tl(cpu_ppc, jmp_dest - 4); + tcg_gen_movi_i32(cpu_ppc, jmp_dest - 4); switch (dc->base.is_jmp) { case DISAS_JUMP: jmp_dest = dc->jmp_pc_imm; if (jmp_dest == -1) { /* The jump destination is indirect/computed; use jmp_pc. */ - tcg_gen_mov_tl(cpu_pc, jmp_pc); - tcg_gen_discard_tl(jmp_pc); + tcg_gen_mov_i32(cpu_pc, jmp_pc); + tcg_gen_discard_i32(jmp_pc); tcg_gen_lookup_and_goto_ptr(); break; } /* The jump destination is direct; use jmp_pc_imm. However, we will have stored into jmp_pc as well; we know now that it wasn't needed. */ - tcg_gen_discard_tl(jmp_pc); + tcg_gen_discard_i32(jmp_pc); /* fallthru */ case DISAS_TOO_MANY: if (translator_use_goto_tb(&dc->base, jmp_dest)) { tcg_gen_goto_tb(0); - tcg_gen_movi_tl(cpu_pc, jmp_dest); + tcg_gen_movi_i32(cpu_pc, jmp_dest); tcg_gen_exit_tb(dc->base.tb, 0); break; } - tcg_gen_movi_tl(cpu_pc, jmp_dest); + tcg_gen_movi_i32(cpu_pc, jmp_dest); tcg_gen_lookup_and_goto_ptr(); break; diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c index ea86ea2..89ae763 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -32,17 +32,20 @@ /* PowerPC CPU definitions */ #define POWERPC_DEF_PREFIX(pvr, svr, type) \ glue(glue(glue(glue(pvr, _), svr), _), type) -#define POWERPC_DEF_SVR(_name, _desc, _pvr, _svr, _type) \ +#define POWERPC_DEF_SVR_DEPR(_name, _desc, _pvr, _svr, _type, _deprecation_note) \ static void \ glue(POWERPC_DEF_PREFIX(_pvr, _svr, _type), _cpu_class_init) \ (ObjectClass *oc, const void *data) \ { \ DeviceClass *dc = DEVICE_CLASS(oc); \ + CPUClass *cc = CPU_CLASS(oc); \ PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); \ \ pcc->pvr = _pvr; \ pcc->svr = _svr; \ dc->desc = _desc; \ + \ + cc->deprecation_note = _deprecation_note; \ } \ \ static const TypeInfo \ @@ -63,6 +66,13 @@ type_init( \ glue(POWERPC_DEF_PREFIX(_pvr, _svr, _type), _cpu_register_types)) +#define POWERPC_DEF_SVR(_name, _desc, _pvr, _svr, _type) \ + POWERPC_DEF_SVR_DEPR(_name, _desc, _pvr, _svr, _type, NULL) + +#define POWERPC_DEPRECATED_CPU(_name, _pvr, _type, _desc, _deprecation_note)\ + POWERPC_DEF_SVR_DEPR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type, \ + _deprecation_note) + #define POWERPC_DEF(_name, _pvr, _type, _desc) \ POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type) @@ -116,6 +126,13 @@ NULL) POWERPC_DEF("x2vp20", CPU_POWERPC_X2VP20, 405, NULL) + /* PPE42 Embedded Controllers */ + POWERPC_DEF("PPE42", CPU_POWERPC_PPE42, ppe42, + "Generic PPE 42") + POWERPC_DEF("PPE42X", CPU_POWERPC_PPE42X, ppe42x, + "Generic PPE 42X") + POWERPC_DEF("PPE42XM", CPU_POWERPC_PPE42XM, ppe42xm, + "Generic PPE 42XM") /* PowerPC 440 family */ #if defined(TODO_USER_ONLY) POWERPC_DEF("440", CPU_POWERPC_440, 440GP, @@ -722,12 +739,12 @@ "POWER7 v2.3") POWERPC_DEF("power7p_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, "POWER7+ v2.1") - POWERPC_DEF("power8e_v2.1", CPU_POWERPC_POWER8E_v21, POWER8, - "POWER8E v2.1") + POWERPC_DEPRECATED_CPU("power8e_v2.1", CPU_POWERPC_POWER8E_v21, POWER8, + "POWER8E v2.1", "CPU is unmaintained.") POWERPC_DEF("power8_v2.0", CPU_POWERPC_POWER8_v20, POWER8, "POWER8 v2.0") - POWERPC_DEF("power8nvl_v1.0", CPU_POWERPC_POWER8NVL_v10, POWER8, - "POWER8NVL v1.0") + POWERPC_DEPRECATED_CPU("power8nvl_v1.0", CPU_POWERPC_POWER8NVL_v10, POWER8, + "POWER8NVL v1.0", "CPU is unmaintained.") POWERPC_DEF("power9_v2.0", CPU_POWERPC_POWER9_DD20, POWER9, "POWER9 v2.0") POWERPC_DEF("power9_v2.2", CPU_POWERPC_POWER9_DD22, POWER9, diff --git a/target/ppc/cpu-models.h b/target/ppc/cpu-models.h index 72ad31b..c6cd27f 100644 --- a/target/ppc/cpu-models.h +++ b/target/ppc/cpu-models.h @@ -69,6 +69,10 @@ enum { /* Xilinx cores */ CPU_POWERPC_X2VP4 = 0x20010820, CPU_POWERPC_X2VP20 = 0x20010860, + /* IBM PPE42 Family */ + CPU_POWERPC_PPE42 = 0x42000000, + CPU_POWERPC_PPE42X = 0x42100000, + CPU_POWERPC_PPE42XM = 0x42200000, /* PowerPC 440 family */ /* Generic PowerPC 440 */ #define CPU_POWERPC_440 CPU_POWERPC_440GXf diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 6b90543..787020f 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -220,6 +220,8 @@ typedef enum powerpc_excp_t { POWERPC_EXCP_POWER10, /* POWER11 exception model */ POWERPC_EXCP_POWER11, + /* PPE42 exception model */ + POWERPC_EXCP_PPE42, } powerpc_excp_t; /*****************************************************************************/ @@ -282,6 +284,8 @@ typedef enum powerpc_input_t { PPC_FLAGS_INPUT_POWER9, /* Freescale RCPU bus */ PPC_FLAGS_INPUT_RCPU, + /* PPE42 bus */ + PPC_FLAGS_INPUT_PPE42, } powerpc_input_t; #define PPC_INPUT(env) ((env)->bus_model) @@ -433,39 +437,64 @@ typedef enum { #define MSR_TM PPC_BIT_NR(31) /* Transactional Memory Available (Book3s) */ #define MSR_CM PPC_BIT_NR(32) /* Computation mode for BookE hflags */ #define MSR_ICM PPC_BIT_NR(33) /* Interrupt computation mode for BookE */ +#define MSR_SEM0 PPC_BIT_NR(33) /* SIB Error Mask Bit 0 (PPE42) */ +#define MSR_SEM1 PPC_BIT_NR(34) /* SIB Error Mask Bit 1 (PPE42) */ +#define MSR_SEM2 PPC_BIT_NR(35) /* SIB Error Mask Bit 2 (PPE42) */ #define MSR_GS PPC_BIT_NR(35) /* guest state for BookE */ +#define MSR_SEM3 PPC_BIT_NR(36) /* SIB Error Mask Bit 3 (PPE42) */ +#define MSR_SEM4 PPC_BIT_NR(37) /* SIB Error Mask Bit 4 (PPE42) */ #define MSR_UCLE PPC_BIT_NR(37) /* User-mode cache lock enable for BookE */ #define MSR_VR PPC_BIT_NR(38) /* altivec available x hflags */ #define MSR_SPE PPC_BIT_NR(38) /* SPE enable for BookE x hflags */ +#define MSR_SEM5 PPC_BIT_NR(38) /* SIB Error Mask Bit 5 (PPE42) */ +#define MSR_SEM6 PPC_BIT_NR(39) /* SIB Error Mask Bit 6 (PPE42) */ #define MSR_VSX PPC_BIT_NR(40) /* Vector Scalar Extension (>= 2.06)x hflags */ +#define MSR_IS0 PPC_BIT_NR(40) /* Instance Specific Bit 0 (PPE42) */ #define MSR_S PPC_BIT_NR(41) /* Secure state */ +#define MSR_SIBRC0 PPC_BIT_NR(41) /* Last SIB return code Bit 0 (PPE42) */ +#define MSR_SIBRC1 PPC_BIT_NR(42) /* Last SIB return code Bit 1 (PPE42) */ +#define MSR_SIBRC2 PPC_BIT_NR(43) /* Last SIB return code Bit 2 (PPE42) */ +#define MSR_LP PPC_BIT_NR(44) /* Low Priority (PPE42) */ #define MSR_KEY PPC_BIT_NR(44) /* key bit on 603e */ #define MSR_POW PPC_BIT_NR(45) /* Power management */ #define MSR_WE PPC_BIT_NR(45) /* Wait State Enable on 405 */ +#define MSR_IS1 PPC_BIT_NR(46) /* Instance Specific Bit 1 (PPE42) */ #define MSR_TGPR PPC_BIT_NR(46) /* TGPR usage on 602/603 x */ #define MSR_CE PPC_BIT_NR(46) /* Critical int. enable on embedded PPC x */ #define MSR_ILE PPC_BIT_NR(47) /* Interrupt little-endian mode */ +#define MSR_UIE PPC_BIT_NR(47) /* Unmaskable Interrupt Enable (PPE42) */ #define MSR_EE PPC_BIT_NR(48) /* External interrupt enable */ #define MSR_PR PPC_BIT_NR(49) /* Problem state hflags */ #define MSR_FP PPC_BIT_NR(50) /* Floating point available hflags */ #define MSR_ME PPC_BIT_NR(51) /* Machine check interrupt enable */ #define MSR_FE0 PPC_BIT_NR(52) /* Floating point exception mode 0 */ +#define MSR_IS2 PPC_BIT_NR(52) /* Instance Specific Bit 2 (PPE42) */ +#define MSR_IS3 PPC_BIT_NR(53) /* Instance Specific Bit 3 (PPE42) */ #define MSR_SE PPC_BIT_NR(53) /* Single-step trace enable x hflags */ #define MSR_DWE PPC_BIT_NR(53) /* Debug wait enable on 405 x */ #define MSR_UBLE PPC_BIT_NR(53) /* User BTB lock enable on e500 x */ #define MSR_BE PPC_BIT_NR(54) /* Branch trace enable x hflags */ #define MSR_DE PPC_BIT_NR(54) /* Debug int. enable on embedded PPC x */ #define MSR_FE1 PPC_BIT_NR(55) /* Floating point exception mode 1 */ +#define MSR_IPE PPC_BIT_NR(55) /* Imprecise Mode Enable (PPE42) */ #define MSR_AL PPC_BIT_NR(56) /* AL bit on POWER */ +#define MSR_SIBRCA0 PPC_BIT_NR(56) /* SIB Return Code Accumulator 0 (PPE42) */ +#define MSR_SIBRCA1 PPC_BIT_NR(57) /* SIB Return Code Accumulator 1 (PPE42) */ #define MSR_EP PPC_BIT_NR(57) /* Exception prefix on 601 */ #define MSR_IR PPC_BIT_NR(58) /* Instruction relocate */ #define MSR_IS PPC_BIT_NR(58) /* Instruction address space (BookE) */ +#define MSR_SIBRCA2 PPC_BIT_NR(58) /* SIB Return Code Accumulator 2 (PPE42) */ +#define MSR_SIBRCA3 PPC_BIT_NR(59) /* SIB Return Code Accumulator 3 (PPE42) */ #define MSR_DR PPC_BIT_NR(59) /* Data relocate */ #define MSR_DS PPC_BIT_NR(59) /* Data address space (BookE) */ #define MSR_PE PPC_BIT_NR(60) /* Protection enable on 403 */ +#define MSR_SIBRCA4 PPC_BIT_NR(60) /* SIB Return Code Accumulator 4 (PPE42) */ +#define MSR_SIBRCA5 PPC_BIT_NR(61) /* SIB Return Code Accumulator 5 (PPE42) */ #define MSR_PX PPC_BIT_NR(61) /* Protection exclusive on 403 x */ #define MSR_PMM PPC_BIT_NR(61) /* Performance monitor mark on POWER x */ #define MSR_RI PPC_BIT_NR(62) /* Recoverable interrupt 1 */ +#define MSR_SIBRCA6 PPC_BIT_NR(62) /* SIB Return Code Accumulator 6 (PPE42) */ +#define MSR_SIBRCA7 PPC_BIT_NR(63) /* SIB Return Code Accumulator 7 (PPE42) */ #define MSR_LE PPC_BIT_NR(63) /* Little-endian mode 1 hflags */ FIELD(MSR, SF, MSR_SF, 1) @@ -517,6 +546,9 @@ FIELD(MSR, PX, MSR_PX, 1) FIELD(MSR, PMM, MSR_PMM, 1) FIELD(MSR, RI, MSR_RI, 1) FIELD(MSR, LE, MSR_LE, 1) +FIELD(MSR, SEM, MSR_SEM6, 7) +FIELD(MSR, SIBRC, MSR_SIBRC2, 3) +FIELD(MSR, SIBRCA, MSR_SIBRCA7, 8) /* * FE0 and FE1 bits are not side-by-side @@ -730,6 +762,31 @@ FIELD(MSR, LE, MSR_LE, 1) #define ESR_VLEMI PPC_BIT(58) /* VLE operation */ #define ESR_MIF PPC_BIT(62) /* Misaligned instruction (VLE) */ +/* PPE42 Interrupt Status Register bits */ +#define PPE42_ISR_SRSMS0 PPC_BIT_NR(48) /* Sys Reset State Machine State 0 */ +#define PPE42_ISR_SRSMS1 PPC_BIT_NR(49) /* Sys Reset State Machine State 1 */ +#define PPE42_ISR_SRSMS2 PPC_BIT_NR(50) /* Sys Reset State Machine State 2 */ +#define PPE42_ISR_SRSMS3 PPC_BIT_NR(51) /* Sys Reset State Machine State 3 */ +#define PPE42_ISR_EP PPC_BIT_NR(53) /* MSR[EE] Maskable Event Pending */ +#define PPE42_ISR_PTR PPC_BIT_NR(56) /* Program Interrupt from trap */ +#define PPE42_ISR_ST PPC_BIT_NR(57) /* Data Interrupt caused by store */ +#define PPE42_ISR_MFE PPC_BIT_NR(60) /* Multiple Fault Error */ +#define PPE42_ISR_MCS0 PPC_BIT_NR(61) /* Machine Check Status bit0 */ +#define PPE42_ISR_MCS1 PPC_BIT_NR(62) /* Machine Check Status bit1 */ +#define PPE42_ISR_MCS2 PPC_BIT_NR(63) /* Machine Check Status bit2 */ +FIELD(PPE42_ISR, SRSMS, PPE42_ISR_SRSMS3, 4) +FIELD(PPE42_ISR, MCS, PPE42_ISR_MCS2, 3) + +/* PPE42 Machine Check Status field values */ +#define PPE42_ISR_MCS_INSTRUCTION 0 +#define PPE42_ISR_MCS_DATA_LOAD 1 +#define PPE42_ISR_MCS_DATA_PRECISE_STORE 2 +#define PPE42_ISR_MCS_DATA_IMPRECISE_STORE 3 +#define PPE42_ISR_MCS_PROGRAM 4 +#define PPE42_ISR_MCS_ISI 5 +#define PPE42_ISR_MCS_ALIGNMENT 6 +#define PPE42_ISR_MCS_DSI 7 + /* Transaction EXception And Summary Register bits */ #define TEXASR_FAILURE_PERSISTENT (63 - 7) #define TEXASR_DISALLOWED (63 - 8) @@ -785,6 +842,8 @@ enum { POWERPC_FLAG_SMT_1LPAR = 0x00800000, /* Has BHRB */ POWERPC_FLAG_BHRB = 0x01000000, + /* Use PPE42-specific behavior */ + POWERPC_FLAG_PPE42 = 0x02000000, }; /* @@ -1522,6 +1581,10 @@ struct PowerPCCPUClass { void (*init_proc)(CPUPPCState *env); int (*check_pow)(CPUPPCState *env); int (*check_attn)(CPUPPCState *env); + + /* Handlers to be set by the machine initialising the chips */ + uint64_t (*load_sprd)(CPUPPCState *env); + void (*store_sprd)(CPUPPCState *env, uint64_t val); }; static inline bool ppc_cpu_core_single_threaded(CPUState *cs) @@ -1750,9 +1813,12 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_BOOKE_CSRR0 (0x03A) #define SPR_BOOKE_CSRR1 (0x03B) #define SPR_BOOKE_DEAR (0x03D) +#define SPR_PPE42_EDR (0x03D) #define SPR_IAMR (0x03D) #define SPR_BOOKE_ESR (0x03E) +#define SPR_PPE42_ISR (0x03E) #define SPR_BOOKE_IVPR (0x03F) +#define SPR_PPE42_IVPR (0x03F) #define SPR_MPC_EIE (0x050) #define SPR_MPC_EID (0x051) #define SPR_MPC_NRI (0x052) @@ -1818,6 +1884,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_TBU40 (0x11E) #define SPR_SVR (0x11E) #define SPR_BOOKE_PIR (0x11E) +#define SPR_PPE42_PIR (0x11E) #define SPR_PVR (0x11F) #define SPR_HSPRG0 (0x130) #define SPR_BOOKE_DBSR (0x130) @@ -1827,6 +1894,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_BOOKE_EPCR (0x133) #define SPR_SPURR (0x134) #define SPR_BOOKE_DBCR0 (0x134) +#define SPR_PPE42_DBCR (0x134) #define SPR_IBCR (0x135) #define SPR_PURR (0x135) #define SPR_BOOKE_DBCR1 (0x135) @@ -1844,6 +1912,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_HSRR1 (0x13B) #define SPR_BOOKE_IAC4 (0x13B) #define SPR_BOOKE_DAC1 (0x13C) +#define SPR_PPE42_DACR (0x13C) #define SPR_MMCRH (0x13C) #define SPR_DABR2 (0x13D) #define SPR_BOOKE_DAC2 (0x13D) @@ -1853,12 +1922,14 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_BOOKE_DVC2 (0x13F) #define SPR_LPIDR (0x13F) #define SPR_BOOKE_TSR (0x150) +#define SPR_PPE42_TSR (0x150) #define SPR_HMER (0x150) #define SPR_HMEER (0x151) #define SPR_PCR (0x152) #define SPR_HEIR (0x153) #define SPR_BOOKE_LPIDR (0x152) #define SPR_BOOKE_TCR (0x154) +#define SPR_PPE42_TCR (0x154) #define SPR_BOOKE_TLB0PS (0x158) #define SPR_BOOKE_TLB1PS (0x159) #define SPR_BOOKE_TLB2PS (0x15A) @@ -2528,6 +2599,12 @@ enum { PPC2_MEM_LWSYNC = 0x0000000000200000ULL, /* ISA 2.06 BCD assist instructions */ PPC2_BCDA_ISA206 = 0x0000000000400000ULL, + /* PPE42 instructions */ + PPC2_PPE42 = 0x0000000000800000ULL, + /* PPE42X instructions */ + PPC2_PPE42X = 0x0000000001000000ULL, + /* PPE42XM instructions */ + PPC2_PPE42XM = 0x0000000002000000ULL, #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \ PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \ @@ -2537,7 +2614,8 @@ enum { PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \ PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206 | \ PPC2_ISA300 | PPC2_ISA310 | PPC2_MEM_LWSYNC | \ - PPC2_BCDA_ISA206) + PPC2_BCDA_ISA206 | PPC2_PPE42 | PPC2_PPE42X | \ + PPC2_PPE42XM) }; /*****************************************************************************/ diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 9642812..3aa3aef 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -1653,6 +1653,47 @@ static void register_8xx_sprs(CPUPPCState *env) * ... and more (thermal management, performance counters, ...) */ +static void register_ppe42_sprs(CPUPPCState *env) +{ + spr_register(env, SPR_PPE42_EDR, "EDR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + spr_register(env, SPR_PPE42_ISR, "ISR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + spr_register(env, SPR_PPE42_IVPR, "IVPR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, SPR_NOACCESS, + 0xfff80000); + spr_register(env, SPR_PPE42_PIR, "PIR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_pir, + 0x00000000); + spr_register(env, SPR_PPE42_DBCR, "DBCR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_40x_dbcr0, + 0x00000000); + spr_register(env, SPR_PPE42_DACR, "DACR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + /* Timer */ + spr_register(env, SPR_DECR, "DECR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_decr, &spr_write_decr, + 0x00000000); + spr_register(env, SPR_PPE42_TSR, "TSR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_booke_tsr, + 0x00000000); + spr_register(env, SPR_BOOKE_TCR, "TCR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_booke_tcr, + 0x00000000); +} + /*****************************************************************************/ /* Exception vectors models */ static void init_excp_4xx(CPUPPCState *env) @@ -1679,6 +1720,30 @@ static void init_excp_4xx(CPUPPCState *env) #endif } +static void init_excp_ppe42(CPUPPCState *env) +{ +#if !defined(CONFIG_USER_ONLY) + /* Machine Check vector changed after version 0 */ + if (((env->spr[SPR_PVR] & 0xf00000ul) >> 20) == 0) { + env->excp_vectors[POWERPC_EXCP_MCHECK] = 0x00000000; + } else { + env->excp_vectors[POWERPC_EXCP_MCHECK] = 0x00000020; + } + env->excp_vectors[POWERPC_EXCP_RESET] = 0x00000040; + env->excp_vectors[POWERPC_EXCP_DSI] = 0x00000060; + env->excp_vectors[POWERPC_EXCP_ISI] = 0x00000080; + env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x000000A0; + env->excp_vectors[POWERPC_EXCP_ALIGN] = 0x000000C0; + env->excp_vectors[POWERPC_EXCP_PROGRAM] = 0x000000E0; + env->excp_vectors[POWERPC_EXCP_DECR] = 0x00000100; + env->excp_vectors[POWERPC_EXCP_FIT] = 0x00000120; + env->excp_vectors[POWERPC_EXCP_WDT] = 0x00000140; + env->ivpr_mask = 0xFFFFFE00UL; + /* Hardware reset vector */ + env->hreset_vector = 0x00000040UL; +#endif +} + static void init_excp_MPC5xx(CPUPPCState *env) { #if !defined(CONFIG_USER_ONLY) @@ -2200,6 +2265,80 @@ POWERPC_FAMILY(405)(ObjectClass *oc, const void *data) POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK; } +static void init_proc_ppe42(CPUPPCState *env) +{ + register_ppe42_sprs(env); + + init_excp_ppe42(env); + env->dcache_line_size = 32; + env->icache_line_size = 32; + /* Allocate hardware IRQ controller */ + ppc40x_irq_init(env_archcpu(env)); + + SET_FIT_PERIOD(8, 12, 16, 20); + SET_WDT_PERIOD(16, 20, 24, 28); +} + +static void ppe42_class_common_init(PowerPCCPUClass *pcc) +{ + pcc->init_proc = init_proc_ppe42; + pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_none; + pcc->insns_flags = PPC_INSNS_BASE | + PPC_WRTEE | + PPC_CACHE | + PPC_CACHE_DCBZ | + PPC_MEM_SYNC; + pcc->msr_mask = R_MSR_SEM_MASK | + (1ull << MSR_IS0) | + R_MSR_SIBRC_MASK | + (1ull << MSR_LP) | + (1ull << MSR_WE) | + (1ull << MSR_IS1) | + (1ull << MSR_UIE) | + (1ull << MSR_EE) | + (1ull << MSR_ME) | + (1ull << MSR_IS2) | + (1ull << MSR_IS3) | + (1ull << MSR_IPE) | + R_MSR_SIBRCA_MASK; + pcc->mmu_model = POWERPC_MMU_REAL; + pcc->excp_model = POWERPC_EXCP_PPE42; + pcc->bus_model = PPC_FLAGS_INPUT_PPE42; + pcc->bfd_mach = bfd_mach_ppc_403; + pcc->flags = POWERPC_FLAG_PPE42 | POWERPC_FLAG_BUS_CLK; +} + +POWERPC_FAMILY(ppe42)(ObjectClass *oc, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + + dc->desc = "PPE 42"; + pcc->insns_flags2 = PPC2_PPE42; + ppe42_class_common_init(pcc); +} + +POWERPC_FAMILY(ppe42x)(ObjectClass *oc, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + + dc->desc = "PPE 42X"; + pcc->insns_flags2 = PPC2_PPE42 | PPC2_PPE42X; + ppe42_class_common_init(pcc); +} + +POWERPC_FAMILY(ppe42xm)(ObjectClass *oc, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + + dc->desc = "PPE 42XM"; + pcc->insns_flags2 = PPC2_PPE42 | PPC2_PPE42X | PPC2_PPE42XM; + ppe42_class_common_init(pcc); +} + static void init_proc_440EP(CPUPPCState *env) { register_BookE_sprs(env, 0x000000000000FFFFULL); @@ -6802,53 +6941,64 @@ static void init_ppc_proc(PowerPCCPU *cpu) /* MSR bits & flags consistency checks */ if (env->msr_mask & (1 << 25)) { - switch (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE)) { + switch (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE | + POWERPC_FLAG_PPE42)) { case POWERPC_FLAG_SPE: case POWERPC_FLAG_VRE: + case POWERPC_FLAG_PPE42: break; default: fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should define POWERPC_FLAG_SPE or POWERPC_FLAG_VRE\n"); + "Should define POWERPC_FLAG_SPE or POWERPC_FLAG_VRE\n" + "or POWERPC_FLAG_PPE42\n"); exit(1); } - } else if (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE)) { + } else if (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE | + POWERPC_FLAG_PPE42)) { fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should not define POWERPC_FLAG_SPE nor POWERPC_FLAG_VRE\n"); + "Should not define POWERPC_FLAG_SPE nor POWERPC_FLAG_VRE\n" + "nor POWERPC_FLAG_PPE42\n"); exit(1); } if (env->msr_mask & (1 << 17)) { - switch (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE)) { + switch (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE | + POWERPC_FLAG_PPE42)) { case POWERPC_FLAG_TGPR: case POWERPC_FLAG_CE: + case POWERPC_FLAG_PPE42: break; default: fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should define POWERPC_FLAG_TGPR or POWERPC_FLAG_CE\n"); + "Should define POWERPC_FLAG_TGPR or POWERPC_FLAG_CE\n" + "or POWERPC_FLAG_PPE42\n"); exit(1); } - } else if (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE)) { + } else if (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE | + POWERPC_FLAG_PPE42)) { fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should not define POWERPC_FLAG_TGPR nor POWERPC_FLAG_CE\n"); + "Should not define POWERPC_FLAG_TGPR nor POWERPC_FLAG_CE\n" + "nor POWERPC_FLAG_PPE42\n"); exit(1); } if (env->msr_mask & (1 << 10)) { switch (env->flags & (POWERPC_FLAG_SE | POWERPC_FLAG_DWE | - POWERPC_FLAG_UBLE)) { + POWERPC_FLAG_UBLE | POWERPC_FLAG_PPE42)) { case POWERPC_FLAG_SE: case POWERPC_FLAG_DWE: case POWERPC_FLAG_UBLE: + case POWERPC_FLAG_PPE42: break; default: fprintf(stderr, "PowerPC MSR definition inconsistency\n" "Should define POWERPC_FLAG_SE or POWERPC_FLAG_DWE or " - "POWERPC_FLAG_UBLE\n"); + "POWERPC_FLAG_UBLE or POWERPC_FLAG_PPE42\n"); exit(1); } } else if (env->flags & (POWERPC_FLAG_SE | POWERPC_FLAG_DWE | - POWERPC_FLAG_UBLE)) { + POWERPC_FLAG_UBLE | POWERPC_FLAG_PPE42)) { fprintf(stderr, "PowerPC MSR definition inconsistency\n" "Should not define POWERPC_FLAG_SE nor POWERPC_FLAG_DWE nor " - "POWERPC_FLAG_UBLE\n"); + "POWERPC_FLAG_UBLE nor POWERPC_FLAG_PPE42\n"); exit(1); } if (env->msr_mask & (1 << 9)) { @@ -6867,18 +7017,23 @@ static void init_ppc_proc(PowerPCCPU *cpu) exit(1); } if (env->msr_mask & (1 << 2)) { - switch (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM)) { + switch (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM | + POWERPC_FLAG_PPE42)) { case POWERPC_FLAG_PX: case POWERPC_FLAG_PMM: + case POWERPC_FLAG_PPE42: break; default: fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should define POWERPC_FLAG_PX or POWERPC_FLAG_PMM\n"); + "Should define POWERPC_FLAG_PX or POWERPC_FLAG_PMM\n" + "or POWERPC_FLAG_PPE42\n"); exit(1); } - } else if (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM)) { + } else if (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM | + POWERPC_FLAG_PPE42)) { fprintf(stderr, "PowerPC MSR definition inconsistency\n" - "Should not define POWERPC_FLAG_PX nor POWERPC_FLAG_PMM\n"); + "Should not define POWERPC_FLAG_PX nor POWERPC_FLAG_PMM\n" + "nor POWERPC_FLAG_PPE42\n"); exit(1); } if ((env->flags & POWERPC_FLAG_BUS_CLK) == 0) { @@ -7143,6 +7298,7 @@ static void ppc_cpu_list_entry(gpointer data, gpointer user_data) { ObjectClass *oc = data; PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + CPUClass *cc = CPU_CLASS(oc); DeviceClass *family = DEVICE_CLASS(ppc_cpu_get_family_class(pcc)); const char *typename = object_class_get_name(oc); char *name; @@ -7153,7 +7309,11 @@ static void ppc_cpu_list_entry(gpointer data, gpointer user_data) } name = cpu_model_from_type(typename); - qemu_printf(" %-16s PVR %08x\n", name, pcc->pvr); + if (cc->deprecation_note) { + qemu_printf(" %-16s PVR %08x (deprecated)\n", name, pcc->pvr); + } else { + qemu_printf(" %-16s PVR %08x\n", name, pcc->pvr); + } for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { PowerPCCPUAlias *alias = &ppc_cpu_aliases[i]; ObjectClass *alias_oc = ppc_cpu_class_by_name(alias->model); @@ -7225,7 +7385,7 @@ static int ppc_cpu_mmu_index(CPUState *cs, bool ifetch) #ifndef CONFIG_USER_ONLY static bool ppc_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & CPU_INTERRUPT_HARD; + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD); } #endif /* !CONFIG_USER_ONLY */ @@ -7243,39 +7403,40 @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) } msr = (target_ulong)0; - msr |= (target_ulong)MSR_HVB; - msr |= (target_ulong)1 << MSR_EP; + if (!(env->flags & POWERPC_FLAG_PPE42)) { + msr |= (target_ulong)MSR_HVB; + msr |= (target_ulong)1 << MSR_EP; #if defined(DO_SINGLE_STEP) && 0 - /* Single step trace mode */ - msr |= (target_ulong)1 << MSR_SE; - msr |= (target_ulong)1 << MSR_BE; + /* Single step trace mode */ + msr |= (target_ulong)1 << MSR_SE; + msr |= (target_ulong)1 << MSR_BE; #endif #if defined(CONFIG_USER_ONLY) - msr |= (target_ulong)1 << MSR_FP; /* Allow floating point usage */ - msr |= (target_ulong)1 << MSR_FE0; /* Allow floating point exceptions */ - msr |= (target_ulong)1 << MSR_FE1; - msr |= (target_ulong)1 << MSR_VR; /* Allow altivec usage */ - msr |= (target_ulong)1 << MSR_VSX; /* Allow VSX usage */ - msr |= (target_ulong)1 << MSR_SPE; /* Allow SPE usage */ - msr |= (target_ulong)1 << MSR_PR; + msr |= (target_ulong)1 << MSR_FP; /* Allow floating point usage */ + msr |= (target_ulong)1 << MSR_FE0; /* Allow floating point exceptions */ + msr |= (target_ulong)1 << MSR_FE1; + msr |= (target_ulong)1 << MSR_VR; /* Allow altivec usage */ + msr |= (target_ulong)1 << MSR_VSX; /* Allow VSX usage */ + msr |= (target_ulong)1 << MSR_SPE; /* Allow SPE usage */ + msr |= (target_ulong)1 << MSR_PR; #if defined(TARGET_PPC64) - msr |= (target_ulong)1 << MSR_TM; /* Transactional memory */ + msr |= (target_ulong)1 << MSR_TM; /* Transactional memory */ #endif #if !TARGET_BIG_ENDIAN - msr |= (target_ulong)1 << MSR_LE; /* Little-endian user mode */ - if (!((env->msr_mask >> MSR_LE) & 1)) { - fprintf(stderr, "Selected CPU does not support little-endian.\n"); - exit(1); - } + msr |= (target_ulong)1 << MSR_LE; /* Little-endian user mode */ + if (!((env->msr_mask >> MSR_LE) & 1)) { + fprintf(stderr, "Selected CPU does not support little-endian.\n"); + exit(1); + } #endif #endif #if defined(TARGET_PPC64) - if (mmu_is_64bit(env->mmu_model)) { - msr |= (1ULL << MSR_SF); - } + if (mmu_is_64bit(env->mmu_model)) { + msr |= (1ULL << MSR_SF); + } #endif - + } hreg_store_msr(env, msr, 1); #if !defined(CONFIG_USER_ONLY) @@ -7386,6 +7547,12 @@ static void ppc_cpu_exec_exit(CPUState *cs) cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu); } } + +static vaddr ppc_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return (cpu_env(cs)->hflags >> HFLAGS_64) & 1 ? result : (uint32_t)result; +} #endif /* CONFIG_TCG */ #endif /* !CONFIG_USER_ONLY */ @@ -7490,6 +7657,7 @@ static const TCGCPUOps ppc_tcg_ops = { .record_sigsegv = ppc_cpu_record_sigsegv, #else .tlb_fill = ppc_cpu_tlb_fill, + .pointer_wrap = ppc_pointer_wrap, .cpu_exec_interrupt = ppc_cpu_exec_interrupt, .cpu_exec_halt = ppc_cpu_has_work, .cpu_exec_reset = cpu_reset, @@ -7718,6 +7886,18 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, int flags) * they can be read with "p $ivor0", "p $ivor1", etc. */ break; + case POWERPC_EXCP_PPE42: + qemu_fprintf(f, "SRR0 " TARGET_FMT_lx " SRR1 " TARGET_FMT_lx "\n", + env->spr[SPR_SRR0], env->spr[SPR_SRR1]); + + qemu_fprintf(f, " TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx + " ISR " TARGET_FMT_lx " EDR " TARGET_FMT_lx "\n", + env->spr[SPR_PPE42_TCR], env->spr[SPR_PPE42_TSR], + env->spr[SPR_PPE42_ISR], env->spr[SPR_PPE42_EDR]); + + qemu_fprintf(f, " PIR " TARGET_FMT_lx " IVPR " TARGET_FMT_lx "\n", + env->spr[SPR_PPE42_PIR], env->spr[SPR_PPE42_IVPR]); + break; case POWERPC_EXCP_40x: qemu_fprintf(f, " TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx " ESR " TARGET_FMT_lx " DEAR " TARGET_FMT_lx "\n", diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 1efdc40..d8bca19 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -949,6 +949,125 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp) powerpc_set_excp_state(cpu, vector, new_msr); } +static void powerpc_excp_ppe42(PowerPCCPU *cpu, int excp) +{ + CPUPPCState *env = &cpu->env; + target_ulong msr, new_msr, vector; + target_ulong mcs = PPE42_ISR_MCS_INSTRUCTION; + bool promote_unmaskable; + + msr = env->msr; + + /* + * New interrupt handler msr preserves SIBRC and ME unless explicitly + * overridden by the exception. All other MSR bits are zeroed out. + */ + new_msr = env->msr & (((target_ulong)1 << MSR_ME) | R_MSR_SIBRC_MASK); + + /* HV emu assistance interrupt only exists on server arch 2.05 or later */ + if (excp == POWERPC_EXCP_HV_EMU) { + excp = POWERPC_EXCP_PROGRAM; + } + + /* + * Unmaskable interrupts (Program, ISI, Alignment and DSI) are promoted to + * machine check if MSR_UIE is 0. + */ + promote_unmaskable = !(msr & ((target_ulong)1 << MSR_UIE)); + + + switch (excp) { + case POWERPC_EXCP_MCHECK: /* Machine check exception */ + break; + case POWERPC_EXCP_DSI: /* Data storage exception */ + trace_ppc_excp_dsi(env->spr[SPR_PPE42_ISR], env->spr[SPR_PPE42_EDR]); + if (promote_unmaskable) { + excp = POWERPC_EXCP_MCHECK; + mcs = PPE42_ISR_MCS_DSI; + } + break; + case POWERPC_EXCP_ISI: /* Instruction storage exception */ + trace_ppc_excp_isi(msr, env->nip); + if (promote_unmaskable) { + excp = POWERPC_EXCP_MCHECK; + mcs = PPE42_ISR_MCS_ISI; + } + break; + case POWERPC_EXCP_EXTERNAL: /* External input */ + break; + case POWERPC_EXCP_ALIGN: /* Alignment exception */ + if (promote_unmaskable) { + excp = POWERPC_EXCP_MCHECK; + mcs = PPE42_ISR_MCS_ALIGNMENT; + } + break; + case POWERPC_EXCP_PROGRAM: /* Program exception */ + if (promote_unmaskable) { + excp = POWERPC_EXCP_MCHECK; + mcs = PPE42_ISR_MCS_PROGRAM; + } + switch (env->error_code & ~0xF) { + case POWERPC_EXCP_INVAL: + trace_ppc_excp_inval(env->nip); + env->spr[SPR_PPE42_ISR] &= ~((target_ulong)1 << PPE42_ISR_PTR); + break; + case POWERPC_EXCP_TRAP: + env->spr[SPR_PPE42_ISR] |= ((target_ulong)1 << PPE42_ISR_PTR); + break; + default: + /* Should never occur */ + cpu_abort(env_cpu(env), "Invalid program exception %d. Aborting\n", + env->error_code); + break; + } +#ifdef CONFIG_TCG + env->spr[SPR_PPE42_EDR] = ppc_ldl_code(env, env->nip); +#endif + break; + case POWERPC_EXCP_DECR: /* Decrementer exception */ + break; + case POWERPC_EXCP_FIT: /* Fixed-interval timer interrupt */ + trace_ppc_excp_print("FIT"); + break; + case POWERPC_EXCP_WDT: /* Watchdog timer interrupt */ + trace_ppc_excp_print("WDT"); + break; + case POWERPC_EXCP_RESET: /* System reset exception */ + /* reset exceptions don't have ME set */ + new_msr &= ~((target_ulong)1 << MSR_ME); + break; + default: + cpu_abort(env_cpu(env), "Invalid PPE42 exception %d. Aborting\n", + excp); + break; + } + + env->spr[SPR_SRR0] = env->nip; + env->spr[SPR_SRR1] = msr; + + vector = env->excp_vectors[excp]; + if (vector == (target_ulong)-1ULL) { + cpu_abort(env_cpu(env), + "Raised an exception without defined vector %d\n", excp); + } + vector |= env->spr[SPR_PPE42_IVPR]; + + if (excp == POWERPC_EXCP_MCHECK) { + /* Also set the Machine Check Status (MCS) */ + env->spr[SPR_PPE42_ISR] &= ~R_PPE42_ISR_MCS_MASK; + env->spr[SPR_PPE42_ISR] |= (mcs & R_PPE42_ISR_MCS_MASK); + env->spr[SPR_PPE42_ISR] &= ~((target_ulong)1 << PPE42_ISR_MFE); + + /* Machine checks halt execution if MSR_ME is 0 */ + powerpc_mcheck_checkstop(env); + + /* machine check exceptions don't have ME set */ + new_msr &= ~((target_ulong)1 << MSR_ME); + } + + powerpc_set_excp_state(cpu, vector, new_msr); +} + static void powerpc_excp_booke(PowerPCCPU *cpu, int excp) { CPUPPCState *env = &cpu->env; @@ -1589,6 +1708,9 @@ void powerpc_excp(PowerPCCPU *cpu, int excp) case POWERPC_EXCP_POWER11: powerpc_excp_books(cpu, excp); break; + case POWERPC_EXCP_PPE42: + powerpc_excp_ppe42(cpu, excp); + break; default: g_assert_not_reached(); } @@ -1945,6 +2067,43 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env, } #endif /* TARGET_PPC64 */ +static int ppe42_next_unmasked_interrupt(CPUPPCState *env) +{ + bool async_deliver; + + /* External reset */ + if (env->pending_interrupts & PPC_INTERRUPT_RESET) { + return PPC_INTERRUPT_RESET; + } + /* Machine check exception */ + if (env->pending_interrupts & PPC_INTERRUPT_MCK) { + return PPC_INTERRUPT_MCK; + } + + async_deliver = FIELD_EX64(env->msr, MSR, EE); + + if (async_deliver != 0) { + /* Watchdog timer */ + if (env->pending_interrupts & PPC_INTERRUPT_WDT) { + return PPC_INTERRUPT_WDT; + } + /* External Interrupt */ + if (env->pending_interrupts & PPC_INTERRUPT_EXT) { + return PPC_INTERRUPT_EXT; + } + /* Fixed interval timer */ + if (env->pending_interrupts & PPC_INTERRUPT_FIT) { + return PPC_INTERRUPT_FIT; + } + /* Decrementer exception */ + if (env->pending_interrupts & PPC_INTERRUPT_DECR) { + return PPC_INTERRUPT_DECR; + } + } + + return 0; +} + static int ppc_next_unmasked_interrupt(CPUPPCState *env) { uint32_t pending_interrupts = env->pending_interrupts; @@ -1970,6 +2129,10 @@ static int ppc_next_unmasked_interrupt(CPUPPCState *env) } #endif + if (env->excp_model == POWERPC_EXCP_PPE42) { + return ppe42_next_unmasked_interrupt(env); + } + /* External reset */ if (pending_interrupts & PPC_INTERRUPT_RESET) { return PPC_INTERRUPT_RESET; diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 07b782f..850aca6 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -562,14 +562,14 @@ uint64_t helper_##op(CPUPPCState *env, float64 arg) \ return ret; \ } -FPU_FCTI(fctiw, int32, 0x80000000U) -FPU_FCTI(fctiwz, int32_round_to_zero, 0x80000000U) -FPU_FCTI(fctiwu, uint32, 0x00000000U) -FPU_FCTI(fctiwuz, uint32_round_to_zero, 0x00000000U) -FPU_FCTI(fctid, int64, 0x8000000000000000ULL) -FPU_FCTI(fctidz, int64_round_to_zero, 0x8000000000000000ULL) -FPU_FCTI(fctidu, uint64, 0x0000000000000000ULL) -FPU_FCTI(fctiduz, uint64_round_to_zero, 0x0000000000000000ULL) +FPU_FCTI(FCTIW, int32, 0x80000000U) +FPU_FCTI(FCTIWZ, int32_round_to_zero, 0x80000000U) +FPU_FCTI(FCTIWU, uint32, 0x00000000U) +FPU_FCTI(FCTIWUZ, uint32_round_to_zero, 0x00000000U) +FPU_FCTI(FCTID, int64, 0x8000000000000000ULL) +FPU_FCTI(FCTIDZ, int64_round_to_zero, 0x8000000000000000ULL) +FPU_FCTI(FCTIDU, uint64, 0x0000000000000000ULL) +FPU_FCTI(FCTIDUZ, uint64_round_to_zero, 0x0000000000000000ULL) #define FPU_FCFI(op, cvtr, is_single) \ uint64_t helper_##op(CPUPPCState *env, uint64_t arg) \ @@ -586,10 +586,10 @@ uint64_t helper_##op(CPUPPCState *env, uint64_t arg) \ return farg.ll; \ } -FPU_FCFI(fcfid, int64_to_float64, 0) -FPU_FCFI(fcfids, int64_to_float32, 1) -FPU_FCFI(fcfidu, uint64_to_float64, 0) -FPU_FCFI(fcfidus, uint64_to_float32, 1) +FPU_FCFI(FCFID, int64_to_float64, 0) +FPU_FCFI(FCFIDS, int64_to_float32, 1) +FPU_FCFI(FCFIDU, uint64_to_float64, 0) +FPU_FCFI(FCFIDUS, uint64_to_float32, 1) static uint64_t do_fri(CPUPPCState *env, uint64_t arg, FloatRoundMode rounding_mode) @@ -613,22 +613,22 @@ static uint64_t do_fri(CPUPPCState *env, uint64_t arg, return arg; } -uint64_t helper_frin(CPUPPCState *env, uint64_t arg) +uint64_t helper_FRIN(CPUPPCState *env, uint64_t arg) { return do_fri(env, arg, float_round_ties_away); } -uint64_t helper_friz(CPUPPCState *env, uint64_t arg) +uint64_t helper_FRIZ(CPUPPCState *env, uint64_t arg) { return do_fri(env, arg, float_round_to_zero); } -uint64_t helper_frip(CPUPPCState *env, uint64_t arg) +uint64_t helper_FRIP(CPUPPCState *env, uint64_t arg) { return do_fri(env, arg, float_round_up); } -uint64_t helper_frim(CPUPPCState *env, uint64_t arg) +uint64_t helper_FRIM(CPUPPCState *env, uint64_t arg) { return do_fri(env, arg, float_round_down); } @@ -697,7 +697,7 @@ static uint64_t do_frsp(CPUPPCState *env, uint64_t arg, uintptr_t retaddr) return helper_todouble(f32); } -uint64_t helper_frsp(CPUPPCState *env, uint64_t arg) +uint64_t helper_FRSP(CPUPPCState *env, uint64_t arg) { return do_frsp(env, arg, GETPC()); } @@ -871,7 +871,7 @@ uint32_t helper_FTSQRT(uint64_t frb) return 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); } -void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2, +void helper_FCMPU(CPUPPCState *env, uint64_t arg1, uint64_t arg2, uint32_t crfD) { CPU_DoubleU farg1, farg2; @@ -902,7 +902,7 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2, } } -void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2, +void helper_FCMPO(CPUPPCState *env, uint64_t arg1, uint64_t arg2, uint32_t crfD) { CPU_DoubleU farg1, farg2; diff --git a/target/ppc/helper.h b/target/ppc/helper.h index ca414f2..e99c8c8 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -94,26 +94,26 @@ DEF_HELPER_2(fpscr_setbit, void, env, i32) DEF_HELPER_FLAGS_1(todouble, TCG_CALL_NO_RWG_SE, i64, i32) DEF_HELPER_FLAGS_1(tosingle, TCG_CALL_NO_RWG_SE, i32, i64) -DEF_HELPER_4(fcmpo, void, env, i64, i64, i32) -DEF_HELPER_4(fcmpu, void, env, i64, i64, i32) +DEF_HELPER_4(FCMPO, void, env, i64, i64, i32) +DEF_HELPER_4(FCMPU, void, env, i64, i64, i32) -DEF_HELPER_2(fctiw, i64, env, i64) -DEF_HELPER_2(fctiwu, i64, env, i64) -DEF_HELPER_2(fctiwz, i64, env, i64) -DEF_HELPER_2(fctiwuz, i64, env, i64) -DEF_HELPER_2(fcfid, i64, env, i64) -DEF_HELPER_2(fcfidu, i64, env, i64) -DEF_HELPER_2(fcfids, i64, env, i64) -DEF_HELPER_2(fcfidus, i64, env, i64) -DEF_HELPER_2(fctid, i64, env, i64) -DEF_HELPER_2(fctidu, i64, env, i64) -DEF_HELPER_2(fctidz, i64, env, i64) -DEF_HELPER_2(fctiduz, i64, env, i64) -DEF_HELPER_2(frsp, i64, env, i64) -DEF_HELPER_2(frin, i64, env, i64) -DEF_HELPER_2(friz, i64, env, i64) -DEF_HELPER_2(frip, i64, env, i64) -DEF_HELPER_2(frim, i64, env, i64) +DEF_HELPER_2(FCTIW, i64, env, i64) +DEF_HELPER_2(FCTIWU, i64, env, i64) +DEF_HELPER_2(FCTIWZ, i64, env, i64) +DEF_HELPER_2(FCTIWUZ, i64, env, i64) +DEF_HELPER_2(FCFID, i64, env, i64) +DEF_HELPER_2(FCFIDU, i64, env, i64) +DEF_HELPER_2(FCFIDS, i64, env, i64) +DEF_HELPER_2(FCFIDUS, i64, env, i64) +DEF_HELPER_2(FCTID, i64, env, i64) +DEF_HELPER_2(FCTIDU, i64, env, i64) +DEF_HELPER_2(FCTIDZ, i64, env, i64) +DEF_HELPER_2(FCTIDUZ, i64, env, i64) +DEF_HELPER_2(FRSP, i64, env, i64) +DEF_HELPER_2(FRIN, i64, env, i64) +DEF_HELPER_2(FRIZ, i64, env, i64) +DEF_HELPER_2(FRIP, i64, env, i64) +DEF_HELPER_2(FRIM, i64, env, i64) DEF_HELPER_3(FADD, f64, env, f64, f64) DEF_HELPER_3(FADDS, f64, env, f64, f64) diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c index 7e57268..a07e6a7 100644 --- a/target/ppc/helper_regs.c +++ b/target/ppc/helper_regs.c @@ -186,6 +186,10 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env) if (env->spr[SPR_LPCR] & LPCR_HR) { hflags |= 1 << HFLAGS_HR; } + if (unlikely(ppc_flags & POWERPC_FLAG_PPE42)) { + /* PPE42 has a single address space and no problem state */ + msr = 0; + } #ifndef CONFIG_USER_ONLY if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) { @@ -274,6 +278,7 @@ TCGTBCPUState ppc_get_tb_cpu_state(CPUState *cs) return (TCGTBCPUState){ .pc = env->nip, .flags = hflags_current }; } +#ifndef CONFIG_USER_ONLY void cpu_interrupt_exittb(CPUState *cs) { /* @@ -285,6 +290,7 @@ void cpu_interrupt_exittb(CPUState *cs) cpu_interrupt(cs, CPU_INTERRUPT_EXITTB); } } +#endif int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv) { @@ -306,9 +312,6 @@ int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv) value &= ~(1 << MSR_ME); value |= env->msr & (1 << MSR_ME); } - if ((value ^ env->msr) & (R_MSR_IR_MASK | R_MSR_DR_MASK)) { - cpu_interrupt_exittb(cs); - } if ((env->mmu_model == POWERPC_MMU_BOOKE || env->mmu_model == POWERPC_MMU_BOOKE206) && ((value ^ env->msr) & R_MSR_GS_MASK)) { @@ -319,8 +322,14 @@ int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv) /* Swap temporary saved registers with GPRs */ hreg_swap_gpr_tgpr(env); } - if (unlikely((value ^ env->msr) & R_MSR_EP_MASK)) { - env->excp_prefix = FIELD_EX64(value, MSR, EP) * 0xFFF00000; + /* PPE42 uses IR, DR and EP MSR bits for other purposes */ + if (likely(!(env->flags & POWERPC_FLAG_PPE42))) { + if ((value ^ env->msr) & (R_MSR_IR_MASK | R_MSR_DR_MASK)) { + cpu_interrupt_exittb(cs); + } + if (unlikely((value ^ env->msr) & R_MSR_EP_MASK)) { + env->excp_prefix = FIELD_EX64(value, MSR, EP) * 0xFFF00000; + } } /* * If PR=1 then EE, IR and DR must be 1 @@ -462,6 +471,23 @@ void register_generic_sprs(PowerPCCPU *cpu) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, 0x00000000); + + spr_register(env, SPR_PVR, "PVR", + /* Linux permits userspace to read PVR */ +#if defined(CONFIG_LINUX_USER) + &spr_read_generic, +#else + SPR_NOACCESS, +#endif + SPR_NOACCESS, + &spr_read_generic, SPR_NOACCESS, + pcc->pvr); + + /* PPE42 doesn't support SPRG1-3, SVR or TB regs */ + if (env->insns_flags2 & PPC2_PPE42) { + return; + } + spr_register(env, SPR_SPRG1, "SPRG1", SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, @@ -475,17 +501,6 @@ void register_generic_sprs(PowerPCCPU *cpu) &spr_read_generic, &spr_write_generic, 0x00000000); - spr_register(env, SPR_PVR, "PVR", - /* Linux permits userspace to read PVR */ -#if defined(CONFIG_LINUX_USER) - &spr_read_generic, -#else - SPR_NOACCESS, -#endif - SPR_NOACCESS, - &spr_read_generic, SPR_NOACCESS, - pcc->pvr); - /* Register SVR if it's defined to anything else than POWERPC_SVR_NONE */ if (pcc->svr != POWERPC_SVR_NONE) { if (pcc->svr & POWERPC_SVR_E500) { diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index e53fd28..0e9c68f 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -58,6 +58,10 @@ %ds_rtp 22:4 !function=times_2 @DS_rtp ...... ....0 ra:5 .............. .. &D rt=%ds_rtp si=%ds_si +%dd_si 3:s13 +&DD rt ra si:int64_t +@DD ...... rt:5 ra:5 ............. . .. &DD si=%dd_si + &DX_b vrt b %dx_b 6:10 16:5 0:1 @DX_b ...... vrt:5 ..... .......... ..... . &DX_b b=%dx_b @@ -66,6 +70,11 @@ %dx_d 6:s10 16:5 0:1 @DX ...... rt:5 ..... .......... ..... . &DX d=%dx_d +%md_sh 1:1 11:5 +%md_mb 5:1 6:5 +&MD rs ra sh mb rc +@MD ...... rs:5 ra:5 ..... ...... ... . rc:1 &MD sh=%md_sh mb=%md_mb + &VA vrt vra vrb rc @VA ...... vrt:5 vra:5 vrb:5 rc:5 ...... &VA @@ -322,6 +331,13 @@ LDUX 011111 ..... ..... ..... 0000110101 - @X LQ 111000 ..... ..... ............ ---- @DQ_rtp +LVD 000101 ..... ..... ................ @D +LVDU 001001 ..... ..... ................ @D +LVDX 011111 ..... ..... ..... 0000010001 - @X +LSKU 111010 ..... ..... ............. 0 11 @DD +LCXU 111010 ..... ..... ............. 1 11 @DD + + ### Fixed-Point Store Instructions STB 100110 ..... ..... ................ @D @@ -346,6 +362,11 @@ STDUX 011111 ..... ..... ..... 0010110101 - @X STQ 111110 ..... ..... ..............10 @DS_rtp +STVDU 010110 ..... ..... ................ @D +STVDX 011111 ..... ..... ..... 0010010001 - @X +STSKU 111110 ..... ..... ............. 0 11 @DD +STCXU 111110 ..... ..... ............. 1 11 @DD + ### Fixed-Point Compare Instructions CMP 011111 ... - . ..... ..... 0000000000 - @X_bfl @@ -461,8 +482,14 @@ PRTYD 011111 ..... ..... ----- 0010111010 - @X_sa BPERMD 011111 ..... ..... ..... 0011111100 - @X CFUGED 011111 ..... ..... ..... 0011011100 - @X -CNTLZDM 011111 ..... ..... ..... 0000111011 - @X -CNTTZDM 011111 ..... ..... ..... 1000111011 - @X +{ + SLVD 011111 ..... ..... ..... 0000111011 . @X_rc + CNTLZDM 011111 ..... ..... ..... 0000111011 - @X +} +{ + SRVD 011111 ..... ..... ..... 1000111011 . @X_rc + CNTTZDM 011111 ..... ..... ..... 1000111011 - @X +} PDEPD 011111 ..... ..... ..... 0010011100 - @X PEXTD 011111 ..... ..... ..... 0010111100 - @X @@ -503,6 +530,17 @@ STFDU 110111 ..... ...... ............... @D STFDX 011111 ..... ...... .... 1011010111 - @X STFDUX 011111 ..... ...... .... 1011110111 - @X +### Floating-Point Move Instructions + +FMR 111111 ..... ----- ..... 0001001000 . @X_tb_rc +FNEG 111111 ..... ----- ..... 0000101000 . @X_tb_rc +FABS 111111 ..... ----- ..... 0100001000 . @X_tb_rc +FNABS 111111 ..... ----- ..... 0010001000 . @X_tb_rc + +FCPSGN 111111 ..... ..... ..... 0000001000 . @X_rc +FMRGEW 111111 ..... ..... ..... 1111000110 - @X +FMRGOW 111111 ..... ..... ..... 1101000110 - @X + ### Floating-Point Arithmetic Instructions FADD 111111 ..... ..... ..... ----- 10101 . @A_tab @@ -541,6 +579,35 @@ FNMADDS 111011 ..... ..... ..... ..... 11111 . @A FNMSUB 111111 ..... ..... ..... ..... 11110 . @A FNMSUBS 111011 ..... ..... ..... ..... 11110 . @A +### Floating-Point Rounding and Conversion Instructions + +FRSP 111111 ..... ----- ..... 0000001100 . @X_tb_rc + +FRIN 111111 ..... ----- ..... 0110001000 . @X_tb_rc +FRIZ 111111 ..... ----- ..... 0110101000 . @X_tb_rc +FRIP 111111 ..... ----- ..... 0111001000 . @X_tb_rc +FRIM 111111 ..... ----- ..... 0111101000 . @X_tb_rc + +FCTIW 111111 ..... ----- ..... 0000001110 . @X_tb_rc +FCTIWU 111111 ..... ----- ..... 0010001110 . @X_tb_rc +FCTIWZ 111111 ..... ----- ..... 0000001111 . @X_tb_rc +FCTIWUZ 111111 ..... ----- ..... 0010001111 . @X_tb_rc + +FCTID 111111 ..... ----- ..... 1100101110 . @X_tb_rc +FCTIDU 111111 ..... ----- ..... 1110101110 . @X_tb_rc +FCTIDZ 111111 ..... ----- ..... 1100101111 . @X_tb_rc +FCTIDUZ 111111 ..... ----- ..... 1110101111 . @X_tb_rc + +FCFID 111111 ..... ----- ..... 1101001110 . @X_tb_rc +FCFIDS 111011 ..... ----- ..... 1101001110 . @X_tb_rc +FCFIDU 111111 ..... ----- ..... 1111001110 . @X_tb_rc +FCFIDUS 111011 ..... ----- ..... 1111001110 . @X_tb_rc + +### Floating-Point Compare Instructions + +FCMPU 111111 ... -- ..... ..... 0000000000 - @X_bf +FCMPO 111111 ... -- ..... ..... 0000100000 - @X_bf + ### Floating-Point Select Instruction FSEL 111111 ..... ..... ..... ..... 10111 . @A @@ -981,8 +1048,16 @@ LXSSP 111001 ..... ..... .............. 11 @DS STXSSP 111101 ..... ..... .............. 11 @DS LXV 111101 ..... ..... ............ . 001 @DQ_TSX STXV 111101 ..... ..... ............ . 101 @DQ_TSX -LXVP 000110 ..... ..... ............ 0000 @DQ_TSXP -STXVP 000110 ..... ..... ............ 0001 @DQ_TSXP + +# STVD PPE instruction overlaps with the LXVP and STXVP instructions +{ + STVD 000110 ..... ..... ................ @D + [ + LXVP 000110 ..... ..... ............ 0000 @DQ_TSXP + STXVP 000110 ..... ..... ............ 0001 @DQ_TSXP + ] +} + LXVX 011111 ..... ..... ..... 0100 - 01100 . @X_TSX STXVX 011111 ..... ..... ..... 0110001100 . @X_TSX LXVPX 011111 ..... ..... ..... 0101001101 - @X_TSXP @@ -1300,3 +1375,26 @@ CLRBHRB 011111 ----- ----- ----- 0110101110 - ## Misc POWER instructions ATTN 000000 00000 00000 00000 0100000000 0 + +# Fused compare-branch instructions for PPE only +%fcb_bdx 1:s10 !function=times_4 +&FCB px:bool ra rb:uint64_t bdx lk:bool +@FCB ...... .. px:1 .. ra:5 rb:5 .......... lk:1 &FCB bdx=%fcb_bdx +&FCB_bix px:bool bix ra rb:uint64_t bdx lk:bool +@FCB_bix ...... .. px:1 bix:2 ra:5 rb:5 .......... lk:1 &FCB_bix bdx=%fcb_bdx + +CMPWBC 000001 00 . .. ..... ..... .......... . @FCB_bix +CMPLWBC 000001 01 . .. ..... ..... .......... . @FCB_bix +CMPWIBC 000001 10 . .. ..... ..... .......... . @FCB_bix +BNBWI 000001 11 . 00 ..... ..... .......... . @FCB +BNBW 000001 11 . 01 ..... ..... .......... . @FCB +CLRBWIBC 000001 11 . 10 ..... ..... .......... . @FCB +CLRBWBC 000001 11 . 11 ..... ..... .......... . @FCB + +# Data Cache Block Query for PPE only +DCBQ 011111 ..... ..... ..... 0110010110 - @X + +# Rotate Doubleword Instructions for PPE only +RLDICL 011110 ..... ..... ..... ...... 000 . . @MD +RLDICR 011110 ..... ..... ..... ...... 001 . . @MD +RLDIMI 011110 ..... ..... ..... ...... 011 . . @MD diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 8a957c3..cd60893 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -479,6 +479,11 @@ static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) } } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -902,7 +907,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu) return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); } -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; @@ -1349,7 +1354,7 @@ static int kvmppc_handle_halt(PowerPCCPU *cpu) CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; - if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && + if (!cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && FIELD_EX64(env->msr, MSR, EE)) { cs->halted = 1; cs->exception_index = EXCP_HLT; @@ -2755,11 +2760,11 @@ int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, uint16_t n_valid, uint16_t n_invalid, Error **errp) { - struct kvm_get_htab_header *buf; - size_t chunksize = sizeof(*buf) + n_valid * HASH_PTE_SIZE_64; + size_t chunksize = sizeof(struct kvm_get_htab_header) + + n_valid * HASH_PTE_SIZE_64; + g_autofree struct kvm_get_htab_header *buf = g_malloc(chunksize); ssize_t rc; - buf = alloca(chunksize); buf->index = index; buf->n_valid = n_valid; buf->n_invalid = n_invalid; diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index e7d9462..0e625cb 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -328,69 +328,22 @@ target_ulong helper_load_sprd(CPUPPCState *env) * accessed by powernv machines. */ PowerPCCPU *cpu = env_archcpu(env); - PnvCore *pc = pnv_cpu_state(cpu)->pnv_core; - target_ulong sprc = env->spr[SPR_POWER_SPRC]; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); - if (pc->big_core) { - pc = pnv_chip_find_core(pc->chip, CPU_CORE(pc)->core_id & ~0x1); + if (pcc->load_sprd) { + return pcc->load_sprd(env); } - switch (sprc & 0x3e0) { - case 0: /* SCRATCH0-3 */ - case 1: /* SCRATCH4-7 */ - return pc->scratch[(sprc >> 3) & 0x7]; - - case 0x1e0: /* core thread state */ - if (env->excp_model == POWERPC_EXCP_POWER9) { - /* - * Only implement for POWER9 because skiboot uses it to check - * big-core mode. Other bits are unimplemented so we would - * prefer to get unimplemented message on POWER10 if it were - * used anywhere. - */ - if (pc->big_core) { - return PPC_BIT(63); - } else { - return 0; - } - } - /* fallthru */ - - default: - qemu_log_mask(LOG_UNIMP, "mfSPRD: Unimplemented SPRC:0x" - TARGET_FMT_lx"\n", sprc); - break; - } return 0; } void helper_store_sprd(CPUPPCState *env, target_ulong val) { - target_ulong sprc = env->spr[SPR_POWER_SPRC]; PowerPCCPU *cpu = env_archcpu(env); - PnvCore *pc = pnv_cpu_state(cpu)->pnv_core; - int nr; - - if (pc->big_core) { - pc = pnv_chip_find_core(pc->chip, CPU_CORE(pc)->core_id & ~0x1); - } + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); - switch (sprc & 0x3e0) { - case 0: /* SCRATCH0-3 */ - case 1: /* SCRATCH4-7 */ - /* - * Log stores to SCRATCH, because some firmware uses these for - * debugging and logging, but they would normally be read by the BMC, - * which is not implemented in QEMU yet. This gives a way to get at the - * information. Could also dump these upon checkstop. - */ - nr = (sprc >> 3) & 0x7; - pc->scratch[nr] = val; - break; - default: - qemu_log_mask(LOG_UNIMP, "mtSPRD: Unimplemented SPRC:0x" - TARGET_FMT_lx"\n", sprc); - break; + if (pcc->store_sprd) { + return pcc->store_sprd(env, val); } } diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h index b8fb12a..ae8d4b3 100644 --- a/target/ppc/mmu-hash64.h +++ b/target/ppc/mmu-hash64.h @@ -1,8 +1,6 @@ #ifndef MMU_HASH64_H #define MMU_HASH64_H -#include "exec/tswap.h" - #ifndef CONFIG_USER_ONLY #ifdef TARGET_PPC64 diff --git a/target/ppc/ppc-qmp-cmds.c b/target/ppc/ppc-qmp-cmds.c index a25d86a..7022564 100644 --- a/target/ppc/ppc-qmp-cmds.c +++ b/target/ppc/ppc-qmp-cmds.c @@ -28,7 +28,8 @@ #include "qemu/ctype.h" #include "monitor/hmp-target.h" #include "monitor/hmp.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" #include "cpu-models.h" #include "cpu-qom.h" @@ -175,6 +176,15 @@ int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval) return -EINVAL; } +CpuModelExpansionInfo * +qmp_query_cpu_model_expansion(CpuModelExpansionType type, + CpuModelInfo *model, + Error **errp) +{ + error_setg(errp, "CPU model expansion is not supported on this target"); + return NULL; +} + static void ppc_cpu_defs_entry(gpointer data, gpointer user_data) { ObjectClass *oc = data; diff --git a/target/ppc/tcg-excp_helper.c b/target/ppc/tcg-excp_helper.c index f835be5..edecfb8 100644 --- a/target/ppc/tcg-excp_helper.c +++ b/target/ppc/tcg-excp_helper.c @@ -229,6 +229,18 @@ void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, case POWERPC_MMU_BOOKE206: env->spr[SPR_BOOKE_DEAR] = vaddr; break; + case POWERPC_MMU_REAL: + if (env->flags & POWERPC_FLAG_PPE42) { + env->spr[SPR_PPE42_EDR] = vaddr; + if (access_type == MMU_DATA_STORE) { + env->spr[SPR_PPE42_ISR] |= PPE42_ISR_ST; + } else { + env->spr[SPR_PPE42_ISR] &= ~PPE42_ISR_ST; + } + } else { + env->spr[SPR_DAR] = vaddr; + } + break; default: env->spr[SPR_DAR] = vaddr; break; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 27f90c3..17e6d07 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -209,6 +209,11 @@ struct DisasContext { #define DISAS_CHAIN DISAS_TARGET_2 /* lookup next tb, pc updated */ #define DISAS_CHAIN_UPDATE DISAS_TARGET_3 /* lookup next tb, pc stale */ +static inline bool is_ppe(const DisasContext *ctx) +{ + return !!(ctx->flags & POWERPC_FLAG_PPE42); +} + /* Return true iff byteswap is needed in a scalar memop */ static inline bool need_byteswap(const DisasContext *ctx) { @@ -556,11 +561,8 @@ void spr_access_nop(DisasContext *ctx, int sprn, int gprn) #endif -/* SPR common to all PowerPC */ -/* XER */ -void spr_read_xer(DisasContext *ctx, int gprn, int sprn) +static void gen_get_xer(DisasContext *ctx, TCGv dst) { - TCGv dst = cpu_gpr[gprn]; TCGv t0 = tcg_temp_new(); TCGv t1 = tcg_temp_new(); TCGv t2 = tcg_temp_new(); @@ -579,9 +581,16 @@ void spr_read_xer(DisasContext *ctx, int gprn, int sprn) } } -void spr_write_xer(DisasContext *ctx, int sprn, int gprn) +/* SPR common to all PowerPC */ +/* XER */ +void spr_read_xer(DisasContext *ctx, int gprn, int sprn) +{ + TCGv dst = cpu_gpr[gprn]; + gen_get_xer(ctx, dst); +} + +static void gen_set_xer(DisasContext *ctx, TCGv src) { - TCGv src = cpu_gpr[gprn]; /* Write all flags, while reading back check for isa300 */ tcg_gen_andi_tl(cpu_xer, src, ~((1u << XER_SO) | @@ -594,6 +603,12 @@ void spr_write_xer(DisasContext *ctx, int sprn, int gprn) tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1); } +void spr_write_xer(DisasContext *ctx, int sprn, int gprn) +{ + TCGv src = cpu_gpr[gprn]; + gen_set_xer(ctx, src); +} + /* LR */ void spr_read_lr(DisasContext *ctx, int gprn, int sprn) { @@ -3653,16 +3668,17 @@ static void gen_lookup_and_goto_ptr(DisasContext *ctx) } /*** Branch ***/ -static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) +static void gen_goto_tb(DisasContext *ctx, unsigned tb_slot_idx, + target_ulong dest) { if (NARROW_MODE(ctx)) { dest = (uint32_t) dest; } if (use_goto_tb(ctx, dest)) { pmu_count_insns(ctx); - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); tcg_gen_movi_tl(cpu_nip, dest & ~3); - tcg_gen_exit_tb(ctx->base.tb, n); + tcg_gen_exit_tb(ctx->base.tb, tb_slot_idx); } else { tcg_gen_movi_tl(cpu_nip, dest & ~3); gen_lookup_and_goto_ptr(ctx); @@ -4264,8 +4280,10 @@ static void gen_mtmsr(DisasContext *ctx) /* L=1 form only updates EE and RI */ mask &= (1ULL << MSR_RI) | (1ULL << MSR_EE); } else { - /* mtmsr does not alter S, ME, or LE */ - mask &= ~((1ULL << MSR_LE) | (1ULL << MSR_ME) | (1ULL << MSR_S)); + if (likely(!(ctx->insns_flags2 & PPC2_PPE42))) { + /* mtmsr does not alter S, ME, or LE */ + mask &= ~((1ULL << MSR_LE) | (1ULL << MSR_ME) | (1ULL << MSR_S)); + } /* * XXX: we need to update nip before the store if we enter @@ -5753,6 +5771,8 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, arg_PLS_D *a) #include "translate/bhrb-impl.c.inc" +#include "translate/ppe-impl.c.inc" + /* Handles lfdp */ static void gen_dform39(DisasContext *ctx) { diff --git a/target/ppc/translate/fp-impl.c.inc b/target/ppc/translate/fp-impl.c.inc index a66b833..464fb1d 100644 --- a/target/ppc/translate/fp-impl.c.inc +++ b/target/ppc/translate/fp-impl.c.inc @@ -98,28 +98,26 @@ static bool do_helper_ac(DisasContext *ctx, arg_A_tac *a, return true; } -#define GEN_FLOAT_B(name, op2, op3, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(); \ - t1 = tcg_temp_new_i64(); \ - gen_reset_fpstatus(); \ - get_fpr(t0, rB(ctx->opcode)); \ - gen_helper_f##name(t1, tcg_env, t0); \ - set_fpr(rD(ctx->opcode), t1); \ - if (set_fprf) { \ - gen_helper_compute_fprf_float64(tcg_env, t1); \ - } \ - gen_helper_float_check_status(tcg_env); \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ +static bool do_round_convert(DisasContext *ctx, arg_X_tb_rc *a, + void (*helper)(TCGv_i64, TCGv_env, TCGv_i64), + bool set_fprf) +{ + TCGv_i64 t0, t1; + REQUIRE_FPU(ctx); + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + gen_reset_fpstatus(); + get_fpr(t0, a->rb); + helper(t1, tcg_env, t0); + set_fpr(a->rt, t1); + if (set_fprf) { + gen_helper_compute_fprf_float64(tcg_env, t1); + } + gen_helper_float_check_status(tcg_env); + if (unlikely(a->rc)) { + gen_set_cr1_from_fpscr(ctx); + } + return true; } static bool do_helper_bs(DisasContext *ctx, arg_A_tb *a, @@ -213,41 +211,26 @@ TRANS(FSQRT, do_helper_fsqrt, gen_helper_FSQRT); TRANS(FSQRTS, do_helper_fsqrt, gen_helper_FSQRTS); /*** Floating-Point round & convert ***/ -/* fctiw */ -GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT); -/* fctiwu */ -GEN_FLOAT_B(ctiwu, 0x0E, 0x04, 0, PPC2_FP_CVT_ISA206); -/* fctiwz */ -GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT); -/* fctiwuz */ -GEN_FLOAT_B(ctiwuz, 0x0F, 0x04, 0, PPC2_FP_CVT_ISA206); -/* frsp */ -GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT); -/* fcfid */ -GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC2_FP_CVT_S64); -/* fcfids */ -GEN_FLOAT_B(cfids, 0x0E, 0x1A, 0, PPC2_FP_CVT_ISA206); -/* fcfidu */ -GEN_FLOAT_B(cfidu, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206); -/* fcfidus */ -GEN_FLOAT_B(cfidus, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206); -/* fctid */ -GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC2_FP_CVT_S64); -/* fctidu */ -GEN_FLOAT_B(ctidu, 0x0E, 0x1D, 0, PPC2_FP_CVT_ISA206); -/* fctidz */ -GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC2_FP_CVT_S64); -/* fctidu */ -GEN_FLOAT_B(ctiduz, 0x0F, 0x1D, 0, PPC2_FP_CVT_ISA206); - -/* frin */ -GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT); -/* friz */ -GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT); -/* frip */ -GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT); -/* frim */ -GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT); +TRANS_FLAGS(FLOAT, FRSP, do_round_convert, gen_helper_FRSP, true); +TRANS_FLAGS(FLOAT_EXT, FRIN, do_round_convert, gen_helper_FRIN, true); +TRANS_FLAGS(FLOAT_EXT, FRIZ, do_round_convert, gen_helper_FRIZ, true); +TRANS_FLAGS(FLOAT_EXT, FRIP, do_round_convert, gen_helper_FRIP, true); +TRANS_FLAGS(FLOAT_EXT, FRIM, do_round_convert, gen_helper_FRIM, true); + +TRANS_FLAGS(FLOAT, FCTIW, do_round_convert, gen_helper_FCTIW, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCTIWU, do_round_convert, gen_helper_FCTIWU, false); +TRANS_FLAGS(FLOAT, FCTIWZ, do_round_convert, gen_helper_FCTIWZ, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCTIWUZ, do_round_convert, gen_helper_FCTIWUZ, false); + +TRANS_FLAGS2(FP_CVT_S64, FCTID, do_round_convert, gen_helper_FCTID, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCTIDU, do_round_convert, gen_helper_FCTIDU, false); +TRANS_FLAGS2(FP_CVT_S64, FCTIDZ, do_round_convert, gen_helper_FCTIDZ, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCTIDUZ, do_round_convert, gen_helper_FCTIDUZ, false); + +TRANS_FLAGS2(FP_CVT_S64, FCFID, do_round_convert, gen_helper_FCFID, true); +TRANS_FLAGS2(FP_CVT_ISA206, FCFIDS, do_round_convert, gen_helper_FCFIDS, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCFIDU, do_round_convert, gen_helper_FCFIDU, false); +TRANS_FLAGS2(FP_CVT_ISA206, FCFIDUS, do_round_convert, gen_helper_FCFIDUS, false); static bool trans_FTDIV(DisasContext *ctx, arg_X_bf *a) { @@ -274,183 +257,117 @@ static bool trans_FTSQRT(DisasContext *ctx, arg_X_bf_b *a) } /*** Floating-Point compare ***/ - -/* fcmpo */ -static void gen_fcmpo(DisasContext *ctx) +static bool do_helper_cmp(DisasContext *ctx, arg_X_bf *a, + void (*helper)(TCGv_env, TCGv_i64, TCGv_i64, + TCGv_i32)) { TCGv_i32 crf; - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + TCGv_i64 t0, t1; + REQUIRE_INSNS_FLAGS(ctx, FLOAT); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); gen_reset_fpstatus(); - crf = tcg_constant_i32(crfD(ctx->opcode)); - get_fpr(t0, rA(ctx->opcode)); - get_fpr(t1, rB(ctx->opcode)); - gen_helper_fcmpo(tcg_env, t0, t1, crf); + crf = tcg_constant_i32(a->bf); + get_fpr(t0, a->ra); + get_fpr(t1, a->rb); + helper(tcg_env, t0, t1, crf); gen_helper_float_check_status(tcg_env); + return true; } -/* fcmpu */ -static void gen_fcmpu(DisasContext *ctx) -{ - TCGv_i32 crf; - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } - t0 = tcg_temp_new_i64(); - t1 = tcg_temp_new_i64(); - gen_reset_fpstatus(); - crf = tcg_constant_i32(crfD(ctx->opcode)); - get_fpr(t0, rA(ctx->opcode)); - get_fpr(t1, rB(ctx->opcode)); - gen_helper_fcmpu(tcg_env, t0, t1, crf); - gen_helper_float_check_status(tcg_env); -} +TRANS(FCMPU, do_helper_cmp, gen_helper_FCMPU); +TRANS(FCMPO, do_helper_cmp, gen_helper_FCMPO); /*** Floating-point move ***/ -/* fabs */ -/* XXX: beware that fabs never checks for NaNs nor update FPSCR */ -static void gen_fabs(DisasContext *ctx) -{ - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } - t0 = tcg_temp_new_i64(); - t1 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - tcg_gen_andi_i64(t1, t0, ~(1ULL << 63)); - set_fpr(rD(ctx->opcode), t1); - if (unlikely(Rc(ctx->opcode))) { - gen_set_cr1_from_fpscr(ctx); - } -} /* fmr - fmr. */ /* XXX: beware that fmr never checks for NaNs nor update FPSCR */ -static void gen_fmr(DisasContext *ctx) +static bool trans_FMR(DisasContext *ctx, arg_FMR *a) { TCGv_i64 t0; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + REQUIRE_INSNS_FLAGS(ctx, FLOAT); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - set_fpr(rD(ctx->opcode), t0); - if (unlikely(Rc(ctx->opcode))) { + get_fpr(t0, a->rb); + set_fpr(a->rt, t0); + if (unlikely(a->rc)) { gen_set_cr1_from_fpscr(ctx); } + return true; } -/* fnabs */ -/* XXX: beware that fnabs never checks for NaNs nor update FPSCR */ -static void gen_fnabs(DisasContext *ctx) +/* XXX: beware that f{neg, abs, nabs} never checks for NaNs nor update FPSCR */ +static bool do_move_b(DisasContext *ctx, arg_X_tb_rc *a, int64_t val, + void (*tcg_op)(TCGv_i64, TCGv_i64, int64_t)) { - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + TCGv_i64 t0, t1; + REQUIRE_INSNS_FLAGS(ctx, FLOAT); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - tcg_gen_ori_i64(t1, t0, 1ULL << 63); - set_fpr(rD(ctx->opcode), t1); - if (unlikely(Rc(ctx->opcode))) { + get_fpr(t0, a->rb); + tcg_op(t1, t0, val); + set_fpr(a->rt, t1); + if (unlikely(a->rc)) { gen_set_cr1_from_fpscr(ctx); } + return true; } -/* fneg */ -/* XXX: beware that fneg never checks for NaNs nor update FPSCR */ -static void gen_fneg(DisasContext *ctx) -{ - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } - t0 = tcg_temp_new_i64(); - t1 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - tcg_gen_xori_i64(t1, t0, 1ULL << 63); - set_fpr(rD(ctx->opcode), t1); - if (unlikely(Rc(ctx->opcode))) { - gen_set_cr1_from_fpscr(ctx); - } -} +TRANS(FNEG, do_move_b, 1ULL << 63, tcg_gen_xori_i64); +TRANS(FABS, do_move_b, ~(1ULL << 63), tcg_gen_andi_i64); +TRANS(FNABS, do_move_b, 1ULL << 63, tcg_gen_ori_i64); /* fcpsgn: PowerPC 2.05 specification */ /* XXX: beware that fcpsgn never checks for NaNs nor update FPSCR */ -static void gen_fcpsgn(DisasContext *ctx) +static bool trans_FCPSGN(DisasContext *ctx, arg_FCPSGN *a) { - TCGv_i64 t0; - TCGv_i64 t1; - TCGv_i64 t2; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + TCGv_i64 t0, t1, t2; + REQUIRE_INSNS_FLAGS2(ctx, ISA205); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); t2 = tcg_temp_new_i64(); - get_fpr(t0, rA(ctx->opcode)); - get_fpr(t1, rB(ctx->opcode)); + get_fpr(t0, a->ra); + get_fpr(t1, a->rb); tcg_gen_deposit_i64(t2, t0, t1, 0, 63); - set_fpr(rD(ctx->opcode), t2); - if (unlikely(Rc(ctx->opcode))) { + set_fpr(a->rt, t2); + if (unlikely(a->rc)) { gen_set_cr1_from_fpscr(ctx); } + return true; } -static void gen_fmrgew(DisasContext *ctx) +static bool trans_FMRGEW(DisasContext *ctx, arg_FMRGEW *a) { - TCGv_i64 b0; - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } - b0 = tcg_temp_new_i64(); + TCGv_i64 t0, t1, t2; + REQUIRE_INSNS_FLAGS2(ctx, VSX207); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - tcg_gen_shri_i64(b0, t0, 32); - get_fpr(t0, rA(ctx->opcode)); - tcg_gen_deposit_i64(t1, t0, b0, 0, 32); - set_fpr(rD(ctx->opcode), t1); + t2 = tcg_temp_new_i64(); + get_fpr(t1, a->rb); + tcg_gen_shri_i64(t0, t1, 32); + get_fpr(t1, a->ra); + tcg_gen_deposit_i64(t2, t1, t0, 0, 32); + set_fpr(a->rt, t2); + return true; } -static void gen_fmrgow(DisasContext *ctx) +static bool trans_FMRGOW(DisasContext *ctx, arg_FMRGOW *a) { - TCGv_i64 t0; - TCGv_i64 t1; - TCGv_i64 t2; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + TCGv_i64 t0, t1, t2; + REQUIRE_INSNS_FLAGS2(ctx, VSX207); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); t2 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - get_fpr(t1, rA(ctx->opcode)); + get_fpr(t0, a->rb); + get_fpr(t1, a->ra); tcg_gen_deposit_i64(t2, t0, t1, 32, 32); - set_fpr(rD(ctx->opcode), t2); + set_fpr(a->rt, t2); + return true; } /*** Floating-Point status & ctrl register ***/ @@ -479,7 +396,7 @@ static void gen_mcrfs(DisasContext *ctx) tcg_gen_extu_tl_i64(tnew_fpscr, cpu_fpscr); /* Only the exception bits (including FX) should be cleared if read */ tcg_gen_andi_i64(tnew_fpscr, tnew_fpscr, - ~((0xF << shift) & FP_EX_CLEAR_BITS)); + ~(MAKE_64BIT_MASK(shift, 4) & FP_EX_CLEAR_BITS)); /* FEX and VX need to be updated, so don't set fpscr directly */ tmask = tcg_constant_i32(1 << nibble); gen_helper_store_fpscr(tcg_env, tnew_fpscr, tmask); @@ -1051,8 +968,6 @@ TRANS(STFDX, do_lsfp_X, false, true, false) TRANS(STFDUX, do_lsfp_X, true, true, false) TRANS(PSTFD, do_lsfp_PLS_D, false, true, false) -#undef GEN_FLOAT_B - #undef GEN_LDF #undef GEN_LDUF #undef GEN_LDUXF diff --git a/target/ppc/translate/fp-ops.c.inc b/target/ppc/translate/fp-ops.c.inc index cef4b5d..9bc9c3a 100644 --- a/target/ppc/translate/fp-ops.c.inc +++ b/target/ppc/translate/fp-ops.c.inc @@ -1,24 +1,3 @@ -#define GEN_FLOAT_B(name, op2, op3, set_fprf, type) \ -GEN_HANDLER(f##name, 0x3F, op2, op3, 0x001F0000, type) - -GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT), -GEN_HANDLER_E(fctiwu, 0x3F, 0x0E, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT), -GEN_HANDLER_E(fctiwuz, 0x3F, 0x0F, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT), -GEN_HANDLER_E(fcfid, 0x3F, 0x0E, 0x1A, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64), -GEN_HANDLER_E(fcfids, 0x3B, 0x0E, 0x1A, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_HANDLER_E(fcfidu, 0x3F, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_HANDLER_E(fcfidus, 0x3B, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_HANDLER_E(fctid, 0x3F, 0x0E, 0x19, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64), -GEN_HANDLER_E(fctidu, 0x3F, 0x0E, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_HANDLER_E(fctidz, 0x3F, 0x0F, 0x19, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64), -GEN_HANDLER_E(fctiduz, 0x3F, 0x0F, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206), -GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT), -GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT), -GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT), -GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT), - GEN_HANDLER_E(lfdepx, 0x1F, 0x1F, 0x12, 0x00000001, PPC_NONE, PPC2_BOOKE206), GEN_HANDLER_E(lfiwax, 0x1f, 0x17, 0x1a, 0x00000001, PPC_NONE, PPC2_ISA205), GEN_HANDLER_E(lfiwzx, 0x1f, 0x17, 0x1b, 0x1, PPC_NONE, PPC2_FP_CVT_ISA206), @@ -31,15 +10,6 @@ GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX) GEN_HANDLER_E(stfdepx, 0x1F, 0x1F, 0x16, 0x00000001, PPC_NONE, PPC2_BOOKE206), GEN_HANDLER_E(stfdpx, 0x1F, 0x17, 0x1C, 0x00200001, PPC_NONE, PPC2_ISA205), -GEN_HANDLER(fcmpo, 0x3F, 0x00, 0x01, 0x00600001, PPC_FLOAT), -GEN_HANDLER(fcmpu, 0x3F, 0x00, 0x00, 0x00600001, PPC_FLOAT), -GEN_HANDLER(fabs, 0x3F, 0x08, 0x08, 0x001F0000, PPC_FLOAT), -GEN_HANDLER(fmr, 0x3F, 0x08, 0x02, 0x001F0000, PPC_FLOAT), -GEN_HANDLER(fnabs, 0x3F, 0x08, 0x04, 0x001F0000, PPC_FLOAT), -GEN_HANDLER(fneg, 0x3F, 0x08, 0x01, 0x001F0000, PPC_FLOAT), -GEN_HANDLER_E(fcpsgn, 0x3F, 0x08, 0x00, 0x00000000, PPC_NONE, PPC2_ISA205), -GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x00000001, PPC_NONE, PPC2_VSX207), -GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x00000001, PPC_NONE, PPC2_VSX207), GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT), GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT), GEN_HANDLER(mtfsb1, 0x3F, 0x06, 0x01, 0x001FF800, PPC_FLOAT), diff --git a/target/ppc/translate/ppe-impl.c.inc b/target/ppc/translate/ppe-impl.c.inc new file mode 100644 index 0000000..0a05903 --- /dev/null +++ b/target/ppc/translate/ppe-impl.c.inc @@ -0,0 +1,609 @@ +/* + * IBM PPE Instructions + * + * Copyright (c) 2025, IBM Corporation. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + +static bool vdr_is_valid(uint32_t vdr) +{ + const uint32_t valid_bitmap = 0xf00003ff; + return !!((1ul << (vdr & 0x1f)) & valid_bitmap); +} + +static bool ppe_gpr_is_valid(uint32_t reg) +{ + const uint32_t valid_bitmap = 0xf00027ff; + return !!((1ul << (reg & 0x1f)) & valid_bitmap); +} + +#define CHECK_VDR(CTX, VDR) \ + do { \ + if (unlikely(!vdr_is_valid(VDR))) { \ + gen_invalid(CTX); \ + return true; \ + } \ + } while (0) + +#define CHECK_PPE_GPR(CTX, REG) \ + do { \ + if (unlikely(!ppe_gpr_is_valid(REG))) { \ + gen_invalid(CTX); \ + return true; \ + } \ + } while (0) + +#define VDR_PAIR_REG(VDR) (((VDR) + 1) & 0x1f) + +#define CHECK_PPE_LEVEL(CTX, LVL) \ + do { \ + if (unlikely(!((CTX)->insns_flags2 & (LVL)))) { \ + gen_invalid(CTX); \ + return true; \ + } \ + } while (0) + +static bool trans_LCXU(DisasContext *ctx, arg_LCXU *a) +{ + int i; + TCGv base, EA; + TCGv lo, hi; + TCGv_i64 t8; + const uint8_t vd_list[] = {9, 7, 5, 3, 0}; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_PPE_GPR(ctx, a->rt); + + if (unlikely((a->rt != a->ra) || (a->ra == 0) || (a->si < 0xB))) { + gen_invalid(ctx); + return true; + } + + EA = tcg_temp_new(); + base = tcg_temp_new(); + + tcg_gen_addi_tl(base, cpu_gpr[a->ra], a->si * 8); + gen_store_spr(SPR_PPE42_EDR, base); + + t8 = tcg_temp_new_i64(); + + tcg_gen_addi_tl(EA, base, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(cpu_gpr[31], cpu_gpr[30], t8); + + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(cpu_gpr[29], cpu_gpr[28], t8); + + lo = tcg_temp_new(); + hi = tcg_temp_new(); + + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(lo, hi, t8); + gen_store_spr(SPR_SRR0, hi); + gen_store_spr(SPR_SRR1, lo); + + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(lo, hi, t8); + gen_set_xer(ctx, hi); + tcg_gen_mov_tl(cpu_ctr, lo); + + for (i = 0; i < sizeof(vd_list); i++) { + int vd = vd_list[i]; + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(cpu_gpr[VDR_PAIR_REG(vd)], cpu_gpr[vd], t8); + } + + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + tcg_gen_extr_i64_tl(lo, hi, t8); + tcg_gen_shri_tl(hi, hi, 28); + tcg_gen_trunc_tl_i32(cpu_crf[0], hi); + gen_store_spr(SPR_SPRG0, lo); + + tcg_gen_addi_tl(EA, base, 4); + tcg_gen_qemu_ld_tl(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN); + tcg_gen_mov_tl(cpu_gpr[a->ra], base); + return true; +} + +static bool trans_LSKU(DisasContext *ctx, arg_LSKU *a) +{ + int64_t n; + TCGv base, EA; + TCGv lo, hi; + TCGv_i64 t8; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_PPE_GPR(ctx, a->rt); + + if (unlikely((a->rt != a->ra) || (a->ra == 0) || + (a->si & PPC_BIT(0)) || (a->si == 0))) { + gen_invalid(ctx); + return true; + } + + EA = tcg_temp_new(); + base = tcg_temp_new(); + gen_addr_register(ctx, base); + + + tcg_gen_addi_tl(base, base, a->si * 8); + gen_store_spr(SPR_PPE42_EDR, base); + + n = a->si - 1; + t8 = tcg_temp_new_i64(); + if (n > 0) { + tcg_gen_addi_tl(EA, base, -8); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + hi = cpu_gpr[30]; + lo = cpu_gpr[31]; + tcg_gen_extr_i64_tl(lo, hi, t8); + } + if (n > 1) { + tcg_gen_addi_tl(EA, base, -16); + tcg_gen_qemu_ld_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + hi = cpu_gpr[28]; + lo = cpu_gpr[29]; + tcg_gen_extr_i64_tl(lo, hi, t8); + } + tcg_gen_addi_tl(EA, base, 4); + tcg_gen_qemu_ld_tl(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN); + tcg_gen_mov_tl(cpu_gpr[a->ra], base); + return true; +} + +static bool trans_STCXU(DisasContext *ctx, arg_STCXU *a) +{ + TCGv EA; + TCGv lo, hi; + TCGv_i64 t8; + int i; + const uint8_t vd_list[] = {9, 7, 5, 3, 0}; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_PPE_GPR(ctx, a->rt); + + if (unlikely((a->rt != a->ra) || (a->ra == 0) || !(a->si & PPC_BIT(0)))) { + gen_invalid(ctx); + return true; + } + + EA = tcg_temp_new(); + tcg_gen_addi_tl(EA, cpu_gpr[a->ra], 4); + tcg_gen_qemu_st_tl(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN); + + gen_store_spr(SPR_PPE42_EDR, cpu_gpr[a->ra]); + + t8 = tcg_temp_new_i64(); + + tcg_gen_concat_tl_i64(t8, cpu_gpr[31], cpu_gpr[30]); + tcg_gen_addi_tl(EA, cpu_gpr[a->ra], -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + + tcg_gen_concat_tl_i64(t8, cpu_gpr[29], cpu_gpr[28]); + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + + lo = tcg_temp_new(); + hi = tcg_temp_new(); + + gen_load_spr(hi, SPR_SRR0); + gen_load_spr(lo, SPR_SRR1); + tcg_gen_concat_tl_i64(t8, lo, hi); + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + + gen_get_xer(ctx, hi); + tcg_gen_mov_tl(lo, cpu_ctr); + tcg_gen_concat_tl_i64(t8, lo, hi); + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + + for (i = 0; i < sizeof(vd_list); i++) { + int vd = vd_list[i]; + tcg_gen_concat_tl_i64(t8, cpu_gpr[VDR_PAIR_REG(vd)], cpu_gpr[vd]); + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + } + + gen_load_spr(lo, SPR_SPRG0); + tcg_gen_extu_i32_tl(hi, cpu_crf[0]); + tcg_gen_shli_tl(hi, hi, 28); + tcg_gen_concat_tl_i64(t8, lo, hi); + tcg_gen_addi_tl(EA, EA, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + + tcg_gen_addi_tl(EA, cpu_gpr[a->ra], a->si * 8); + tcg_gen_qemu_st_tl(cpu_gpr[a->rt], EA, ctx->mem_idx, DEF_MEMOP(MO_32) | + MO_ALIGN); + tcg_gen_mov_tl(cpu_gpr[a->ra], EA); + return true; +} + +static bool trans_STSKU(DisasContext *ctx, arg_STSKU *a) +{ + int64_t n; + TCGv base, EA; + TCGv lo, hi; + TCGv_i64 t8; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_PPE_GPR(ctx, a->rt); + + if (unlikely((a->rt != a->ra) || (a->ra == 0) || !(a->si & PPC_BIT(0)))) { + gen_invalid(ctx); + return true; + } + + EA = tcg_temp_new(); + base = tcg_temp_new(); + gen_addr_register(ctx, base); + tcg_gen_addi_tl(EA, base, 4); + tcg_gen_qemu_st_tl(cpu_lr, EA, ctx->mem_idx, DEF_MEMOP(MO_32) | MO_ALIGN); + + gen_store_spr(SPR_PPE42_EDR, base); + + n = ~(a->si); + + t8 = tcg_temp_new_i64(); + if (n > 0) { + hi = cpu_gpr[30]; + lo = cpu_gpr[31]; + tcg_gen_concat_tl_i64(t8, lo, hi); + tcg_gen_addi_tl(EA, base, -8); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + } + if (n > 1) { + hi = cpu_gpr[28]; + lo = cpu_gpr[29]; + tcg_gen_concat_tl_i64(t8, lo, hi); + tcg_gen_addi_tl(EA, base, -16); + tcg_gen_qemu_st_i64(t8, EA, ctx->mem_idx, DEF_MEMOP(MO_64) | MO_ALIGN); + } + + tcg_gen_addi_tl(EA, base, a->si * 8); + tcg_gen_qemu_st_tl(cpu_gpr[a->rt], EA, ctx->mem_idx, DEF_MEMOP(MO_32) | + MO_ALIGN); + tcg_gen_mov_tl(cpu_gpr[a->ra], EA); + return true; +} + +static bool do_ppe_ldst(DisasContext *ctx, int rt, int ra, TCGv disp, + bool update, bool store) +{ + TCGv ea; + int rt_lo; + TCGv_i64 t8; + + CHECK_VDR(ctx, rt); + CHECK_PPE_GPR(ctx, ra); + rt_lo = VDR_PAIR_REG(rt); + if (update && (ra == 0 || (!store && ((ra == rt) || (ra == rt_lo))))) { + gen_invalid(ctx); + return true; + } + gen_set_access_type(ctx, ACCESS_INT); + + ea = do_ea_calc(ctx, ra, disp); + t8 = tcg_temp_new_i64(); + if (store) { + tcg_gen_concat_tl_i64(t8, cpu_gpr[rt_lo], cpu_gpr[rt]); + tcg_gen_qemu_st_i64(t8, ea, ctx->mem_idx, DEF_MEMOP(MO_64)); + } else { + tcg_gen_qemu_ld_i64(t8, ea, ctx->mem_idx, DEF_MEMOP(MO_64)); + tcg_gen_extr_i64_tl(cpu_gpr[rt_lo], cpu_gpr[rt], t8); + } + if (update) { + tcg_gen_mov_tl(cpu_gpr[ra], ea); + } + return true; +} + +static bool do_ppe_ldst_D(DisasContext *ctx, arg_D *a, bool update, bool store) +{ + if (unlikely(!is_ppe(ctx))) { + return false; + } + return do_ppe_ldst(ctx, a->rt, a->ra, tcg_constant_tl(a->si), update, + store); +} + +static bool do_ppe_ldst_X(DisasContext *ctx, arg_X *a, bool store) +{ + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_GPR(ctx, a->rb); + return do_ppe_ldst(ctx, a->rt, a->ra, cpu_gpr[a->rb], false, store); +} + +TRANS(LVD, do_ppe_ldst_D, false, false) +TRANS(LVDU, do_ppe_ldst_D, true, false) +TRANS(STVD, do_ppe_ldst_D, false, true) +TRANS(STVDU, do_ppe_ldst_D, true, true) +TRANS(LVDX, do_ppe_ldst_X, false) +TRANS(STVDX, do_ppe_ldst_X, true) + + +static bool do_fcb(DisasContext *ctx, TCGv ra_val, TCGv rb_val, int bix, + int32_t bdx, bool s, bool px, bool lk) +{ + TCGCond cond; + uint32_t mask; + TCGLabel *no_branch; + target_ulong dest; + + /* Update CR0 */ + gen_op_cmp32(ra_val, rb_val, s, 0); + + if (lk) { + gen_setlr(ctx, ctx->base.pc_next); + } + + + mask = PPC_BIT32(28 + bix); + cond = (px) ? TCG_COND_TSTEQ : TCG_COND_TSTNE; + no_branch = gen_new_label(); + dest = ctx->cia + bdx; + + /* Do the branch if CR0[bix] == PX */ + tcg_gen_brcondi_i32(cond, cpu_crf[0], mask, no_branch); + gen_goto_tb(ctx, 0, dest); + gen_set_label(no_branch); + gen_goto_tb(ctx, 1, ctx->base.pc_next); + ctx->base.is_jmp = DISAS_NORETURN; + return true; +} + +static bool do_cmp_branch(DisasContext *ctx, arg_FCB_bix *a, bool s, + bool rb_is_gpr) +{ + TCGv old_ra; + TCGv rb_val; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_GPR(ctx, a->ra); + if (rb_is_gpr) { + CHECK_PPE_GPR(ctx, a->rb); + rb_val = cpu_gpr[a->rb]; + } else { + rb_val = tcg_constant_tl(a->rb); + } + if (a->bix == 3) { + old_ra = tcg_temp_new(); + tcg_gen_mov_tl(old_ra, cpu_gpr[a->ra]); + tcg_gen_sub_tl(cpu_gpr[a->ra], cpu_gpr[a->ra], rb_val); + return do_fcb(ctx, old_ra, rb_val, 2, + a->bdx, s, a->px, a->lk); + } else { + return do_fcb(ctx, cpu_gpr[a->ra], rb_val, a->bix, + a->bdx, s, a->px, a->lk); + } +} + +TRANS(CMPWBC, do_cmp_branch, true, true) +TRANS(CMPLWBC, do_cmp_branch, false, true) +TRANS(CMPWIBC, do_cmp_branch, true, false) + +static bool do_mask_branch(DisasContext *ctx, arg_FCB * a, bool invert, + bool update, bool rb_is_gpr) +{ + TCGv r; + TCGv mask, shift; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_GPR(ctx, a->ra); + if (rb_is_gpr) { + CHECK_PPE_GPR(ctx, a->rb); + mask = tcg_temp_new(); + shift = tcg_temp_new(); + tcg_gen_andi_tl(shift, cpu_gpr[a->rb], 0x1f); + tcg_gen_shr_tl(mask, tcg_constant_tl(0x80000000), shift); + } else { + mask = tcg_constant_tl(PPC_BIT32(a->rb)); + } + if (invert) { + tcg_gen_not_tl(mask, mask); + } + + /* apply mask to ra */ + r = tcg_temp_new(); + tcg_gen_and_tl(r, cpu_gpr[a->ra], mask); + if (update) { + tcg_gen_mov_tl(cpu_gpr[a->ra], r); + } + return do_fcb(ctx, r, tcg_constant_tl(0), 2, + a->bdx, false, a->px, a->lk); +} + +TRANS(BNBWI, do_mask_branch, false, false, false) +TRANS(BNBW, do_mask_branch, false, false, true) +TRANS(CLRBWIBC, do_mask_branch, true, true, false) +TRANS(CLRBWBC, do_mask_branch, true, true, true) + +static void gen_set_Rc0_i64(DisasContext *ctx, TCGv_i64 reg) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_movi_i64(t0, CRF_EQ); + tcg_gen_movi_i64(t1, CRF_LT); + tcg_gen_movcond_i64(TCG_COND_LT, t0, reg, tcg_constant_i64(0), t1, t0); + tcg_gen_movi_i64(t1, CRF_GT); + tcg_gen_movcond_i64(TCG_COND_GT, t0, reg, tcg_constant_i64(0), t1, t0); + tcg_gen_extrl_i64_i32(t, t0); + tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); + tcg_gen_or_i32(cpu_crf[0], cpu_crf[0], t); +} + +static bool do_shift64(DisasContext *ctx, arg_X_rc *a, bool left) +{ + int rt_lo, ra_lo; + TCGv_i64 t0, t8; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_VDR(ctx, a->rt); + CHECK_VDR(ctx, a->ra); + CHECK_PPE_GPR(ctx, a->rb); + rt_lo = VDR_PAIR_REG(a->rt); + ra_lo = VDR_PAIR_REG(a->ra); + t8 = tcg_temp_new_i64(); + + /* AND rt with a mask that is 0 when rb >= 0x40 */ + t0 = tcg_temp_new_i64(); + tcg_gen_extu_tl_i64(t0, cpu_gpr[a->rb]); + tcg_gen_shli_i64(t0, t0, 0x39); + tcg_gen_sari_i64(t0, t0, 0x3f); + + /* form 64bit value from two 32bit regs */ + tcg_gen_concat_tl_i64(t8, cpu_gpr[rt_lo], cpu_gpr[a->rt]); + + /* apply mask */ + tcg_gen_andc_i64(t8, t8, t0); + + /* do the shift */ + tcg_gen_extu_tl_i64(t0, cpu_gpr[a->rb]); + tcg_gen_andi_i64(t0, t0, 0x3f); + if (left) { + tcg_gen_shl_i64(t8, t8, t0); + } else { + tcg_gen_shr_i64(t8, t8, t0); + } + + /* split the 64bit word back into two 32bit regs */ + tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t8); + + /* update CR0 if requested */ + if (unlikely(a->rc != 0)) { + gen_set_Rc0_i64(ctx, t8); + } + return true; +} + +TRANS(SRVD, do_shift64, false) +TRANS(SLVD, do_shift64, true) + +static bool trans_DCBQ(DisasContext *ctx, arg_DCBQ * a) +{ + if (unlikely(!is_ppe(ctx))) { + return false; + } + + CHECK_PPE_GPR(ctx, a->rt); + CHECK_PPE_GPR(ctx, a->ra); + CHECK_PPE_GPR(ctx, a->rb); + + /* No cache exists, so just set RT to 0 */ + tcg_gen_movi_tl(cpu_gpr[a->rt], 0); + return true; +} + +static bool trans_RLDIMI(DisasContext *ctx, arg_RLDIMI *a) +{ + TCGv_i64 t_rs, t_ra; + int ra_lo, rs_lo; + uint32_t sh = a->sh; + uint32_t mb = a->mb; + uint32_t me = 63 - sh; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_VDR(ctx, a->rs); + CHECK_VDR(ctx, a->ra); + + rs_lo = VDR_PAIR_REG(a->rs); + ra_lo = VDR_PAIR_REG(a->ra); + + t_rs = tcg_temp_new_i64(); + t_ra = tcg_temp_new_i64(); + + tcg_gen_concat_tl_i64(t_rs, cpu_gpr[rs_lo], cpu_gpr[a->rs]); + tcg_gen_concat_tl_i64(t_ra, cpu_gpr[ra_lo], cpu_gpr[a->ra]); + + if (mb <= me) { + tcg_gen_deposit_i64(t_ra, t_ra, t_rs, sh, me - mb + 1); + } else { + uint64_t mask = mask_u64(mb, me); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_rotli_i64(t1, t_rs, sh); + tcg_gen_andi_i64(t1, t1, mask); + tcg_gen_andi_i64(t_ra, t_ra, ~mask); + tcg_gen_or_i64(t_ra, t_ra, t1); + } + + tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t_ra); + + if (unlikely(a->rc != 0)) { + gen_set_Rc0_i64(ctx, t_ra); + } + return true; +} + + +static bool gen_rldinm_i64(DisasContext *ctx, arg_MD *a, int mb, int me, int sh) +{ + int len = me - mb + 1; + int rsh = (64 - sh) & 63; + int ra_lo, rs_lo; + TCGv_i64 t8; + + if (unlikely(!is_ppe(ctx))) { + return false; + } + CHECK_PPE_LEVEL(ctx, PPC2_PPE42X); + CHECK_VDR(ctx, a->rs); + CHECK_VDR(ctx, a->ra); + + rs_lo = VDR_PAIR_REG(a->rs); + ra_lo = VDR_PAIR_REG(a->ra); + t8 = tcg_temp_new_i64(); + tcg_gen_concat_tl_i64(t8, cpu_gpr[rs_lo], cpu_gpr[a->rs]); + if (sh != 0 && len > 0 && me == (63 - sh)) { + tcg_gen_deposit_z_i64(t8, t8, sh, len); + } else if (me == 63 && rsh + len <= 64) { + tcg_gen_extract_i64(t8, t8, rsh, len); + } else { + tcg_gen_rotli_i64(t8, t8, sh); + tcg_gen_andi_i64(t8, t8, mask_u64(mb, me)); + } + tcg_gen_extr_i64_tl(cpu_gpr[ra_lo], cpu_gpr[a->ra], t8); + if (unlikely(a->rc != 0)) { + gen_set_Rc0_i64(ctx, t8); + } + return true; +} + +TRANS(RLDICL, gen_rldinm_i64, a->mb, 63, a->sh) +TRANS(RLDICR, gen_rldinm_i64, 0, a->mb, a->sh) diff --git a/target/riscv/common-semi-target.h b/target/riscv/common-semi-target.c index 7c8a59e..aeaeb88 100644 --- a/target/riscv/common-semi-target.h +++ b/target/riscv/common-semi-target.c @@ -8,43 +8,42 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef TARGET_RISCV_COMMON_SEMI_TARGET_H -#define TARGET_RISCV_COMMON_SEMI_TARGET_H +#include "qemu/osdep.h" +#include "cpu.h" +#include "semihosting/common-semi.h" -static inline target_ulong common_semi_arg(CPUState *cs, int argno) +uint64_t common_semi_arg(CPUState *cs, int argno) { RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; return env->gpr[xA0 + argno]; } -static inline void common_semi_set_ret(CPUState *cs, target_ulong ret) +void common_semi_set_ret(CPUState *cs, uint64_t ret) { RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; env->gpr[xA0] = ret; } -static inline bool common_semi_sys_exit_extended(CPUState *cs, int nr) +bool is_64bit_semihosting(CPUArchState *env) { - return (nr == TARGET_SYS_EXIT_EXTENDED || sizeof(target_ulong) == 8); + return riscv_cpu_mxl(env) != MXL_RV32; } -static inline bool is_64bit_semihosting(CPUArchState *env) +bool common_semi_sys_exit_is_extended(CPUState *cs) { - return riscv_cpu_mxl(env) != MXL_RV32; + return is_64bit_semihosting(cpu_env(cs)); } -static inline target_ulong common_semi_stack_bottom(CPUState *cs) +uint64_t common_semi_stack_bottom(CPUState *cs) { RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; return env->gpr[xSP]; } -static inline bool common_semi_has_synccache(CPUArchState *env) +bool common_semi_has_synccache(CPUArchState *env) { return true; } - -#endif diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h index 4cfdb74..75f4e43 100644 --- a/target/riscv/cpu-qom.h +++ b/target/riscv/cpu-qom.h @@ -44,15 +44,18 @@ #define TYPE_RISCV_CPU_RVA23S64 RISCV_CPU_TYPE_NAME("rva23s64") #define TYPE_RISCV_CPU_IBEX RISCV_CPU_TYPE_NAME("lowrisc-ibex") #define TYPE_RISCV_CPU_SHAKTI_C RISCV_CPU_TYPE_NAME("shakti-c") +#define TYPE_RISCV_CPU_SIFIVE_E RISCV_CPU_TYPE_NAME("sifive-e") #define TYPE_RISCV_CPU_SIFIVE_E31 RISCV_CPU_TYPE_NAME("sifive-e31") #define TYPE_RISCV_CPU_SIFIVE_E34 RISCV_CPU_TYPE_NAME("sifive-e34") #define TYPE_RISCV_CPU_SIFIVE_E51 RISCV_CPU_TYPE_NAME("sifive-e51") +#define TYPE_RISCV_CPU_SIFIVE_U RISCV_CPU_TYPE_NAME("sifive-u") #define TYPE_RISCV_CPU_SIFIVE_U34 RISCV_CPU_TYPE_NAME("sifive-u34") #define TYPE_RISCV_CPU_SIFIVE_U54 RISCV_CPU_TYPE_NAME("sifive-u54") #define TYPE_RISCV_CPU_THEAD_C906 RISCV_CPU_TYPE_NAME("thead-c906") #define TYPE_RISCV_CPU_VEYRON_V1 RISCV_CPU_TYPE_NAME("veyron-v1") #define TYPE_RISCV_CPU_TT_ASCALON RISCV_CPU_TYPE_NAME("tt-ascalon") #define TYPE_RISCV_CPU_XIANGSHAN_NANHU RISCV_CPU_TYPE_NAME("xiangshan-nanhu") +#define TYPE_RISCV_CPU_XIANGSHAN_KMH RISCV_CPU_TYPE_NAME("xiangshan-kunminghu") #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host") OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index d92874b..a877018 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -73,6 +73,13 @@ bool riscv_cpu_option_set(const char *optname) return g_hash_table_contains(general_user_opts, optname); } +static void riscv_cpu_cfg_merge(RISCVCPUConfig *dest, const RISCVCPUConfig *src) +{ +#define BOOL_FIELD(x) dest->x |= src->x; +#define TYPED_FIELD(type, x, default_) if (src->x != default_) dest->x = src->x; +#include "cpu_cfg_fields.h.inc" +} + #define ISA_EXT_DATA_ENTRY(_name, _min_ver, _prop) \ {#_name, _min_ver, CPU_CFG_OFFSET(_prop)} @@ -120,8 +127,8 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zaamo, PRIV_VERSION_1_12_0, ext_zaamo), ISA_EXT_DATA_ENTRY(zabha, PRIV_VERSION_1_13_0, ext_zabha), ISA_EXT_DATA_ENTRY(zacas, PRIV_VERSION_1_12_0, ext_zacas), - ISA_EXT_DATA_ENTRY(zama16b, PRIV_VERSION_1_13_0, ext_zama16b), ISA_EXT_DATA_ENTRY(zalrsc, PRIV_VERSION_1_12_0, ext_zalrsc), + ISA_EXT_DATA_ENTRY(zama16b, PRIV_VERSION_1_13_0, ext_zama16b), ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa), ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin), @@ -182,6 +189,7 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvkt, PRIV_VERSION_1_12_0, ext_zvkt), ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), + ISA_EXT_DATA_ENTRY(sdtrig, PRIV_VERSION_1_12_0, debug), ISA_EXT_DATA_ENTRY(shcounterenw, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sha, PRIV_VERSION_1_12_0, ext_sha), ISA_EXT_DATA_ENTRY(shgatpa, PRIV_VERSION_1_12_0, has_priv_1_12), @@ -209,6 +217,7 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(ssnpm, PRIV_VERSION_1_13_0, ext_ssnpm), ISA_EXT_DATA_ENTRY(sspm, PRIV_VERSION_1_13_0, ext_sspm), ISA_EXT_DATA_ENTRY(ssstateen, PRIV_VERSION_1_12_0, ext_ssstateen), + ISA_EXT_DATA_ENTRY(ssstrict, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sstc, PRIV_VERSION_1_12_0, ext_sstc), ISA_EXT_DATA_ENTRY(sstvala, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sstvecd, PRIV_VERSION_1_12_0, has_priv_1_12), @@ -221,6 +230,7 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(svinval, PRIV_VERSION_1_12_0, ext_svinval), ISA_EXT_DATA_ENTRY(svnapot, PRIV_VERSION_1_12_0, ext_svnapot), ISA_EXT_DATA_ENTRY(svpbmt, PRIV_VERSION_1_12_0, ext_svpbmt), + ISA_EXT_DATA_ENTRY(svrsw60t59b, PRIV_VERSION_1_13_0, ext_svrsw60t59b), ISA_EXT_DATA_ENTRY(svukte, PRIV_VERSION_1_13_0, ext_svukte), ISA_EXT_DATA_ENTRY(svvptc, PRIV_VERSION_1_13_0, ext_svvptc), ISA_EXT_DATA_ENTRY(xtheadba, PRIV_VERSION_1_11_0, ext_xtheadba), @@ -356,7 +366,7 @@ void riscv_cpu_set_misa_ext(CPURISCVState *env, uint32_t ext) int riscv_cpu_max_xlen(RISCVCPUClass *mcc) { - return 16 << mcc->misa_mxl_max; + return 16 << mcc->def->misa_mxl_max; } #ifndef CONFIG_USER_ONLY @@ -389,7 +399,7 @@ static uint8_t satp_mode_from_str(const char *satp_mode_str) g_assert_not_reached(); } -uint8_t satp_mode_max_from_map(uint32_t map) +static uint8_t satp_mode_max_from_map(uint32_t map) { /* * 'map = 0' will make us return (31 - 32), which C will @@ -433,17 +443,23 @@ const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit) g_assert_not_reached(); } -static void set_satp_mode_max_supported(RISCVCPU *cpu, - uint8_t satp_mode) +static bool get_satp_mode_supported(RISCVCPU *cpu, uint16_t *supported) { - bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32; + bool rv32 = riscv_cpu_is_32bit(cpu); const bool *valid_vm = rv32 ? valid_vm_1_10_32 : valid_vm_1_10_64; + int satp_mode = cpu->cfg.max_satp_mode; + + if (satp_mode == -1) { + return false; + } + *supported = 0; for (int i = 0; i <= satp_mode; ++i) { if (valid_vm[i]) { - cpu->cfg.satp_mode.supported |= (1 << i); + *supported |= (1 << i); } } + return true; } /* Set the satp mode to the max supported */ @@ -452,380 +468,26 @@ static void set_satp_mode_default_map(RISCVCPU *cpu) /* * Bare CPUs do not default to the max available. * Users must set a valid satp_mode in the command - * line. + * line. Otherwise, leave the existing max_satp_mode + * in place. */ if (object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_BARE_CPU) != NULL) { warn_report("No satp mode set. Defaulting to 'bare'"); - cpu->cfg.satp_mode.map = (1 << VM_1_10_MBARE); - return; + cpu->cfg.max_satp_mode = VM_1_10_MBARE; } - - cpu->cfg.satp_mode.map = cpu->cfg.satp_mode.supported; -} -#endif - -static void riscv_max_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; - - env->priv_ver = PRIV_VERSION_LATEST; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(RISCV_CPU(obj), - riscv_cpu_mxl(&RISCV_CPU(obj)->env) == MXL_RV32 ? - VM_1_10_SV32 : VM_1_10_SV57); -#endif -} - -#if defined(TARGET_RISCV64) -static void rv64_base_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; - - /* Set latest version of privileged specification */ - env->priv_ver = PRIV_VERSION_LATEST; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV57); -#endif -} - -static void rv64_sifive_u_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - riscv_cpu_set_misa_ext(env, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); - env->priv_ver = PRIV_VERSION_1_10_0; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV39); -#endif - - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; -} - -static void rv64_sifive_e_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVI | RVM | RVA | RVC | RVU); - env->priv_ver = PRIV_VERSION_1_10_0; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_MBARE); -#endif - - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.pmp = true; -} - -static void rv64_thead_c906_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU); - env->priv_ver = PRIV_VERSION_1_11_0; - - cpu->cfg.ext_zfa = true; - cpu->cfg.ext_zfh = true; - cpu->cfg.mmu = true; - cpu->cfg.ext_xtheadba = true; - cpu->cfg.ext_xtheadbb = true; - cpu->cfg.ext_xtheadbs = true; - cpu->cfg.ext_xtheadcmo = true; - cpu->cfg.ext_xtheadcondmov = true; - cpu->cfg.ext_xtheadfmemidx = true; - cpu->cfg.ext_xtheadmac = true; - cpu->cfg.ext_xtheadmemidx = true; - cpu->cfg.ext_xtheadmempair = true; - cpu->cfg.ext_xtheadsync = true; - - cpu->cfg.mvendorid = THEAD_VENDOR_ID; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_SV39); - th_register_custom_csrs(cpu); -#endif - - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.pmp = true; -} - -static void rv64_veyron_v1_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU | RVH); - env->priv_ver = PRIV_VERSION_1_12_0; - - /* Enable ISA extensions */ - cpu->cfg.mmu = true; - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.pmp = true; - cpu->cfg.ext_zicbom = true; - cpu->cfg.cbom_blocksize = 64; - cpu->cfg.cboz_blocksize = 64; - cpu->cfg.ext_zicboz = true; - cpu->cfg.ext_smaia = true; - cpu->cfg.ext_ssaia = true; - cpu->cfg.ext_sscofpmf = true; - cpu->cfg.ext_sstc = true; - cpu->cfg.ext_svinval = true; - cpu->cfg.ext_svnapot = true; - cpu->cfg.ext_svpbmt = true; - cpu->cfg.ext_smstateen = true; - cpu->cfg.ext_zba = true; - cpu->cfg.ext_zbb = true; - cpu->cfg.ext_zbc = true; - cpu->cfg.ext_zbs = true; - cpu->cfg.ext_XVentanaCondOps = true; - - cpu->cfg.mvendorid = VEYRON_V1_MVENDORID; - cpu->cfg.marchid = VEYRON_V1_MARCHID; - cpu->cfg.mimpid = VEYRON_V1_MIMPID; - -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_SV48); -#endif -} - -/* Tenstorrent Ascalon */ -static void rv64_tt_ascalon_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU | RVH | RVV); - env->priv_ver = PRIV_VERSION_1_13_0; - - /* Enable ISA extensions */ - cpu->cfg.mmu = true; - cpu->cfg.vlenb = 256 >> 3; - cpu->cfg.elen = 64; - cpu->env.vext_ver = VEXT_VERSION_1_00_0; - cpu->cfg.rvv_ma_all_1s = true; - cpu->cfg.rvv_ta_all_1s = true; - cpu->cfg.misa_w = true; - cpu->cfg.pmp = true; - cpu->cfg.cbom_blocksize = 64; - cpu->cfg.cbop_blocksize = 64; - cpu->cfg.cboz_blocksize = 64; - cpu->cfg.ext_zic64b = true; - cpu->cfg.ext_zicbom = true; - cpu->cfg.ext_zicbop = true; - cpu->cfg.ext_zicboz = true; - cpu->cfg.ext_zicntr = true; - cpu->cfg.ext_zicond = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zihintntl = true; - cpu->cfg.ext_zihintpause = true; - cpu->cfg.ext_zihpm = true; - cpu->cfg.ext_zimop = true; - cpu->cfg.ext_zawrs = true; - cpu->cfg.ext_zfa = true; - cpu->cfg.ext_zfbfmin = true; - cpu->cfg.ext_zfh = true; - cpu->cfg.ext_zfhmin = true; - cpu->cfg.ext_zcb = true; - cpu->cfg.ext_zcmop = true; - cpu->cfg.ext_zba = true; - cpu->cfg.ext_zbb = true; - cpu->cfg.ext_zbs = true; - cpu->cfg.ext_zkt = true; - cpu->cfg.ext_zvbb = true; - cpu->cfg.ext_zvbc = true; - cpu->cfg.ext_zvfbfmin = true; - cpu->cfg.ext_zvfbfwma = true; - cpu->cfg.ext_zvfh = true; - cpu->cfg.ext_zvfhmin = true; - cpu->cfg.ext_zvkng = true; - cpu->cfg.ext_smaia = true; - cpu->cfg.ext_smstateen = true; - cpu->cfg.ext_ssaia = true; - cpu->cfg.ext_sscofpmf = true; - cpu->cfg.ext_sstc = true; - cpu->cfg.ext_svade = true; - cpu->cfg.ext_svinval = true; - cpu->cfg.ext_svnapot = true; - cpu->cfg.ext_svpbmt = true; - -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_SV57); -#endif -} - -static void rv64_xiangshan_nanhu_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVG | RVC | RVB | RVS | RVU); - env->priv_ver = PRIV_VERSION_1_12_0; - - /* Enable ISA extensions */ - cpu->cfg.ext_zbc = true; - cpu->cfg.ext_zbkb = true; - cpu->cfg.ext_zbkc = true; - cpu->cfg.ext_zbkx = true; - cpu->cfg.ext_zknd = true; - cpu->cfg.ext_zkne = true; - cpu->cfg.ext_zknh = true; - cpu->cfg.ext_zksed = true; - cpu->cfg.ext_zksh = true; - cpu->cfg.ext_svinval = true; - - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; - -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_SV39); -#endif -} - -#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) -static void rv128_base_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; - - /* Set latest version of privileged specification */ - env->priv_ver = PRIV_VERSION_LATEST; - set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV57); -} -#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */ - -static void rv64i_bare_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - riscv_cpu_set_misa_ext(env, RVI); -} - -static void rv64e_bare_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - riscv_cpu_set_misa_ext(env, RVE); -} - -#endif /* !TARGET_RISCV64 */ - -#if defined(TARGET_RISCV32) || \ - (defined(TARGET_RISCV64) && !defined(CONFIG_USER_ONLY)) - -static void rv32_base_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; - - /* Set latest version of privileged specification */ - env->priv_ver = PRIV_VERSION_LATEST; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV32); -#endif } - -static void rv32_sifive_u_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - riscv_cpu_set_misa_ext(env, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); - env->priv_ver = PRIV_VERSION_1_10_0; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV32); -#endif - - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.mmu = true; - cpu->cfg.pmp = true; -} - -static void rv32_sifive_e_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVI | RVM | RVA | RVC | RVU); - env->priv_ver = PRIV_VERSION_1_10_0; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_MBARE); #endif - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.pmp = true; -} - -static void rv32_ibex_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVI | RVM | RVC | RVU); - env->priv_ver = PRIV_VERSION_1_12_0; #ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_MBARE); -#endif - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.pmp = true; - cpu->cfg.ext_smepmp = true; - - cpu->cfg.ext_zba = true; - cpu->cfg.ext_zbb = true; - cpu->cfg.ext_zbc = true; - cpu->cfg.ext_zbs = true; -} - -static void rv32_imafcu_nommu_cpu_init(Object *obj) +static void riscv_register_custom_csrs(RISCVCPU *cpu, const RISCVCSR *csr_list) { - CPURISCVState *env = &RISCV_CPU(obj)->env; - RISCVCPU *cpu = RISCV_CPU(obj); - - riscv_cpu_set_misa_ext(env, RVI | RVM | RVA | RVF | RVC | RVU); - env->priv_ver = PRIV_VERSION_1_10_0; -#ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_MBARE); -#endif - - /* inherited from parent obj via riscv_cpu_init() */ - cpu->cfg.ext_zifencei = true; - cpu->cfg.ext_zicsr = true; - cpu->cfg.pmp = true; -} - -static void rv32i_bare_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - riscv_cpu_set_misa_ext(env, RVI); -} - -static void rv32e_bare_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - riscv_cpu_set_misa_ext(env, RVE); + for (size_t i = 0; csr_list[i].csr_ops.name; i++) { + int csrno = csr_list[i].csrno; + const riscv_csr_operations *csr_ops = &csr_list[i].csr_ops; + if (!csr_list[i].insertion_test || csr_list[i].insertion_test(cpu)) { + riscv_set_csr_ops(csrno, csr_ops); + } + } } #endif @@ -942,7 +604,7 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, int flags) } } } - if (riscv_has_ext(env, RVV) && (flags & CPU_DUMP_VPU)) { + if (riscv_cpu_cfg(env)->ext_zve32x && (flags & CPU_DUMP_VPU)) { static const int dump_rvv_csrs[] = { CSR_VSTART, CSR_VXSAT, @@ -1033,7 +695,7 @@ static void riscv_cpu_reset_hold(Object *obj, ResetType type) mcc->parent_phases.hold(obj, type); } #ifndef CONFIG_USER_ONLY - env->misa_mxl = mcc->misa_mxl_max; + env->misa_mxl = mcc->def->misa_mxl_max; env->priv = PRV_M; env->mstatus &= ~(MSTATUS_MIE | MSTATUS_MPRV); if (env->misa_mxl > MXL_RV32) { @@ -1170,18 +832,16 @@ static void riscv_cpu_disas_set_info(CPUState *s, disassemble_info *info) static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) { bool rv32 = riscv_cpu_is_32bit(cpu); - uint8_t satp_mode_map_max, satp_mode_supported_max; + uint16_t supported; + uint8_t satp_mode_map_max; - /* The CPU wants the OS to decide which satp mode to use */ - if (cpu->cfg.satp_mode.supported == 0) { + if (!get_satp_mode_supported(cpu, &supported)) { + /* The CPU wants the hypervisor to decide which satp mode to allow */ return; } - satp_mode_supported_max = - satp_mode_max_from_map(cpu->cfg.satp_mode.supported); - - if (cpu->cfg.satp_mode.map == 0) { - if (cpu->cfg.satp_mode.init == 0) { + if (cpu->satp_modes.map == 0) { + if (cpu->satp_modes.init == 0) { /* If unset by the user, we fallback to the default satp mode. */ set_satp_mode_default_map(cpu); } else { @@ -1191,27 +851,27 @@ static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) * valid_vm_1_10_32/64. */ for (int i = 1; i < 16; ++i) { - if ((cpu->cfg.satp_mode.init & (1 << i)) && - (cpu->cfg.satp_mode.supported & (1 << i))) { + if ((cpu->satp_modes.init & (1 << i)) && + supported & (1 << i)) { for (int j = i - 1; j >= 0; --j) { - if (cpu->cfg.satp_mode.supported & (1 << j)) { - cpu->cfg.satp_mode.map |= (1 << j); - break; + if (supported & (1 << j)) { + cpu->cfg.max_satp_mode = j; + return; } } - break; } } } + return; } - satp_mode_map_max = satp_mode_max_from_map(cpu->cfg.satp_mode.map); + satp_mode_map_max = satp_mode_max_from_map(cpu->satp_modes.map); /* Make sure the user asked for a supported configuration (HW and qemu) */ - if (satp_mode_map_max > satp_mode_supported_max) { + if (satp_mode_map_max > cpu->cfg.max_satp_mode) { error_setg(errp, "satp_mode %s is higher than hw max capability %s", satp_mode_str(satp_mode_map_max, rv32), - satp_mode_str(satp_mode_supported_max, rv32)); + satp_mode_str(cpu->cfg.max_satp_mode, rv32)); return; } @@ -1221,9 +881,9 @@ static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) */ if (!rv32) { for (int i = satp_mode_map_max - 1; i >= 0; --i) { - if (!(cpu->cfg.satp_mode.map & (1 << i)) && - (cpu->cfg.satp_mode.init & (1 << i)) && - (cpu->cfg.satp_mode.supported & (1 << i))) { + if (!(cpu->satp_modes.map & (1 << i)) && + (cpu->satp_modes.init & (1 << i)) && + (supported & (1 << i))) { error_setg(errp, "cannot disable %s satp mode if %s " "is enabled", satp_mode_str(i, false), satp_mode_str(satp_mode_map_max, false)); @@ -1232,12 +892,7 @@ static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) } } - /* Finally expand the map so that all valid modes are set */ - for (int i = satp_mode_map_max - 1; i >= 0; --i) { - if (cpu->cfg.satp_mode.supported & (1 << i)) { - cpu->cfg.satp_mode.map |= (1 << i); - } - } + cpu->cfg.max_satp_mode = satp_mode_map_max; } #endif @@ -1315,11 +970,11 @@ bool riscv_cpu_accelerator_compatible(RISCVCPU *cpu) static void cpu_riscv_get_satp(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - RISCVSATPMap *satp_map = opaque; + RISCVSATPModes *satp_modes = opaque; uint8_t satp = satp_mode_from_str(name); bool value; - value = satp_map->map & (1 << satp); + value = satp_modes->map & (1 << satp); visit_type_bool(v, name, &value, errp); } @@ -1327,7 +982,7 @@ static void cpu_riscv_get_satp(Object *obj, Visitor *v, const char *name, static void cpu_riscv_set_satp(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - RISCVSATPMap *satp_map = opaque; + RISCVSATPModes *satp_modes = opaque; uint8_t satp = satp_mode_from_str(name); bool value; @@ -1335,8 +990,8 @@ static void cpu_riscv_set_satp(Object *obj, Visitor *v, const char *name, return; } - satp_map->map = deposit32(satp_map->map, satp, 1, value); - satp_map->init |= 1 << satp; + satp_modes->map = deposit32(satp_modes->map, satp, 1, value); + satp_modes->init |= 1 << satp; } void riscv_add_satp_mode_properties(Object *obj) @@ -1345,16 +1000,16 @@ void riscv_add_satp_mode_properties(Object *obj) if (cpu->env.misa_mxl == MXL_RV32) { object_property_add(obj, "sv32", "bool", cpu_riscv_get_satp, - cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + cpu_riscv_set_satp, NULL, &cpu->satp_modes); } else { object_property_add(obj, "sv39", "bool", cpu_riscv_get_satp, - cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + cpu_riscv_set_satp, NULL, &cpu->satp_modes); object_property_add(obj, "sv48", "bool", cpu_riscv_get_satp, - cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + cpu_riscv_set_satp, NULL, &cpu->satp_modes); object_property_add(obj, "sv57", "bool", cpu_riscv_get_satp, - cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + cpu_riscv_set_satp, NULL, &cpu->satp_modes); object_property_add(obj, "sv64", "bool", cpu_riscv_get_satp, - cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + cpu_riscv_set_satp, NULL, &cpu->satp_modes); } } @@ -1431,18 +1086,13 @@ static bool riscv_cpu_is_dynamic(Object *cpu_obj) return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL; } -static void riscv_cpu_post_init(Object *obj) -{ - accel_cpu_instance_init(CPU(obj)); -} - static void riscv_cpu_init(Object *obj) { RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(obj); RISCVCPU *cpu = RISCV_CPU(obj); CPURISCVState *env = &cpu->env; - env->misa_mxl = mcc->misa_mxl_max; + env->misa_mxl = mcc->def->misa_mxl_max; #ifndef CONFIG_USER_ONLY qdev_init_gpio_in(DEVICE(obj), riscv_cpu_set_irq, @@ -1460,8 +1110,8 @@ static void riscv_cpu_init(Object *obj) * for all CPUs. Each accelerator will decide what to do when * users disable them. */ - RISCV_CPU(obj)->cfg.ext_zicntr = true; - RISCV_CPU(obj)->cfg.ext_zihpm = true; + RISCV_CPU(obj)->cfg.ext_zicntr = !mcc->def->bare; + RISCV_CPU(obj)->cfg.ext_zihpm = !mcc->def->bare; /* Default values for non-bool cpu properties */ cpu->cfg.pmu_mask = MAKE_64BIT_MASK(3, 16); @@ -1470,35 +1120,30 @@ static void riscv_cpu_init(Object *obj) cpu->cfg.cbom_blocksize = 64; cpu->cfg.cbop_blocksize = 64; cpu->cfg.cboz_blocksize = 64; + cpu->cfg.pmp_regions = 16; cpu->env.vext_ver = VEXT_VERSION_1_00_0; -} - -static void riscv_bare_cpu_init(Object *obj) -{ - RISCVCPU *cpu = RISCV_CPU(obj); + cpu->cfg.max_satp_mode = -1; - /* - * Bare CPUs do not inherit the timer and performance - * counters from the parent class (see riscv_cpu_init() - * for info on why the parent enables them). - * - * Users have to explicitly enable these counters for - * bare CPUs. - */ - cpu->cfg.ext_zicntr = false; - cpu->cfg.ext_zihpm = false; + if (mcc->def->profile) { + mcc->def->profile->enabled = true; + } - /* Set to QEMU's first supported priv version */ - cpu->env.priv_ver = PRIV_VERSION_1_10_0; + env->misa_ext_mask = env->misa_ext = mcc->def->misa_ext; + riscv_cpu_cfg_merge(&cpu->cfg, &mcc->def->cfg); - /* - * Support all available satp_mode settings. The default - * value will be set to MBARE if the user doesn't set - * satp_mode manually (see set_satp_mode_default()). - */ + if (mcc->def->priv_spec != RISCV_PROFILE_ATTR_UNUSED) { + cpu->env.priv_ver = mcc->def->priv_spec; + } + if (mcc->def->vext_spec != RISCV_PROFILE_ATTR_UNUSED) { + cpu->env.vext_ver = mcc->def->vext_spec; + } #ifndef CONFIG_USER_ONLY - set_satp_mode_max_supported(cpu, VM_1_10_SV64); + if (mcc->def->custom_csrs) { + riscv_register_custom_csrs(cpu, mcc->def->custom_csrs); + } #endif + + accel_cpu_instance_init(CPU(obj)); } typedef struct misa_ext_info { @@ -1533,7 +1178,7 @@ static void riscv_cpu_validate_misa_mxl(RISCVCPUClass *mcc) CPUClass *cc = CPU_CLASS(mcc); /* Validate that MISA_MXL is set properly. */ - switch (mcc->misa_mxl_max) { + switch (mcc->def->misa_mxl_max) { #ifdef TARGET_RISCV64 case MXL_RV64: case MXL_RV128: @@ -1641,6 +1286,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = { MULTI_EXT_CFG_BOOL("svinval", ext_svinval, false), MULTI_EXT_CFG_BOOL("svnapot", ext_svnapot, false), MULTI_EXT_CFG_BOOL("svpbmt", ext_svpbmt, false), + MULTI_EXT_CFG_BOOL("svrsw60t59b", ext_svrsw60t59b, false), MULTI_EXT_CFG_BOOL("svvptc", ext_svvptc, true), MULTI_EXT_CFG_BOOL("zicntr", ext_zicntr, true), @@ -1734,31 +1380,24 @@ const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = { * 'Named features' is the name we give to extensions that we * don't want to expose to users. They are either immutable * (always enabled/disable) or they'll vary depending on - * the resulting CPU state. They have riscv,isa strings - * and priv_ver like regular extensions. + * the resulting CPU state. + * + * Some of them are always enabled depending on priv version + * of the CPU and are declared directly in isa_edata_arr[]. + * The ones listed here have special checks during finalize() + * time and require their own flags like regular extensions. + * See riscv_cpu_update_named_features() for more info. */ const RISCVCPUMultiExtConfig riscv_cpu_named_features[] = { MULTI_EXT_CFG_BOOL("zic64b", ext_zic64b, true), MULTI_EXT_CFG_BOOL("ssstateen", ext_ssstateen, true), MULTI_EXT_CFG_BOOL("sha", ext_sha, true), - MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true), - - { }, -}; -/* Deprecated entries marked for future removal */ -const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[] = { - MULTI_EXT_CFG_BOOL("Zifencei", ext_zifencei, true), - MULTI_EXT_CFG_BOOL("Zicsr", ext_zicsr, true), - MULTI_EXT_CFG_BOOL("Zihintntl", ext_zihintntl, true), - MULTI_EXT_CFG_BOOL("Zihintpause", ext_zihintpause, true), - MULTI_EXT_CFG_BOOL("Zawrs", ext_zawrs, true), - MULTI_EXT_CFG_BOOL("Zfa", ext_zfa, true), - MULTI_EXT_CFG_BOOL("Zfh", ext_zfh, false), - MULTI_EXT_CFG_BOOL("Zfhmin", ext_zfhmin, false), - MULTI_EXT_CFG_BOOL("Zve32f", ext_zve32f, false), - MULTI_EXT_CFG_BOOL("Zve64f", ext_zve64f, false), - MULTI_EXT_CFG_BOOL("Zve64d", ext_zve64d, false), + /* + * 'ziccrse' has its own flag because the KVM driver + * wants to enable/disable it on its own accord. + */ + MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true), { }, }; @@ -1927,6 +1566,46 @@ static const PropertyInfo prop_pmp = { .set = prop_pmp_set, }; +static void prop_num_pmp_regions_set(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVCPU *cpu = RISCV_CPU(obj); + uint8_t value; + + visit_type_uint8(v, name, &value, errp); + + if (cpu->cfg.pmp_regions != value && riscv_cpu_is_vendor(obj)) { + cpu_set_prop_err(cpu, name, errp); + return; + } + + if (cpu->env.priv_ver < PRIV_VERSION_1_12_0 && value > OLD_MAX_RISCV_PMPS) { + error_setg(errp, "Number of PMP regions exceeds maximum available"); + return; + } else if (value > MAX_RISCV_PMPS) { + error_setg(errp, "Number of PMP regions exceeds maximum available"); + return; + } + + cpu_option_add_user_setting(name, value); + cpu->cfg.pmp_regions = value; +} + +static void prop_num_pmp_regions_get(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t value = RISCV_CPU(obj)->cfg.pmp_regions; + + visit_type_uint8(v, name, &value, errp); +} + +static const PropertyInfo prop_num_pmp_regions = { + .type = "uint8", + .description = "num-pmp-regions", + .get = prop_num_pmp_regions_get, + .set = prop_num_pmp_regions_set, +}; + static int priv_spec_from_str(const char *priv_spec_str) { int priv_version = -1; @@ -2926,6 +2605,7 @@ static const Property riscv_cpu_properties[] = { {.name = "mmu", .info = &prop_mmu}, {.name = "pmp", .info = &prop_pmp}, + {.name = "num-pmp-regions", .info = &prop_num_pmp_regions}, {.name = "priv_spec", .info = &prop_priv_spec}, {.name = "vext_spec", .info = &prop_vext_spec}, @@ -2954,6 +2634,7 @@ static const Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false), DEFINE_PROP_BOOL("rvv_ma_all_1s", RISCVCPU, cfg.rvv_ma_all_1s, false), DEFINE_PROP_BOOL("rvv_vl_half_avl", RISCVCPU, cfg.rvv_vl_half_avl, false), + DEFINE_PROP_BOOL("rvv_vsetvl_x0_vill", RISCVCPU, cfg.rvv_vsetvl_x0_vill, false), /* * write_misa() is marked as experimental for now so mark @@ -2962,36 +2643,6 @@ static const Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("x-misa-w", RISCVCPU, cfg.misa_w, false), }; -#if defined(TARGET_RISCV64) -static void rva22u64_profile_cpu_init(Object *obj) -{ - rv64i_bare_cpu_init(obj); - - RVA22U64.enabled = true; -} - -static void rva22s64_profile_cpu_init(Object *obj) -{ - rv64i_bare_cpu_init(obj); - - RVA22S64.enabled = true; -} - -static void rva23u64_profile_cpu_init(Object *obj) -{ - rv64i_bare_cpu_init(obj); - - RVA23U64.enabled = true; -} - -static void rva23s64_profile_cpu_init(Object *obj) -{ - rv64i_bare_cpu_init(obj); - - RVA23S64.enabled = true; -} -#endif - static const gchar *riscv_gdb_arch_name(CPUState *cs) { RISCVCPU *cpu = RISCV_CPU(cs); @@ -3060,12 +2711,87 @@ static void riscv_cpu_common_class_init(ObjectClass *c, const void *data) device_class_set_props(dc, riscv_cpu_properties); } -static void riscv_cpu_class_init(ObjectClass *c, const void *data) +static bool profile_extends(RISCVCPUProfile *trial, RISCVCPUProfile *parent) +{ + RISCVCPUProfile *curr; + if (!parent) { + return true; + } + + curr = trial; + while (curr) { + if (curr == parent) { + return true; + } + curr = curr->u_parent; + } + + curr = trial; + while (curr) { + if (curr == parent) { + return true; + } + curr = curr->s_parent; + } + + return false; +} + +static void riscv_cpu_class_base_init(ObjectClass *c, const void *data) { RISCVCPUClass *mcc = RISCV_CPU_CLASS(c); + RISCVCPUClass *pcc = RISCV_CPU_CLASS(object_class_get_parent(c)); + + if (pcc->def) { + mcc->def = g_memdup2(pcc->def, sizeof(*pcc->def)); + } else { + mcc->def = g_new0(RISCVCPUDef, 1); + } + + if (data) { + const RISCVCPUDef *def = data; + mcc->def->bare |= def->bare; + if (def->profile) { + assert(profile_extends(def->profile, mcc->def->profile)); + assert(mcc->def->bare); + mcc->def->profile = def->profile; + } + if (def->misa_mxl_max) { + assert(def->misa_mxl_max <= MXL_RV128); + mcc->def->misa_mxl_max = def->misa_mxl_max; + +#ifndef CONFIG_USER_ONLY + /* + * Hack to simplify CPU class hierarchies that include both 32- and + * 64-bit models: reduce SV39/48/57/64 to SV32 for 32-bit models. + */ + if (mcc->def->misa_mxl_max == MXL_RV32 && + !valid_vm_1_10_32[mcc->def->cfg.max_satp_mode]) { + mcc->def->cfg.max_satp_mode = VM_1_10_SV32; + } +#endif + } + if (def->priv_spec != RISCV_PROFILE_ATTR_UNUSED) { + assert(def->priv_spec <= PRIV_VERSION_LATEST); + mcc->def->priv_spec = def->priv_spec; + } + if (def->vext_spec != RISCV_PROFILE_ATTR_UNUSED) { + assert(def->vext_spec != 0); + mcc->def->vext_spec = def->vext_spec; + } + mcc->def->misa_ext |= def->misa_ext; + + riscv_cpu_cfg_merge(&mcc->def->cfg, &def->cfg); - mcc->misa_mxl_max = (RISCVMXL)GPOINTER_TO_UINT(data); - riscv_cpu_validate_misa_mxl(mcc); + if (def->custom_csrs) { + assert(!mcc->def->custom_csrs); + mcc->def->custom_csrs = def->custom_csrs; + } + } + + if (!object_class_is_abstract(c)) { + riscv_cpu_validate_misa_mxl(mcc); + } } static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, @@ -3160,41 +2886,34 @@ void riscv_isa_write_fdt(RISCVCPU *cpu, void *fdt, char *nodename) } #endif -#define DEFINE_DYNAMIC_CPU(type_name, misa_mxl_max, initfn) \ +#define DEFINE_ABSTRACT_RISCV_CPU(type_name, parent_type_name, ...) \ { \ .name = (type_name), \ - .parent = TYPE_RISCV_DYNAMIC_CPU, \ - .instance_init = (initfn), \ - .class_init = riscv_cpu_class_init, \ - .class_data = GUINT_TO_POINTER(misa_mxl_max) \ + .parent = (parent_type_name), \ + .abstract = true, \ + .class_data = &(const RISCVCPUDef) { \ + .priv_spec = RISCV_PROFILE_ATTR_UNUSED, \ + .vext_spec = RISCV_PROFILE_ATTR_UNUSED, \ + .cfg.max_satp_mode = -1, \ + __VA_ARGS__ \ + }, \ } -#define DEFINE_VENDOR_CPU(type_name, misa_mxl_max, initfn) \ +#define DEFINE_RISCV_CPU(type_name, parent_type_name, ...) \ { \ .name = (type_name), \ - .parent = TYPE_RISCV_VENDOR_CPU, \ - .instance_init = (initfn), \ - .class_init = riscv_cpu_class_init, \ - .class_data = GUINT_TO_POINTER(misa_mxl_max) \ + .parent = (parent_type_name), \ + .class_data = &(const RISCVCPUDef) { \ + .priv_spec = RISCV_PROFILE_ATTR_UNUSED, \ + .vext_spec = RISCV_PROFILE_ATTR_UNUSED, \ + .cfg.max_satp_mode = -1, \ + __VA_ARGS__ \ + }, \ } -#define DEFINE_BARE_CPU(type_name, misa_mxl_max, initfn) \ - { \ - .name = (type_name), \ - .parent = TYPE_RISCV_BARE_CPU, \ - .instance_init = (initfn), \ - .class_init = riscv_cpu_class_init, \ - .class_data = GUINT_TO_POINTER(misa_mxl_max) \ - } - -#define DEFINE_PROFILE_CPU(type_name, misa_mxl_max, initfn) \ - { \ - .name = (type_name), \ - .parent = TYPE_RISCV_BARE_CPU, \ - .instance_init = (initfn), \ - .class_init = riscv_cpu_class_init, \ - .class_data = GUINT_TO_POINTER(misa_mxl_max) \ - } +#define DEFINE_PROFILE_CPU(type_name, parent_type_name, profile_) \ + DEFINE_RISCV_CPU(type_name, parent_type_name, \ + .profile = &(profile_)) static const TypeInfo riscv_cpu_type_infos[] = { { @@ -3203,67 +2922,370 @@ static const TypeInfo riscv_cpu_type_infos[] = { .instance_size = sizeof(RISCVCPU), .instance_align = __alignof(RISCVCPU), .instance_init = riscv_cpu_init, - .instance_post_init = riscv_cpu_post_init, .abstract = true, .class_size = sizeof(RISCVCPUClass), .class_init = riscv_cpu_common_class_init, + .class_base_init = riscv_cpu_class_base_init, }, - { - .name = TYPE_RISCV_DYNAMIC_CPU, - .parent = TYPE_RISCV_CPU, - .abstract = true, - }, - { - .name = TYPE_RISCV_VENDOR_CPU, - .parent = TYPE_RISCV_CPU, - .abstract = true, - }, - { - .name = TYPE_RISCV_BARE_CPU, - .parent = TYPE_RISCV_CPU, - .instance_init = riscv_bare_cpu_init, - .abstract = true, - }, + + DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_DYNAMIC_CPU, TYPE_RISCV_CPU, + .cfg.mmu = true, + .cfg.pmp = true, + .priv_spec = PRIV_VERSION_LATEST, + ), + + DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_VENDOR_CPU, TYPE_RISCV_CPU), + DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_BARE_CPU, TYPE_RISCV_CPU, + /* + * Bare CPUs do not inherit the timer and performance + * counters from the parent class (see riscv_cpu_init() + * for info on why the parent enables them). + * + * Users have to explicitly enable these counters for + * bare CPUs. + */ + .bare = true, + + /* Set to QEMU's first supported priv version */ + .priv_spec = PRIV_VERSION_1_10_0, + + /* + * Support all available satp_mode settings. By default + * only MBARE will be available if the user doesn't enable + * a mode manually (see riscv_cpu_satp_mode_finalize()). + */ +#ifdef TARGET_RISCV32 + .cfg.max_satp_mode = VM_1_10_SV32, +#else + .cfg.max_satp_mode = VM_1_10_SV57, +#endif + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_MAX, TYPE_RISCV_DYNAMIC_CPU, #if defined(TARGET_RISCV32) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX, MXL_RV32, riscv_max_cpu_init), + .misa_mxl_max = MXL_RV32, + .cfg.max_satp_mode = VM_1_10_SV32, #elif defined(TARGET_RISCV64) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX, MXL_RV64, riscv_max_cpu_init), + .misa_mxl_max = MXL_RV64, + .cfg.max_satp_mode = VM_1_10_SV57, #endif + ), + + DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_E, TYPE_RISCV_VENDOR_CPU, + .misa_ext = RVI | RVM | RVA | RVC | RVU, + .priv_spec = PRIV_VERSION_1_10_0, + .cfg.max_satp_mode = VM_1_10_MBARE, + .cfg.ext_zifencei = true, + .cfg.ext_zicsr = true, + .cfg.pmp = true, + .cfg.pmp_regions = 8 + ), + + DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_U, TYPE_RISCV_VENDOR_CPU, + .misa_ext = RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU, + .priv_spec = PRIV_VERSION_1_10_0, + + .cfg.max_satp_mode = VM_1_10_SV39, + .cfg.ext_zifencei = true, + .cfg.ext_zicsr = true, + .cfg.mmu = true, + .cfg.pmp = true, + .cfg.pmp_regions = 8 + ), #if defined(TARGET_RISCV32) || \ (defined(TARGET_RISCV64) && !defined(CONFIG_USER_ONLY)) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE32, MXL_RV32, rv32_base_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_IBEX, MXL_RV32, rv32_ibex_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_E31, MXL_RV32, rv32_sifive_e_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_E34, MXL_RV32, rv32_imafcu_nommu_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U34, MXL_RV32, rv32_sifive_u_cpu_init), - DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV32I, MXL_RV32, rv32i_bare_cpu_init), - DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV32E, MXL_RV32, rv32e_bare_cpu_init), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_BASE32, TYPE_RISCV_DYNAMIC_CPU, + .cfg.max_satp_mode = VM_1_10_SV32, + .misa_mxl_max = MXL_RV32, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_IBEX, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV32, + .misa_ext = RVI | RVM | RVC | RVU, + .priv_spec = PRIV_VERSION_1_12_0, + .cfg.max_satp_mode = VM_1_10_MBARE, + .cfg.ext_zifencei = true, + .cfg.ext_zicsr = true, + .cfg.pmp = true, + .cfg.ext_smepmp = true, + + .cfg.ext_zba = true, + .cfg.ext_zbb = true, + .cfg.ext_zbc = true, + .cfg.ext_zbs = true + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_E31, TYPE_RISCV_CPU_SIFIVE_E, + .misa_mxl_max = MXL_RV32 + ), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_E34, TYPE_RISCV_CPU_SIFIVE_E, + .misa_mxl_max = MXL_RV32, + .misa_ext = RVF, /* IMAFCU */ + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_U34, TYPE_RISCV_CPU_SIFIVE_U, + .misa_mxl_max = MXL_RV32, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_RV32I, TYPE_RISCV_BARE_CPU, + .misa_mxl_max = MXL_RV32, + .misa_ext = RVI + ), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_RV32E, TYPE_RISCV_BARE_CPU, + .misa_mxl_max = MXL_RV32, + .misa_ext = RVE + ), #endif #if (defined(TARGET_RISCV64) && !defined(CONFIG_USER_ONLY)) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX32, MXL_RV32, riscv_max_cpu_init), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_MAX32, TYPE_RISCV_DYNAMIC_CPU, + .cfg.max_satp_mode = VM_1_10_SV32, + .misa_mxl_max = MXL_RV32, + ), #endif #if defined(TARGET_RISCV64) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE64, MXL_RV64, rv64_base_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_E51, MXL_RV64, rv64_sifive_e_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U54, MXL_RV64, rv64_sifive_u_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SHAKTI_C, MXL_RV64, rv64_sifive_u_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_THEAD_C906, MXL_RV64, rv64_thead_c906_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_TT_ASCALON, MXL_RV64, rv64_tt_ascalon_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_VEYRON_V1, MXL_RV64, rv64_veyron_v1_cpu_init), - DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_XIANGSHAN_NANHU, - MXL_RV64, rv64_xiangshan_nanhu_cpu_init), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_BASE64, TYPE_RISCV_DYNAMIC_CPU, + .cfg.max_satp_mode = VM_1_10_SV57, + .misa_mxl_max = MXL_RV64, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_E51, TYPE_RISCV_CPU_SIFIVE_E, + .misa_mxl_max = MXL_RV64 + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_U54, TYPE_RISCV_CPU_SIFIVE_U, + .misa_mxl_max = MXL_RV64, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_SHAKTI_C, TYPE_RISCV_CPU_SIFIVE_U, + .misa_mxl_max = MXL_RV64, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_THEAD_C906, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVS | RVU, + .priv_spec = PRIV_VERSION_1_11_0, + + .cfg.ext_zfa = true, + .cfg.ext_zfh = true, + .cfg.mmu = true, + .cfg.ext_xtheadba = true, + .cfg.ext_xtheadbb = true, + .cfg.ext_xtheadbs = true, + .cfg.ext_xtheadcmo = true, + .cfg.ext_xtheadcondmov = true, + .cfg.ext_xtheadfmemidx = true, + .cfg.ext_xtheadmac = true, + .cfg.ext_xtheadmemidx = true, + .cfg.ext_xtheadmempair = true, + .cfg.ext_xtheadsync = true, + .cfg.pmp = true, + + .cfg.mvendorid = THEAD_VENDOR_ID, + + .cfg.max_satp_mode = VM_1_10_SV39, +#ifndef CONFIG_USER_ONLY + .custom_csrs = th_csr_list, +#endif + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_TT_ASCALON, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVS | RVU | RVH | RVV, + .priv_spec = PRIV_VERSION_1_13_0, + .vext_spec = VEXT_VERSION_1_00_0, + + /* ISA extensions */ + .cfg.mmu = true, + .cfg.vlenb = 256 >> 3, + .cfg.elen = 64, + .cfg.rvv_ma_all_1s = true, + .cfg.rvv_ta_all_1s = true, + .cfg.misa_w = true, + .cfg.pmp = true, + .cfg.cbom_blocksize = 64, + .cfg.cbop_blocksize = 64, + .cfg.cboz_blocksize = 64, + .cfg.ext_zic64b = true, + .cfg.ext_zicbom = true, + .cfg.ext_zicbop = true, + .cfg.ext_zicboz = true, + .cfg.ext_zicntr = true, + .cfg.ext_zicond = true, + .cfg.ext_zicsr = true, + .cfg.ext_zifencei = true, + .cfg.ext_zihintntl = true, + .cfg.ext_zihintpause = true, + .cfg.ext_zihpm = true, + .cfg.ext_zimop = true, + .cfg.ext_zawrs = true, + .cfg.ext_zfa = true, + .cfg.ext_zfbfmin = true, + .cfg.ext_zfh = true, + .cfg.ext_zfhmin = true, + .cfg.ext_zcb = true, + .cfg.ext_zcmop = true, + .cfg.ext_zba = true, + .cfg.ext_zbb = true, + .cfg.ext_zbs = true, + .cfg.ext_zkt = true, + .cfg.ext_zvbb = true, + .cfg.ext_zvbc = true, + .cfg.ext_zvfbfmin = true, + .cfg.ext_zvfbfwma = true, + .cfg.ext_zvfh = true, + .cfg.ext_zvfhmin = true, + .cfg.ext_zvkng = true, + .cfg.ext_smaia = true, + .cfg.ext_smstateen = true, + .cfg.ext_ssaia = true, + .cfg.ext_sscofpmf = true, + .cfg.ext_sstc = true, + .cfg.ext_svade = true, + .cfg.ext_svinval = true, + .cfg.ext_svnapot = true, + .cfg.ext_svpbmt = true, + + .cfg.max_satp_mode = VM_1_10_SV57, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_VEYRON_V1, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVS | RVU | RVH, + .priv_spec = PRIV_VERSION_1_12_0, + + /* ISA extensions */ + .cfg.mmu = true, + .cfg.ext_zifencei = true, + .cfg.ext_zicsr = true, + .cfg.pmp = true, + .cfg.ext_zicbom = true, + .cfg.cbom_blocksize = 64, + .cfg.cboz_blocksize = 64, + .cfg.ext_zicboz = true, + .cfg.ext_smaia = true, + .cfg.ext_ssaia = true, + .cfg.ext_sscofpmf = true, + .cfg.ext_sstc = true, + .cfg.ext_svinval = true, + .cfg.ext_svnapot = true, + .cfg.ext_svpbmt = true, + .cfg.ext_smstateen = true, + .cfg.ext_zba = true, + .cfg.ext_zbb = true, + .cfg.ext_zbc = true, + .cfg.ext_zbs = true, + .cfg.ext_XVentanaCondOps = true, + + .cfg.mvendorid = VEYRON_V1_MVENDORID, + .cfg.marchid = VEYRON_V1_MARCHID, + .cfg.mimpid = VEYRON_V1_MIMPID, + + .cfg.max_satp_mode = VM_1_10_SV48, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_XIANGSHAN_NANHU, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVB | RVS | RVU, + .priv_spec = PRIV_VERSION_1_12_0, + + /* ISA extensions */ + .cfg.ext_zbc = true, + .cfg.ext_zbkb = true, + .cfg.ext_zbkc = true, + .cfg.ext_zbkx = true, + .cfg.ext_zknd = true, + .cfg.ext_zkne = true, + .cfg.ext_zknh = true, + .cfg.ext_zksed = true, + .cfg.ext_zksh = true, + .cfg.ext_svinval = true, + + .cfg.mmu = true, + .cfg.pmp = true, + + .cfg.max_satp_mode = VM_1_10_SV39, + ), + + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_XIANGSHAN_KMH, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVB | RVS | RVU | RVH | RVV, + .priv_spec = PRIV_VERSION_1_13_0, + /* + * The RISC-V Instruction Set Manual: Volume I + * Unprivileged Architecture + */ + .cfg.ext_zicntr = true, + .cfg.ext_zihpm = true, + .cfg.ext_zihintntl = true, + .cfg.ext_zihintpause = true, + .cfg.ext_zimop = true, + .cfg.ext_zcmop = true, + .cfg.ext_zicond = true, + .cfg.ext_zawrs = true, + .cfg.ext_zacas = true, + .cfg.ext_zfh = true, + .cfg.ext_zfa = true, + .cfg.ext_zcb = true, + .cfg.ext_zbc = true, + .cfg.ext_zvfh = true, + .cfg.ext_zkn = true, + .cfg.ext_zks = true, + .cfg.ext_zkt = true, + .cfg.ext_zvbb = true, + .cfg.ext_zvkt = true, + /* + * The RISC-V Instruction Set Manual: Volume II + * Privileged Architecture + */ + .cfg.ext_smstateen = true, + .cfg.ext_smcsrind = true, + .cfg.ext_sscsrind = true, + .cfg.ext_svnapot = true, + .cfg.ext_svpbmt = true, + .cfg.ext_svinval = true, + .cfg.ext_sstc = true, + .cfg.ext_sscofpmf = true, + .cfg.ext_ssdbltrp = true, + .cfg.ext_ssnpm = true, + .cfg.ext_smnpm = true, + .cfg.ext_smmpm = true, + .cfg.ext_sspm = true, + .cfg.ext_supm = true, + /* The RISC-V Advanced Interrupt Architecture */ + .cfg.ext_smaia = true, + .cfg.ext_ssaia = true, + /* RVA23 Profiles */ + .cfg.ext_zicbom = true, + .cfg.ext_zicbop = true, + .cfg.ext_zicboz = true, + .cfg.ext_svade = true, + .cfg.mmu = true, + .cfg.pmp = true, + .cfg.max_satp_mode = VM_1_10_SV48, + ), + #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE128, MXL_RV128, rv128_base_cpu_init), -#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */ - DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV64I, MXL_RV64, rv64i_bare_cpu_init), - DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV64E, MXL_RV64, rv64e_bare_cpu_init), - DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22U64, MXL_RV64, rva22u64_profile_cpu_init), - DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22S64, MXL_RV64, rva22s64_profile_cpu_init), - DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23U64, MXL_RV64, rva23u64_profile_cpu_init), - DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23S64, MXL_RV64, rva23s64_profile_cpu_init), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_BASE128, TYPE_RISCV_DYNAMIC_CPU, + .cfg.max_satp_mode = VM_1_10_SV57, + .misa_mxl_max = MXL_RV128, + ), +#endif /* CONFIG_TCG */ + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_RV64I, TYPE_RISCV_BARE_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVI + ), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_RV64E, TYPE_RISCV_BARE_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVE + ), + + DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22U64, TYPE_RISCV_CPU_RV64I, RVA22U64), + DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22S64, TYPE_RISCV_CPU_RV64I, RVA22S64), + DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23U64, TYPE_RISCV_CPU_RV64I, RVA23U64), + DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23S64, TYPE_RISCV_CPU_RV64I, RVA23S64), #endif /* TARGET_RISCV64 */ }; diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index b56d3af..4c13012 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -50,7 +50,7 @@ typedef struct CPUArchState CPURISCVState; */ #define RISCV_UW2_ALWAYS_STORE_AMO 1 -#define RV(x) ((target_ulong)1 << (x - 'A')) +#define RV(x) BIT(x - 'A') /* * Update misa_bits[], misa_ext_info_arr[] and misa_ext_cfgs[] @@ -82,7 +82,22 @@ typedef struct riscv_cpu_profile { struct riscv_cpu_profile *s_parent; const char *name; uint32_t misa_ext; + /* + * The profile is enabled/disabled via command line or + * via cpu_init(). Enabling a profile will add all its + * mandatory extensions in the CPU during init(). + */ bool enabled; + /* + * The profile is present in the CPU, i.e. the current set of + * CPU extensions complies with it. A profile can be enabled + * and not present (e.g. the user disabled a mandatory extension) + * and the other way around (e.g. all mandatory extensions are + * present in a non-profile CPU). + * + * QMP uses this flag. + */ + bool present; bool user_set; int priv_spec; int satp_mode; @@ -159,7 +174,8 @@ extern RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[]; #define MMU_USER_IDX 3 -#define MAX_RISCV_PMPS (16) +#define MAX_RISCV_PMPS (64) +#define OLD_MAX_RISCV_PMPS (16) #if !defined(CONFIG_USER_ONLY) #include "pmp.h" @@ -500,6 +516,19 @@ struct CPUArchState { }; /* + * map is a 16-bit bitmap: the most significant set bit in map is the maximum + * satp mode that is supported. It may be chosen by the user and must respect + * what qemu implements (valid_1_10_32/64) and what the hw is capable of + * (supported bitmap below). + * + * init is a 16-bit bitmap used to make sure the user selected a correct + * configuration as per the specification. + */ +typedef struct { + uint16_t map, init; +} RISCVSATPModes; + +/* * RISCVCPU: * @env: #CPURISCVState * @@ -515,6 +544,7 @@ struct ArchCPU { /* Configuration Settings */ RISCVCPUConfig cfg; + RISCVSATPModes satp_modes; QEMUTimer *pmu_timer; /* A bitmask of Available programmable counters */ @@ -524,6 +554,19 @@ struct ArchCPU { const GPtrArray *decoders; }; +typedef struct RISCVCSR RISCVCSR; + +typedef struct RISCVCPUDef { + RISCVMXL misa_mxl_max; /* max mxl for this cpu */ + RISCVCPUProfile *profile; + uint32_t misa_ext; + int priv_spec; + int32_t vext_spec; + RISCVCPUConfig cfg; + bool bare; + const RISCVCSR *custom_csrs; +} RISCVCPUDef; + /** * RISCVCPUClass: * @parent_realize: The parent class' realize handler. @@ -536,10 +579,10 @@ struct RISCVCPUClass { DeviceRealize parent_realize; ResettablePhases parent_phases; - RISCVMXL misa_mxl_max; /* max mxl for this cpu */ + RISCVCPUDef *def; }; -static inline int riscv_has_ext(CPURISCVState *env, target_ulong ext) +static inline int riscv_has_ext(CPURISCVState *env, uint32_t ext) { return (env->misa_ext & ext) != 0; } @@ -549,6 +592,7 @@ static inline int riscv_has_ext(CPURISCVState *env, target_ulong ext) extern const char * const riscv_int_regnames[]; extern const char * const riscv_int_regnamesh[]; extern const char * const riscv_fpr_regnames[]; +extern const char * const riscv_rvv_regnames[]; const char *riscv_cpu_get_trap_name(target_ulong cause, bool async); int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, @@ -830,7 +874,7 @@ static inline void riscv_csr_write(CPURISCVState *env, int csrno, static inline target_ulong riscv_csr_read(CPURISCVState *env, int csrno) { target_ulong val = 0; - riscv_csrrw(env, csrno, &val, 0, 0, 0); + riscv_csrr(env, csrno, &val); return val; } @@ -869,6 +913,12 @@ typedef struct { uint32_t min_priv_ver; } riscv_csr_operations; +struct RISCVCSR { + int csrno; + bool (*insertion_test)(RISCVCPU *cpu); + riscv_csr_operations csr_ops; +}; + /* CSR function table constants */ enum { CSR_TABLE_SIZE = 0x1000 @@ -903,7 +953,6 @@ extern const RISCVCPUMultiExtConfig riscv_cpu_extensions[]; extern const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[]; extern const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[]; extern const RISCVCPUMultiExtConfig riscv_cpu_named_features[]; -extern const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[]; typedef struct isa_ext_data { const char *name; @@ -923,18 +972,17 @@ extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE]; extern const bool valid_vm_1_10_32[], valid_vm_1_10_64[]; void riscv_get_csr_ops(int csrno, riscv_csr_operations *ops); -void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops); +void riscv_set_csr_ops(int csrno, const riscv_csr_operations *ops); void riscv_cpu_register_gdb_regs_for_features(CPUState *cs); target_ulong riscv_new_csr_seed(target_ulong new_value, target_ulong write_mask); -uint8_t satp_mode_max_from_map(uint32_t map); const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit); -/* Implemented in th_csr.c */ -void th_register_custom_csrs(RISCVCPU *cpu); +/* In th_csr.c */ +extern const RISCVCSR th_csr_list[]; const char *priv_spec_to_str(int priv_version); #endif /* RISCV_CPU_H */ diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index a30317c..b62dd82 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -372,6 +372,18 @@ #define CSR_PMPCFG1 0x3a1 #define CSR_PMPCFG2 0x3a2 #define CSR_PMPCFG3 0x3a3 +#define CSR_PMPCFG4 0x3a4 +#define CSR_PMPCFG5 0x3a5 +#define CSR_PMPCFG6 0x3a6 +#define CSR_PMPCFG7 0x3a7 +#define CSR_PMPCFG8 0x3a8 +#define CSR_PMPCFG9 0x3a9 +#define CSR_PMPCFG10 0x3aa +#define CSR_PMPCFG11 0x3ab +#define CSR_PMPCFG12 0x3ac +#define CSR_PMPCFG13 0x3ad +#define CSR_PMPCFG14 0x3ae +#define CSR_PMPCFG15 0x3af #define CSR_PMPADDR0 0x3b0 #define CSR_PMPADDR1 0x3b1 #define CSR_PMPADDR2 0x3b2 @@ -388,6 +400,54 @@ #define CSR_PMPADDR13 0x3bd #define CSR_PMPADDR14 0x3be #define CSR_PMPADDR15 0x3bf +#define CSR_PMPADDR16 0x3c0 +#define CSR_PMPADDR17 0x3c1 +#define CSR_PMPADDR18 0x3c2 +#define CSR_PMPADDR19 0x3c3 +#define CSR_PMPADDR20 0x3c4 +#define CSR_PMPADDR21 0x3c5 +#define CSR_PMPADDR22 0x3c6 +#define CSR_PMPADDR23 0x3c7 +#define CSR_PMPADDR24 0x3c8 +#define CSR_PMPADDR25 0x3c9 +#define CSR_PMPADDR26 0x3ca +#define CSR_PMPADDR27 0x3cb +#define CSR_PMPADDR28 0x3cc +#define CSR_PMPADDR29 0x3cd +#define CSR_PMPADDR30 0x3ce +#define CSR_PMPADDR31 0x3cf +#define CSR_PMPADDR32 0x3d0 +#define CSR_PMPADDR33 0x3d1 +#define CSR_PMPADDR34 0x3d2 +#define CSR_PMPADDR35 0x3d3 +#define CSR_PMPADDR36 0x3d4 +#define CSR_PMPADDR37 0x3d5 +#define CSR_PMPADDR38 0x3d6 +#define CSR_PMPADDR39 0x3d7 +#define CSR_PMPADDR40 0x3d8 +#define CSR_PMPADDR41 0x3d9 +#define CSR_PMPADDR42 0x3da +#define CSR_PMPADDR43 0x3db +#define CSR_PMPADDR44 0x3dc +#define CSR_PMPADDR45 0x3dd +#define CSR_PMPADDR46 0x3de +#define CSR_PMPADDR47 0x3df +#define CSR_PMPADDR48 0x3e0 +#define CSR_PMPADDR49 0x3e1 +#define CSR_PMPADDR50 0x3e2 +#define CSR_PMPADDR51 0x3e3 +#define CSR_PMPADDR52 0x3e4 +#define CSR_PMPADDR53 0x3e5 +#define CSR_PMPADDR54 0x3e6 +#define CSR_PMPADDR55 0x3e7 +#define CSR_PMPADDR56 0x3e8 +#define CSR_PMPADDR57 0x3e9 +#define CSR_PMPADDR58 0x3ea +#define CSR_PMPADDR59 0x3eb +#define CSR_PMPADDR60 0x3ec +#define CSR_PMPADDR61 0x3ed +#define CSR_PMPADDR62 0x3ee +#define CSR_PMPADDR63 0x3ef /* RNMI */ #define CSR_MNSCRATCH 0x740 @@ -675,7 +735,8 @@ typedef enum { #define PTE_SOFT 0x300 /* Reserved for Software */ #define PTE_PBMT 0x6000000000000000ULL /* Page-based memory types */ #define PTE_N 0x8000000000000000ULL /* NAPOT translation */ -#define PTE_RESERVED 0x1FC0000000000000ULL /* Reserved bits */ +#define PTE_RESERVED(svrsw60t59b) \ + (svrsw60t59b ? 0x07C0000000000000ULL : 0x1FC0000000000000ULL) /* Reserved bits */ #define PTE_ATTR (PTE_N | PTE_PBMT) /* All attributes bits */ /* Page table PPN shift amount */ diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index cfe371b..aa28dc8 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -21,182 +21,10 @@ #ifndef RISCV_CPU_CFG_H #define RISCV_CPU_CFG_H -/* - * map is a 16-bit bitmap: the most significant set bit in map is the maximum - * satp mode that is supported. It may be chosen by the user and must respect - * what qemu implements (valid_1_10_32/64) and what the hw is capable of - * (supported bitmap below). - * - * init is a 16-bit bitmap used to make sure the user selected a correct - * configuration as per the specification. - * - * supported is a 16-bit bitmap used to reflect the hw capabilities. - */ -typedef struct { - uint16_t map, init, supported; -} RISCVSATPMap; - struct RISCVCPUConfig { - bool ext_zba; - bool ext_zbb; - bool ext_zbc; - bool ext_zbkb; - bool ext_zbkc; - bool ext_zbkx; - bool ext_zbs; - bool ext_zca; - bool ext_zcb; - bool ext_zcd; - bool ext_zce; - bool ext_zcf; - bool ext_zcmp; - bool ext_zcmt; - bool ext_zk; - bool ext_zkn; - bool ext_zknd; - bool ext_zkne; - bool ext_zknh; - bool ext_zkr; - bool ext_zks; - bool ext_zksed; - bool ext_zksh; - bool ext_zkt; - bool ext_zifencei; - bool ext_zicntr; - bool ext_zicsr; - bool ext_zicbom; - bool ext_zicbop; - bool ext_zicboz; - bool ext_zicfilp; - bool ext_zicfiss; - bool ext_zicond; - bool ext_zihintntl; - bool ext_zihintpause; - bool ext_zihpm; - bool ext_zimop; - bool ext_zcmop; - bool ext_ztso; - bool ext_smstateen; - bool ext_sstc; - bool ext_smcdeleg; - bool ext_ssccfg; - bool ext_smcntrpmf; - bool ext_smcsrind; - bool ext_sscsrind; - bool ext_ssdbltrp; - bool ext_smdbltrp; - bool ext_svadu; - bool ext_svinval; - bool ext_svnapot; - bool ext_svpbmt; - bool ext_svvptc; - bool ext_svukte; - bool ext_zdinx; - bool ext_zaamo; - bool ext_zacas; - bool ext_zama16b; - bool ext_zabha; - bool ext_zalrsc; - bool ext_zawrs; - bool ext_zfa; - bool ext_zfbfmin; - bool ext_zfh; - bool ext_zfhmin; - bool ext_zfinx; - bool ext_zhinx; - bool ext_zhinxmin; - bool ext_zve32f; - bool ext_zve32x; - bool ext_zve64f; - bool ext_zve64d; - bool ext_zve64x; - bool ext_zvbb; - bool ext_zvbc; - bool ext_zvkb; - bool ext_zvkg; - bool ext_zvkned; - bool ext_zvknha; - bool ext_zvknhb; - bool ext_zvksed; - bool ext_zvksh; - bool ext_zvkt; - bool ext_zvkn; - bool ext_zvknc; - bool ext_zvkng; - bool ext_zvks; - bool ext_zvksc; - bool ext_zvksg; - bool ext_zmmul; - bool ext_zvfbfmin; - bool ext_zvfbfwma; - bool ext_zvfh; - bool ext_zvfhmin; - bool ext_smaia; - bool ext_ssaia; - bool ext_smctr; - bool ext_ssctr; - bool ext_sscofpmf; - bool ext_smepmp; - bool ext_smrnmi; - bool ext_ssnpm; - bool ext_smnpm; - bool ext_smmpm; - bool ext_sspm; - bool ext_supm; - bool rvv_ta_all_1s; - bool rvv_ma_all_1s; - bool rvv_vl_half_avl; - - uint32_t mvendorid; - uint64_t marchid; - uint64_t mimpid; - - /* Named features */ - bool ext_svade; - bool ext_zic64b; - bool ext_ssstateen; - bool ext_sha; - - /* - * Always 'true' booleans for named features - * TCG always implement/can't be user disabled, - * based on spec version. - */ - bool has_priv_1_13; - bool has_priv_1_12; - bool has_priv_1_11; - - /* Always enabled for TCG if has_priv_1_11 */ - bool ext_ziccrse; - - /* Vendor-specific custom extensions */ - bool ext_xtheadba; - bool ext_xtheadbb; - bool ext_xtheadbs; - bool ext_xtheadcmo; - bool ext_xtheadcondmov; - bool ext_xtheadfmemidx; - bool ext_xtheadfmv; - bool ext_xtheadmac; - bool ext_xtheadmemidx; - bool ext_xtheadmempair; - bool ext_xtheadsync; - bool ext_XVentanaCondOps; - - uint32_t pmu_mask; - uint16_t vlenb; - uint16_t elen; - uint16_t cbom_blocksize; - uint16_t cbop_blocksize; - uint16_t cboz_blocksize; - bool mmu; - bool pmp; - bool debug; - bool misa_w; - - bool short_isa_string; - - RISCVSATPMap satp_mode; +#define BOOL_FIELD(x) bool x; +#define TYPED_FIELD(type, x, default) type x; +#include "cpu_cfg_fields.h.inc" }; typedef struct RISCVCPUConfig RISCVCPUConfig; diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc new file mode 100644 index 0000000..e2d116f --- /dev/null +++ b/target/riscv/cpu_cfg_fields.h.inc @@ -0,0 +1,173 @@ +/* + * Required definitions before including this file: + * + * #define BOOL_FIELD(x) + * #define TYPED_FIELD(type, x, default) + */ + +BOOL_FIELD(ext_zba) +BOOL_FIELD(ext_zbb) +BOOL_FIELD(ext_zbc) +BOOL_FIELD(ext_zbkb) +BOOL_FIELD(ext_zbkc) +BOOL_FIELD(ext_zbkx) +BOOL_FIELD(ext_zbs) +BOOL_FIELD(ext_zca) +BOOL_FIELD(ext_zcb) +BOOL_FIELD(ext_zcd) +BOOL_FIELD(ext_zce) +BOOL_FIELD(ext_zcf) +BOOL_FIELD(ext_zcmp) +BOOL_FIELD(ext_zcmt) +BOOL_FIELD(ext_zk) +BOOL_FIELD(ext_zkn) +BOOL_FIELD(ext_zknd) +BOOL_FIELD(ext_zkne) +BOOL_FIELD(ext_zknh) +BOOL_FIELD(ext_zkr) +BOOL_FIELD(ext_zks) +BOOL_FIELD(ext_zksed) +BOOL_FIELD(ext_zksh) +BOOL_FIELD(ext_zkt) +BOOL_FIELD(ext_zifencei) +BOOL_FIELD(ext_zicntr) +BOOL_FIELD(ext_zicsr) +BOOL_FIELD(ext_zicbom) +BOOL_FIELD(ext_zicbop) +BOOL_FIELD(ext_zicboz) +BOOL_FIELD(ext_zicfilp) +BOOL_FIELD(ext_zicfiss) +BOOL_FIELD(ext_zicond) +BOOL_FIELD(ext_zihintntl) +BOOL_FIELD(ext_zihintpause) +BOOL_FIELD(ext_zihpm) +BOOL_FIELD(ext_zimop) +BOOL_FIELD(ext_zcmop) +BOOL_FIELD(ext_ztso) +BOOL_FIELD(ext_smstateen) +BOOL_FIELD(ext_sstc) +BOOL_FIELD(ext_smcdeleg) +BOOL_FIELD(ext_ssccfg) +BOOL_FIELD(ext_smcntrpmf) +BOOL_FIELD(ext_smcsrind) +BOOL_FIELD(ext_sscsrind) +BOOL_FIELD(ext_ssdbltrp) +BOOL_FIELD(ext_smdbltrp) +BOOL_FIELD(ext_svadu) +BOOL_FIELD(ext_svinval) +BOOL_FIELD(ext_svnapot) +BOOL_FIELD(ext_svpbmt) +BOOL_FIELD(ext_svrsw60t59b) +BOOL_FIELD(ext_svvptc) +BOOL_FIELD(ext_svukte) +BOOL_FIELD(ext_zdinx) +BOOL_FIELD(ext_zaamo) +BOOL_FIELD(ext_zacas) +BOOL_FIELD(ext_zama16b) +BOOL_FIELD(ext_zabha) +BOOL_FIELD(ext_zalrsc) +BOOL_FIELD(ext_zawrs) +BOOL_FIELD(ext_zfa) +BOOL_FIELD(ext_zfbfmin) +BOOL_FIELD(ext_zfh) +BOOL_FIELD(ext_zfhmin) +BOOL_FIELD(ext_zfinx) +BOOL_FIELD(ext_zhinx) +BOOL_FIELD(ext_zhinxmin) +BOOL_FIELD(ext_zve32f) +BOOL_FIELD(ext_zve32x) +BOOL_FIELD(ext_zve64f) +BOOL_FIELD(ext_zve64d) +BOOL_FIELD(ext_zve64x) +BOOL_FIELD(ext_zvbb) +BOOL_FIELD(ext_zvbc) +BOOL_FIELD(ext_zvkb) +BOOL_FIELD(ext_zvkg) +BOOL_FIELD(ext_zvkned) +BOOL_FIELD(ext_zvknha) +BOOL_FIELD(ext_zvknhb) +BOOL_FIELD(ext_zvksed) +BOOL_FIELD(ext_zvksh) +BOOL_FIELD(ext_zvkt) +BOOL_FIELD(ext_zvkn) +BOOL_FIELD(ext_zvknc) +BOOL_FIELD(ext_zvkng) +BOOL_FIELD(ext_zvks) +BOOL_FIELD(ext_zvksc) +BOOL_FIELD(ext_zvksg) +BOOL_FIELD(ext_zmmul) +BOOL_FIELD(ext_zvfbfmin) +BOOL_FIELD(ext_zvfbfwma) +BOOL_FIELD(ext_zvfh) +BOOL_FIELD(ext_zvfhmin) +BOOL_FIELD(ext_smaia) +BOOL_FIELD(ext_ssaia) +BOOL_FIELD(ext_smctr) +BOOL_FIELD(ext_ssctr) +BOOL_FIELD(ext_sscofpmf) +BOOL_FIELD(ext_smepmp) +BOOL_FIELD(ext_smrnmi) +BOOL_FIELD(ext_ssnpm) +BOOL_FIELD(ext_smnpm) +BOOL_FIELD(ext_smmpm) +BOOL_FIELD(ext_sspm) +BOOL_FIELD(ext_supm) +BOOL_FIELD(rvv_ta_all_1s) +BOOL_FIELD(rvv_ma_all_1s) +BOOL_FIELD(rvv_vl_half_avl) +BOOL_FIELD(rvv_vsetvl_x0_vill) +/* Named features */ +BOOL_FIELD(ext_svade) +BOOL_FIELD(ext_zic64b) +BOOL_FIELD(ext_ssstateen) +BOOL_FIELD(ext_sha) + +/* + * Always 'true' booleans for named features + * TCG always implement/can't be user disabled, + * based on spec version. + */ +BOOL_FIELD(has_priv_1_13) +BOOL_FIELD(has_priv_1_12) +BOOL_FIELD(has_priv_1_11) + +/* Always enabled for TCG if has_priv_1_11 */ +BOOL_FIELD(ext_ziccrse) + +/* Vendor-specific custom extensions */ +BOOL_FIELD(ext_xtheadba) +BOOL_FIELD(ext_xtheadbb) +BOOL_FIELD(ext_xtheadbs) +BOOL_FIELD(ext_xtheadcmo) +BOOL_FIELD(ext_xtheadcondmov) +BOOL_FIELD(ext_xtheadfmemidx) +BOOL_FIELD(ext_xtheadfmv) +BOOL_FIELD(ext_xtheadmac) +BOOL_FIELD(ext_xtheadmemidx) +BOOL_FIELD(ext_xtheadmempair) +BOOL_FIELD(ext_xtheadsync) +BOOL_FIELD(ext_XVentanaCondOps) + +BOOL_FIELD(mmu) +BOOL_FIELD(pmp) +BOOL_FIELD(debug) +BOOL_FIELD(misa_w) + +BOOL_FIELD(short_isa_string) + +TYPED_FIELD(uint32_t, mvendorid, 0) +TYPED_FIELD(uint64_t, marchid, 0) +TYPED_FIELD(uint64_t, mimpid, 0) + +TYPED_FIELD(uint32_t, pmu_mask, 0) +TYPED_FIELD(uint16_t, vlenb, 0) +TYPED_FIELD(uint16_t, elen, 0) +TYPED_FIELD(uint16_t, cbom_blocksize, 0) +TYPED_FIELD(uint16_t, cbop_blocksize, 0) +TYPED_FIELD(uint16_t, cboz_blocksize, 0) +TYPED_FIELD(uint8_t, pmp_regions, 0) + +TYPED_FIELD(int8_t, max_satp_mode, -1) + +#undef BOOL_FIELD +#undef TYPED_FIELD diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 2ed69d7..3479a62 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -1309,6 +1309,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, bool svade = riscv_cpu_cfg(env)->ext_svade; bool svadu = riscv_cpu_cfg(env)->ext_svadu; bool adue = svadu ? env->menvcfg & MENVCFG_ADUE : !svade; + bool svrsw60t59b = riscv_cpu_cfg(env)->ext_svrsw60t59b; if (first_stage && two_stage && env->virt_enabled) { pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE); @@ -1376,7 +1377,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, if (riscv_cpu_sxl(env) == MXL_RV32) { ppn = pte >> PTE_PPN_SHIFT; } else { - if (pte & PTE_RESERVED) { + if (pte & PTE_RESERVED(svrsw60t59b)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: reserved bits set in PTE: " "addr: 0x%" HWADDR_PRIx " pte: 0x" TARGET_FMT_lx "\n", __func__, pte_addr, pte); diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 288edee..5c91658 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -25,7 +25,6 @@ #include "pmu.h" #include "time_helper.h" #include "exec/cputlb.h" -#include "exec/tb-flush.h" #include "exec/icount.h" #include "accel/tcg/getpc.h" #include "qemu/guest-random.h" @@ -40,7 +39,7 @@ void riscv_get_csr_ops(int csrno, riscv_csr_operations *ops) *ops = csr_ops[csrno & (CSR_TABLE_SIZE - 1)]; } -void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops) +void riscv_set_csr_ops(int csrno, const riscv_csr_operations *ops) { csr_ops[csrno & (CSR_TABLE_SIZE - 1)] = *ops; } @@ -204,6 +203,8 @@ static RISCVException cfi_ss(CPURISCVState *env, int csrno) #if !defined(CONFIG_USER_ONLY) if (env->debugger) { return RISCV_EXCP_NONE; + } else if (env->virt_enabled) { + return RISCV_EXCP_VIRT_INSTRUCTION_FAULT; } #endif return RISCV_EXCP_ILLEGAL_INST; @@ -374,8 +375,11 @@ static RISCVException aia_smode(CPURISCVState *env, int csrno) static RISCVException aia_smode32(CPURISCVState *env, int csrno) { int ret; + int csr_priv = get_field(csrno, 0x300); - if (!riscv_cpu_cfg(env)->ext_ssaia) { + if (csr_priv == PRV_M && !riscv_cpu_cfg(env)->ext_smaia) { + return RISCV_EXCP_ILLEGAL_INST; + } else if (!riscv_cpu_cfg(env)->ext_ssaia) { return RISCV_EXCP_ILLEGAL_INST; } @@ -738,7 +742,10 @@ static RISCVException dbltrp_hmode(CPURISCVState *env, int csrno) static RISCVException pmp(CPURISCVState *env, int csrno) { if (riscv_cpu_cfg(env)->pmp) { - if (csrno <= CSR_PMPCFG3) { + int max_pmpcfg = (env->priv_ver >= PRIV_VERSION_1_12_0) ? ++ CSR_PMPCFG15 : CSR_PMPCFG3; + + if (csrno <= max_pmpcfg) { uint32_t reg_index = csrno - CSR_PMPCFG0; /* TODO: RV128 restriction check */ @@ -1912,8 +1919,13 @@ static RISCVException read_mstatus(CPURISCVState *env, int csrno, static bool validate_vm(CPURISCVState *env, target_ulong vm) { - uint64_t mode_supported = riscv_cpu_cfg(env)->satp_mode.map; - return get_field(mode_supported, (1 << vm)); + bool rv32 = riscv_cpu_mxl(env) == MXL_RV32; + RISCVCPU *cpu = env_archcpu(env); + int satp_mode_supported_max = cpu->cfg.max_satp_mode; + const bool *valid_vm = rv32 ? valid_vm_1_10_32 : valid_vm_1_10_64; + + assert(satp_mode_supported_max >= 0); + return vm <= satp_mode_supported_max && valid_vm[vm]; } static target_ulong legalize_xatp(CPURISCVState *env, target_ulong old_xatp, @@ -1993,7 +2005,8 @@ static RISCVException write_mstatus(CPURISCVState *env, int csrno, if (riscv_has_ext(env, RVF)) { mask |= MSTATUS_FS; } - if (riscv_has_ext(env, RVV)) { + + if (riscv_cpu_cfg(env)->ext_zve32x) { mask |= MSTATUS_VS; } @@ -2162,8 +2175,6 @@ static RISCVException write_misa(CPURISCVState *env, int csrno, env->mstatus &= ~MSTATUS_FS; } - /* flush translation cache */ - tb_flush(env_cpu(env)); env->xl = riscv_cpu_mxl(env); return RISCV_EXCP_NONE; } @@ -3121,14 +3132,14 @@ static RISCVException write_mscratch(CPURISCVState *env, int csrno, static RISCVException read_mepc(CPURISCVState *env, int csrno, target_ulong *val) { - *val = env->mepc; + *val = env->mepc & get_xepc_mask(env); return RISCV_EXCP_NONE; } static RISCVException write_mepc(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { - env->mepc = val; + env->mepc = val & get_xepc_mask(env); return RISCV_EXCP_NONE; } @@ -3176,6 +3187,7 @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = MENVCFG_FIOM | MENVCFG_CBIE | MENVCFG_CBCFE | MENVCFG_CBZE | MENVCFG_CDE; + bool stce_changed = false; if (riscv_cpu_mxl(env) == MXL_RV64) { mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | @@ -3201,8 +3213,18 @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, if ((val & MENVCFG_DTE) == 0) { env->mstatus &= ~MSTATUS_SDT; } + + if (cfg->ext_sstc && + ((env->menvcfg & MENVCFG_STCE) != (val & MENVCFG_STCE))) { + stce_changed = true; + } } env->menvcfg = (env->menvcfg & ~mask) | (val & mask); + + if (stce_changed) { + riscv_timer_stce_changed(env, true, !!(val & MENVCFG_STCE)); + } + return write_henvcfg(env, CSR_HENVCFG, env->henvcfg, ra); } @@ -3225,12 +3247,23 @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno, (cfg->ext_smcdeleg ? MENVCFG_CDE : 0) | (cfg->ext_ssdbltrp ? MENVCFG_DTE : 0); uint64_t valh = (uint64_t)val << 32; + bool stce_changed = false; + + if (cfg->ext_sstc && + ((env->menvcfg & MENVCFG_STCE) != (valh & MENVCFG_STCE))) { + stce_changed = true; + } if ((valh & MENVCFG_DTE) == 0) { env->mstatus &= ~MSTATUS_SDT; } env->menvcfg = (env->menvcfg & ~mask) | (valh & mask); + + if (stce_changed) { + riscv_timer_stce_changed(env, true, !!(valh & MENVCFG_STCE)); + } + return write_henvcfgh(env, CSR_HENVCFGH, env->henvcfg >> 32, ra); } @@ -3308,8 +3341,10 @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno, static RISCVException write_henvcfg(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { + const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = HENVCFG_FIOM | HENVCFG_CBIE | HENVCFG_CBCFE | HENVCFG_CBZE; RISCVException ret; + bool stce_changed = false; ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); if (ret != RISCV_EXCP_NONE) { @@ -3335,6 +3370,11 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, get_field(val, HENVCFG_PMM) != PMM_FIELD_RESERVED) { mask |= HENVCFG_PMM; } + + if (cfg->ext_sstc && + ((env->henvcfg & HENVCFG_STCE) != (val & HENVCFG_STCE))) { + stce_changed = true; + } } env->henvcfg = val & mask; @@ -3342,6 +3382,10 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, env->vsstatus &= ~MSTATUS_SDT; } + if (stce_changed) { + riscv_timer_stce_changed(env, false, !!(val & HENVCFG_STCE)); + } + return RISCV_EXCP_NONE; } @@ -3363,19 +3407,32 @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno, static RISCVException write_henvcfgh(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { + const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE | HENVCFG_DTE); uint64_t valh = (uint64_t)val << 32; RISCVException ret; + bool stce_changed = false; ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); if (ret != RISCV_EXCP_NONE) { return ret; } + + if (cfg->ext_sstc && + ((env->henvcfg & HENVCFG_STCE) != (valh & HENVCFG_STCE))) { + stce_changed = true; + } + env->henvcfg = (env->henvcfg & 0xFFFFFFFF) | (valh & mask); if ((env->henvcfg & HENVCFG_DTE) == 0) { env->vsstatus &= ~MSTATUS_SDT; } + + if (stce_changed) { + riscv_timer_stce_changed(env, false, !!(val & HENVCFG_STCE)); + } + return RISCV_EXCP_NONE; } @@ -3646,7 +3703,14 @@ static RISCVException rmw_mip64(CPURISCVState *env, int csrno, if (riscv_cpu_cfg(env)->ext_sstc && (env->priv == PRV_M) && get_field(env->menvcfg, MENVCFG_STCE)) { /* sstc extension forbids STIP & VSTIP to be writeable in mip */ - mask = mask & ~(MIP_STIP | MIP_VSTIP); + + /* STIP is not writable when menvcfg.STCE is enabled. */ + mask = mask & ~MIP_STIP; + + /* VSTIP is not writable when both [mh]envcfg.STCE are enabled. */ + if (get_field(env->henvcfg, HENVCFG_STCE)) { + mask = mask & ~MIP_VSTIP; + } } if (mask) { @@ -4108,14 +4172,14 @@ static RISCVException write_sscratch(CPURISCVState *env, int csrno, static RISCVException read_sepc(CPURISCVState *env, int csrno, target_ulong *val) { - *val = env->sepc; + *val = env->sepc & get_xepc_mask(env); return RISCV_EXCP_NONE; } static RISCVException write_sepc(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { - env->sepc = val; + env->sepc = val & get_xepc_mask(env); return RISCV_EXCP_NONE; } @@ -5516,7 +5580,7 @@ static inline RISCVException riscv_csrrw_check(CPURISCVState *env, csr_priv = get_field(csrno, 0x300); if (!env->debugger && (effective_priv < csr_priv)) { - if (csr_priv == (PRV_S + 1) && env->virt_enabled) { + if (csr_priv <= (PRV_S + 1) && env->virt_enabled) { return RISCV_EXCP_VIRT_INSTRUCTION_FAULT; } return RISCV_EXCP_ILLEGAL_INST; @@ -5801,8 +5865,8 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { NULL, read_mstatus_i128 }, [CSR_MISA] = { "misa", any, read_misa, write_misa, NULL, read_misa_i128 }, - [CSR_MIDELEG] = { "mideleg", any, NULL, NULL, rmw_mideleg }, - [CSR_MEDELEG] = { "medeleg", any, read_medeleg, write_medeleg }, + [CSR_MIDELEG] = { "mideleg", smode, NULL, NULL, rmw_mideleg }, + [CSR_MEDELEG] = { "medeleg", smode, read_medeleg, write_medeleg }, [CSR_MIE] = { "mie", any, NULL, NULL, rmw_mie }, [CSR_MTVEC] = { "mtvec", any, read_mtvec, write_mtvec }, [CSR_MCOUNTEREN] = { "mcounteren", umode, read_mcounteren, @@ -5810,7 +5874,7 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_MSTATUSH] = { "mstatush", any32, read_mstatush, write_mstatush }, - [CSR_MEDELEGH] = { "medelegh", any32, read_zero, write_ignore, + [CSR_MEDELEGH] = { "medelegh", smode32, read_zero, write_ignore, .min_priv_ver = PRIV_VERSION_1_13_0 }, [CSR_HEDELEGH] = { "hedelegh", hmode32, read_hedelegh, write_hedelegh, .min_priv_ver = PRIV_VERSION_1_13_0 }, @@ -5850,7 +5914,7 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_MVIP] = { "mvip", aia_any, NULL, NULL, rmw_mvip }, /* Machine-Level High-Half CSRs (AIA) */ - [CSR_MIDELEGH] = { "midelegh", aia_any32, NULL, NULL, rmw_midelegh }, + [CSR_MIDELEGH] = { "midelegh", aia_smode32, NULL, NULL, rmw_midelegh }, [CSR_MIEH] = { "mieh", aia_any32, NULL, NULL, rmw_mieh }, [CSR_MVIENH] = { "mvienh", aia_any32, NULL, NULL, rmw_mvienh }, [CSR_MVIPH] = { "mviph", aia_any32, NULL, NULL, rmw_mviph }, @@ -6106,6 +6170,30 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_PMPCFG1] = { "pmpcfg1", pmp, read_pmpcfg, write_pmpcfg }, [CSR_PMPCFG2] = { "pmpcfg2", pmp, read_pmpcfg, write_pmpcfg }, [CSR_PMPCFG3] = { "pmpcfg3", pmp, read_pmpcfg, write_pmpcfg }, + [CSR_PMPCFG4] = { "pmpcfg4", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG5] = { "pmpcfg5", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG6] = { "pmpcfg6", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG7] = { "pmpcfg7", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG8] = { "pmpcfg8", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG9] = { "pmpcfg9", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG10] = { "pmpcfg10", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG11] = { "pmpcfg11", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG12] = { "pmpcfg12", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG13] = { "pmpcfg13", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG14] = { "pmpcfg14", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG15] = { "pmpcfg15", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, [CSR_PMPADDR0] = { "pmpaddr0", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR1] = { "pmpaddr1", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR2] = { "pmpaddr2", pmp, read_pmpaddr, write_pmpaddr }, @@ -6120,8 +6208,104 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_PMPADDR11] = { "pmpaddr11", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR12] = { "pmpaddr12", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR13] = { "pmpaddr13", pmp, read_pmpaddr, write_pmpaddr }, - [CSR_PMPADDR14] = { "pmpaddr14", pmp, read_pmpaddr, write_pmpaddr }, - [CSR_PMPADDR15] = { "pmpaddr15", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR14] = { "pmpaddr14", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR15] = { "pmpaddr15", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR16] = { "pmpaddr16", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR17] = { "pmpaddr17", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR18] = { "pmpaddr18", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR19] = { "pmpaddr19", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR20] = { "pmpaddr20", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR21] = { "pmpaddr21", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR22] = { "pmpaddr22", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR23] = { "pmpaddr23", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR24] = { "pmpaddr24", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR25] = { "pmpaddr25", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR26] = { "pmpaddr26", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR27] = { "pmpaddr27", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR28] = { "pmpaddr28", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR29] = { "pmpaddr29", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR30] = { "pmpaddr30", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR31] = { "pmpaddr31", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR32] = { "pmpaddr32", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR33] = { "pmpaddr33", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR34] = { "pmpaddr34", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR35] = { "pmpaddr35", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR36] = { "pmpaddr36", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR37] = { "pmpaddr37", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR38] = { "pmpaddr38", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR39] = { "pmpaddr39", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR40] = { "pmpaddr40", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR41] = { "pmpaddr41", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR42] = { "pmpaddr42", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR43] = { "pmpaddr43", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR44] = { "pmpaddr44", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR45] = { "pmpaddr45", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR46] = { "pmpaddr46", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR47] = { "pmpaddr47", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR48] = { "pmpaddr48", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR49] = { "pmpaddr49", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR50] = { "pmpaddr50", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR51] = { "pmpaddr51", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR52] = { "pmpaddr52", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR53] = { "pmpaddr53", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR54] = { "pmpaddr54", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR55] = { "pmpaddr55", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR56] = { "pmpaddr56", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR57] = { "pmpaddr57", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR58] = { "pmpaddr58", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR59] = { "pmpaddr59", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR60] = { "pmpaddr60", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR61] = { "pmpaddr61", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR62] = { "pmpaddr62", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR63] = { "pmpaddr63", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, /* Debug CSRs */ [CSR_TSELECT] = { "tselect", debug, read_tselect, write_tselect }, diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index 706bdfa..af40561 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -755,6 +755,6 @@ uint64_t helper_fcvt_bf16_s(CPURISCVState *env, uint64_t rs1) uint64_t helper_fcvt_s_bf16(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(env, rs1); + float16 frs1 = check_nanbox_bf16(env, rs1); return nanbox_s(env, bfloat16_to_float32(frs1, &env->fp_status)); } diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c index 18e88f4..1934f91 100644 --- a/target/riscv/gdbstub.c +++ b/target/riscv/gdbstub.c @@ -62,7 +62,7 @@ int riscv_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) return 0; } - switch (mcc->misa_mxl_max) { + switch (mcc->def->misa_mxl_max) { case MXL_RV32: return gdb_get_reg32(mem_buf, tmp); case MXL_RV64: @@ -82,7 +82,7 @@ int riscv_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) int length = 0; target_ulong tmp; - switch (mcc->misa_mxl_max) { + switch (mcc->def->misa_mxl_max) { case MXL_RV32: tmp = (int32_t)ldl_p(mem_buf); length = 4; @@ -359,7 +359,7 @@ void riscv_cpu_register_gdb_regs_for_features(CPUState *cs) ricsv_gen_dynamic_vector_feature(cs, cs->gdb_num_regs), 0); } - switch (mcc->misa_mxl_max) { + switch (mcc->def->misa_mxl_max) { case MXL_RV32: gdb_register_coprocessor(cs, riscv_gdb_get_virtual, riscv_gdb_set_virtual, diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 85d73e4..b785456 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -159,7 +159,7 @@ DEF_HELPER_FLAGS_3(hyp_hsv_d, TCG_CALL_NO_WG, void, env, tl, tl) #endif /* Vector functions */ -DEF_HELPER_3(vsetvl, tl, env, tl, tl) +DEF_HELPER_4(vsetvl, tl, env, tl, tl, tl) DEF_HELPER_5(vle8_v, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vle16_v, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vle32_v, void, ptr, ptr, tl, env, i32) @@ -1101,14 +1101,14 @@ DEF_HELPER_6(vslidedown_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vslidedown_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vslidedown_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vslidedown_vx_d, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1up_vx_b, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1up_vx_h, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1up_vx_w, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1up_vx_d, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32) -DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_b, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfslide1up_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(vfslide1up_vf_w, void, ptr, ptr, i64, ptr, env, i32) @@ -1284,3 +1284,8 @@ DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32) DEF_HELPER_5(vsm4k_vi, void, ptr, ptr, i32, env, i32) DEF_HELPER_4(vsm4r_vv, void, ptr, ptr, env, i32) DEF_HELPER_4(vsm4r_vs, void, ptr, ptr, env, i32) + +/* CFI (zicfiss) helpers */ +#ifndef CONFIG_USER_ONLY +DEF_HELPER_1(ssamoswap_disabled, void, env) +#endif diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 2b6077a..2a48717 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -202,7 +202,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2) s1 = get_gpr(s, rs1, EXT_ZERO); } - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl((int) (rd == 0 && rs1 == 0))); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); @@ -222,7 +222,7 @@ static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2) dst = dest_gpr(s, rd); - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl(0)); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); gen_update_pc(s, s->cur_insn_len); @@ -864,286 +864,32 @@ GEN_VEXT_TRANS(vlm_v, MO_8, vlm_v, ld_us_mask_op, ld_us_mask_check) GEN_VEXT_TRANS(vsm_v, MO_8, vsm_v, st_us_mask_op, st_us_mask_check) /* - * MAXSZ returns the maximum vector size can be operated in bytes, - * which is used in GVEC IR when vl_eq_vlmax flag is set to true - * to accelerate vector operation. + *** stride load and store */ -static inline uint32_t MAXSZ(DisasContext *s) -{ - int max_sz = s->cfg_ptr->vlenb << 3; - return max_sz >> (3 - s->lmul); -} - -static inline uint32_t get_log2(uint32_t a) -{ - uint32_t i = 0; - for (; a > 0;) { - a >>= 1; - i++; - } - return i; -} - -typedef void gen_tl_ldst(TCGv, TCGv_ptr, tcg_target_long); - -/* - * Simulate the strided load/store main loop: - * - * for (i = env->vstart; i < env->vl; env->vstart = ++i) { - * k = 0; - * while (k < nf) { - * if (!vm && !vext_elem_mask(v0, i)) { - * vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, - * (i + k * max_elems + 1) * esz); - * k++; - * continue; - * } - * target_ulong addr = base + stride * i + (k << log2_esz); - * ldst(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); - * k++; - * } - * } - */ -static void gen_ldst_stride_main_loop(DisasContext *s, TCGv dest, uint32_t rs1, - uint32_t rs2, uint32_t vm, uint32_t nf, - gen_tl_ldst *ld_fn, gen_tl_ldst *st_fn, - bool is_load) -{ - TCGv addr = tcg_temp_new(); - TCGv base = get_gpr(s, rs1, EXT_NONE); - TCGv stride = get_gpr(s, rs2, EXT_NONE); - - TCGv i = tcg_temp_new(); - TCGv i_esz = tcg_temp_new(); - TCGv k = tcg_temp_new(); - TCGv k_esz = tcg_temp_new(); - TCGv k_max = tcg_temp_new(); - TCGv mask = tcg_temp_new(); - TCGv mask_offs = tcg_temp_new(); - TCGv mask_offs_64 = tcg_temp_new(); - TCGv mask_elem = tcg_temp_new(); - TCGv mask_offs_rem = tcg_temp_new(); - TCGv vreg = tcg_temp_new(); - TCGv dest_offs = tcg_temp_new(); - TCGv stride_offs = tcg_temp_new(); - - uint32_t max_elems = MAXSZ(s) >> s->sew; - - TCGLabel *start = gen_new_label(); - TCGLabel *end = gen_new_label(); - TCGLabel *start_k = gen_new_label(); - TCGLabel *inc_k = gen_new_label(); - TCGLabel *end_k = gen_new_label(); - - MemOp atomicity = MO_ATOM_NONE; - if (s->sew == 0) { - atomicity = MO_ATOM_NONE; - } else { - atomicity = MO_ATOM_IFALIGN_PAIR; - } - - mark_vs_dirty(s); - - tcg_gen_addi_tl(mask, (TCGv)tcg_env, vreg_ofs(s, 0)); - - /* Start of outer loop. */ - tcg_gen_mov_tl(i, cpu_vstart); - gen_set_label(start); - tcg_gen_brcond_tl(TCG_COND_GE, i, cpu_vl, end); - tcg_gen_shli_tl(i_esz, i, s->sew); - /* Start of inner loop. */ - tcg_gen_movi_tl(k, 0); - gen_set_label(start_k); - tcg_gen_brcond_tl(TCG_COND_GE, k, tcg_constant_tl(nf), end_k); - /* - * If we are in mask agnostic regime and the operation is not unmasked we - * set the inactive elements to 1. - */ - if (!vm && s->vma) { - TCGLabel *active_element = gen_new_label(); - /* (i + k * max_elems) * esz */ - tcg_gen_shli_tl(mask_offs, k, get_log2(max_elems << s->sew)); - tcg_gen_add_tl(mask_offs, mask_offs, i_esz); - - /* - * Check whether the i bit of the mask is 0 or 1. - * - * static inline int vext_elem_mask(void *v0, int index) - * { - * int idx = index / 64; - * int pos = index % 64; - * return (((uint64_t *)v0)[idx] >> pos) & 1; - * } - */ - tcg_gen_shri_tl(mask_offs_64, mask_offs, 3); - tcg_gen_add_tl(mask_offs_64, mask_offs_64, mask); - tcg_gen_ld_i64((TCGv_i64)mask_elem, (TCGv_ptr)mask_offs_64, 0); - tcg_gen_rem_tl(mask_offs_rem, mask_offs, tcg_constant_tl(8)); - tcg_gen_shr_tl(mask_elem, mask_elem, mask_offs_rem); - tcg_gen_andi_tl(mask_elem, mask_elem, 1); - tcg_gen_brcond_tl(TCG_COND_NE, mask_elem, tcg_constant_tl(0), - active_element); - /* - * Set masked-off elements in the destination vector register to 1s. - * Store instructions simply skip this bit as memory ops access memory - * only for active elements. - */ - if (is_load) { - tcg_gen_shli_tl(mask_offs, mask_offs, s->sew); - tcg_gen_add_tl(mask_offs, mask_offs, dest); - st_fn(tcg_constant_tl(-1), (TCGv_ptr)mask_offs, 0); - } - tcg_gen_br(inc_k); - gen_set_label(active_element); - } - /* - * The element is active, calculate the address with stride: - * target_ulong addr = base + stride * i + (k << log2_esz); - */ - tcg_gen_mul_tl(stride_offs, stride, i); - tcg_gen_shli_tl(k_esz, k, s->sew); - tcg_gen_add_tl(stride_offs, stride_offs, k_esz); - tcg_gen_add_tl(addr, base, stride_offs); - /* Calculate the offset in the dst/src vector register. */ - tcg_gen_shli_tl(k_max, k, get_log2(max_elems)); - tcg_gen_add_tl(dest_offs, i, k_max); - tcg_gen_shli_tl(dest_offs, dest_offs, s->sew); - tcg_gen_add_tl(dest_offs, dest_offs, dest); - if (is_load) { - tcg_gen_qemu_ld_tl(vreg, addr, s->mem_idx, MO_LE | s->sew | atomicity); - st_fn((TCGv)vreg, (TCGv_ptr)dest_offs, 0); - } else { - ld_fn((TCGv)vreg, (TCGv_ptr)dest_offs, 0); - tcg_gen_qemu_st_tl(vreg, addr, s->mem_idx, MO_LE | s->sew | atomicity); - } - /* - * We don't execute the load/store above if the element was inactive. - * We jump instead directly to incrementing k and continuing the loop. - */ - if (!vm && s->vma) { - gen_set_label(inc_k); - } - tcg_gen_addi_tl(k, k, 1); - tcg_gen_br(start_k); - /* End of the inner loop. */ - gen_set_label(end_k); - - tcg_gen_addi_tl(i, i, 1); - tcg_gen_mov_tl(cpu_vstart, i); - tcg_gen_br(start); - - /* End of the outer loop. */ - gen_set_label(end); - - return; -} - - -/* - * Set the tail bytes of the strided loads/stores to 1: - * - * for (k = 0; k < nf; ++k) { - * cnt = (k * max_elems + vl) * esz; - * tot = (k * max_elems + max_elems) * esz; - * for (i = cnt; i < tot; i += esz) { - * store_1s(-1, vd[vl+i]); - * } - * } - */ -static void gen_ldst_stride_tail_loop(DisasContext *s, TCGv dest, uint32_t nf, - gen_tl_ldst *st_fn) -{ - TCGv i = tcg_temp_new(); - TCGv k = tcg_temp_new(); - TCGv tail_cnt = tcg_temp_new(); - TCGv tail_tot = tcg_temp_new(); - TCGv tail_addr = tcg_temp_new(); - - TCGLabel *start = gen_new_label(); - TCGLabel *end = gen_new_label(); - TCGLabel *start_i = gen_new_label(); - TCGLabel *end_i = gen_new_label(); - - uint32_t max_elems_b = MAXSZ(s); - uint32_t esz = 1 << s->sew; - - /* Start of the outer loop. */ - tcg_gen_movi_tl(k, 0); - tcg_gen_shli_tl(tail_cnt, cpu_vl, s->sew); - tcg_gen_movi_tl(tail_tot, max_elems_b); - tcg_gen_add_tl(tail_addr, dest, tail_cnt); - gen_set_label(start); - tcg_gen_brcond_tl(TCG_COND_GE, k, tcg_constant_tl(nf), end); - /* Start of the inner loop. */ - tcg_gen_mov_tl(i, tail_cnt); - gen_set_label(start_i); - tcg_gen_brcond_tl(TCG_COND_GE, i, tail_tot, end_i); - /* store_1s(-1, vd[vl+i]); */ - st_fn(tcg_constant_tl(-1), (TCGv_ptr)tail_addr, 0); - tcg_gen_addi_tl(tail_addr, tail_addr, esz); - tcg_gen_addi_tl(i, i, esz); - tcg_gen_br(start_i); - /* End of the inner loop. */ - gen_set_label(end_i); - /* Update the counts */ - tcg_gen_addi_tl(tail_cnt, tail_cnt, max_elems_b); - tcg_gen_addi_tl(tail_tot, tail_cnt, max_elems_b); - tcg_gen_addi_tl(k, k, 1); - tcg_gen_br(start); - /* End of the outer loop. */ - gen_set_label(end); - - return; -} +typedef void gen_helper_ldst_stride(TCGv_ptr, TCGv_ptr, TCGv, + TCGv, TCGv_env, TCGv_i32); static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, - uint32_t data, DisasContext *s, bool is_load) + uint32_t data, gen_helper_ldst_stride *fn, + DisasContext *s) { - if (!s->vstart_eq_zero) { - return false; - } - - TCGv dest = tcg_temp_new(); - - uint32_t nf = FIELD_EX32(data, VDATA, NF); - uint32_t vm = FIELD_EX32(data, VDATA, VM); - - /* Destination register and mask register */ - tcg_gen_addi_tl(dest, (TCGv)tcg_env, vreg_ofs(s, vd)); - - /* - * Select the appropriate load/tore to retrieve data from the vector - * register given a specific sew. - */ - static gen_tl_ldst * const ld_fns[4] = { - tcg_gen_ld8u_tl, tcg_gen_ld16u_tl, - tcg_gen_ld32u_tl, tcg_gen_ld_tl - }; - - static gen_tl_ldst * const st_fns[4] = { - tcg_gen_st8_tl, tcg_gen_st16_tl, - tcg_gen_st32_tl, tcg_gen_st_tl - }; + TCGv_ptr dest, mask; + TCGv base, stride; + TCGv_i32 desc; - gen_tl_ldst *ld_fn = ld_fns[s->sew]; - gen_tl_ldst *st_fn = st_fns[s->sew]; + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + base = get_gpr(s, rs1, EXT_NONE); + stride = get_gpr(s, rs2, EXT_NONE); + desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, + s->cfg_ptr->vlenb, data)); - if (ld_fn == NULL || st_fn == NULL) { - return false; - } + tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); mark_vs_dirty(s); - gen_ldst_stride_main_loop(s, dest, rs1, rs2, vm, nf, ld_fn, st_fn, is_load); - - tcg_gen_movi_tl(cpu_vstart, 0); - - /* - * Set the tail bytes to 1 if tail agnostic: - */ - if (s->vta != 0 && is_load) { - gen_ldst_stride_tail_loop(s, dest, nf, st_fn); - } + fn(dest, mask, base, stride, tcg_env, desc); finalize_rvv_inst(s); return true; @@ -1152,6 +898,16 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) { uint32_t data = 0; + gen_helper_ldst_stride *fn; + static gen_helper_ldst_stride * const fns[4] = { + gen_helper_vlse8_v, gen_helper_vlse16_v, + gen_helper_vlse32_v, gen_helper_vlse64_v + }; + + fn = fns[eew]; + if (fn == NULL) { + return false; + } uint8_t emul = vext_get_emul(s, eew); data = FIELD_DP32(data, VDATA, VM, a->vm); @@ -1159,7 +915,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, NF, a->nf); data = FIELD_DP32(data, VDATA, VTA, s->vta); data = FIELD_DP32(data, VDATA, VMA, s->vma); - return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, s, true); + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s); } static bool ld_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) @@ -1177,13 +933,23 @@ GEN_VEXT_TRANS(vlse64_v, MO_64, rnfvm, ld_stride_op, ld_stride_check) static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) { uint32_t data = 0; + gen_helper_ldst_stride *fn; + static gen_helper_ldst_stride * const fns[4] = { + /* masked stride store */ + gen_helper_vsse8_v, gen_helper_vsse16_v, + gen_helper_vsse32_v, gen_helper_vsse64_v + }; uint8_t emul = vext_get_emul(s, eew); data = FIELD_DP32(data, VDATA, VM, a->vm); data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); + fn = fns[eew]; + if (fn == NULL) { + return false; + } - return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, s, false); + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s); } static bool st_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) @@ -1361,6 +1127,12 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, fn(dest, mask, base, tcg_env, desc); finalize_rvv_inst(s); + + /* vector unit-stride fault-only-first load may modify vl CSR */ + gen_update_pc(s, s->cur_insn_len); + lookup_and_goto_ptr(s); + s->base.is_jmp = DISAS_NORETURN; + return true; } @@ -1528,6 +1300,17 @@ GEN_LDST_WHOLE_TRANS(vs8r_v, int8_t, 8, false) *** Vector Integer Arithmetic Instructions */ +/* + * MAXSZ returns the maximum vector size can be operated in bytes, + * which is used in GVEC IR when vl_eq_vlmax flag is set to true + * to accelerate vector operation. + */ +static inline uint32_t MAXSZ(DisasContext *s) +{ + int max_sz = s->cfg_ptr->vlenb * 8; + return max_sz >> (3 - s->lmul); +} + static bool opivv_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && @@ -3568,19 +3351,19 @@ static void load_element(TCGv_i64 dest, TCGv_ptr base, /* offset of the idx element with base register r */ static uint32_t endian_ofs(DisasContext *s, int r, int idx) { -#if HOST_BIG_ENDIAN - return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew); -#else - return vreg_ofs(s, r) + (idx << s->sew); -#endif + if (HOST_BIG_ENDIAN) { + return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew); + } else { + return vreg_ofs(s, r) + (idx << s->sew); + } } /* adjust the index according to the endian */ static void endian_adjust(TCGv_i32 ofs, int sew) { -#if HOST_BIG_ENDIAN - tcg_gen_xori_i32(ofs, ofs, 7 >> sew); -#endif + if (HOST_BIG_ENDIAN) { + tcg_gen_xori_i32(ofs, ofs, 7 >> sew); + } } /* Load idx >= VLMAX ? 0 : vreg[idx] */ @@ -3778,7 +3561,6 @@ static bool slideup_check(DisasContext *s, arg_rmrr *a) } GEN_OPIVX_TRANS(vslideup_vx, slideup_check) -GEN_OPIVX_TRANS(vslide1up_vx, slideup_check) GEN_OPIVI_TRANS(vslideup_vi, IMM_ZX, vslideup_vx, slideup_check) static bool slidedown_check(DisasContext *s, arg_rmrr *a) @@ -3789,9 +3571,56 @@ static bool slidedown_check(DisasContext *s, arg_rmrr *a) } GEN_OPIVX_TRANS(vslidedown_vx, slidedown_check) -GEN_OPIVX_TRANS(vslide1down_vx, slidedown_check) GEN_OPIVI_TRANS(vslidedown_vi, IMM_ZX, vslidedown_vx, slidedown_check) +typedef void gen_helper_vslide1_vx(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr, + TCGv_env, TCGv_i32); + +#define GEN_OPIVX_VSLIDE1_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_vslide1_vx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + TCGv_ptr dest, src2, mask; \ + TCGv_i64 src1; \ + TCGv_i32 desc; \ + uint32_t data = 0; \ + \ + dest = tcg_temp_new_ptr(); \ + mask = tcg_temp_new_ptr(); \ + src2 = tcg_temp_new_ptr(); \ + src1 = tcg_temp_new_i64(); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data)); \ + \ + tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, a->rd)); \ + tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, a->rs2)); \ + tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); \ + tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN)); \ + \ + fns[s->sew](dest, mask, src1, src2, tcg_env, desc); \ + \ + tcg_gen_movi_tl(cpu_vstart, 0); \ + finalize_rvv_inst(s); \ + \ + return true; \ + } \ + return false; \ +} + +GEN_OPIVX_VSLIDE1_TRANS(vslide1up_vx, slideup_check) +GEN_OPIVX_VSLIDE1_TRANS(vslide1down_vx, slidedown_check) + /* Vector Floating-Point Slide Instructions */ static bool fslideup_check(DisasContext *s, arg_rmrr *a) { diff --git a/target/riscv/insn_trans/trans_rvzce.c.inc b/target/riscv/insn_trans/trans_rvzce.c.inc index c77c2b9..dd15af0 100644 --- a/target/riscv/insn_trans/trans_rvzce.c.inc +++ b/target/riscv/insn_trans/trans_rvzce.c.inc @@ -88,13 +88,13 @@ static bool trans_c_lbu(DisasContext *ctx, arg_c_lbu *a) static bool trans_c_lhu(DisasContext *ctx, arg_c_lhu *a) { REQUIRE_ZCB(ctx); - return gen_load(ctx, a, MO_UW); + return gen_load(ctx, a, MO_TEUW); } static bool trans_c_lh(DisasContext *ctx, arg_c_lh *a) { REQUIRE_ZCB(ctx); - return gen_load(ctx, a, MO_SW); + return gen_load(ctx, a, MO_TESW); } static bool trans_c_sb(DisasContext *ctx, arg_c_sb *a) @@ -106,7 +106,7 @@ static bool trans_c_sb(DisasContext *ctx, arg_c_sb *a) static bool trans_c_sh(DisasContext *ctx, arg_c_sh *a) { REQUIRE_ZCB(ctx); - return gen_store(ctx, a, MO_UW); + return gen_store(ctx, a, MO_TEUW); } #define X_S0 8 diff --git a/target/riscv/insn_trans/trans_rvzicfiss.c.inc b/target/riscv/insn_trans/trans_rvzicfiss.c.inc index b0096ad..f4a1c12 100644 --- a/target/riscv/insn_trans/trans_rvzicfiss.c.inc +++ b/target/riscv/insn_trans/trans_rvzicfiss.c.inc @@ -40,6 +40,7 @@ static bool trans_sspopchk(DisasContext *ctx, arg_sspopchk *a) tcg_gen_brcond_tl(TCG_COND_EQ, data, rs1, skip); tcg_gen_st_tl(tcg_constant_tl(RISCV_EXCP_SW_CHECK_BCFI_TVAL), tcg_env, offsetof(CPURISCVState, sw_check_code)); + gen_update_pc(ctx, 0); gen_helper_raise_exception(tcg_env, tcg_constant_i32(RISCV_EXCP_SW_CHECK)); gen_set_label(skip); @@ -90,7 +91,11 @@ static bool trans_ssamoswap_w(DisasContext *ctx, arg_amoswap_w *a) } if (!ctx->bcfi_enabled) { +#ifndef CONFIG_USER_ONLY + gen_helper_ssamoswap_disabled(tcg_env); +#else return false; +#endif } TCGv dest = dest_gpr(ctx, a->rd); @@ -115,7 +120,11 @@ static bool trans_ssamoswap_d(DisasContext *ctx, arg_amoswap_w *a) } if (!ctx->bcfi_enabled) { +#ifndef CONFIG_USER_ONLY + gen_helper_ssamoswap_disabled(tcg_env); +#else return false; +#endif } TCGv dest = dest_gpr(ctx, a->rd); diff --git a/target/riscv/internals.h b/target/riscv/internals.h index 4570bd5..172296f 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -142,6 +142,33 @@ static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f) } } +static inline float16 check_nanbox_bf16(CPURISCVState *env, uint64_t f) +{ + /* Disable nanbox check when enable zfinx */ + if (env_archcpu(env)->cfg.ext_zfinx) { + return (uint16_t)f; + } + + uint64_t mask = MAKE_64BIT_MASK(16, 48); + + if (likely((f & mask) == mask)) { + return (uint16_t)f; + } else { + return 0x7FC0u; /* default qnan */ + } +} + +static inline target_ulong get_xepc_mask(CPURISCVState *env) +{ + /* When IALIGN=32, both low bits must be zero. + * When IALIGN=16 (has C extension), only bit 0 must be zero. */ + if (riscv_has_ext(env, RVC)) { + return ~(target_ulong)1; + } else { + return ~(target_ulong)3; + } +} + #ifndef CONFIG_USER_ONLY /* Our implementation of SysemuCPUOps::has_work */ bool riscv_cpu_has_work(CPUState *cs); diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 82f9728..0dd0d59 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -36,6 +36,7 @@ #include "hw/pci/pci.h" #include "exec/memattrs.h" #include "system/address-spaces.h" +#include "system/memory.h" #include "hw/boards.h" #include "hw/irq.h" #include "hw/intc/riscv_imsic.h" @@ -999,6 +1000,19 @@ static void kvm_riscv_destroy_scratch_vcpu(KVMScratchCPU *scratch) close(scratch->kvmfd); } +static void kvm_riscv_init_max_satp_mode(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) +{ + struct kvm_one_reg reg; + int ret; + + reg.id = RISCV_CONFIG_REG(satp_mode); + reg.addr = (uint64_t)&cpu->cfg.max_satp_mode; + ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); + if (ret != 0) { + error_report("Unable to retrieve satp mode from host, error %d", ret); + } +} + static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { struct kvm_one_reg reg; @@ -1302,6 +1316,7 @@ static void riscv_init_kvm_registers(Object *cpu_obj) kvm_riscv_init_machine_ids(cpu, &kvmcpu); kvm_riscv_init_misa_ext_mask(cpu, &kvmcpu); kvm_riscv_init_cfg(cpu, &kvmcpu); + kvm_riscv_init_max_satp_mode(cpu, &kvmcpu); kvm_riscv_destroy_scratch_vcpu(&kvmcpu); } @@ -1355,7 +1370,7 @@ int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state) return 0; } -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { int ret = 0; @@ -1472,6 +1487,11 @@ static int kvm_vcpu_enable_sbi_dbcn(RISCVCPU *cpu, CPUState *cs) return kvm_set_one_reg(cs, kvm_sbi_dbcn.kvm_reg_id, ®); } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret = 0; @@ -1545,6 +1565,7 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) static void kvm_riscv_handle_sbi_dbcn(CPUState *cs, struct kvm_run *run) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; g_autofree uint8_t *buf = NULL; RISCVCPU *cpu = RISCV_CPU(cs); target_ulong num_bytes; @@ -1569,7 +1590,7 @@ static void kvm_riscv_handle_sbi_dbcn(CPUState *cs, struct kvm_run *run) * Handle the case where a 32 bit CPU is running in a * 64 bit addressing env. */ - if (riscv_cpu_mxl(&cpu->env) == MXL_RV32) { + if (riscv_cpu_is_32bit(cpu)) { addr |= (uint64_t)run->riscv_sbi.args[2] << 32; } @@ -1583,9 +1604,9 @@ static void kvm_riscv_handle_sbi_dbcn(CPUState *cs, struct kvm_run *run) exit(1); } - cpu_physical_memory_write(addr, buf, ret); + address_space_write(cs->as, addr, attrs, buf, ret); } else { - cpu_physical_memory_read(addr, buf, num_bytes); + address_space_read(cs->as, addr, attrs, buf, num_bytes); ret = qemu_chr_fe_write_all(serial_hd(0)->be, buf, num_bytes); if (ret < 0) { @@ -1600,7 +1621,7 @@ static void kvm_riscv_handle_sbi_dbcn(CPUState *cs, struct kvm_run *run) break; case SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: ch = run->riscv_sbi.args[0]; - ret = qemu_chr_fe_write(serial_hd(0)->be, &ch, sizeof(ch)); + ret = qemu_chr_fe_write_all(serial_hd(0)->be, &ch, sizeof(ch)); if (ret < 0) { error_report("SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: error when " @@ -1980,7 +2001,7 @@ static bool kvm_cpu_realize(CPUState *cs, Error **errp) } } - return true; + return true; } void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp) @@ -2086,22 +2107,25 @@ static void kvm_cpu_accel_register_types(void) } type_init(kvm_cpu_accel_register_types); -static void riscv_host_cpu_class_init(ObjectClass *c, const void *data) -{ - RISCVCPUClass *mcc = RISCV_CPU_CLASS(c); - -#if defined(TARGET_RISCV32) - mcc->misa_mxl_max = MXL_RV32; -#elif defined(TARGET_RISCV64) - mcc->misa_mxl_max = MXL_RV64; -#endif -} - static const TypeInfo riscv_kvm_cpu_type_infos[] = { { .name = TYPE_RISCV_CPU_HOST, .parent = TYPE_RISCV_CPU, - .class_init = riscv_host_cpu_class_init, +#if defined(TARGET_RISCV32) + .class_data = &(const RISCVCPUDef) { + .misa_mxl_max = MXL_RV32, + .priv_spec = RISCV_PROFILE_ATTR_UNUSED, + .vext_spec = RISCV_PROFILE_ATTR_UNUSED, + .cfg.max_satp_mode = -1, + }, +#elif defined(TARGET_RISCV64) + .class_data = &(const RISCVCPUDef) { + .misa_mxl_max = MXL_RV64, + .priv_spec = RISCV_PROFILE_ATTR_UNUSED, + .vext_spec = RISCV_PROFILE_ATTR_UNUSED, + .cfg.max_satp_mode = -1, + }, +#endif } }; diff --git a/target/riscv/machine.c b/target/riscv/machine.c index a1f70cc..18d790a 100644 --- a/target/riscv/machine.c +++ b/target/riscv/machine.c @@ -36,8 +36,9 @@ static int pmp_post_load(void *opaque, int version_id) RISCVCPU *cpu = opaque; CPURISCVState *env = &cpu->env; int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { pmp_update_rule_addr(env, i); } pmp_update_rule_nums(env); @@ -130,7 +131,8 @@ static bool vector_needed(void *opaque) RISCVCPU *cpu = opaque; CPURISCVState *env = &cpu->env; - return riscv_has_ext(env, RVV); + return kvm_enabled() ? riscv_has_ext(env, RVV) : + riscv_cpu_cfg(env)->ext_zve32x; } static const VMStateDescription vmstate_vector = { @@ -170,7 +172,7 @@ static bool rv128_needed(void *opaque) { RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(opaque); - return mcc->misa_mxl_max == MXL_RV128; + return mcc->def->misa_mxl_max == MXL_RV128; } static const VMStateDescription vmstate_rv128 = { @@ -399,6 +401,30 @@ static const VMStateDescription vmstate_ssp = { } }; +static bool sstc_timer_needed(void *opaque) +{ + RISCVCPU *cpu = opaque; + CPURISCVState *env = &cpu->env; + + if (!cpu->cfg.ext_sstc) { + return false; + } + + return env->stimer != NULL || env->vstimer != NULL; +} + +static const VMStateDescription vmstate_sstc = { + .name = "cpu/timer", + .version_id = 1, + .minimum_version_id = 1, + .needed = sstc_timer_needed, + .fields = (const VMStateField[]) { + VMSTATE_TIMER_PTR(env.stimer, RISCVCPU), + VMSTATE_TIMER_PTR(env.vstimer, RISCVCPU), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_riscv_cpu = { .name = "cpu", .version_id = 10, @@ -475,6 +501,7 @@ const VMStateDescription vmstate_riscv_cpu = { &vmstate_elp, &vmstate_ssp, &vmstate_ctr, + &vmstate_sstc, NULL } }; diff --git a/target/riscv/meson.build b/target/riscv/meson.build index a4bd61e..fdefe88 100644 --- a/target/riscv/meson.build +++ b/target/riscv/meson.build @@ -8,6 +8,10 @@ gen = [ riscv_ss = ss.source_set() riscv_ss.add(gen) + +riscv_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING', + if_true: files('common-semi-target.c')) + riscv_ss.add(files( 'cpu.c', 'cpu_helper.c', diff --git a/target/riscv/monitor.c b/target/riscv/monitor.c index 100005e..8a77476 100644 --- a/target/riscv/monitor.c +++ b/target/riscv/monitor.c @@ -23,6 +23,7 @@ #include "cpu_bits.h" #include "monitor/monitor.h" #include "monitor/hmp-target.h" +#include "system/memory.h" #ifdef TARGET_RISCV64 #define PTE_HEADER_FIELDS "vaddr paddr "\ @@ -77,11 +78,13 @@ static void print_pte(Monitor *mon, int va_bits, target_ulong vaddr, attr & PTE_D ? 'd' : '-'); } -static void walk_pte(Monitor *mon, hwaddr base, target_ulong start, +static void walk_pte(Monitor *mon, AddressSpace *as, + hwaddr base, target_ulong start, int level, int ptidxbits, int ptesize, int va_bits, target_ulong *vbase, hwaddr *pbase, hwaddr *last_paddr, target_ulong *last_size, int *last_attr) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; hwaddr pte_addr; hwaddr paddr; target_ulong last_start = -1; @@ -100,7 +103,7 @@ static void walk_pte(Monitor *mon, hwaddr base, target_ulong start, for (idx = 0; idx < (1UL << ptidxbits); idx++) { pte_addr = base + idx * ptesize; - cpu_physical_memory_read(pte_addr, &pte, ptesize); + address_space_read(as, pte_addr, attrs, &pte, ptesize); paddr = (hwaddr)(pte >> PTE_PPN_SHIFT) << PGSHIFT; attr = pte & 0xff; @@ -132,7 +135,7 @@ static void walk_pte(Monitor *mon, hwaddr base, target_ulong start, *last_size = pgsize; } else { /* pointer to the next level of the page table */ - walk_pte(mon, paddr, start, level - 1, ptidxbits, ptesize, + walk_pte(mon, as, paddr, start, level - 1, ptidxbits, ptesize, va_bits, vbase, pbase, last_paddr, last_size, last_attr); } @@ -145,6 +148,7 @@ static void walk_pte(Monitor *mon, hwaddr base, target_ulong start, static void mem_info_svxx(Monitor *mon, CPUArchState *env) { + AddressSpace *as = env_cpu(env)->as; int levels, ptidxbits, ptesize, vm, va_bits; hwaddr base; target_ulong vbase; @@ -199,7 +203,7 @@ static void mem_info_svxx(Monitor *mon, CPUArchState *env) last_attr = 0; /* walk page tables, starting from address 0 */ - walk_pte(mon, base, 0, levels - 1, ptidxbits, ptesize, va_bits, + walk_pte(mon, as, base, 0, levels - 1, ptidxbits, ptesize, va_bits, &vbase, &pbase, &last_paddr, &last_size, &last_attr); /* don't forget the last one */ diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index 557807b..8382aa9 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -280,7 +280,7 @@ target_ulong helper_sret(CPURISCVState *env) riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } - target_ulong retpc = env->sepc; + target_ulong retpc = env->sepc & get_xepc_mask(env); if (!riscv_cpu_allow_16bit_insn(&env_archcpu(env)->cfg, env->priv_ver, env->misa_ext) && (retpc & 0x3)) { @@ -355,21 +355,22 @@ target_ulong helper_sret(CPURISCVState *env) } static void check_ret_from_m_mode(CPURISCVState *env, target_ulong retpc, - target_ulong prev_priv) + target_ulong prev_priv, + uintptr_t ra) { if (!(env->priv >= PRV_M)) { - riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, ra); } if (!riscv_cpu_allow_16bit_insn(&env_archcpu(env)->cfg, env->priv_ver, env->misa_ext) && (retpc & 0x3)) { - riscv_raise_exception(env, RISCV_EXCP_INST_ADDR_MIS, GETPC()); + riscv_raise_exception(env, RISCV_EXCP_INST_ADDR_MIS, ra); } if (riscv_cpu_cfg(env)->pmp && !pmp_get_num_rules(env) && (prev_priv != PRV_M)) { - riscv_raise_exception(env, RISCV_EXCP_INST_ACCESS_FAULT, GETPC()); + riscv_raise_exception(env, RISCV_EXCP_INST_ACCESS_FAULT, ra); } } static target_ulong ssdbltrp_mxret(CPURISCVState *env, target_ulong mstatus, @@ -391,11 +392,12 @@ static target_ulong ssdbltrp_mxret(CPURISCVState *env, target_ulong mstatus, target_ulong helper_mret(CPURISCVState *env) { - target_ulong retpc = env->mepc; + target_ulong retpc = env->mepc & get_xepc_mask(env); uint64_t mstatus = env->mstatus; target_ulong prev_priv = get_field(mstatus, MSTATUS_MPP); + uintptr_t ra = GETPC(); - check_ret_from_m_mode(env, retpc, prev_priv); + check_ret_from_m_mode(env, retpc, prev_priv, ra); target_ulong prev_virt = get_field(env->mstatus, MSTATUS_MPV) && (prev_priv != PRV_M); @@ -443,8 +445,9 @@ target_ulong helper_mnret(CPURISCVState *env) target_ulong retpc = env->mnepc; target_ulong prev_priv = get_field(env->mnstatus, MNSTATUS_MNPP); target_ulong prev_virt; + uintptr_t ra = GETPC(); - check_ret_from_m_mode(env, retpc, prev_priv); + check_ret_from_m_mode(env, retpc, prev_priv, ra); prev_virt = get_field(env->mnstatus, MNSTATUS_MNPV) && (prev_priv != PRV_M); @@ -714,4 +717,53 @@ target_ulong helper_hyp_hlvx_wu(CPURISCVState *env, target_ulong addr) return cpu_ldl_code_mmu(env, addr, oi, ra); } +void helper_ssamoswap_disabled(CPURISCVState *env) +{ + int exception = RISCV_EXCP_ILLEGAL_INST; + + /* + * Here we follow the RISC-V CFI spec [1] to implement the exception type + * of ssamoswap* instruction. + * + * [1] RISC-V CFI spec v1.0, ch2.7 Atomic Swap from a Shadow Stack Location + * + * Note: We have already checked some conditions in trans_* functions: + * 1. The effective priv mode is not M-mode. + * 2. The xSSE specific to the effictive priv mode is disabled. + */ + if (!get_field(env->menvcfg, MENVCFG_SSE)) { + /* + * Disabled M-mode SSE always trigger illegal instruction when + * current priv mode is not M-mode. + */ + exception = RISCV_EXCP_ILLEGAL_INST; + goto done; + } + + if (!riscv_has_ext(env, RVS)) { + /* S-mode is not implemented */ + exception = RISCV_EXCP_ILLEGAL_INST; + goto done; + } else if (env->virt_enabled) { + /* + * VU/VS-mode with disabled xSSE will trigger the virtual instruction + * exception. + */ + exception = RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + goto done; + } else { + /* + * U-mode with disabled S-mode SSE will trigger the illegal instruction + * exception. + * + * Note: S-mode is already handled in the disabled M-mode SSE case. + */ + exception = RISCV_EXCP_ILLEGAL_INST; + goto done; + } + +done: + riscv_raise_exception(env, exception, GETPC()); +} + #endif /* !CONFIG_USER_ONLY */ diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index 5af295e..72f1372 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -122,7 +122,9 @@ uint32_t pmp_get_num_rules(CPURISCVState *env) */ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) { - if (pmp_index < MAX_RISCV_PMPS) { + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; + + if (pmp_index < pmp_regions) { return env->pmp_state.pmp[pmp_index].cfg_reg; } @@ -136,7 +138,9 @@ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) */ static bool pmp_write_cfg(CPURISCVState *env, uint32_t pmp_index, uint8_t val) { - if (pmp_index < MAX_RISCV_PMPS) { + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; + + if (pmp_index < pmp_regions) { if (env->pmp_state.pmp[pmp_index].cfg_reg == val) { /* no change */ return false; @@ -207,11 +211,12 @@ void pmp_update_rule_addr(CPURISCVState *env, uint32_t pmp_index) break; case PMP_AMATCH_TOR: - sa = prev_addr << 2; /* shift up from [xx:0] to [xx+2:2] */ - ea = (this_addr << 2) - 1u; - if (sa > ea) { + if (prev_addr >= this_addr) { sa = ea = 0u; + break; } + sa = prev_addr << 2; /* shift up from [xx:0] to [xx+2:2] */ + ea = (this_addr << 2) - 1u; break; case PMP_AMATCH_NA4: @@ -236,9 +241,10 @@ void pmp_update_rule_addr(CPURISCVState *env, uint32_t pmp_index) void pmp_update_rule_nums(CPURISCVState *env) { int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; env->pmp_state.num_rules = 0; - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { const uint8_t a_field = pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg); if (PMP_AMATCH_OFF != a_field) { @@ -332,6 +338,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, int pmp_size = 0; hwaddr s = 0; hwaddr e = 0; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* Short cut if no rules */ if (0 == pmp_get_num_rules(env)) { @@ -356,7 +363,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, * 1.10 draft priv spec states there is an implicit order * from low to high */ - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { s = pmp_is_in_range(env, i, addr); e = pmp_is_in_range(env, i, addr + pmp_size - 1); @@ -527,8 +534,9 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, { trace_pmpaddr_csr_write(env->mhartid, addr_index, val); bool is_next_cfg_tor = false; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - if (addr_index < MAX_RISCV_PMPS) { + if (addr_index < pmp_regions) { if (env->pmp_state.pmp[addr_index].addr_reg == val) { /* no change */ return; @@ -538,7 +546,7 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, * In TOR mode, need to check the lock bit of the next pmp * (if there is a next). */ - if (addr_index + 1 < MAX_RISCV_PMPS) { + if (addr_index + 1 < pmp_regions) { uint8_t pmp_cfg = env->pmp_state.pmp[addr_index + 1].cfg_reg; is_next_cfg_tor = PMP_AMATCH_TOR == pmp_get_a_field(pmp_cfg); @@ -573,8 +581,9 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, target_ulong pmpaddr_csr_read(CPURISCVState *env, uint32_t addr_index) { target_ulong val = 0; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - if (addr_index < MAX_RISCV_PMPS) { + if (addr_index < pmp_regions) { val = env->pmp_state.pmp[addr_index].addr_reg; trace_pmpaddr_csr_read(env->mhartid, addr_index, val); } else { @@ -592,6 +601,7 @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val) { int i; uint64_t mask = MSECCFG_MMWP | MSECCFG_MML; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* Update PMM field only if the value is valid according to Zjpm v1.0 */ if (riscv_cpu_cfg(env)->ext_smmpm && riscv_cpu_mxl(env) == MXL_RV64 && @@ -603,7 +613,7 @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val) /* RLB cannot be enabled if it's already 0 and if any regions are locked */ if (!MSECCFG_RLB_ISSET(env)) { - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { if (pmp_is_locked(env, i)) { val &= ~MSECCFG_RLB; break; @@ -659,6 +669,7 @@ target_ulong pmp_get_tlb_size(CPURISCVState *env, hwaddr addr) hwaddr tlb_sa = addr & ~(TARGET_PAGE_SIZE - 1); hwaddr tlb_ea = tlb_sa + TARGET_PAGE_SIZE - 1; int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* * If PMP is not supported or there are no PMP rules, the TLB page will not @@ -669,7 +680,7 @@ target_ulong pmp_get_tlb_size(CPURISCVState *env, hwaddr addr) return TARGET_PAGE_SIZE; } - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { if (pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg) == PMP_AMATCH_OFF) { continue; } diff --git a/target/riscv/riscv-qmp-cmds.c b/target/riscv/riscv-qmp-cmds.c index d0a3243..c499f9b 100644 --- a/target/riscv/riscv-qmp-cmds.c +++ b/target/riscv/riscv-qmp-cmds.c @@ -25,12 +25,16 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "qobject/qbool.h" #include "qobject/qdict.h" #include "qapi/qobject-input-visitor.h" #include "qapi/visitor.h" #include "qom/qom-qobject.h" +#include "qemu/ctype.h" +#include "qemu/qemu-print.h" +#include "monitor/hmp.h" +#include "monitor/hmp-target.h" #include "system/kvm.h" #include "system/tcg.h" #include "cpu-qom.h" @@ -121,7 +125,7 @@ static void riscv_obj_add_profiles_qdict(Object *obj, QDict *qdict_out) for (int i = 0; riscv_profiles[i] != NULL; i++) { profile = riscv_profiles[i]; - value = QOBJECT(qbool_from_bool(profile->enabled)); + value = QOBJECT(qbool_from_bool(profile->present)); qdict_put_obj(qdict_out, profile->name, value); } @@ -240,3 +244,147 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, return expansion_info; } + +/* + * We have way too many potential CSRs and regs being added + * regularly to register them in a static array. + * + * Declare an empty array instead, making get_monitor_def() use + * the target_get_monitor_def() API directly. + */ +const MonitorDef monitor_defs[] = { { } }; +const MonitorDef *target_monitor_defs(void) +{ + return monitor_defs; +} + +static bool reg_is_ulong_integer(CPURISCVState *env, const char *name, + target_ulong *val, bool is_gprh) +{ + const char * const *reg_names; + target_ulong *vals; + + if (is_gprh) { + reg_names = riscv_int_regnamesh; + vals = env->gprh; + } else { + reg_names = riscv_int_regnames; + vals = env->gpr; + } + + for (int i = 0; i < 32; i++) { + g_autofree char *reg_name = g_strdup(reg_names[i]); + char *reg1 = strtok(reg_name, "/"); + char *reg2 = strtok(NULL, "/"); + + if (strcasecmp(reg1, name) == 0 || + (reg2 && strcasecmp(reg2, name) == 0)) { + *val = vals[i]; + return true; + } + } + + return false; +} + +static bool reg_is_u64_fpu(CPURISCVState *env, const char *name, uint64_t *val) +{ + if (qemu_tolower(name[0]) != 'f') { + return false; + } + + for (int i = 0; i < 32; i++) { + g_autofree char *reg_name = g_strdup(riscv_fpr_regnames[i]); + char *reg1 = strtok(reg_name, "/"); + char *reg2 = strtok(NULL, "/"); + + if (strcasecmp(reg1, name) == 0 || + (reg2 && strcasecmp(reg2, name) == 0)) { + *val = env->fpr[i]; + return true; + } + } + + return false; +} + +static bool reg_is_vreg(const char *name) +{ + if (qemu_tolower(name[0]) != 'v' || strlen(name) > 3) { + return false; + } + + for (int i = 0; i < 32; i++) { + if (strcasecmp(name, riscv_rvv_regnames[i]) == 0) { + return true; + } + } + + return false; +} + +int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval) +{ + CPURISCVState *env = &RISCV_CPU(cs)->env; + target_ulong val = 0; + uint64_t val64 = 0; + int i; + + if (reg_is_ulong_integer(env, name, &val, false) || + reg_is_ulong_integer(env, name, &val, true)) { + *pval = val; + return 0; + } + + if (reg_is_u64_fpu(env, name, &val64)) { + *pval = val64; + return 0; + } + + if (reg_is_vreg(name)) { + if (!riscv_cpu_cfg(env)->ext_zve32x) { + return -EINVAL; + } + + qemu_printf("Unable to print the value of vector " + "vreg '%s' from this API\n", name); + + /* + * We're returning 0 because returning -EINVAL triggers + * an 'unknown register' message in exp_unary() later, + * which feels ankward after our own error message. + */ + *pval = 0; + return 0; + } + + for (i = 0; i < ARRAY_SIZE(csr_ops); i++) { + RISCVException res; + int csrno = i; + + /* + * Early skip when possible since we're going + * through a lot of NULL entries. + */ + if (csr_ops[csrno].predicate == NULL) { + continue; + } + + if (strcasecmp(csr_ops[csrno].name, name) != 0) { + continue; + } + + res = riscv_csrrw_debug(env, csrno, &val, 0, 0); + + /* + * Rely on the smode, hmode, etc, predicates within csr.c + * to do the filtering of the registers that are present. + */ + if (res == RISCV_EXCP_NONE) { + *pval = val; + return 0; + } + } + + return -EINVAL; +} diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index 55e0097..1150bd1 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -191,7 +191,8 @@ static TCGTBCPUState riscv_get_tb_cpu_state(CPUState *cs) return (TCGTBCPUState){ .pc = env->xl == MXL_RV32 ? env->pc & UINT32_MAX : env->pc, - .flags = flags + .flags = flags, + .cs_base = env->misa_ext, }; } @@ -237,6 +238,31 @@ static void riscv_restore_state_to_opc(CPUState *cs, env->excp_uw2 = data[2]; } +#ifndef CONFIG_USER_ONLY +static vaddr riscv_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + CPURISCVState *env = cpu_env(cs); + uint32_t pm_len; + bool pm_signext; + + if (cpu_address_xl(env) == MXL_RV32) { + return (uint32_t)result; + } + + pm_len = riscv_pm_get_pmlen(riscv_pm_get_pmm(env)); + if (pm_len == 0) { + return result; + } + + pm_signext = riscv_cpu_virt_mem_enabled(env); + if (pm_signext) { + return sextract64(result, 0, 64 - pm_len); + } + return extract64(result, 0, 64 - pm_len); +} +#endif + const TCGCPUOps riscv_tcg_ops = { .mttcg_supported = true, .guest_default_memory_order = 0, @@ -250,6 +276,7 @@ const TCGCPUOps riscv_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = riscv_cpu_tlb_fill, + .pointer_wrap = riscv_pointer_wrap, .cpu_exec_interrupt = riscv_cpu_exec_interrupt, .cpu_exec_halt = riscv_cpu_has_work, .cpu_exec_reset = cpu_reset, @@ -390,12 +417,21 @@ static void riscv_cpu_validate_misa_priv(CPURISCVState *env, Error **errp) static void riscv_cpu_validate_v(CPURISCVState *env, RISCVCPUConfig *cfg, Error **errp) { + uint32_t min_vlen; uint32_t vlen = cfg->vlenb << 3; - if (vlen > RV_VLEN_MAX || vlen < 128) { + if (riscv_has_ext(env, RVV)) { + min_vlen = 128; + } else if (cfg->ext_zve64x) { + min_vlen = 64; + } else if (cfg->ext_zve32x) { + min_vlen = 32; + } + + if (vlen > RV_VLEN_MAX || vlen < min_vlen) { error_setg(errp, "Vector extension implementation only supports VLEN " - "in the range [128, %d]", RV_VLEN_MAX); + "in the range [%d, %d]", min_vlen, RV_VLEN_MAX); return; } @@ -405,6 +441,12 @@ static void riscv_cpu_validate_v(CPURISCVState *env, RISCVCPUConfig *cfg, "in the range [8, 64]"); return; } + + if (vlen < cfg->elen) { + error_setg(errp, "Vector extension implementation requires VLEN " + "to be greater than or equal to ELEN"); + return; + } } static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu) @@ -425,6 +467,15 @@ static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu) continue; } + /* + * cpu.debug = true is marked as 'sdtrig', priv spec 1.12. + * Skip this warning since existing CPUs with older priv + * spec and debug = true will be impacted. + */ + if (!strcmp(edata->name, "sdtrig")) { + continue; + } + isa_ext_update_enabled(cpu, edata->ext_enable_offset, false); /* @@ -625,7 +676,7 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - if (riscv_has_ext(env, RVV)) { + if (cpu->cfg.ext_zve32x) { riscv_cpu_validate_v(env, &cpu->cfg, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); @@ -691,7 +742,7 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - if (mcc->misa_mxl_max != MXL_RV32 && cpu->cfg.ext_zcf) { + if (mcc->def->misa_mxl_max != MXL_RV32 && cpu->cfg.ext_zcf) { error_setg(errp, "Zcf extension is only relevant to RV32"); return; } @@ -788,7 +839,7 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - if (mcc->misa_mxl_max == MXL_RV32 && cpu->cfg.ext_svukte) { + if (mcc->def->misa_mxl_max == MXL_RV32 && cpu->cfg.ext_svukte) { error_setg(errp, "svukte is not supported for RV32"); return; } @@ -804,6 +855,12 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) cpu->cfg.ext_ssctr = false; } + if (cpu->cfg.ext_svrsw60t59b && + (!cpu->cfg.mmu || mcc->def->misa_mxl_max == MXL_RV32)) { + error_setg(errp, "svrsw60t59b is not supported on RV32 and MMU-less platforms"); + return; + } + /* * Disable isa extensions based on priv spec after we * validated and set everything we need. @@ -816,8 +873,9 @@ static bool riscv_cpu_validate_profile_satp(RISCVCPU *cpu, RISCVCPUProfile *profile, bool send_warn) { - int satp_max = satp_mode_max_from_map(cpu->cfg.satp_mode.supported); + int satp_max = cpu->cfg.max_satp_mode; + assert(satp_max >= 0); if (profile->satp_mode > satp_max) { if (send_warn) { bool is_32bit = riscv_cpu_is_32bit(cpu); @@ -840,16 +898,11 @@ static void riscv_cpu_check_parent_profile(RISCVCPU *cpu, RISCVCPUProfile *profile, RISCVCPUProfile *parent) { - const char *parent_name; - bool parent_enabled; - - if (!profile->enabled || !parent) { + if (!profile->present || !parent) { return; } - parent_name = parent->name; - parent_enabled = object_property_get_bool(OBJECT(cpu), parent_name, NULL); - profile->enabled = parent_enabled; + profile->present = parent->present; } static void riscv_cpu_validate_profile(RISCVCPU *cpu, @@ -910,7 +963,7 @@ static void riscv_cpu_validate_profile(RISCVCPU *cpu, } } - profile->enabled = profile_impl; + profile->present = profile_impl; riscv_cpu_check_parent_profile(cpu, profile, profile->u_parent); riscv_cpu_check_parent_profile(cpu, profile, profile->s_parent); @@ -1025,7 +1078,7 @@ static void cpu_enable_zc_implied_rules(RISCVCPU *cpu) cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmp), true); cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmt), true); - if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { + if (riscv_has_ext(env, RVF) && mcc->def->misa_mxl_max == MXL_RV32) { cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); } } @@ -1034,7 +1087,7 @@ static void cpu_enable_zc_implied_rules(RISCVCPU *cpu) if (riscv_has_ext(env, RVC) && env->priv_ver >= PRIV_VERSION_1_12_0) { cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); - if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { + if (riscv_has_ext(env, RVF) && mcc->def->misa_mxl_max == MXL_RV32) { cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); } @@ -1139,6 +1192,70 @@ static bool riscv_cpu_is_generic(Object *cpu_obj) return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL; } +static void riscv_cpu_set_profile(RISCVCPU *cpu, + RISCVCPUProfile *profile, + bool enabled) +{ + int i, ext_offset; + + if (profile->u_parent != NULL) { + riscv_cpu_set_profile(cpu, profile->u_parent, enabled); + } + + if (profile->s_parent != NULL) { + riscv_cpu_set_profile(cpu, profile->s_parent, enabled); + } + + profile->enabled = enabled; + + if (profile->enabled) { + cpu->env.priv_ver = profile->priv_spec; + +#ifndef CONFIG_USER_ONLY + if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { + object_property_set_bool(OBJECT(cpu), "mmu", true, NULL); + const char *satp_prop = satp_mode_str(profile->satp_mode, + riscv_cpu_is_32bit(cpu)); + object_property_set_bool(OBJECT(cpu), satp_prop, true, NULL); + } +#endif + } + + for (i = 0; misa_bits[i] != 0; i++) { + uint32_t bit = misa_bits[i]; + + if (!(profile->misa_ext & bit)) { + continue; + } + + if (bit == RVI && !profile->enabled) { + /* + * Disabling profiles will not disable the base + * ISA RV64I. + */ + continue; + } + + cpu_misa_ext_add_user_opt(bit, profile->enabled); + riscv_cpu_write_misa_bit(cpu, bit, profile->enabled); + } + + for (i = 0; profile->ext_offsets[i] != RISCV_PROFILE_EXT_LIST_END; i++) { + ext_offset = profile->ext_offsets[i]; + + if (profile->enabled) { + if (cpu_cfg_offset_is_named_feat(ext_offset)) { + riscv_cpu_enable_named_feat(cpu, ext_offset); + } + + cpu_bump_multi_ext_priv_ver(&cpu->env, ext_offset); + } + + cpu_cfg_ext_add_user_opt(ext_offset, profile->enabled); + isa_ext_update_enabled(cpu, ext_offset, profile->enabled); + } +} + /* * We'll get here via the following path: * @@ -1160,7 +1277,7 @@ static bool riscv_tcg_cpu_realize(CPUState *cs, Error **errp) #ifndef CONFIG_USER_ONLY RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cpu); - if (mcc->misa_mxl_max >= MXL_RV128 && qemu_tcg_mttcg_enabled()) { + if (mcc->def->misa_mxl_max >= MXL_RV128 && qemu_tcg_mttcg_enabled()) { /* Missing 128-bit aligned atomics */ error_setg(errp, "128-bit RISC-V currently does not work with Multi " @@ -1305,7 +1422,6 @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, RISCVCPUProfile *profile = opaque; RISCVCPU *cpu = RISCV_CPU(obj); bool value; - int i, ext_offset; if (riscv_cpu_is_vendor(obj)) { error_setg(errp, "Profile %s is not available for vendor CPUs", @@ -1324,64 +1440,8 @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, } profile->user_set = true; - profile->enabled = value; - if (profile->u_parent != NULL) { - object_property_set_bool(obj, profile->u_parent->name, - profile->enabled, NULL); - } - - if (profile->s_parent != NULL) { - object_property_set_bool(obj, profile->s_parent->name, - profile->enabled, NULL); - } - - if (profile->enabled) { - cpu->env.priv_ver = profile->priv_spec; - } - -#ifndef CONFIG_USER_ONLY - if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { - object_property_set_bool(obj, "mmu", true, NULL); - const char *satp_prop = satp_mode_str(profile->satp_mode, - riscv_cpu_is_32bit(cpu)); - object_property_set_bool(obj, satp_prop, profile->enabled, NULL); - } -#endif - - for (i = 0; misa_bits[i] != 0; i++) { - uint32_t bit = misa_bits[i]; - - if (!(profile->misa_ext & bit)) { - continue; - } - - if (bit == RVI && !profile->enabled) { - /* - * Disabling profiles will not disable the base - * ISA RV64I. - */ - continue; - } - - cpu_misa_ext_add_user_opt(bit, profile->enabled); - riscv_cpu_write_misa_bit(cpu, bit, profile->enabled); - } - - for (i = 0; profile->ext_offsets[i] != RISCV_PROFILE_EXT_LIST_END; i++) { - ext_offset = profile->ext_offsets[i]; - - if (profile->enabled) { - if (cpu_cfg_offset_is_named_feat(ext_offset)) { - riscv_cpu_enable_named_feat(cpu, ext_offset); - } - - cpu_bump_multi_ext_priv_ver(&cpu->env, ext_offset); - } - - cpu_cfg_ext_add_user_opt(ext_offset, profile->enabled); - isa_ext_update_enabled(cpu, ext_offset, profile->enabled); - } + riscv_cpu_set_profile(cpu, profile, value); } static void cpu_get_profile(Object *obj, Visitor *v, const char *name, @@ -1396,7 +1456,7 @@ static void cpu_get_profile(Object *obj, Visitor *v, const char *name, static void riscv_cpu_add_profiles(Object *cpu_obj) { for (int i = 0; riscv_profiles[i] != NULL; i++) { - const RISCVCPUProfile *profile = riscv_profiles[i]; + RISCVCPUProfile *profile = riscv_profiles[i]; object_property_add(cpu_obj, profile->name, "bool", cpu_get_profile, cpu_set_profile, @@ -1408,30 +1468,11 @@ static void riscv_cpu_add_profiles(Object *cpu_obj) * case. */ if (profile->enabled) { - object_property_set_bool(cpu_obj, profile->name, true, NULL); + riscv_cpu_set_profile(RISCV_CPU(cpu_obj), profile, true); } } } -static bool cpu_ext_is_deprecated(const char *ext_name) -{ - return isupper(ext_name[0]); -} - -/* - * String will be allocated in the heap. Caller is responsible - * for freeing it. - */ -static char *cpu_ext_to_lower(const char *ext_name) -{ - char *ret = g_malloc0(strlen(ext_name) + 1); - - strcpy(ret, ext_name); - ret[0] = tolower(ret[0]); - - return ret; -} - static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -1444,13 +1485,6 @@ static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name, return; } - if (cpu_ext_is_deprecated(multi_ext_cfg->name)) { - g_autofree char *lower = cpu_ext_to_lower(multi_ext_cfg->name); - - warn_report("CPU property '%s' is deprecated. Please use '%s' instead", - multi_ext_cfg->name, lower); - } - cpu_cfg_ext_add_user_opt(multi_ext_cfg->offset, value); prev_val = isa_ext_is_enabled(cpu, multi_ext_cfg->offset); @@ -1486,14 +1520,13 @@ static void cpu_add_multi_ext_prop(Object *cpu_obj, const RISCVCPUMultiExtConfig *multi_cfg) { bool generic_cpu = riscv_cpu_is_generic(cpu_obj); - bool deprecated_ext = cpu_ext_is_deprecated(multi_cfg->name); object_property_add(cpu_obj, multi_cfg->name, "bool", cpu_get_multi_ext_cfg, cpu_set_multi_ext_cfg, NULL, (void *)multi_cfg); - if (!generic_cpu || deprecated_ext) { + if (!generic_cpu) { return; } @@ -1536,8 +1569,6 @@ static void riscv_cpu_add_user_properties(Object *obj) riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_vendor_exts); riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_experimental_exts); - riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_deprecated_exts); - riscv_cpu_add_profiles(obj); } @@ -1579,6 +1610,8 @@ static void riscv_init_max_cpu_extensions(Object *obj) if (env->misa_mxl != MXL_RV32) { isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zcf), false); + } else { + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_svrsw60t59b), false); } /* diff --git a/target/riscv/th_csr.c b/target/riscv/th_csr.c index 6c970d4..49eb7bb 100644 --- a/target/riscv/th_csr.c +++ b/target/riscv/th_csr.c @@ -27,12 +27,6 @@ #define TH_SXSTATUS_MAEE BIT(21) #define TH_SXSTATUS_THEADISAEE BIT(22) -typedef struct { - int csrno; - int (*insertion_test)(RISCVCPU *cpu); - riscv_csr_operations csr_ops; -} riscv_csr; - static RISCVException smode(CPURISCVState *env, int csrno) { if (riscv_has_ext(env, RVS)) { @@ -42,13 +36,9 @@ static RISCVException smode(CPURISCVState *env, int csrno) return RISCV_EXCP_ILLEGAL_INST; } -static int test_thead_mvendorid(RISCVCPU *cpu) +static bool test_thead_mvendorid(RISCVCPU *cpu) { - if (cpu->cfg.mvendorid != THEAD_VENDOR_ID) { - return -1; - } - - return 0; + return cpu->cfg.mvendorid == THEAD_VENDOR_ID; } static RISCVException read_th_sxstatus(CPURISCVState *env, int csrno, @@ -59,21 +49,11 @@ static RISCVException read_th_sxstatus(CPURISCVState *env, int csrno, return RISCV_EXCP_NONE; } -static riscv_csr th_csr_list[] = { +const RISCVCSR th_csr_list[] = { { .csrno = CSR_TH_SXSTATUS, .insertion_test = test_thead_mvendorid, .csr_ops = { "th.sxstatus", smode, read_th_sxstatus } - } + }, + { } }; - -void th_register_custom_csrs(RISCVCPU *cpu) -{ - for (size_t i = 0; i < ARRAY_SIZE(th_csr_list); i++) { - int csrno = th_csr_list[i].csrno; - riscv_csr_operations *csr_ops = &th_csr_list[i].csr_ops; - if (!th_csr_list[i].insertion_test(cpu)) { - riscv_set_csr_ops(csrno, csr_ops); - } - } -} diff --git a/target/riscv/time_helper.c b/target/riscv/time_helper.c index bc0d9a0..400e917 100644 --- a/target/riscv/time_helper.c +++ b/target/riscv/time_helper.c @@ -46,8 +46,23 @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, { uint64_t diff, ns_diff, next; RISCVAclintMTimerState *mtimer = env->rdtime_fn_arg; - uint32_t timebase_freq = mtimer->timebase_freq; - uint64_t rtc_r = env->rdtime_fn(env->rdtime_fn_arg) + delta; + uint32_t timebase_freq; + uint64_t rtc_r; + + if (!riscv_cpu_cfg(env)->ext_sstc || !env->rdtime_fn || + !env->rdtime_fn_arg || !get_field(env->menvcfg, MENVCFG_STCE)) { + /* S/VS Timer IRQ depends on sstc extension, rdtime_fn(), and STCE. */ + return; + } + + if (timer_irq == MIP_VSTIP && + (!riscv_has_ext(env, RVH) || !get_field(env->henvcfg, HENVCFG_STCE))) { + /* VS Timer IRQ also depends on RVH and henvcfg.STCE. */ + return; + } + + timebase_freq = mtimer->timebase_freq; + rtc_r = env->rdtime_fn(env->rdtime_fn_arg) + delta; if (timecmp <= rtc_r) { /* @@ -125,6 +140,52 @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, timer_mod(timer, next); } +/* + * When disabling xenvcfg.STCE, the S/VS Timer may be disabled at the same time. + * It is safe to call this function regardless of whether the timer has been + * deleted or not. timer_del() will do nothing if the timer has already + * been deleted. + */ +static void riscv_timer_disable_timecmp(CPURISCVState *env, QEMUTimer *timer, + uint32_t timer_irq) +{ + /* Disable S-mode Timer IRQ and HW-based STIP */ + if ((timer_irq == MIP_STIP) && !get_field(env->menvcfg, MENVCFG_STCE)) { + riscv_cpu_update_mip(env, timer_irq, BOOL_TO_MASK(0)); + timer_del(timer); + return; + } + + /* Disable VS-mode Timer IRQ and HW-based VSTIP */ + if ((timer_irq == MIP_VSTIP) && + (!get_field(env->menvcfg, MENVCFG_STCE) || + !get_field(env->henvcfg, HENVCFG_STCE))) { + env->vstime_irq = 0; + riscv_cpu_update_mip(env, 0, BOOL_TO_MASK(0)); + timer_del(timer); + return; + } +} + +/* Enable or disable S/VS-mode Timer when xenvcfg.STCE is changed */ +void riscv_timer_stce_changed(CPURISCVState *env, bool is_m_mode, bool enable) +{ + if (enable) { + riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp, + env->htimedelta, MIP_VSTIP); + } else { + riscv_timer_disable_timecmp(env, env->vstimer, MIP_VSTIP); + } + + if (is_m_mode) { + if (enable) { + riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP); + } else { + riscv_timer_disable_timecmp(env, env->stimer, MIP_STIP); + } + } +} + void riscv_timer_init(RISCVCPU *cpu) { CPURISCVState *env; diff --git a/target/riscv/time_helper.h b/target/riscv/time_helper.h index cacd79b..af1f634 100644 --- a/target/riscv/time_helper.h +++ b/target/riscv/time_helper.h @@ -25,6 +25,7 @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, uint64_t timecmp, uint64_t delta, uint32_t timer_irq); +void riscv_timer_stce_changed(CPURISCVState *env, bool is_m_mode, bool enable); void riscv_timer_init(RISCVCPU *cpu); #endif diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 0d4f7d6..9a53aec 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -24,6 +24,7 @@ #include "exec/helper-gen.h" #include "exec/target_page.h" #include "exec/translator.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/translation-block.h" #include "exec/log.h" #include "semihosting/semihost.h" @@ -285,7 +286,8 @@ static void exit_tb(DisasContext *ctx) tcg_gen_exit_tb(NULL, 0); } -static void gen_goto_tb(DisasContext *ctx, int n, target_long diff) +static void gen_goto_tb(DisasContext *ctx, unsigned tb_slot_idx, + target_long diff) { target_ulong dest = ctx->base.pc_next + diff; @@ -304,12 +306,12 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_long diff) */ if (tb_cflags(ctx->base.tb) & CF_PCREL) { gen_update_pc(ctx, diff); - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); } else { - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); gen_update_pc(ctx, diff); } - tcg_gen_exit_tb(ctx->base.tb, n); + tcg_gen_exit_tb(ctx->base.tb, tb_slot_idx); } else { gen_update_pc(ctx, diff); lookup_and_goto_ptr(ctx); @@ -1166,7 +1168,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) CPUState *cpu = ctx->cs; CPURISCVState *env = cpu_env(cpu); - return translator_ldl(env, &ctx->base, pc); + return cpu_ldl_code(env, pc); } #define SS_MMU_INDEX(ctx) (ctx->mem_idx | MMU_IDX_SS_WRITE) @@ -1217,13 +1219,35 @@ const RISCVDecoder decoder_table[] = { const size_t decoder_table_size = ARRAY_SIZE(decoder_table); -static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) +static void decode_opc(CPURISCVState *env, DisasContext *ctx) { + uint32_t opcode; + bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0); + ctx->virt_inst_excp = false; - ctx->cur_insn_len = insn_len(opcode); + if (pc_is_4byte_align) { + /* + * Load 4 bytes at once to make instruction fetch atomically. + * + * Note: When pc is 4-byte aligned, 4-byte instruction wouldn't be + * across pages. We could preload 4 bytes instruction no matter + * real one is 2 or 4 bytes. Instruction preload wouldn't trigger + * additional page fault. + */ + opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next); + } else { + /* + * For unaligned pc, instruction preload may trigger additional + * page fault so we only load 2 bytes here. + */ + opcode = (uint32_t) translator_lduw(env, &ctx->base, ctx->base.pc_next); + } + ctx->ol = ctx->xl; + + ctx->cur_insn_len = insn_len((uint16_t)opcode); /* Check for compressed insn */ if (ctx->cur_insn_len == 2) { - ctx->opcode = opcode; + ctx->opcode = (uint16_t)opcode; /* * The Zca extension is added as way to refer to instructions in the C * extension that do not include the floating-point loads and stores @@ -1233,15 +1257,17 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) return; } } else { - uint32_t opcode32 = opcode; - opcode32 = deposit32(opcode32, 16, 16, - translator_lduw(env, &ctx->base, - ctx->base.pc_next + 2)); - ctx->opcode = opcode32; + if (!pc_is_4byte_align) { + /* Load last 2 bytes of instruction here */ + opcode = deposit32(opcode, 16, 16, + translator_lduw(env, &ctx->base, + ctx->base.pc_next + 2)); + } + ctx->opcode = opcode; for (guint i = 0; i < ctx->decoders->len; ++i) { riscv_cpu_decode_fn func = g_ptr_array_index(ctx->decoders, i); - if (func(ctx, opcode32)) { + if (func(ctx, opcode)) { return; } } @@ -1276,7 +1302,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->cfg_vta_all_1s = cpu->cfg.rvv_ta_all_1s; ctx->vstart_eq_zero = FIELD_EX32(tb_flags, TB_FLAGS, VSTART_EQ_ZERO); ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); - ctx->misa_mxl_max = mcc->misa_mxl_max; + ctx->misa_mxl_max = mcc->def->misa_mxl_max; ctx->xl = FIELD_EX32(tb_flags, TB_FLAGS, XL); ctx->address_xl = FIELD_EX32(tb_flags, TB_FLAGS, AXL); ctx->cs = cs; @@ -1319,10 +1345,8 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *ctx = container_of(dcbase, DisasContext, base); CPURISCVState *env = cpu_env(cpu); - uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next); - ctx->ol = ctx->xl; - decode_opc(env, ctx, opcode16); + decode_opc(env, ctx); ctx->base.pc_next += ctx->cur_insn_len; /* diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 5dc1c10..2de3358 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -27,7 +27,6 @@ #include "exec/helper-proto.h" #include "exec/tlb-flags.h" #include "exec/target_page.h" -#include "exec/tswap.h" #include "fpu/softfloat.h" #include "tcg/tcg-gvec-desc.h" #include "internals.h" @@ -35,7 +34,7 @@ #include <math.h> target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, - target_ulong s2) + target_ulong s2, target_ulong x0) { int vlmax, vl; RISCVCPU *cpu = env_archcpu(env); @@ -83,6 +82,16 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, } else { vl = vlmax; } + + if (cpu->cfg.rvv_vsetvl_x0_vill && x0 && (env->vl != vl)) { + /* only set vill bit. */ + env->vill = 1; + env->vtype = 0; + env->vl = 0; + env->vstart = 0; + return 0; + } + env->vl = vl; env->vtype = s2; env->vstart = 0; @@ -226,26 +235,26 @@ vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host, void *vd, uint32_t evl, uint32_t reg_start, void *host, uint32_t esz, bool is_load) { -#if HOST_BIG_ENDIAN - for (; reg_start < evl; reg_start++, host += esz) { - ldst_host(vd, reg_start, host); - } -#else - if (esz == 1) { - uint32_t byte_offset = reg_start * esz; - uint32_t size = (evl - reg_start) * esz; - - if (is_load) { - memcpy(vd + byte_offset, host, size); - } else { - memcpy(host, vd + byte_offset, size); - } - } else { + if (HOST_BIG_ENDIAN) { for (; reg_start < evl; reg_start++, host += esz) { ldst_host(vd, reg_start, host); } + } else { + if (esz == 1) { + uint32_t byte_offset = reg_start * esz; + uint32_t size = (evl - reg_start) * esz; + + if (is_load) { + memcpy(vd + byte_offset, host, size); + } else { + memcpy(host, vd + byte_offset, size); + } + } else { + for (; reg_start < evl; reg_start++, host += esz) { + ldst_host(vd, reg_start, host); + } + } } -#endif } static void vext_set_tail_elems_1s(target_ulong vl, void *vd, @@ -5189,11 +5198,11 @@ GEN_VEXT_VSLIE1UP(16, H2) GEN_VEXT_VSLIE1UP(32, H4) GEN_VEXT_VSLIE1UP(64, H8) -#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ -void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ -{ \ - vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ +#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ @@ -5240,11 +5249,11 @@ GEN_VEXT_VSLIDE1DOWN(16, H2) GEN_VEXT_VSLIDE1DOWN(32, H4) GEN_VEXT_VSLIDE1DOWN(64, H8) -#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ -void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ -{ \ - vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ } /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ diff --git a/target/rx/cpu.c b/target/rx/cpu.c index 36eba75..da02ae7 100644 --- a/target/rx/cpu.c +++ b/target/rx/cpu.c @@ -75,8 +75,7 @@ static void rx_restore_state_to_opc(CPUState *cs, static bool rx_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & - (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIR); + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIR); } static int rx_cpu_mmu_index(CPUState *cs, bool ifunc) @@ -225,6 +224,7 @@ static const TCGCPUOps rx_tcg_ops = { .restore_state_to_opc = rx_restore_state_to_opc, .mmu_index = rx_cpu_mmu_index, .tlb_fill = rx_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = rx_cpu_exec_interrupt, .cpu_exec_halt = rx_cpu_has_work, diff --git a/target/rx/helper.c b/target/rx/helper.c index 0640ab3..41c9606 100644 --- a/target/rx/helper.c +++ b/target/rx/helper.c @@ -44,7 +44,7 @@ void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte) void rx_cpu_do_interrupt(CPUState *cs) { CPURXState *env = cpu_env(cs); - int do_irq = cs->interrupt_request & INT_FLAGS; + int do_irq = cpu_test_interrupt(cs, INT_FLAGS); uint32_t save_psw; env->in_sleep = 0; @@ -63,7 +63,7 @@ void rx_cpu_do_interrupt(CPUState *cs) env->bpsw = save_psw; env->pc = env->fintv; env->psw_ipl = 15; - cs->interrupt_request &= ~CPU_INTERRUPT_FIR; + cpu_reset_interrupt(cs, CPU_INTERRUPT_FIR); qemu_set_irq(env->ack, env->ack_irq); qemu_log_mask(CPU_LOG_INT, "fast interrupt raised\n"); } else if (do_irq & CPU_INTERRUPT_HARD) { @@ -73,7 +73,7 @@ void rx_cpu_do_interrupt(CPUState *cs) cpu_stl_data(env, env->isp, env->pc); env->pc = cpu_ldl_data(env, env->intb + env->ack_irq * 4); env->psw_ipl = env->ack_ipl; - cs->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); qemu_set_irq(env->ack, env->ack_irq); qemu_log_mask(CPU_LOG_INT, "interrupt 0x%02x raised\n", env->ack_irq); diff --git a/target/rx/translate.c b/target/rx/translate.c index 19a9584..ef865f1 100644 --- a/target/rx/translate.c +++ b/target/rx/translate.c @@ -40,8 +40,8 @@ typedef struct DisasContext { } DisasContext; typedef struct DisasCompare { - TCGv value; - TCGv temp; + TCGv_i32 value; + TCGv_i32 temp; TCGCond cond; } DisasCompare; @@ -63,15 +63,20 @@ const char *rx_crname(uint8_t cr) #define DISAS_EXIT DISAS_TARGET_2 /* global register indexes */ -static TCGv cpu_regs[16]; -static TCGv cpu_psw_o, cpu_psw_s, cpu_psw_z, cpu_psw_c; -static TCGv cpu_psw_i, cpu_psw_pm, cpu_psw_u, cpu_psw_ipl; -static TCGv cpu_usp, cpu_fpsw, cpu_bpsw, cpu_bpc, cpu_isp; -static TCGv cpu_fintv, cpu_intb, cpu_pc; +static TCGv_i32 cpu_regs[16]; +static TCGv_i32 cpu_psw_o, cpu_psw_s, cpu_psw_z, cpu_psw_c; +static TCGv_i32 cpu_psw_i, cpu_psw_pm, cpu_psw_u, cpu_psw_ipl; +static TCGv_i32 cpu_usp, cpu_fpsw, cpu_bpsw, cpu_bpc, cpu_isp; +static TCGv_i32 cpu_fintv, cpu_intb, cpu_pc; static TCGv_i64 cpu_acc; #define cpu_sp cpu_regs[0] +static inline MemOp mo_endian(DisasContext *dc) +{ + return MO_LE; +} + /* decoder helper */ static uint32_t decode_load_bytes(DisasContext *ctx, uint32_t insn, int i, int n) @@ -85,7 +90,7 @@ static uint32_t decode_load_bytes(DisasContext *ctx, uint32_t insn, static uint32_t li(DisasContext *ctx, int sz) { - target_ulong addr; + vaddr addr; uint32_t tmp; CPURXState *env = ctx->env; addr = ctx->base.pc_next; @@ -147,12 +152,12 @@ void rx_cpu_dump_state(CPUState *cs, FILE *f, int flags) } } -static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest) +static void gen_goto_tb(DisasContext *dc, unsigned tb_slot_idx, vaddr dest) { if (translator_use_goto_tb(&dc->base, dest)) { - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); tcg_gen_movi_i32(cpu_pc, dest); - tcg_gen_exit_tb(dc->base.tb, n); + tcg_gen_exit_tb(dc->base.tb, tb_slot_idx); } else { tcg_gen_movi_i32(cpu_pc, dest); tcg_gen_lookup_and_goto_ptr(); @@ -161,34 +166,34 @@ static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest) } /* generic load wrapper */ -static inline void rx_gen_ld(unsigned int size, TCGv reg, TCGv mem) +static void rx_gen_ld(DisasContext *ctx, MemOp size, TCGv_i32 reg, TCGv_i32 mem) { - tcg_gen_qemu_ld_i32(reg, mem, 0, size | MO_SIGN | MO_TE); + tcg_gen_qemu_ld_i32(reg, mem, 0, size | MO_SIGN | mo_endian(ctx)); } /* unsigned load wrapper */ -static inline void rx_gen_ldu(unsigned int size, TCGv reg, TCGv mem) +static void rx_gen_ldu(DisasContext *ctx, MemOp size, TCGv_i32 reg, TCGv_i32 mem) { - tcg_gen_qemu_ld_i32(reg, mem, 0, size | MO_TE); + tcg_gen_qemu_ld_i32(reg, mem, 0, size | mo_endian(ctx)); } /* generic store wrapper */ -static inline void rx_gen_st(unsigned int size, TCGv reg, TCGv mem) +static void rx_gen_st(DisasContext *ctx, MemOp size, TCGv_i32 reg, TCGv_i32 mem) { - tcg_gen_qemu_st_i32(reg, mem, 0, size | MO_TE); + tcg_gen_qemu_st_i32(reg, mem, 0, size | mo_endian(ctx)); } /* [ri, rb] */ -static inline void rx_gen_regindex(DisasContext *ctx, TCGv mem, - int size, int ri, int rb) +static void rx_gen_regindex(DisasContext *ctx, TCGv_i32 mem, + int size, int ri, int rb) { tcg_gen_shli_i32(mem, cpu_regs[ri], size); tcg_gen_add_i32(mem, mem, cpu_regs[rb]); } /* dsp[reg] */ -static inline TCGv rx_index_addr(DisasContext *ctx, TCGv mem, - int ld, int size, int reg) +static TCGv_i32 rx_index_addr(DisasContext *ctx, TCGv_i32 mem, + int ld, int size, int reg) { uint32_t dsp; @@ -218,15 +223,15 @@ static inline MemOp mi_to_mop(unsigned mi) } /* load source operand */ -static inline TCGv rx_load_source(DisasContext *ctx, TCGv mem, - int ld, int mi, int rs) +static TCGv_i32 rx_load_source(DisasContext *ctx, TCGv_i32 mem, + int ld, int mi, int rs) { - TCGv addr; + TCGv_i32 addr; MemOp mop; if (ld < 3) { mop = mi_to_mop(mi); addr = rx_index_addr(ctx, mem, ld, mop & MO_SIZE, rs); - tcg_gen_qemu_ld_i32(mem, addr, 0, mop | MO_TE); + tcg_gen_qemu_ld_i32(mem, addr, 0, mop | mo_endian(ctx)); return mem; } else { return cpu_regs[rs]; @@ -315,7 +320,7 @@ static void psw_cond(DisasCompare *dc, uint32_t cond) } } -static void move_from_cr(DisasContext *ctx, TCGv ret, int cr, uint32_t pc) +static void move_from_cr(DisasContext *ctx, TCGv_i32 ret, int cr, uint32_t pc) { switch (cr) { case 0: /* PSW */ @@ -361,7 +366,7 @@ static void move_from_cr(DisasContext *ctx, TCGv ret, int cr, uint32_t pc) } } -static void move_to_cr(DisasContext *ctx, TCGv val, int cr) +static void move_to_cr(DisasContext *ctx, TCGv_i32 val, int cr) { if (cr >= 8 && !is_privileged(ctx, 0)) { /* Some control registers can only be written in privileged mode. */ @@ -414,35 +419,35 @@ static void move_to_cr(DisasContext *ctx, TCGv val, int cr) } } -static void push(TCGv val) +static void push(DisasContext *ctx, TCGv_i32 val) { tcg_gen_subi_i32(cpu_sp, cpu_sp, 4); - rx_gen_st(MO_32, val, cpu_sp); + rx_gen_st(ctx, MO_32, val, cpu_sp); } -static void pop(TCGv ret) +static void pop(DisasContext *ctx, TCGv_i32 ret) { - rx_gen_ld(MO_32, ret, cpu_sp); + rx_gen_ld(ctx, MO_32, ret, cpu_sp); tcg_gen_addi_i32(cpu_sp, cpu_sp, 4); } /* mov.<bwl> rs,dsp5[rd] */ static bool trans_MOV_rm(DisasContext *ctx, arg_MOV_rm *a) { - TCGv mem; - mem = tcg_temp_new(); + TCGv_i32 mem; + mem = tcg_temp_new_i32(); tcg_gen_addi_i32(mem, cpu_regs[a->rd], a->dsp << a->sz); - rx_gen_st(a->sz, cpu_regs[a->rs], mem); + rx_gen_st(ctx, a->sz, cpu_regs[a->rs], mem); return true; } /* mov.<bwl> dsp5[rs],rd */ static bool trans_MOV_mr(DisasContext *ctx, arg_MOV_mr *a) { - TCGv mem; - mem = tcg_temp_new(); + TCGv_i32 mem; + mem = tcg_temp_new_i32(); tcg_gen_addi_i32(mem, cpu_regs[a->rs], a->dsp << a->sz); - rx_gen_ld(a->sz, cpu_regs[a->rd], mem); + rx_gen_ld(ctx, a->sz, cpu_regs[a->rd], mem); return true; } @@ -459,31 +464,31 @@ static bool trans_MOV_ir(DisasContext *ctx, arg_MOV_ir *a) /* mov.<bwl> #imm, dsp[rd] */ static bool trans_MOV_im(DisasContext *ctx, arg_MOV_im *a) { - TCGv imm, mem; + TCGv_i32 imm, mem; imm = tcg_constant_i32(a->imm); - mem = tcg_temp_new(); + mem = tcg_temp_new_i32(); tcg_gen_addi_i32(mem, cpu_regs[a->rd], a->dsp << a->sz); - rx_gen_st(a->sz, imm, mem); + rx_gen_st(ctx, a->sz, imm, mem); return true; } /* mov.<bwl> [ri,rb],rd */ static bool trans_MOV_ar(DisasContext *ctx, arg_MOV_ar *a) { - TCGv mem; - mem = tcg_temp_new(); + TCGv_i32 mem; + mem = tcg_temp_new_i32(); rx_gen_regindex(ctx, mem, a->sz, a->ri, a->rb); - rx_gen_ld(a->sz, cpu_regs[a->rd], mem); + rx_gen_ld(ctx, a->sz, cpu_regs[a->rd], mem); return true; } /* mov.<bwl> rd,[ri,rb] */ static bool trans_MOV_ra(DisasContext *ctx, arg_MOV_ra *a) { - TCGv mem; - mem = tcg_temp_new(); + TCGv_i32 mem; + mem = tcg_temp_new_i32(); rx_gen_regindex(ctx, mem, a->sz, a->ri, a->rb); - rx_gen_st(a->sz, cpu_regs[a->rs], mem); + rx_gen_st(ctx, a->sz, cpu_regs[a->rs], mem); return true; } @@ -493,7 +498,7 @@ static bool trans_MOV_ra(DisasContext *ctx, arg_MOV_ra *a) /* mov.<bwl> rs,rd */ static bool trans_MOV_mm(DisasContext *ctx, arg_MOV_mm *a) { - TCGv tmp, mem, addr; + TCGv_i32 tmp, mem, addr; if (a->lds == 3 && a->ldd == 3) { /* mov.<bwl> rs,rd */ @@ -501,22 +506,22 @@ static bool trans_MOV_mm(DisasContext *ctx, arg_MOV_mm *a) return true; } - mem = tcg_temp_new(); + mem = tcg_temp_new_i32(); if (a->lds == 3) { /* mov.<bwl> rs,dsp[rd] */ addr = rx_index_addr(ctx, mem, a->ldd, a->sz, a->rs); - rx_gen_st(a->sz, cpu_regs[a->rd], addr); + rx_gen_st(ctx, a->sz, cpu_regs[a->rd], addr); } else if (a->ldd == 3) { /* mov.<bwl> dsp[rs],rd */ addr = rx_index_addr(ctx, mem, a->lds, a->sz, a->rs); - rx_gen_ld(a->sz, cpu_regs[a->rd], addr); + rx_gen_ld(ctx, a->sz, cpu_regs[a->rd], addr); } else { /* mov.<bwl> dsp[rs],dsp[rd] */ - tmp = tcg_temp_new(); + tmp = tcg_temp_new_i32(); addr = rx_index_addr(ctx, mem, a->lds, a->sz, a->rs); - rx_gen_ld(a->sz, tmp, addr); + rx_gen_ld(ctx, a->sz, tmp, addr); addr = rx_index_addr(ctx, mem, a->ldd, a->sz, a->rd); - rx_gen_st(a->sz, tmp, addr); + rx_gen_st(ctx, a->sz, tmp, addr); } return true; } @@ -525,13 +530,13 @@ static bool trans_MOV_mm(DisasContext *ctx, arg_MOV_mm *a) /* mov.<bwl> rs,[-rd] */ static bool trans_MOV_rp(DisasContext *ctx, arg_MOV_rp *a) { - TCGv val; - val = tcg_temp_new(); + TCGv_i32 val; + val = tcg_temp_new_i32(); tcg_gen_mov_i32(val, cpu_regs[a->rs]); if (a->ad == 1) { tcg_gen_subi_i32(cpu_regs[a->rd], cpu_regs[a->rd], 1 << a->sz); } - rx_gen_st(a->sz, val, cpu_regs[a->rd]); + rx_gen_st(ctx, a->sz, val, cpu_regs[a->rd]); if (a->ad == 0) { tcg_gen_addi_i32(cpu_regs[a->rd], cpu_regs[a->rd], 1 << a->sz); } @@ -542,12 +547,12 @@ static bool trans_MOV_rp(DisasContext *ctx, arg_MOV_rp *a) /* mov.<bwl> [-rd],rs */ static bool trans_MOV_pr(DisasContext *ctx, arg_MOV_pr *a) { - TCGv val; - val = tcg_temp_new(); + TCGv_i32 val; + val = tcg_temp_new_i32(); if (a->ad == 1) { tcg_gen_subi_i32(cpu_regs[a->rd], cpu_regs[a->rd], 1 << a->sz); } - rx_gen_ld(a->sz, val, cpu_regs[a->rd]); + rx_gen_ld(ctx, a->sz, val, cpu_regs[a->rd]); if (a->ad == 0) { tcg_gen_addi_i32(cpu_regs[a->rd], cpu_regs[a->rd], 1 << a->sz); } @@ -559,10 +564,10 @@ static bool trans_MOV_pr(DisasContext *ctx, arg_MOV_pr *a) /* movu.<bw> dsp[rs],rd */ static bool trans_MOVU_mr(DisasContext *ctx, arg_MOVU_mr *a) { - TCGv mem; - mem = tcg_temp_new(); + TCGv_i32 mem; + mem = tcg_temp_new_i32(); tcg_gen_addi_i32(mem, cpu_regs[a->rs], a->dsp << a->sz); - rx_gen_ldu(a->sz, cpu_regs[a->rd], mem); + rx_gen_ldu(ctx, a->sz, cpu_regs[a->rd], mem); return true; } @@ -576,10 +581,10 @@ static bool trans_MOVU_rr(DisasContext *ctx, arg_MOVU_rr *a) /* movu.<bw> [ri,rb],rd */ static bool trans_MOVU_ar(DisasContext *ctx, arg_MOVU_ar *a) { - TCGv mem; - mem = tcg_temp_new(); + TCGv_i32 mem; + mem = tcg_temp_new_i32(); rx_gen_regindex(ctx, mem, a->sz, a->ri, a->rb); - rx_gen_ldu(a->sz, cpu_regs[a->rd], mem); + rx_gen_ldu(ctx, a->sz, cpu_regs[a->rd], mem); return true; } @@ -587,12 +592,12 @@ static bool trans_MOVU_ar(DisasContext *ctx, arg_MOVU_ar *a) /* mov.<bw> [-rd],rs */ static bool trans_MOVU_pr(DisasContext *ctx, arg_MOVU_pr *a) { - TCGv val; - val = tcg_temp_new(); + TCGv_i32 val; + val = tcg_temp_new_i32(); if (a->ad == 1) { tcg_gen_subi_i32(cpu_regs[a->rd], cpu_regs[a->rd], 1 << a->sz); } - rx_gen_ldu(a->sz, val, cpu_regs[a->rd]); + rx_gen_ldu(ctx, a->sz, val, cpu_regs[a->rd]); if (a->ad == 0) { tcg_gen_addi_i32(cpu_regs[a->rd], cpu_regs[a->rd], 1 << a->sz); } @@ -617,9 +622,9 @@ static bool trans_POP(DisasContext *ctx, arg_POP *a) /* popc cr */ static bool trans_POPC(DisasContext *ctx, arg_POPC *a) { - TCGv val; - val = tcg_temp_new(); - pop(val); + TCGv_i32 val; + val = tcg_temp_new_i32(); + pop(ctx, val); move_to_cr(ctx, val, a->cr); return true; } @@ -634,7 +639,7 @@ static bool trans_POPM(DisasContext *ctx, arg_POPM *a) } r = a->rd; while (r <= a->rd2 && r < 16) { - pop(cpu_regs[r++]); + pop(ctx, cpu_regs[r++]); } return true; } @@ -643,34 +648,34 @@ static bool trans_POPM(DisasContext *ctx, arg_POPM *a) /* push.<bwl> rs */ static bool trans_PUSH_r(DisasContext *ctx, arg_PUSH_r *a) { - TCGv val; - val = tcg_temp_new(); + TCGv_i32 val; + val = tcg_temp_new_i32(); tcg_gen_mov_i32(val, cpu_regs[a->rs]); tcg_gen_subi_i32(cpu_sp, cpu_sp, 4); - rx_gen_st(a->sz, val, cpu_sp); + rx_gen_st(ctx, a->sz, val, cpu_sp); return true; } /* push.<bwl> dsp[rs] */ static bool trans_PUSH_m(DisasContext *ctx, arg_PUSH_m *a) { - TCGv mem, val, addr; - mem = tcg_temp_new(); - val = tcg_temp_new(); + TCGv_i32 mem, val, addr; + mem = tcg_temp_new_i32(); + val = tcg_temp_new_i32(); addr = rx_index_addr(ctx, mem, a->ld, a->sz, a->rs); - rx_gen_ld(a->sz, val, addr); + rx_gen_ld(ctx, a->sz, val, addr); tcg_gen_subi_i32(cpu_sp, cpu_sp, 4); - rx_gen_st(a->sz, val, cpu_sp); + rx_gen_st(ctx, a->sz, val, cpu_sp); return true; } /* pushc rx */ static bool trans_PUSHC(DisasContext *ctx, arg_PUSHC *a) { - TCGv val; - val = tcg_temp_new(); + TCGv_i32 val; + val = tcg_temp_new_i32(); move_from_cr(ctx, val, a->cr, ctx->pc); - push(val); + push(ctx, val); return true; } @@ -685,7 +690,7 @@ static bool trans_PUSHM(DisasContext *ctx, arg_PUSHM *a) } r = a->rs2; while (r >= a->rs && r >= 0) { - push(cpu_regs[r--]); + push(ctx, cpu_regs[r--]); } return true; } @@ -693,8 +698,8 @@ static bool trans_PUSHM(DisasContext *ctx, arg_PUSHM *a) /* xchg rs,rd */ static bool trans_XCHG_rr(DisasContext *ctx, arg_XCHG_rr *a) { - TCGv tmp; - tmp = tcg_temp_new(); + TCGv_i32 tmp; + tmp = tcg_temp_new_i32(); tcg_gen_mov_i32(tmp, cpu_regs[a->rs]); tcg_gen_mov_i32(cpu_regs[a->rs], cpu_regs[a->rd]); tcg_gen_mov_i32(cpu_regs[a->rd], tmp); @@ -704,8 +709,8 @@ static bool trans_XCHG_rr(DisasContext *ctx, arg_XCHG_rr *a) /* xchg dsp[rs].<mi>,rd */ static bool trans_XCHG_mr(DisasContext *ctx, arg_XCHG_mr *a) { - TCGv mem, addr; - mem = tcg_temp_new(); + TCGv_i32 mem, addr; + mem = tcg_temp_new_i32(); switch (a->mi) { case 0: /* dsp[rs].b */ case 1: /* dsp[rs].w */ @@ -724,10 +729,10 @@ static bool trans_XCHG_mr(DisasContext *ctx, arg_XCHG_mr *a) return true; } -static inline void stcond(TCGCond cond, int rd, int imm) +static void stcond(TCGCond cond, int rd, int imm) { - TCGv z; - TCGv _imm; + TCGv_i32 z; + TCGv_i32 _imm; z = tcg_constant_i32(0); _imm = tcg_constant_i32(imm); tcg_gen_movcond_i32(cond, cpu_regs[rd], cpu_psw_z, z, @@ -753,15 +758,15 @@ static bool trans_STNZ(DisasContext *ctx, arg_STNZ *a) static bool trans_SCCnd(DisasContext *ctx, arg_SCCnd *a) { DisasCompare dc; - TCGv val, mem, addr; - dc.temp = tcg_temp_new(); + TCGv_i32 val, mem, addr; + dc.temp = tcg_temp_new_i32(); psw_cond(&dc, a->cd); if (a->ld < 3) { - val = tcg_temp_new(); - mem = tcg_temp_new(); + val = tcg_temp_new_i32(); + mem = tcg_temp_new_i32(); tcg_gen_setcondi_i32(dc.cond, val, dc.value, 0); addr = rx_index_addr(ctx, mem, a->sz, a->ld, a->rd); - rx_gen_st(a->sz, val, addr); + rx_gen_st(ctx, a->sz, val, addr); } else { tcg_gen_setcondi_i32(dc.cond, cpu_regs[a->rd], dc.value, 0); } @@ -772,7 +777,7 @@ static bool trans_SCCnd(DisasContext *ctx, arg_SCCnd *a) static bool trans_RTSD_i(DisasContext *ctx, arg_RTSD_i *a) { tcg_gen_addi_i32(cpu_sp, cpu_sp, a->imm << 2); - pop(cpu_pc); + pop(ctx, cpu_pc); ctx->base.is_jmp = DISAS_JUMP; return true; } @@ -792,42 +797,42 @@ static bool trans_RTSD_irr(DisasContext *ctx, arg_RTSD_irr *a) tcg_gen_addi_i32(cpu_sp, cpu_sp, adj << 2); dst = a->rd; while (dst <= a->rd2 && dst < 16) { - pop(cpu_regs[dst++]); + pop(ctx, cpu_regs[dst++]); } - pop(cpu_pc); + pop(ctx, cpu_pc); ctx->base.is_jmp = DISAS_JUMP; return true; } -typedef void (*op2fn)(TCGv ret, TCGv arg1); -typedef void (*op3fn)(TCGv ret, TCGv arg1, TCGv arg2); +typedef void (*op2fn)(TCGv_i32 ret, TCGv_i32 arg1); +typedef void (*op3fn)(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); -static inline void rx_gen_op_rr(op2fn opr, int dst, int src) +static void rx_gen_op_rr(op2fn opr, int dst, int src) { opr(cpu_regs[dst], cpu_regs[src]); } -static inline void rx_gen_op_rrr(op3fn opr, int dst, int src, int src2) +static void rx_gen_op_rrr(op3fn opr, int dst, int src, int src2) { opr(cpu_regs[dst], cpu_regs[src], cpu_regs[src2]); } -static inline void rx_gen_op_irr(op3fn opr, int dst, int src, uint32_t src2) +static void rx_gen_op_irr(op3fn opr, int dst, int src, uint32_t src2) { - TCGv imm = tcg_constant_i32(src2); + TCGv_i32 imm = tcg_constant_i32(src2); opr(cpu_regs[dst], cpu_regs[src], imm); } -static inline void rx_gen_op_mr(op3fn opr, DisasContext *ctx, - int dst, int src, int ld, int mi) +static void rx_gen_op_mr(op3fn opr, DisasContext *ctx, + int dst, int src, int ld, int mi) { - TCGv val, mem; - mem = tcg_temp_new(); + TCGv_i32 val, mem; + mem = tcg_temp_new_i32(); val = rx_load_source(ctx, mem, ld, mi, src); opr(cpu_regs[dst], cpu_regs[dst], val); } -static void rx_and(TCGv ret, TCGv arg1, TCGv arg2) +static void rx_and(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { tcg_gen_and_i32(cpu_psw_s, arg1, arg2); tcg_gen_mov_i32(cpu_psw_z, cpu_psw_s); @@ -857,7 +862,7 @@ static bool trans_AND_rrr(DisasContext *ctx, arg_AND_rrr *a) return true; } -static void rx_or(TCGv ret, TCGv arg1, TCGv arg2) +static void rx_or(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { tcg_gen_or_i32(cpu_psw_s, arg1, arg2); tcg_gen_mov_i32(cpu_psw_z, cpu_psw_s); @@ -887,7 +892,7 @@ static bool trans_OR_rrr(DisasContext *ctx, arg_OR_rrr *a) return true; } -static void rx_xor(TCGv ret, TCGv arg1, TCGv arg2) +static void rx_xor(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { tcg_gen_xor_i32(cpu_psw_s, arg1, arg2); tcg_gen_mov_i32(cpu_psw_z, cpu_psw_s); @@ -909,7 +914,7 @@ static bool trans_XOR_mr(DisasContext *ctx, arg_XOR_mr *a) return true; } -static void rx_tst(TCGv ret, TCGv arg1, TCGv arg2) +static void rx_tst(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { tcg_gen_and_i32(cpu_psw_s, arg1, arg2); tcg_gen_mov_i32(cpu_psw_z, cpu_psw_s); @@ -930,7 +935,7 @@ static bool trans_TST_mr(DisasContext *ctx, arg_TST_mr *a) return true; } -static void rx_not(TCGv ret, TCGv arg1) +static void rx_not(TCGv_i32 ret, TCGv_i32 arg1) { tcg_gen_not_i32(ret, arg1); tcg_gen_mov_i32(cpu_psw_z, ret); @@ -945,7 +950,7 @@ static bool trans_NOT_rr(DisasContext *ctx, arg_NOT_rr *a) return true; } -static void rx_neg(TCGv ret, TCGv arg1) +static void rx_neg(TCGv_i32 ret, TCGv_i32 arg1) { tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_psw_o, arg1, 0x80000000); tcg_gen_neg_i32(ret, arg1); @@ -964,9 +969,9 @@ static bool trans_NEG_rr(DisasContext *ctx, arg_NEG_rr *a) } /* ret = arg1 + arg2 + psw_c */ -static void rx_adc(TCGv ret, TCGv arg1, TCGv arg2) +static void rx_adc(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - TCGv z = tcg_constant_i32(0); + TCGv_i32 z = tcg_constant_i32(0); tcg_gen_add2_i32(cpu_psw_s, cpu_psw_c, arg1, z, cpu_psw_c, z); tcg_gen_add2_i32(cpu_psw_s, cpu_psw_c, cpu_psw_s, cpu_psw_c, arg2, z); tcg_gen_xor_i32(cpu_psw_o, cpu_psw_s, arg1); @@ -1002,9 +1007,9 @@ static bool trans_ADC_mr(DisasContext *ctx, arg_ADC_mr *a) } /* ret = arg1 + arg2 */ -static void rx_add(TCGv ret, TCGv arg1, TCGv arg2) +static void rx_add(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - TCGv z = tcg_constant_i32(0); + TCGv_i32 z = tcg_constant_i32(0); tcg_gen_add2_i32(cpu_psw_s, cpu_psw_c, arg1, z, arg2, z); tcg_gen_xor_i32(cpu_psw_o, cpu_psw_s, arg1); tcg_gen_xor_i32(cpu_psw_z, arg1, arg2); @@ -1037,7 +1042,7 @@ static bool trans_ADD_rrr(DisasContext *ctx, arg_ADD_rrr *a) } /* ret = arg1 - arg2 */ -static void rx_sub(TCGv ret, TCGv arg1, TCGv arg2) +static void rx_sub(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { tcg_gen_sub_i32(cpu_psw_s, arg1, arg2); tcg_gen_setcond_i32(TCG_COND_GEU, cpu_psw_c, arg1, arg2); @@ -1051,17 +1056,17 @@ static void rx_sub(TCGv ret, TCGv arg1, TCGv arg2) } } -static void rx_cmp(TCGv dummy, TCGv arg1, TCGv arg2) +static void rx_cmp(TCGv_i32 dummy, TCGv_i32 arg1, TCGv_i32 arg2) { rx_sub(NULL, arg1, arg2); } /* ret = arg1 - arg2 - !psw_c */ /* -> ret = arg1 + ~arg2 + psw_c */ -static void rx_sbb(TCGv ret, TCGv arg1, TCGv arg2) +static void rx_sbb(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - TCGv temp; - temp = tcg_temp_new(); + TCGv_i32 temp; + temp = tcg_temp_new_i32(); tcg_gen_not_i32(temp, arg2); rx_adc(ret, arg1, temp); } @@ -1187,7 +1192,7 @@ static bool trans_MUL_rrr(DisasContext *ctx, arg_MUL_rrr *a) /* emul #imm, rd */ static bool trans_EMUL_ir(DisasContext *ctx, arg_EMUL_ir *a) { - TCGv imm = tcg_constant_i32(a->imm); + TCGv_i32 imm = tcg_constant_i32(a->imm); if (a->rd > 14) { qemu_log_mask(LOG_GUEST_ERROR, "rd too large %d", a->rd); } @@ -1200,11 +1205,11 @@ static bool trans_EMUL_ir(DisasContext *ctx, arg_EMUL_ir *a) /* emul dsp[rs], rd */ static bool trans_EMUL_mr(DisasContext *ctx, arg_EMUL_mr *a) { - TCGv val, mem; + TCGv_i32 val, mem; if (a->rd > 14) { qemu_log_mask(LOG_GUEST_ERROR, "rd too large %d", a->rd); } - mem = tcg_temp_new(); + mem = tcg_temp_new_i32(); val = rx_load_source(ctx, mem, a->ld, a->mi, a->rs); tcg_gen_muls2_i32(cpu_regs[a->rd], cpu_regs[(a->rd + 1) & 15], cpu_regs[a->rd], val); @@ -1214,7 +1219,7 @@ static bool trans_EMUL_mr(DisasContext *ctx, arg_EMUL_mr *a) /* emulu #imm, rd */ static bool trans_EMULU_ir(DisasContext *ctx, arg_EMULU_ir *a) { - TCGv imm = tcg_constant_i32(a->imm); + TCGv_i32 imm = tcg_constant_i32(a->imm); if (a->rd > 14) { qemu_log_mask(LOG_GUEST_ERROR, "rd too large %d", a->rd); } @@ -1227,23 +1232,23 @@ static bool trans_EMULU_ir(DisasContext *ctx, arg_EMULU_ir *a) /* emulu dsp[rs], rd */ static bool trans_EMULU_mr(DisasContext *ctx, arg_EMULU_mr *a) { - TCGv val, mem; + TCGv_i32 val, mem; if (a->rd > 14) { qemu_log_mask(LOG_GUEST_ERROR, "rd too large %d", a->rd); } - mem = tcg_temp_new(); + mem = tcg_temp_new_i32(); val = rx_load_source(ctx, mem, a->ld, a->mi, a->rs); tcg_gen_mulu2_i32(cpu_regs[a->rd], cpu_regs[(a->rd + 1) & 15], cpu_regs[a->rd], val); return true; } -static void rx_div(TCGv ret, TCGv arg1, TCGv arg2) +static void rx_div(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { gen_helper_div(ret, tcg_env, arg1, arg2); } -static void rx_divu(TCGv ret, TCGv arg1, TCGv arg2) +static void rx_divu(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { gen_helper_divu(ret, tcg_env, arg1, arg2); } @@ -1283,8 +1288,8 @@ static bool trans_DIVU_mr(DisasContext *ctx, arg_DIVU_mr *a) /* shll #imm:5, rs2, rd */ static bool trans_SHLL_irr(DisasContext *ctx, arg_SHLL_irr *a) { - TCGv tmp; - tmp = tcg_temp_new(); + TCGv_i32 tmp; + tmp = tcg_temp_new_i32(); if (a->imm) { tcg_gen_sari_i32(cpu_psw_c, cpu_regs[a->rs2], 32 - a->imm); tcg_gen_shli_i32(cpu_regs[a->rd], cpu_regs[a->rs2], a->imm); @@ -1306,14 +1311,14 @@ static bool trans_SHLL_irr(DisasContext *ctx, arg_SHLL_irr *a) static bool trans_SHLL_rr(DisasContext *ctx, arg_SHLL_rr *a) { TCGLabel *noshift, *done; - TCGv count, tmp; + TCGv_i32 count, tmp; noshift = gen_new_label(); done = gen_new_label(); /* if (cpu_regs[a->rs]) { */ tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_regs[a->rs], 0, noshift); - count = tcg_temp_new(); - tmp = tcg_temp_new(); + count = tcg_temp_new_i32(); + tmp = tcg_temp_new_i32(); tcg_gen_andi_i32(tmp, cpu_regs[a->rs], 31); tcg_gen_sub_i32(count, tcg_constant_i32(32), tmp); tcg_gen_sar_i32(cpu_psw_c, cpu_regs[a->rd], count); @@ -1334,10 +1339,10 @@ static bool trans_SHLL_rr(DisasContext *ctx, arg_SHLL_rr *a) return true; } -static inline void shiftr_imm(uint32_t rd, uint32_t rs, uint32_t imm, - unsigned int alith) +static void shiftr_imm(uint32_t rd, uint32_t rs, uint32_t imm, + unsigned int alith) { - static void (* const gen_sXri[])(TCGv ret, TCGv arg1, int arg2) = { + static void (* const gen_sXri[])(TCGv_i32 ret, TCGv_i32 arg1, int arg2) = { tcg_gen_shri_i32, tcg_gen_sari_i32, }; tcg_debug_assert(alith < 2); @@ -1354,20 +1359,21 @@ static inline void shiftr_imm(uint32_t rd, uint32_t rs, uint32_t imm, tcg_gen_mov_i32(cpu_psw_s, cpu_regs[rd]); } -static inline void shiftr_reg(uint32_t rd, uint32_t rs, unsigned int alith) +static void shiftr_reg(uint32_t rd, uint32_t rs, unsigned int alith) { TCGLabel *noshift, *done; - TCGv count; - static void (* const gen_sXri[])(TCGv ret, TCGv arg1, int arg2) = { + TCGv_i32 count; + static void (* const gen_sXri[])(TCGv_i32 ret, TCGv_i32 arg1, int arg2) = { tcg_gen_shri_i32, tcg_gen_sari_i32, }; - static void (* const gen_sXr[])(TCGv ret, TCGv arg1, TCGv arg2) = { + static void (* const gen_sXr[])(TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2) = { tcg_gen_shr_i32, tcg_gen_sar_i32, }; tcg_debug_assert(alith < 2); noshift = gen_new_label(); done = gen_new_label(); - count = tcg_temp_new(); + count = tcg_temp_new_i32(); /* if (cpu_regs[rs]) { */ tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_regs[rs], 0, noshift); tcg_gen_andi_i32(count, cpu_regs[rs], 31); @@ -1419,8 +1425,8 @@ static bool trans_SHLR_rr(DisasContext *ctx, arg_SHLR_rr *a) /* rolc rd */ static bool trans_ROLC(DisasContext *ctx, arg_ROLC *a) { - TCGv tmp; - tmp = tcg_temp_new(); + TCGv_i32 tmp; + tmp = tcg_temp_new_i32(); tcg_gen_shri_i32(tmp, cpu_regs[a->rd], 31); tcg_gen_shli_i32(cpu_regs[a->rd], cpu_regs[a->rd], 1); tcg_gen_or_i32(cpu_regs[a->rd], cpu_regs[a->rd], cpu_psw_c); @@ -1433,8 +1439,8 @@ static bool trans_ROLC(DisasContext *ctx, arg_ROLC *a) /* rorc rd */ static bool trans_RORC(DisasContext *ctx, arg_RORC *a) { - TCGv tmp; - tmp = tcg_temp_new(); + TCGv_i32 tmp; + tmp = tcg_temp_new_i32(); tcg_gen_andi_i32(tmp, cpu_regs[a->rd], 0x00000001); tcg_gen_shri_i32(cpu_regs[a->rd], cpu_regs[a->rd], 1); tcg_gen_shli_i32(cpu_psw_c, cpu_psw_c, 31); @@ -1447,7 +1453,7 @@ static bool trans_RORC(DisasContext *ctx, arg_RORC *a) enum {ROTR = 0, ROTL = 1}; enum {ROT_IMM = 0, ROT_REG = 1}; -static inline void rx_rot(int ir, int dir, int rd, int src) +static void rx_rot(int ir, int dir, int rd, int src) { switch (dir) { case ROTL: @@ -1509,8 +1515,8 @@ static bool trans_REVL(DisasContext *ctx, arg_REVL *a) /* revw rs, rd */ static bool trans_REVW(DisasContext *ctx, arg_REVW *a) { - TCGv tmp; - tmp = tcg_temp_new(); + TCGv_i32 tmp; + tmp = tcg_temp_new_i32(); tcg_gen_andi_i32(tmp, cpu_regs[a->rs], 0x00ff00ff); tcg_gen_shli_i32(tmp, tmp, 8); tcg_gen_shri_i32(cpu_regs[a->rd], cpu_regs[a->rs], 8); @@ -1527,7 +1533,7 @@ static void rx_bcnd_main(DisasContext *ctx, int cd, int dst) switch (cd) { case 0 ... 13: - dc.temp = tcg_temp_new(); + dc.temp = tcg_temp_new_i32(); psw_cond(&dc, cd); t = gen_new_label(); done = gen_new_label(); @@ -1582,10 +1588,10 @@ static bool trans_BRA_l(DisasContext *ctx, arg_BRA_l *a) return true; } -static inline void rx_save_pc(DisasContext *ctx) +static void rx_save_pc(DisasContext *ctx) { - TCGv pc = tcg_constant_i32(ctx->base.pc_next); - push(pc); + TCGv_i32 pc = tcg_constant_i32(ctx->base.pc_next); + push(ctx, pc); } /* jmp rs */ @@ -1626,7 +1632,7 @@ static bool trans_BSR_l(DisasContext *ctx, arg_BSR_l *a) /* rts */ static bool trans_RTS(DisasContext *ctx, arg_RTS *a) { - pop(cpu_pc); + pop(ctx, cpu_pc); ctx->base.is_jmp = DISAS_JUMP; return true; } @@ -1667,7 +1673,7 @@ static bool trans_SMOVB(DisasContext *ctx, arg_SMOVB *a) #define STRING(op) \ do { \ - TCGv size = tcg_constant_i32(a->sz); \ + TCGv_i32 size = tcg_constant_i32(a->sz); \ gen_helper_##op(tcg_env, size); \ } while (0) @@ -1798,7 +1804,7 @@ static bool trans_MVTACLO(DisasContext *ctx, arg_MVTACLO *a) /* racw #imm */ static bool trans_RACW(DisasContext *ctx, arg_RACW *a) { - TCGv imm = tcg_constant_i32(a->imm + 1); + TCGv_i32 imm = tcg_constant_i32(a->imm + 1); gen_helper_racw(tcg_env, imm); return true; } @@ -1806,8 +1812,8 @@ static bool trans_RACW(DisasContext *ctx, arg_RACW *a) /* sat rd */ static bool trans_SAT(DisasContext *ctx, arg_SAT *a) { - TCGv tmp, z; - tmp = tcg_temp_new(); + TCGv_i32 tmp, z; + tmp = tcg_temp_new_i32(); z = tcg_constant_i32(0); /* S == 1 -> 0xffffffff / S == 0 -> 0x00000000 */ tcg_gen_sari_i32(tmp, cpu_psw_s, 31); @@ -1830,7 +1836,7 @@ static bool trans_SATR(DisasContext *ctx, arg_SATR *a) static bool cat3(trans_, name, _ir)(DisasContext *ctx, \ cat3(arg_, name, _ir) * a) \ { \ - TCGv imm = tcg_constant_i32(li(ctx, 0)); \ + TCGv_i32 imm = tcg_constant_i32(li(ctx, 0)); \ gen_helper_##op(cpu_regs[a->rd], tcg_env, \ cpu_regs[a->rd], imm); \ return true; \ @@ -1838,8 +1844,8 @@ static bool trans_SATR(DisasContext *ctx, arg_SATR *a) static bool cat3(trans_, name, _mr)(DisasContext *ctx, \ cat3(arg_, name, _mr) * a) \ { \ - TCGv val, mem; \ - mem = tcg_temp_new(); \ + TCGv_i32 val, mem; \ + mem = tcg_temp_new_i32(); \ val = rx_load_source(ctx, mem, a->ld, MO_32, a->rs); \ gen_helper_##op(cpu_regs[a->rd], tcg_env, \ cpu_regs[a->rd], val); \ @@ -1849,8 +1855,8 @@ static bool trans_SATR(DisasContext *ctx, arg_SATR *a) #define FCONVOP(name, op) \ static bool trans_##name(DisasContext *ctx, arg_##name * a) \ { \ - TCGv val, mem; \ - mem = tcg_temp_new(); \ + TCGv_i32 val, mem; \ + mem = tcg_temp_new_i32(); \ val = rx_load_source(ctx, mem, a->ld, MO_32, a->rs); \ gen_helper_##op(cpu_regs[a->rd], tcg_env, val); \ return true; \ @@ -1864,7 +1870,7 @@ FOP(FDIV, fdiv) /* fcmp #imm, rd */ static bool trans_FCMP_ir(DisasContext *ctx, arg_FCMP_ir * a) { - TCGv imm = tcg_constant_i32(li(ctx, 0)); + TCGv_i32 imm = tcg_constant_i32(li(ctx, 0)); gen_helper_fcmp(tcg_env, cpu_regs[a->rd], imm); return true; } @@ -1873,8 +1879,8 @@ static bool trans_FCMP_ir(DisasContext *ctx, arg_FCMP_ir * a) /* fcmp rs, rd */ static bool trans_FCMP_mr(DisasContext *ctx, arg_FCMP_mr *a) { - TCGv val, mem; - mem = tcg_temp_new(); + TCGv_i32 val, mem; + mem = tcg_temp_new_i32(); val = rx_load_source(ctx, mem, a->ld, MO_32, a->rs); gen_helper_fcmp(tcg_env, cpu_regs[a->rd], val); return true; @@ -1887,70 +1893,70 @@ FCONVOP(ROUND, round) /* itof dsp[rs], rd */ static bool trans_ITOF(DisasContext *ctx, arg_ITOF * a) { - TCGv val, mem; - mem = tcg_temp_new(); + TCGv_i32 val, mem; + mem = tcg_temp_new_i32(); val = rx_load_source(ctx, mem, a->ld, a->mi, a->rs); gen_helper_itof(cpu_regs[a->rd], tcg_env, val); return true; } -static void rx_bsetm(TCGv mem, TCGv mask) +static void rx_bsetm(DisasContext *ctx, TCGv_i32 mem, TCGv_i32 mask) { - TCGv val; - val = tcg_temp_new(); - rx_gen_ld(MO_8, val, mem); + TCGv_i32 val; + val = tcg_temp_new_i32(); + rx_gen_ld(ctx, MO_8, val, mem); tcg_gen_or_i32(val, val, mask); - rx_gen_st(MO_8, val, mem); + rx_gen_st(ctx, MO_8, val, mem); } -static void rx_bclrm(TCGv mem, TCGv mask) +static void rx_bclrm(DisasContext *ctx, TCGv_i32 mem, TCGv_i32 mask) { - TCGv val; - val = tcg_temp_new(); - rx_gen_ld(MO_8, val, mem); + TCGv_i32 val; + val = tcg_temp_new_i32(); + rx_gen_ld(ctx, MO_8, val, mem); tcg_gen_andc_i32(val, val, mask); - rx_gen_st(MO_8, val, mem); + rx_gen_st(ctx, MO_8, val, mem); } -static void rx_btstm(TCGv mem, TCGv mask) +static void rx_btstm(DisasContext *ctx, TCGv_i32 mem, TCGv_i32 mask) { - TCGv val; - val = tcg_temp_new(); - rx_gen_ld(MO_8, val, mem); + TCGv_i32 val; + val = tcg_temp_new_i32(); + rx_gen_ld(ctx, MO_8, val, mem); tcg_gen_and_i32(val, val, mask); tcg_gen_setcondi_i32(TCG_COND_NE, cpu_psw_c, val, 0); tcg_gen_mov_i32(cpu_psw_z, cpu_psw_c); } -static void rx_bnotm(TCGv mem, TCGv mask) +static void rx_bnotm(DisasContext *ctx, TCGv_i32 mem, TCGv_i32 mask) { - TCGv val; - val = tcg_temp_new(); - rx_gen_ld(MO_8, val, mem); + TCGv_i32 val; + val = tcg_temp_new_i32(); + rx_gen_ld(ctx, MO_8, val, mem); tcg_gen_xor_i32(val, val, mask); - rx_gen_st(MO_8, val, mem); + rx_gen_st(ctx, MO_8, val, mem); } -static void rx_bsetr(TCGv reg, TCGv mask) +static void rx_bsetr(DisasContext *ctx, TCGv_i32 reg, TCGv_i32 mask) { tcg_gen_or_i32(reg, reg, mask); } -static void rx_bclrr(TCGv reg, TCGv mask) +static void rx_bclrr(DisasContext *ctx, TCGv_i32 reg, TCGv_i32 mask) { tcg_gen_andc_i32(reg, reg, mask); } -static inline void rx_btstr(TCGv reg, TCGv mask) +static void rx_btstr(DisasContext *ctx, TCGv_i32 reg, TCGv_i32 mask) { - TCGv t0; - t0 = tcg_temp_new(); + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); tcg_gen_and_i32(t0, reg, mask); tcg_gen_setcondi_i32(TCG_COND_NE, cpu_psw_c, t0, 0); tcg_gen_mov_i32(cpu_psw_z, cpu_psw_c); } -static inline void rx_bnotr(TCGv reg, TCGv mask) +static void rx_bnotr(DisasContext *ctx, TCGv_i32 reg, TCGv_i32 mask) { tcg_gen_xor_i32(reg, reg, mask); } @@ -1959,43 +1965,43 @@ static inline void rx_bnotr(TCGv reg, TCGv mask) static bool cat3(trans_, name, _im)(DisasContext *ctx, \ cat3(arg_, name, _im) * a) \ { \ - TCGv mask, mem, addr; \ - mem = tcg_temp_new(); \ + TCGv_i32 mask, mem, addr; \ + mem = tcg_temp_new_i32(); \ mask = tcg_constant_i32(1 << a->imm); \ addr = rx_index_addr(ctx, mem, a->ld, MO_8, a->rs); \ - cat3(rx_, op, m)(addr, mask); \ + cat3(rx_, op, m)(ctx, addr, mask); \ return true; \ } \ static bool cat3(trans_, name, _ir)(DisasContext *ctx, \ cat3(arg_, name, _ir) * a) \ { \ - TCGv mask; \ + TCGv_i32 mask; \ mask = tcg_constant_i32(1 << a->imm); \ - cat3(rx_, op, r)(cpu_regs[a->rd], mask); \ + cat3(rx_, op, r)(ctx, cpu_regs[a->rd], mask); \ return true; \ } \ static bool cat3(trans_, name, _rr)(DisasContext *ctx, \ cat3(arg_, name, _rr) * a) \ { \ - TCGv mask, b; \ - mask = tcg_temp_new(); \ - b = tcg_temp_new(); \ + TCGv_i32 mask, b; \ + mask = tcg_temp_new_i32(); \ + b = tcg_temp_new_i32(); \ tcg_gen_andi_i32(b, cpu_regs[a->rs], 31); \ tcg_gen_shl_i32(mask, tcg_constant_i32(1), b); \ - cat3(rx_, op, r)(cpu_regs[a->rd], mask); \ + cat3(rx_, op, r)(ctx, cpu_regs[a->rd], mask); \ return true; \ } \ static bool cat3(trans_, name, _rm)(DisasContext *ctx, \ cat3(arg_, name, _rm) * a) \ { \ - TCGv mask, mem, addr, b; \ - mask = tcg_temp_new(); \ - b = tcg_temp_new(); \ + TCGv_i32 mask, mem, addr, b; \ + mask = tcg_temp_new_i32(); \ + b = tcg_temp_new_i32(); \ tcg_gen_andi_i32(b, cpu_regs[a->rd], 7); \ tcg_gen_shl_i32(mask, tcg_constant_i32(1), b); \ - mem = tcg_temp_new(); \ + mem = tcg_temp_new_i32(); \ addr = rx_index_addr(ctx, mem, a->ld, MO_8, a->rs); \ - cat3(rx_, op, m)(addr, mask); \ + cat3(rx_, op, m)(ctx, addr, mask); \ return true; \ } @@ -2004,12 +2010,12 @@ BITOP(BCLR, bclr) BITOP(BTST, btst) BITOP(BNOT, bnot) -static inline void bmcnd_op(TCGv val, TCGCond cond, int pos) +static void bmcnd_op(TCGv_i32 val, TCGCond cond, int pos) { - TCGv bit; + TCGv_i32 bit; DisasCompare dc; - dc.temp = tcg_temp_new(); - bit = tcg_temp_new(); + dc.temp = tcg_temp_new_i32(); + bit = tcg_temp_new_i32(); psw_cond(&dc, cond); tcg_gen_andi_i32(val, val, ~(1 << pos)); tcg_gen_setcondi_i32(dc.cond, bit, dc.value, 0); @@ -2019,13 +2025,13 @@ static inline void bmcnd_op(TCGv val, TCGCond cond, int pos) /* bmcnd #imm, dsp[rd] */ static bool trans_BMCnd_im(DisasContext *ctx, arg_BMCnd_im *a) { - TCGv val, mem, addr; - val = tcg_temp_new(); - mem = tcg_temp_new(); + TCGv_i32 val, mem, addr; + val = tcg_temp_new_i32(); + mem = tcg_temp_new_i32(); addr = rx_index_addr(ctx, mem, a->ld, MO_8, a->rd); - rx_gen_ld(MO_8, val, addr); + rx_gen_ld(ctx, MO_8, val, addr); bmcnd_op(val, a->cd, a->imm); - rx_gen_st(MO_8, val, addr); + rx_gen_st(ctx, MO_8, val, addr); return true; } @@ -2045,7 +2051,7 @@ enum { PSW_U = 9, }; -static inline void clrsetpsw(DisasContext *ctx, int cb, int val) +static void clrsetpsw(DisasContext *ctx, int cb, int val) { if (cb < 8) { switch (cb) { @@ -2113,7 +2119,7 @@ static bool trans_MVTIPL(DisasContext *ctx, arg_MVTIPL *a) /* mvtc #imm, rd */ static bool trans_MVTC_i(DisasContext *ctx, arg_MVTC_i *a) { - TCGv imm; + TCGv_i32 imm; imm = tcg_constant_i32(a->imm); move_to_cr(ctx, imm, a->cr); @@ -2137,9 +2143,9 @@ static bool trans_MVFC(DisasContext *ctx, arg_MVFC *a) /* rtfi */ static bool trans_RTFI(DisasContext *ctx, arg_RTFI *a) { - TCGv psw; + TCGv_i32 psw; if (is_privileged(ctx, 1)) { - psw = tcg_temp_new(); + psw = tcg_temp_new_i32(); tcg_gen_mov_i32(cpu_pc, cpu_bpc); tcg_gen_mov_i32(psw, cpu_bpsw); gen_helper_set_psw_rte(tcg_env, psw); @@ -2151,11 +2157,11 @@ static bool trans_RTFI(DisasContext *ctx, arg_RTFI *a) /* rte */ static bool trans_RTE(DisasContext *ctx, arg_RTE *a) { - TCGv psw; + TCGv_i32 psw; if (is_privileged(ctx, 1)) { - psw = tcg_temp_new(); - pop(cpu_pc); - pop(psw); + psw = tcg_temp_new_i32(); + pop(ctx, cpu_pc); + pop(ctx, psw); gen_helper_set_psw_rte(tcg_env, psw); ctx->base.is_jmp = DISAS_EXIT; } @@ -2174,7 +2180,7 @@ static bool trans_BRK(DisasContext *ctx, arg_BRK *a) /* int #imm */ static bool trans_INT(DisasContext *ctx, arg_INT *a) { - TCGv vec; + TCGv_i32 vec; tcg_debug_assert(a->imm < 0x100); vec = tcg_constant_i32(a->imm); diff --git a/target/s390x/cpu-system.c b/target/s390x/cpu-system.c index 9b380e3..f3a9ffb 100644 --- a/target/s390x/cpu-system.c +++ b/target/s390x/cpu-system.c @@ -49,7 +49,7 @@ bool s390_cpu_has_work(CPUState *cs) return false; } - if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) { + if (!cpu_test_interrupt(cs, CPU_INTERRUPT_HARD)) { return false; } @@ -196,7 +196,7 @@ static bool disabled_wait(CPUState *cpu) (PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK)); } -static unsigned s390_count_running_cpus(void) +unsigned s390_count_running_cpus(void) { CPUState *cpu; int nr_running = 0; @@ -214,7 +214,7 @@ static unsigned s390_count_running_cpus(void) return nr_running; } -unsigned int s390_cpu_halt(S390CPU *cpu) +void s390_cpu_halt(S390CPU *cpu) { CPUState *cs = CPU(cpu); trace_cpu_halt(cs->cpu_index); @@ -223,8 +223,6 @@ unsigned int s390_cpu_halt(S390CPU *cpu) cs->halted = 1; cs->exception_index = EXCP_HLT; } - - return s390_count_running_cpus(); } void s390_cpu_unhalt(S390CPU *cpu) diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 9c1158e..f05ce31 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -347,6 +347,14 @@ static TCGTBCPUState s390x_get_tb_cpu_state(CPUState *cs) }; } +#ifndef CONFIG_USER_ONLY +static vaddr s390_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return wrap_address(cpu_env(cs), result); +} +#endif + static const TCGCPUOps s390_tcg_ops = { .mttcg_supported = true, .precise_smc = true, @@ -367,6 +375,7 @@ static const TCGCPUOps s390_tcg_ops = { .record_sigbus = s390_cpu_record_sigbus, #else .tlb_fill = s390_cpu_tlb_fill, + .pointer_wrap = s390_pointer_wrap, .cpu_exec_interrupt = s390_cpu_exec_interrupt, .cpu_exec_halt = s390_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/s390x/cpu_features_def.h.inc b/target/s390x/cpu_features_def.h.inc index e23e603..c017bff 100644 --- a/target/s390x/cpu_features_def.h.inc +++ b/target/s390x/cpu_features_def.h.inc @@ -186,7 +186,7 @@ DEF_FEAT(PLO_CSO, "plo-cso", PLO, 25, "PLO Compare and swap (256 bit in paramete DEF_FEAT(PLO_DCSO, "plo-dcso", PLO, 26, "PLO Double compare and swap (256 bit in parameter list)") DEF_FEAT(PLO_CSSTO, "plo-cssto", PLO, 27, "PLO Compare and swap and store (256 bit in parameter list)") DEF_FEAT(PLO_CSDSTO, "plo-csdsto", PLO, 28, "PLO Compare and swap and double store (256 bit in parameter list)") -DEF_FEAT(PLO_CSTSTO, "plo-cststo", PLO, 29, "PLO Compare and swap and trible store (256 bit in parameter list)") +DEF_FEAT(PLO_CSTSTO, "plo-cststo", PLO, 29, "PLO Compare and swap and triple store (256 bit in parameter list)") DEF_FEAT(PLO_TCS, "plo-tcs", PLO, 30, "Triple compare and swap (32 bit in parameter list)") DEF_FEAT(PLO_TCSG, "plo-tcsg", PLO, 31, "Triple compare and swap (64 bit in parameter list)") DEF_FEAT(PLO_TCSX, "plo-tcsx", PLO, 32, "Triple compare and swap (128 bit in parameter list)") diff --git a/target/s390x/cpu_models_system.c b/target/s390x/cpu_models_system.c index 4351182..5b84604 100644 --- a/target/s390x/cpu_models_system.c +++ b/target/s390x/cpu_models_system.c @@ -19,7 +19,7 @@ #include "qapi/visitor.h" #include "qapi/qobject-input-visitor.h" #include "qobject/qdict.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" static void list_add_feat(const char *name, void *opaque); @@ -252,6 +252,9 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, s390_feat_bitmap_to_ascii(deprecated_feats, &expansion_info->deprecated_props, list_add_feat); + + expansion_info->has_deprecated_props = !!expansion_info->deprecated_props; + return expansion_info; } diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c index a814ece..8218e64 100644 --- a/target/s390x/gen-features.c +++ b/target/s390x/gen-features.c @@ -884,9 +884,6 @@ static uint16_t qemu_MIN[] = { */ S390_FEAT_FLOATING_POINT_EXT, S390_FEAT_ZPCI, -}; - -static uint16_t qemu_V4_1[] = { S390_FEAT_STFLE_53, S390_FEAT_VECTOR, }; @@ -1049,7 +1046,6 @@ static FeatGroupDefSpec FeatGroupDef[] = { *******************************/ static FeatGroupDefSpec QemuFeatDef[] = { QEMU_FEAT_INITIALIZER(MIN), - QEMU_FEAT_INITIALIZER(V4_1), QEMU_FEAT_INITIALIZER(V6_0), QEMU_FEAT_INITIALIZER(V6_2), QEMU_FEAT_INITIALIZER(V7_0), diff --git a/target/s390x/helper.c b/target/s390x/helper.c index 3c57c32..184428c 100644 --- a/target/s390x/helper.c +++ b/target/s390x/helper.c @@ -24,8 +24,8 @@ #include "gdbstub/helpers.h" #include "qemu/timer.h" #include "hw/s390x/ioinst.h" -#include "target/s390x/kvm/pv.h" #include "system/hw_accel.h" +#include "system/memory.h" #include "system/runstate.h" #include "exec/target_page.h" #include "exec/watchpoint.h" @@ -91,7 +91,9 @@ void s390_handle_wait(S390CPU *cpu) { CPUState *cs = CPU(cpu); - if (s390_cpu_halt(cpu) == 0) { + s390_cpu_halt(cpu); + + if (s390_count_running_cpus() == 0) { if (is_special_wait_psw(cpu->env.psw.addr)) { qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } else { @@ -105,19 +107,23 @@ LowCore *cpu_map_lowcore(CPUS390XState *env) { LowCore *lowcore; hwaddr len = sizeof(LowCore); + CPUState *cs = env_cpu(env); + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; - lowcore = cpu_physical_memory_map(env->psa, &len, true); + lowcore = address_space_map(cs->as, env->psa, &len, true, attrs); if (len < sizeof(LowCore)) { - cpu_abort(env_cpu(env), "Could not map lowcore\n"); + cpu_abort(cs, "Could not map lowcore\n"); } return lowcore; } -void cpu_unmap_lowcore(LowCore *lowcore) +void cpu_unmap_lowcore(CPUS390XState *env, LowCore *lowcore) { - cpu_physical_memory_unmap(lowcore, sizeof(LowCore), 1, sizeof(LowCore)); + AddressSpace *as = env_cpu(env)->as; + + address_space_unmap(as, lowcore, sizeof(LowCore), true, sizeof(LowCore)); } void do_restart_interrupt(CPUS390XState *env) @@ -132,7 +138,7 @@ void do_restart_interrupt(CPUS390XState *env) mask = be64_to_cpu(lowcore->restart_new_psw.mask); addr = be64_to_cpu(lowcore->restart_new_psw.addr); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); env->pending_int &= ~INTERRUPT_RESTART; s390_cpu_set_psw(env, mask, addr); @@ -175,109 +181,3 @@ void s390_cpu_recompute_watchpoints(CPUState *cs) wp_flags, NULL); } } - -typedef struct SigpSaveArea { - uint64_t fprs[16]; /* 0x0000 */ - uint64_t grs[16]; /* 0x0080 */ - PSW psw; /* 0x0100 */ - uint8_t pad_0x0110[0x0118 - 0x0110]; /* 0x0110 */ - uint32_t prefix; /* 0x0118 */ - uint32_t fpc; /* 0x011c */ - uint8_t pad_0x0120[0x0124 - 0x0120]; /* 0x0120 */ - uint32_t todpr; /* 0x0124 */ - uint64_t cputm; /* 0x0128 */ - uint64_t ckc; /* 0x0130 */ - uint8_t pad_0x0138[0x0140 - 0x0138]; /* 0x0138 */ - uint32_t ars[16]; /* 0x0140 */ - uint64_t crs[16]; /* 0x0384 */ -} SigpSaveArea; -QEMU_BUILD_BUG_ON(sizeof(SigpSaveArea) != 512); - -int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch) -{ - static const uint8_t ar_id = 1; - SigpSaveArea *sa; - hwaddr len = sizeof(*sa); - int i; - - /* For PVMs storing will occur when this cpu enters SIE again */ - if (s390_is_pv()) { - return 0; - } - - sa = cpu_physical_memory_map(addr, &len, true); - if (!sa) { - return -EFAULT; - } - if (len != sizeof(*sa)) { - cpu_physical_memory_unmap(sa, len, 1, 0); - return -EFAULT; - } - - if (store_arch) { - cpu_physical_memory_write(offsetof(LowCore, ar_access_id), &ar_id, 1); - } - for (i = 0; i < 16; ++i) { - sa->fprs[i] = cpu_to_be64(*get_freg(&cpu->env, i)); - } - for (i = 0; i < 16; ++i) { - sa->grs[i] = cpu_to_be64(cpu->env.regs[i]); - } - sa->psw.addr = cpu_to_be64(cpu->env.psw.addr); - sa->psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(&cpu->env)); - sa->prefix = cpu_to_be32(cpu->env.psa); - sa->fpc = cpu_to_be32(cpu->env.fpc); - sa->todpr = cpu_to_be32(cpu->env.todpr); - sa->cputm = cpu_to_be64(cpu->env.cputm); - sa->ckc = cpu_to_be64(cpu->env.ckc >> 8); - for (i = 0; i < 16; ++i) { - sa->ars[i] = cpu_to_be32(cpu->env.aregs[i]); - } - for (i = 0; i < 16; ++i) { - sa->crs[i] = cpu_to_be64(cpu->env.cregs[i]); - } - - cpu_physical_memory_unmap(sa, len, 1, len); - - return 0; -} - -typedef struct SigpAdtlSaveArea { - uint64_t vregs[32][2]; /* 0x0000 */ - uint8_t pad_0x0200[0x0400 - 0x0200]; /* 0x0200 */ - uint64_t gscb[4]; /* 0x0400 */ - uint8_t pad_0x0420[0x1000 - 0x0420]; /* 0x0420 */ -} SigpAdtlSaveArea; -QEMU_BUILD_BUG_ON(sizeof(SigpAdtlSaveArea) != 4096); - -#define ADTL_GS_MIN_SIZE 2048 /* minimal size of adtl save area for GS */ -int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len) -{ - SigpAdtlSaveArea *sa; - hwaddr save = len; - int i; - - sa = cpu_physical_memory_map(addr, &save, true); - if (!sa) { - return -EFAULT; - } - if (save != len) { - cpu_physical_memory_unmap(sa, len, 1, 0); - return -EFAULT; - } - - if (s390_has_feat(S390_FEAT_VECTOR)) { - for (i = 0; i < 32; i++) { - sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0]); - sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1]); - } - } - if (s390_has_feat(S390_FEAT_GUARDED_STORAGE) && len >= ADTL_GS_MIN_SIZE) { - for (i = 0; i < 4; i++) { - sa->gscb[i] = cpu_to_be64(cpu->env.gscb[i]); - } - } - - cpu_physical_memory_unmap(sa, len, 1, len); - return 0; -} diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c index fe62ba5..2320dd4 100644 --- a/target/s390x/ioinst.c +++ b/target/s390x/ioinst.c @@ -18,6 +18,7 @@ #include "trace.h" #include "hw/s390x/s390-pci-bus.h" #include "target/s390x/kvm/pv.h" +#include "hw/s390x/ap-bridge.h" /* All I/O instructions but chsc use the s format */ static uint64_t get_address_from_regs(CPUS390XState *env, uint32_t ipb, @@ -574,13 +575,19 @@ out: static int chsc_sei_nt0_get_event(void *res) { - /* no events yet */ + if (s390_has_feat(S390_FEAT_AP)) { + return ap_chsc_sei_nt0_get_event(res); + } + return 1; } static int chsc_sei_nt0_have_event(void) { - /* no events yet */ + if (s390_has_feat(S390_FEAT_AP)) { + return ap_chsc_sei_nt0_have_event(); + } + return 0; } diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 6cd2ebc..916dac1 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -398,6 +398,11 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu) return cpu->cpu_index; } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { unsigned int max_cpus = MACHINE(qdev_get_machine())->smp.max_cpus; @@ -463,7 +468,7 @@ static int can_sync_regs(CPUState *cs, int regs) #define KVM_SYNC_REQUIRED_REGS (KVM_SYNC_GPRS | KVM_SYNC_ACRS | \ KVM_SYNC_CRS | KVM_SYNC_PREFIX) -int kvm_arch_put_registers(CPUState *cs, int level, Error **errp) +int kvm_arch_put_registers(CPUState *cs, KvmPutState level, Error **errp) { CPUS390XState *env = cpu_env(cs); struct kvm_fpu fpu = {}; @@ -884,7 +889,7 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) return 0; } -static struct kvm_hw_breakpoint *find_hw_breakpoint(target_ulong addr, +static struct kvm_hw_breakpoint *find_hw_breakpoint(vaddr addr, int len, int type) { int n; @@ -899,7 +904,7 @@ static struct kvm_hw_breakpoint *find_hw_breakpoint(target_ulong addr, return NULL; } -static int insert_hw_breakpoint(target_ulong addr, int len, int type) +static int insert_hw_breakpoint(vaddr addr, int len, int type) { int size; diff --git a/target/s390x/mmu_helper.c b/target/s390x/mmu_helper.c index 00946e9..3b1e75f 100644 --- a/target/s390x/mmu_helper.c +++ b/target/s390x/mmu_helper.c @@ -23,6 +23,7 @@ #include "kvm/kvm_s390x.h" #include "system/kvm.h" #include "system/tcg.h" +#include "system/memory.h" #include "exec/page-protection.h" #include "exec/target_page.h" #include "hw/hw.h" @@ -522,6 +523,7 @@ int s390_cpu_pv_mem_rw(S390CPU *cpu, unsigned int offset, void *hostbuf, int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf, int len, bool is_write) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; int currlen, nr_pages, i; target_ulong *pages; uint64_t tec; @@ -539,19 +541,28 @@ int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf, pages = g_malloc(nr_pages * sizeof(*pages)); ret = translate_pages(cpu, laddr, nr_pages, pages, is_write, &tec); - if (ret) { - trigger_access_exception(&cpu->env, ret, tec); - } else if (hostbuf != NULL) { + if (ret == 0 && hostbuf != NULL) { + AddressSpace *as = CPU(cpu)->as; + /* Copy data by stepping through the area page by page */ for (i = 0; i < nr_pages; i++) { + MemTxResult res; + currlen = MIN(len, TARGET_PAGE_SIZE - (laddr % TARGET_PAGE_SIZE)); - cpu_physical_memory_rw(pages[i] | (laddr & ~TARGET_PAGE_MASK), - hostbuf, currlen, is_write); + res = address_space_rw(as, pages[i] | (laddr & ~TARGET_PAGE_MASK), + attrs, hostbuf, currlen, is_write); + if (res != MEMTX_OK) { + ret = PGM_ADDRESSING; + break; + } laddr += currlen; hostbuf += currlen; len -= currlen; } } + if (ret) { + trigger_access_exception(&cpu->env, ret, tec); + } g_free(pages); return ret; diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h index a4ba622..9691366 100644 --- a/target/s390x/s390x-internal.h +++ b/target/s390x/s390x-internal.h @@ -238,7 +238,8 @@ uint32_t calc_cc(CPUS390XState *env, uint32_t cc_op, uint64_t src, uint64_t dst, /* cpu.c */ #ifndef CONFIG_USER_ONLY -unsigned int s390_cpu_halt(S390CPU *cpu); +unsigned int s390_count_running_cpus(void); +void s390_cpu_halt(S390CPU *cpu); void s390_cpu_unhalt(S390CPU *cpu); void s390_cpu_system_init(Object *obj); bool s390_cpu_system_realize(DeviceState *dev, Error **errp); @@ -246,16 +247,6 @@ void s390_cpu_finalize(Object *obj); void s390_cpu_system_class_init(CPUClass *cc); void s390_cpu_machine_reset_cb(void *opaque); bool s390_cpu_has_work(CPUState *cs); - -#else -static inline unsigned int s390_cpu_halt(S390CPU *cpu) -{ - return 0; -} - -static inline void s390_cpu_unhalt(S390CPU *cpu) -{ -} #endif /* CONFIG_USER_ONLY */ @@ -332,11 +323,8 @@ void s390x_cpu_timer(void *opaque); void s390_handle_wait(S390CPU *cpu); hwaddr s390_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); hwaddr s390_cpu_get_phys_addr_debug(CPUState *cpu, vaddr addr); -#define S390_STORE_STATUS_DEF_ADDR offsetof(LowCore, floating_pt_save_area) -int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch); -int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len); LowCore *cpu_map_lowcore(CPUS390XState *env); -void cpu_unmap_lowcore(LowCore *lowcore); +void cpu_unmap_lowcore(CPUS390XState *env, LowCore *lowcore); #endif /* CONFIG_USER_ONLY */ diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c index 5e95c497..f5d7bc0 100644 --- a/target/s390x/sigp.c +++ b/target/s390x/sigp.c @@ -13,12 +13,14 @@ #include "s390x-internal.h" #include "hw/boards.h" #include "system/hw_accel.h" +#include "system/memory.h" #include "system/runstate.h" #include "system/address-spaces.h" #include "exec/cputlb.h" #include "system/tcg.h" #include "trace.h" #include "qapi/qapi-types-machine.h" +#include "target/s390x/kvm/pv.h" QemuMutex qemu_sigp_mutex; @@ -126,6 +128,78 @@ static void sigp_stop(CPUState *cs, run_on_cpu_data arg) si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; } +typedef struct SigpSaveArea { + uint64_t fprs[16]; /* 0x0000 */ + uint64_t grs[16]; /* 0x0080 */ + PSW psw; /* 0x0100 */ + uint8_t pad_0x0110[0x0118 - 0x0110]; /* 0x0110 */ + uint32_t prefix; /* 0x0118 */ + uint32_t fpc; /* 0x011c */ + uint8_t pad_0x0120[0x0124 - 0x0120]; /* 0x0120 */ + uint32_t todpr; /* 0x0124 */ + uint64_t cputm; /* 0x0128 */ + uint64_t ckc; /* 0x0130 */ + uint8_t pad_0x0138[0x0140 - 0x0138]; /* 0x0138 */ + uint32_t ars[16]; /* 0x0140 */ + uint64_t crs[16]; /* 0x0384 */ +} SigpSaveArea; +QEMU_BUILD_BUG_ON(sizeof(SigpSaveArea) != 512); + +#define S390_STORE_STATUS_DEF_ADDR offsetof(LowCore, floating_pt_save_area) +static int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch) +{ + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; + AddressSpace *as = CPU(cpu)->as; + SigpSaveArea *sa; + hwaddr len = sizeof(*sa); + int i; + + /* For PVMs storing will occur when this cpu enters SIE again */ + if (s390_is_pv()) { + return 0; + } + + sa = address_space_map(as, addr, &len, true, attrs); + if (!sa) { + return -EFAULT; + } + if (len != sizeof(*sa)) { + address_space_unmap(as, sa, len, true, 0); + return -EFAULT; + } + + if (store_arch) { + static const uint8_t ar_id = 1; + + address_space_stb(as, offsetof(LowCore, ar_access_id), + ar_id, attrs, NULL); + + } + for (i = 0; i < 16; ++i) { + sa->fprs[i] = cpu_to_be64(*get_freg(&cpu->env, i)); + } + for (i = 0; i < 16; ++i) { + sa->grs[i] = cpu_to_be64(cpu->env.regs[i]); + } + sa->psw.addr = cpu_to_be64(cpu->env.psw.addr); + sa->psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(&cpu->env)); + sa->prefix = cpu_to_be32(cpu->env.psa); + sa->fpc = cpu_to_be32(cpu->env.fpc); + sa->todpr = cpu_to_be32(cpu->env.todpr); + sa->cputm = cpu_to_be64(cpu->env.cputm); + sa->ckc = cpu_to_be64(cpu->env.ckc >> 8); + for (i = 0; i < 16; ++i) { + sa->ars[i] = cpu_to_be32(cpu->env.aregs[i]); + } + for (i = 0; i < 16; ++i) { + sa->crs[i] = cpu_to_be64(cpu->env.cregs[i]); + } + + address_space_unmap(as, sa, len, true, len); + + return 0; +} + static void sigp_stop_and_store_status(CPUState *cs, run_on_cpu_data arg) { S390CPU *cpu = S390_CPU(cs); @@ -172,6 +246,49 @@ static void sigp_store_status_at_address(CPUState *cs, run_on_cpu_data arg) si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; } +typedef struct SigpAdtlSaveArea { + uint64_t vregs[32][2]; /* 0x0000 */ + uint8_t pad_0x0200[0x0400 - 0x0200]; /* 0x0200 */ + uint64_t gscb[4]; /* 0x0400 */ + uint8_t pad_0x0420[0x1000 - 0x0420]; /* 0x0420 */ +} SigpAdtlSaveArea; +QEMU_BUILD_BUG_ON(sizeof(SigpAdtlSaveArea) != 4096); + +#define ADTL_GS_MIN_SIZE 2048 /* minimal size of adtl save area for GS */ +static int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len) +{ + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; + AddressSpace *as = CPU(cpu)->as; + SigpAdtlSaveArea *sa; + hwaddr save = len; + int i; + + sa = address_space_map(as, addr, &save, true, attrs); + if (!sa) { + return -EFAULT; + } + if (save != len) { + address_space_unmap(as, sa, len, true, 0); + return -EFAULT; + } + + if (s390_has_feat(S390_FEAT_VECTOR)) { + for (i = 0; i < 32; i++) { + sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0]); + sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1]); + } + } + if (s390_has_feat(S390_FEAT_GUARDED_STORAGE) && len >= ADTL_GS_MIN_SIZE) { + for (i = 0; i < 4; i++) { + sa->gscb[i] = cpu_to_be64(cpu->env.gscb[i]); + } + } + + address_space_unmap(as, sa, len, true, len); + + return 0; +} + #define ADTL_SAVE_LC_MASK 0xfUL static void sigp_store_adtl_status(CPUState *cs, run_on_cpu_data arg) { diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c index e4c75d0..0ae4e26 100644 --- a/target/s390x/tcg/excp_helper.c +++ b/target/s390x/tcg/excp_helper.c @@ -30,6 +30,7 @@ #ifndef CONFIG_USER_ONLY #include "qemu/timer.h" #include "system/address-spaces.h" +#include "system/memory.h" #include "hw/s390x/ioinst.h" #include "hw/s390x/s390_flic.h" #include "hw/boards.h" @@ -284,7 +285,7 @@ static void do_program_interrupt(CPUS390XState *env) addr = be64_to_cpu(lowcore->program_new_psw.addr); lowcore->per_breaking_event_addr = cpu_to_be64(env->gbea); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); s390_cpu_set_psw(env, mask, addr); } @@ -303,7 +304,7 @@ static void do_svc_interrupt(CPUS390XState *env) mask = be64_to_cpu(lowcore->svc_new_psw.mask); addr = be64_to_cpu(lowcore->svc_new_psw.addr); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); s390_cpu_set_psw(env, mask, addr); @@ -377,7 +378,7 @@ static void do_ext_interrupt(CPUS390XState *env) lowcore->external_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env)); lowcore->external_old_psw.addr = cpu_to_be64(env->psw.addr); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); s390_cpu_set_psw(env, mask, addr); } @@ -404,7 +405,7 @@ static void do_io_interrupt(CPUS390XState *env) mask = be64_to_cpu(lowcore->io_new_psw.mask); addr = be64_to_cpu(lowcore->io_new_psw.addr); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); g_free(io); s390_cpu_set_psw(env, mask, addr); @@ -418,16 +419,18 @@ QEMU_BUILD_BUG_ON(sizeof(MchkExtSaveArea) != 1024); static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; + AddressSpace *as = env_cpu(env)->as; hwaddr len = sizeof(MchkExtSaveArea); MchkExtSaveArea *sa; int i; - sa = cpu_physical_memory_map(mcesao, &len, true); + sa = address_space_map(as, mcesao, &len, true, attrs); if (!sa) { return -EFAULT; } if (len != sizeof(MchkExtSaveArea)) { - cpu_physical_memory_unmap(sa, len, 1, 0); + address_space_unmap(as, sa, len, true, 0); return -EFAULT; } @@ -436,7 +439,7 @@ static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao) sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1]); } - cpu_physical_memory_unmap(sa, len, 1, len); + address_space_unmap(as, sa, len, true, len); return 0; } @@ -488,7 +491,7 @@ static void do_mchk_interrupt(CPUS390XState *env) mask = be64_to_cpu(lowcore->mcck_new_psw.mask); addr = be64_to_cpu(lowcore->mcck_new_psw.addr); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); s390_cpu_set_psw(env, mask, addr); } @@ -559,7 +562,7 @@ try_deliver: /* we might still have pending interrupts, but not deliverable */ if (!env->pending_int && !qemu_s390_flic_has_any(flic)) { - cs->interrupt_request &= ~CPU_INTERRUPT_HARD; + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } /* WAIT PSW during interrupt injection or STOP interrupt */ diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index a03609a..f1acb16 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -126,8 +126,8 @@ static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr, /* An access covers at most 4096 bytes and therefore at most two pages. */ typedef struct S390Access { - target_ulong vaddr1; - target_ulong vaddr2; + vaddr vaddr1; + vaddr vaddr2; void *haddr1; void *haddr2; uint16_t size1; @@ -148,7 +148,7 @@ typedef struct S390Access { * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec. * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr. */ -static inline int s390_probe_access(CPUArchState *env, target_ulong addr, +static inline int s390_probe_access(CPUArchState *env, vaddr addr, int size, MMUAccessType access_type, int mmu_idx, bool nonfault, void **phost, uintptr_t ra) @@ -258,7 +258,7 @@ static void access_memset(CPUS390XState *env, S390Access *desta, static uint8_t access_get_byte(CPUS390XState *env, S390Access *access, int offset, uintptr_t ra) { - target_ulong vaddr = access->vaddr1; + vaddr vaddr = access->vaddr1; void *haddr = access->haddr1; if (unlikely(offset >= access->size1)) { @@ -278,7 +278,7 @@ static uint8_t access_get_byte(CPUS390XState *env, S390Access *access, static void access_set_byte(CPUS390XState *env, S390Access *access, int offset, uint8_t byte, uintptr_t ra) { - target_ulong vaddr = access->vaddr1; + vaddr vaddr = access->vaddr1; void *haddr = access->haddr1; if (unlikely(offset >= access->size1)) { diff --git a/target/s390x/tcg/misc_helper.c b/target/s390x/tcg/misc_helper.c index f7101be..6d9d601 100644 --- a/target/s390x/tcg/misc_helper.c +++ b/target/s390x/tcg/misc_helper.c @@ -570,7 +570,7 @@ uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr) lowcore->subchannel_nr = cpu_to_be16(io->nr); lowcore->io_int_parm = cpu_to_be32(io->parm); lowcore->io_int_word = cpu_to_be32(io->word); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); } g_free(io); @@ -700,7 +700,7 @@ void HELPER(stfl)(CPUS390XState *env) lowcore = cpu_map_lowcore(env); prepare_stfl(); memcpy(&lowcore->stfl_fac_list, stfl_bytes, sizeof(lowcore->stfl_fac_list)); - cpu_unmap_lowcore(lowcore); + cpu_unmap_lowcore(env, lowcore); } #endif diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index c7e8574..ec9e5a0 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -258,9 +258,9 @@ static inline int vec_reg_offset(uint8_t reg, uint8_t enr, MemOp es) * 16 byte operations to handle it in a special way. */ g_assert(es <= MO_64); -#if !HOST_BIG_ENDIAN - offs ^= (8 - bytes); -#endif + if (!HOST_BIG_ENDIAN) { + offs ^= (8 - bytes); + } return offs + vec_full_reg_offset(reg); } diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc index e073e5a..f3b4b48 100644 --- a/target/s390x/tcg/translate_vx.c.inc +++ b/target/s390x/tcg/translate_vx.c.inc @@ -175,9 +175,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr, /* convert it to an element offset relative to tcg_env (vec_reg_offset() */ tcg_gen_shli_i64(tmp, tmp, es); -#if !HOST_BIG_ENDIAN - tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es)); -#endif + if (!HOST_BIG_ENDIAN) { + tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es)); + } tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg)); /* generate the final ptr by adding tcg_env */ diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index b35f18e..21ccb86 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -108,7 +108,7 @@ static bool superh_io_recompile_replay_branch(CPUState *cs, static bool superh_cpu_has_work(CPUState *cs) { - return cs->interrupt_request & CPU_INTERRUPT_HARD; + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD); } #endif /* !CONFIG_USER_ONLY */ @@ -296,6 +296,7 @@ static const TCGCPUOps superh_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = superh_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_notreached, .cpu_exec_interrupt = superh_cpu_exec_interrupt, .cpu_exec_halt = superh_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index c41ab70..b075901 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -277,7 +277,7 @@ void cpu_sh4_write_mmaped_utlb_data(CPUSH4State *s, hwaddr addr, uint32_t mem_value); #endif -int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr); +int cpu_sh4_is_cached(CPUSH4State *env, uint32_t addr); void cpu_load_tlb(CPUSH4State * env); @@ -365,14 +365,14 @@ static inline int cpu_ptel_pr (uint32_t ptel) #define PTEA_TC (1 << 3) #define cpu_ptea_tc(ptea) (((ptea) & PTEA_TC) >> 3) -static inline target_ulong cpu_read_sr(CPUSH4State *env) +static inline uint32_t cpu_read_sr(CPUSH4State *env) { return env->sr | (env->sr_m << SR_M) | (env->sr_q << SR_Q) | (env->sr_t << SR_T); } -static inline void cpu_write_sr(CPUSH4State *env, target_ulong sr) +static inline void cpu_write_sr(CPUSH4State *env, uint32_t sr) { env->sr_m = (sr >> SR_M) & 1; env->sr_q = (sr >> SR_Q) & 1; diff --git a/target/sh4/helper.c b/target/sh4/helper.c index fb7642b..3b18a32 100644 --- a/target/sh4/helper.c +++ b/target/sh4/helper.c @@ -47,7 +47,7 @@ #if defined(CONFIG_USER_ONLY) -int cpu_sh4_is_cached(CPUSH4State *env, target_ulong addr) +int cpu_sh4_is_cached(CPUSH4State *env, uint32_t addr) { /* For user mode, only U0 area is cacheable. */ return !(addr & 0x80000000); @@ -58,7 +58,7 @@ int cpu_sh4_is_cached(CPUSH4State *env, target_ulong addr) void superh_cpu_do_interrupt(CPUState *cs) { CPUSH4State *env = cpu_env(cs); - int do_irq = cs->interrupt_request & CPU_INTERRUPT_HARD; + int do_irq = cpu_test_interrupt(cs, CPU_INTERRUPT_HARD); int do_exp, irq_vector = cs->exception_index; /* prioritize exceptions over interrupts */ @@ -231,11 +231,11 @@ static int itlb_replacement(CPUSH4State * env) /* Find the corresponding entry in the right TLB Return entry, MMU_DTLB_MISS or MMU_DTLB_MULTIPLE */ -static int find_tlb_entry(CPUSH4State * env, target_ulong address, +static int find_tlb_entry(CPUSH4State *env, vaddr address, tlb_t * entries, uint8_t nbtlb, int use_asid) { int match = MMU_DTLB_MISS; - uint32_t start, end; + vaddr start, end; uint8_t asid; int i; @@ -291,7 +291,7 @@ static int copy_utlb_entry_itlb(CPUSH4State *env, int utlb) /* Find itlb entry Return entry, MMU_ITLB_MISS, MMU_ITLB_MULTIPLE or MMU_DTLB_MULTIPLE */ -static int find_itlb_entry(CPUSH4State * env, target_ulong address, +static int find_itlb_entry(CPUSH4State *env, vaddr address, int use_asid) { int e; @@ -309,7 +309,7 @@ static int find_itlb_entry(CPUSH4State * env, target_ulong address, /* Find utlb entry Return entry, MMU_DTLB_MISS, MMU_DTLB_MULTIPLE */ -static int find_utlb_entry(CPUSH4State * env, target_ulong address, int use_asid) +static int find_utlb_entry(CPUSH4State *env, vaddr address, int use_asid) { /* per utlb access */ increment_urc(env); @@ -325,8 +325,8 @@ static int find_utlb_entry(CPUSH4State * env, target_ulong address, int use_asid MMU_ITLB_MULTIPLE, MMU_ITLB_VIOLATION, MMU_IADDR_ERROR, MMU_DADDR_ERROR_READ, MMU_DADDR_ERROR_WRITE. */ -static int get_mmu_address(CPUSH4State * env, target_ulong * physical, - int *prot, target_ulong address, +static int get_mmu_address(CPUSH4State *env, hwaddr *physical, + int *prot, vaddr address, MMUAccessType access_type) { int use_asid, n; @@ -392,8 +392,8 @@ static int get_mmu_address(CPUSH4State * env, target_ulong * physical, return n; } -static int get_physical_address(CPUSH4State * env, target_ulong * physical, - int *prot, target_ulong address, +static int get_physical_address(CPUSH4State *env, hwaddr* physical, + int *prot, vaddr address, MMUAccessType access_type) { /* P1, P2 and P4 areas do not use translation */ @@ -433,7 +433,7 @@ static int get_physical_address(CPUSH4State * env, target_ulong * physical, hwaddr superh_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) { - target_ulong physical; + hwaddr physical; int prot; if (get_physical_address(cpu_env(cs), &physical, &prot, addr, MMU_DATA_LOAD) @@ -452,7 +452,7 @@ void cpu_load_tlb(CPUSH4State * env) if (entry->v) { /* Overwriting valid entry in utlb. */ - target_ulong address = entry->vpn << 10; + vaddr address = entry->vpn << 10; tlb_flush_page(cs, address); } @@ -528,7 +528,7 @@ void cpu_sh4_write_mmaped_itlb_addr(CPUSH4State *s, hwaddr addr, tlb_t * entry = &s->itlb[index]; if (entry->v) { /* Overwriting valid entry in itlb. */ - target_ulong address = entry->vpn << 10; + vaddr address = entry->vpn << 10; tlb_flush_page(env_cpu(s), address); } entry->asid = asid; @@ -570,7 +570,7 @@ void cpu_sh4_write_mmaped_itlb_data(CPUSH4State *s, hwaddr addr, /* ITLB Data Array 1 */ if (entry->v) { /* Overwriting valid entry in utlb. */ - target_ulong address = entry->vpn << 10; + vaddr address = entry->vpn << 10; tlb_flush_page(env_cpu(s), address); } entry->ppn = (mem_value & 0x1ffffc00) >> 10; @@ -665,7 +665,7 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, hwaddr addr, CPUState *cs = env_cpu(s); /* Overwriting valid entry in utlb. */ - target_ulong address = entry->vpn << 10; + vaddr address = entry->vpn << 10; tlb_flush_page(cs, address); } entry->asid = asid; @@ -716,7 +716,7 @@ void cpu_sh4_write_mmaped_utlb_data(CPUSH4State *s, hwaddr addr, /* UTLB Data Array 1 */ if (entry->v) { /* Overwriting valid entry in utlb. */ - target_ulong address = entry->vpn << 10; + vaddr address = entry->vpn << 10; tlb_flush_page(env_cpu(s), address); } entry->ppn = (mem_value & 0x1ffffc00) >> 10; @@ -735,7 +735,7 @@ void cpu_sh4_write_mmaped_utlb_data(CPUSH4State *s, hwaddr addr, } } -int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr) +int cpu_sh4_is_cached(CPUSH4State *env, uint32_t addr) { int n; int use_asid = !(env->mmucr & MMUCR_SV) || !(env->sr & (1u << SR_MD)); @@ -800,7 +800,7 @@ bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size, CPUSH4State *env = cpu_env(cs); int ret; - target_ulong physical; + hwaddr physical; int prot; ret = get_physical_address(env, &physical, &prot, address, access_type); diff --git a/target/sh4/translate.c b/target/sh4/translate.c index bf8828f..b3ae0a3 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -54,7 +54,7 @@ typedef struct DisasContext { #define UNALIGN(C) (ctx->tbflags & TB_FLAG_UNALIGN ? MO_UNALN : MO_ALIGN) #else #define IS_USER(ctx) (!(ctx->tbflags & (1u << SR_MD))) -#define UNALIGN(C) 0 +#define UNALIGN(C) MO_ALIGN #endif /* Target-specific values for ctx->base.is_jmp. */ @@ -223,7 +223,7 @@ static inline bool use_exit_tb(DisasContext *ctx) return (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) != 0; } -static bool use_goto_tb(DisasContext *ctx, target_ulong dest) +static bool use_goto_tb(DisasContext *ctx, vaddr dest) { if (use_exit_tb(ctx)) { return false; @@ -231,12 +231,12 @@ static bool use_goto_tb(DisasContext *ctx, target_ulong dest) return translator_use_goto_tb(&ctx->base, dest); } -static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) +static void gen_goto_tb(DisasContext *ctx, unsigned tb_slot_idx, vaddr dest) { if (use_goto_tb(ctx, dest)) { - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_idx); tcg_gen_movi_i32(cpu_pc, dest); - tcg_gen_exit_tb(ctx->base.tb, n); + tcg_gen_exit_tb(ctx->base.tb, tb_slot_idx); } else { tcg_gen_movi_i32(cpu_pc, dest); if (use_exit_tb(ctx)) { @@ -267,7 +267,7 @@ static void gen_jump(DisasContext * ctx) } /* Immediate conditional jump (bt or bf) */ -static void gen_conditional_jump(DisasContext *ctx, target_ulong dest, +static void gen_conditional_jump(DisasContext *ctx, vaddr dest, bool jump_if_true) { TCGLabel *l1 = gen_new_label(); diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 2a3e408..c9773f1 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -783,7 +783,7 @@ static void sparc_restore_state_to_opc(CPUState *cs, #ifndef CONFIG_USER_ONLY static bool sparc_cpu_has_work(CPUState *cs) { - return (cs->interrupt_request & CPU_INTERRUPT_HARD) && + return cpu_test_interrupt(cs, CPU_INTERRUPT_HARD) && cpu_interrupts_enabled(cpu_env(cs)); } #endif /* !CONFIG_USER_ONLY */ @@ -1002,6 +1002,18 @@ static const struct SysemuCPUOps sparc_sysemu_ops = { #ifdef CONFIG_TCG #include "accel/tcg/cpu-ops.h" +#ifndef CONFIG_USER_ONLY +static vaddr sparc_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ +#ifdef TARGET_SPARC64 + return cpu_env(cs)->pstate & PS_AM ? (uint32_t)result : result; +#else + return (uint32_t)result; +#endif +} +#endif + static const TCGCPUOps sparc_tcg_ops = { /* * From Oracle SPARC Architecture 2015: @@ -1036,6 +1048,7 @@ static const TCGCPUOps sparc_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = sparc_cpu_tlb_fill, + .pointer_wrap = sparc_pointer_wrap, .cpu_exec_interrupt = sparc_cpu_exec_interrupt, .cpu_exec_halt = sparc_cpu_has_work, .cpu_exec_reset = cpu_reset, @@ -1077,6 +1090,7 @@ static void sparc_cpu_class_init(ObjectClass *oc, const void *data) cc->disas_set_info = cpu_sparc_disas_set_info; #if defined(TARGET_SPARC64) && !defined(TARGET_ABI32) + cc->gdb_core_xml_file = "sparc64-core.xml"; cc->gdb_num_core_regs = 86; #else cc->gdb_num_core_regs = 72; diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h index 31cb3d9..7169a50 100644 --- a/target/sparc/cpu.h +++ b/target/sparc/cpu.h @@ -3,7 +3,6 @@ #include "qemu/bswap.h" #include "cpu-qom.h" -#include "exec/cpu-common.h" #include "exec/cpu-defs.h" #include "exec/cpu-interrupt.h" #include "qemu/cpu-float.h" diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c index a493341..29fd166 100644 --- a/target/sparc/fop_helper.c +++ b/target/sparc/fop_helper.c @@ -445,7 +445,6 @@ static uint32_t finish_fcmp(CPUSPARCState *env, FloatRelation r, uintptr_t ra) case float_relation_greater: return 2; case float_relation_unordered: - env->fsr |= FSR_NVA; return 3; } g_assert_not_reached(); diff --git a/target/sparc/helper.c b/target/sparc/helper.c index 9163b9d..c5d88de 100644 --- a/target/sparc/helper.c +++ b/target/sparc/helper.c @@ -21,6 +21,7 @@ #include "cpu.h" #include "qemu/timer.h" #include "qemu/host-utils.h" +#include "exec/cpu-common.h" #include "exec/helper-proto.h" void cpu_raise_exception_ra(CPUSPARCState *env, int tt, uintptr_t ra) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 9e39d23..242ec42 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -88,9 +88,10 @@ CALL 01 i:s30 { [ - STBAR 10 00000 101000 01111 0 0000000000000 + STBAR_v9 10 00000 101000 01111 0 0000000000000 MEMBAR 10 00000 101000 01111 1 000000 cmask:3 mmask:4 + RDY_v9 10 rd:5 101000 00000 0 0000000000000 RDCCR 10 rd:5 101000 00010 0 0000000000000 RDASI 10 rd:5 101000 00011 0 0000000000000 RDTICK 10 rd:5 101000 00100 0 0000000000000 @@ -107,8 +108,26 @@ CALL 01 i:s30 RDSTICK_CMPR 10 rd:5 101000 11001 0 0000000000000 RDSTRAND_STATUS 10 rd:5 101000 11010 0 0000000000000 ] - # Before v8, all rs1 accepted; otherwise rs1==0. - RDY 10 rd:5 101000 rs1:5 0 0000000000000 + + # The v8 manual, section B.30 STBAR instruction, says + # bits [12:0] are ignored, but bit 13 must be 0. + # However, section B.28 Read State Register Instruction has a + # comment that RDASR with rs1 = 15, rd = 0 is STBAR. Here, + # bit 13 is also ignored and rd != 0 is merely reserved. + # + # Solaris 8 executes v9 MEMBAR instruction 0x8143e008 during boot. + # This confirms that bit 13 is ignored, as 0x8143c000 is STBAR. + STBAR_v8 10 ----- 101000 01111 - ------------- + + # For v7, bits [18:0] are ignored. + # For v8, bits [18:14], aka rs1, are repurposed and rs1 = 0 is RDY, + # and other values are RDASR. However, the v8 manual explicitly + # says that rs1 in 1..14 yield undefined results and do not cause + # an illegal instruction trap, and rs1 in 16..31 are available for + # implementation specific usage. + # Implement not causing an illegal instruction trap for v8 by + # continuing to interpret unused values per v7, i.e. as RDY. + RDY_v7 10 rd:5 101000 ----- - ------------- } { @@ -139,14 +158,16 @@ CALL 01 i:s30 } { - RDPSR 10 rd:5 101001 00000 0 0000000000000 - RDHPR_hpstate 10 rd:5 101001 00000 0 0000000000000 + [ + RDHPR_hpstate 10 rd:5 101001 00000 0 0000000000000 + RDHPR_htstate 10 rd:5 101001 00001 0 0000000000000 + RDHPR_hintp 10 rd:5 101001 00011 0 0000000000000 + RDHPR_htba 10 rd:5 101001 00101 0 0000000000000 + RDHPR_hver 10 rd:5 101001 00110 0 0000000000000 + RDHPR_hstick_cmpr 10 rd:5 101001 11111 0 0000000000000 + ] + RDPSR 10 rd:5 101001 ----- - ------------- } -RDHPR_htstate 10 rd:5 101001 00001 0 0000000000000 -RDHPR_hintp 10 rd:5 101001 00011 0 0000000000000 -RDHPR_htba 10 rd:5 101001 00101 0 0000000000000 -RDHPR_hver 10 rd:5 101001 00110 0 0000000000000 -RDHPR_hstick_cmpr 10 rd:5 101001 11111 0 0000000000000 { WRPSR 10 00000 110001 ..... . ............. @n_r_ri @@ -159,26 +180,28 @@ RESTORED 10 00001 110001 00000 0 0000000000000 # UA2005 INVALW { - RDWIM 10 rd:5 101010 00000 0 0000000000000 - RDPR_tpc 10 rd:5 101010 00000 0 0000000000000 + [ + RDPR_tpc 10 rd:5 101010 00000 0 0000000000000 + RDPR_tnpc 10 rd:5 101010 00001 0 0000000000000 + RDPR_tstate 10 rd:5 101010 00010 0 0000000000000 + RDPR_tt 10 rd:5 101010 00011 0 0000000000000 + RDPR_tick 10 rd:5 101010 00100 0 0000000000000 + RDPR_tba 10 rd:5 101010 00101 0 0000000000000 + RDPR_pstate 10 rd:5 101010 00110 0 0000000000000 + RDPR_tl 10 rd:5 101010 00111 0 0000000000000 + RDPR_pil 10 rd:5 101010 01000 0 0000000000000 + RDPR_cwp 10 rd:5 101010 01001 0 0000000000000 + RDPR_cansave 10 rd:5 101010 01010 0 0000000000000 + RDPR_canrestore 10 rd:5 101010 01011 0 0000000000000 + RDPR_cleanwin 10 rd:5 101010 01100 0 0000000000000 + RDPR_otherwin 10 rd:5 101010 01101 0 0000000000000 + RDPR_wstate 10 rd:5 101010 01110 0 0000000000000 + RDPR_gl 10 rd:5 101010 10000 0 0000000000000 + RDPR_strand_status 10 rd:5 101010 11010 0 0000000000000 + RDPR_ver 10 rd:5 101010 11111 0 0000000000000 + ] + RDWIM 10 rd:5 101010 ----- - ------------- } -RDPR_tnpc 10 rd:5 101010 00001 0 0000000000000 -RDPR_tstate 10 rd:5 101010 00010 0 0000000000000 -RDPR_tt 10 rd:5 101010 00011 0 0000000000000 -RDPR_tick 10 rd:5 101010 00100 0 0000000000000 -RDPR_tba 10 rd:5 101010 00101 0 0000000000000 -RDPR_pstate 10 rd:5 101010 00110 0 0000000000000 -RDPR_tl 10 rd:5 101010 00111 0 0000000000000 -RDPR_pil 10 rd:5 101010 01000 0 0000000000000 -RDPR_cwp 10 rd:5 101010 01001 0 0000000000000 -RDPR_cansave 10 rd:5 101010 01010 0 0000000000000 -RDPR_canrestore 10 rd:5 101010 01011 0 0000000000000 -RDPR_cleanwin 10 rd:5 101010 01100 0 0000000000000 -RDPR_otherwin 10 rd:5 101010 01101 0 0000000000000 -RDPR_wstate 10 rd:5 101010 01110 0 0000000000000 -RDPR_gl 10 rd:5 101010 10000 0 0000000000000 -RDPR_strand_status 10 rd:5 101010 11010 0 0000000000000 -RDPR_ver 10 rd:5 101010 11111 0 0000000000000 { WRWIM 10 00000 110010 ..... . ............. @n_r_ri @@ -203,7 +226,7 @@ WRPR_strand_status 10 11010 110010 ..... . ............. @n_r_ri { FLUSHW 10 00000 101011 00000 0 0000000000000 - RDTBR 10 rd:5 101011 00000 0 0000000000000 + RDTBR 10 rd:5 101011 ----- - ------------- } { diff --git a/target/sparc/int32_helper.c b/target/sparc/int32_helper.c index 39db4ff..fdcaa0a 100644 --- a/target/sparc/int32_helper.c +++ b/target/sparc/int32_helper.c @@ -65,6 +65,7 @@ static const char *excp_name_str(int32_t exception_index) return excp_names[exception_index]; } +#if !defined(CONFIG_USER_ONLY) void cpu_check_irqs(CPUSPARCState *env) { CPUState *cs; @@ -96,6 +97,7 @@ void cpu_check_irqs(CPUSPARCState *env) cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } } +#endif void sparc_cpu_do_interrupt(CPUState *cs) { diff --git a/target/sparc/int64_helper.c b/target/sparc/int64_helper.c index bd14c7a..96ef81c 100644 --- a/target/sparc/int64_helper.c +++ b/target/sparc/int64_helper.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "cpu.h" +#include "exec/cpu-common.h" #include "exec/helper-proto.h" #include "exec/log.h" #include "trace.h" @@ -62,6 +63,7 @@ static const char * const excp_names[0x80] = { }; #endif +#if !defined(CONFIG_USER_ONLY) void cpu_check_irqs(CPUSPARCState *env) { CPUState *cs; @@ -89,7 +91,7 @@ void cpu_check_irqs(CPUSPARCState *env) * the next bit is (2 << psrpil). */ if (pil < (2 << env->psrpil)) { - if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD)) { trace_sparc64_cpu_check_irqs_reset_irq(env->interrupt_index); env->interrupt_index = 0; cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); @@ -120,13 +122,14 @@ void cpu_check_irqs(CPUSPARCState *env) break; } } - } else if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + } else if (cpu_test_interrupt(cs, CPU_INTERRUPT_HARD)) { trace_sparc64_cpu_check_irqs_disabled(pil, env->pil_in, env->softint, env->interrupt_index); env->interrupt_index = 0; cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } } +#endif void sparc_cpu_do_interrupt(CPUState *cs) { diff --git a/target/sparc/translate.c b/target/sparc/translate.c index b922e53..d6b599b 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -363,15 +363,15 @@ static bool use_goto_tb(DisasContext *s, target_ulong pc, target_ulong npc) translator_use_goto_tb(&s->base, npc); } -static void gen_goto_tb(DisasContext *s, int tb_num, +static void gen_goto_tb(DisasContext *s, unsigned tb_slot_idx, target_ulong pc, target_ulong npc) { if (use_goto_tb(s, pc, npc)) { /* jump to same page: we can use a direct jump */ - tcg_gen_goto_tb(tb_num); + tcg_gen_goto_tb(tb_slot_idx); tcg_gen_movi_tl(cpu_pc, pc); tcg_gen_movi_tl(cpu_npc, npc); - tcg_gen_exit_tb(s->base.tb, tb_num); + tcg_gen_exit_tb(s->base.tb, tb_slot_idx); } else { /* jump to another page: we can use an indirect jump */ tcg_gen_movi_tl(cpu_pc, pc); @@ -2487,7 +2487,7 @@ static int extract_qfpreg(DisasContext *dc, int x) #define TRANS(NAME, AVAIL, FUNC, ...) \ static bool trans_##NAME(DisasContext *dc, arg_##NAME *a) \ - { return avail_##AVAIL(dc) && FUNC(dc, __VA_ARGS__); } + { return avail_##AVAIL(dc) && FUNC(dc, ## __VA_ARGS__); } #define avail_ALL(C) true #ifdef TARGET_SPARC64 @@ -2526,6 +2526,32 @@ static int extract_qfpreg(DisasContext *dc, int x) # define avail_VIS4(C) false #endif +/* + * We decoded bit 13 as imm, and bits [12:0] as rs2_or_imm. + * For v9, if !imm, then the unused bits [12:5] must be zero. + * For v7 and v8, the unused bits are ignored; clear them here. + */ +static bool check_rs2(DisasContext *dc, int *rs2) +{ + if (unlikely(*rs2 & ~0x1f)) { + if (avail_64(dc)) { + return false; + } + *rs2 &= 0x1f; + } + return true; +} + +static bool check_r_r_ri(DisasContext *dc, arg_r_r_ri *a) +{ + return a->imm || check_rs2(dc, &a->rs2_or_imm); +} + +static bool check_r_r_ri_cc(DisasContext *dc, arg_r_r_ri_cc *a) +{ + return a->imm || check_rs2(dc, &a->rs2_or_imm); +} + /* Default case for non jump instructions. */ static bool advance_pc(DisasContext *dc) { @@ -2823,12 +2849,15 @@ static bool trans_Tcc_i_v9(DisasContext *dc, arg_Tcc_i_v9 *a) return do_tcc(dc, a->cond, a->cc, a->rs1, true, a->i); } -static bool trans_STBAR(DisasContext *dc, arg_STBAR *a) +static bool do_stbar(DisasContext *dc) { tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC); return advance_pc(dc); } +TRANS(STBAR_v8, 32, do_stbar) +TRANS(STBAR_v9, 64, do_stbar) + static bool trans_MEMBAR(DisasContext *dc, arg_MEMBAR *a) { if (avail_32(dc)) { @@ -2860,18 +2889,8 @@ static TCGv do_rdy(DisasContext *dc, TCGv dst) return cpu_y; } -static bool trans_RDY(DisasContext *dc, arg_RDY *a) -{ - /* - * TODO: Need a feature bit for sparcv8. In the meantime, treat all - * 32-bit cpus like sparcv7, which ignores the rs1 field. - * This matches after all other ASR, so Leon3 Asr17 is handled first. - */ - if (avail_64(dc) && a->rs1 != 0) { - return false; - } - return do_rd_special(dc, true, a->rd, do_rdy); -} +TRANS(RDY_v7, 32, do_rd_special, true, a->rd, do_rdy) +TRANS(RDY_v9, 64, do_rd_special, true, a->rd, do_rdy) static TCGv do_rd_leon3_config(DisasContext *dc, TCGv dst) { @@ -3256,8 +3275,7 @@ static bool do_wr_special(DisasContext *dc, arg_r_r_ri *a, bool priv, { TCGv src; - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && (a->rs2_or_imm & ~0x1f)) { + if (!check_r_r_ri(dc, a)) { return false; } if (!priv) { @@ -3700,8 +3718,7 @@ static bool do_arith_int(DisasContext *dc, arg_r_r_ri_cc *a, { TCGv dst, src1; - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && a->rs2_or_imm & ~0x1f) { + if (!check_r_r_ri_cc(dc, a)) { return false; } @@ -3785,11 +3802,11 @@ static bool trans_OR(DisasContext *dc, arg_r_r_ri_cc *a) { /* OR with %g0 is the canonical alias for MOV. */ if (!a->cc && a->rs1 == 0) { + if (!check_r_r_ri_cc(dc, a)) { + return false; + } if (a->imm || a->rs2_or_imm == 0) { gen_store_gpr(dc, a->rd, tcg_constant_tl(a->rs2_or_imm)); - } else if (a->rs2_or_imm & ~0x1f) { - /* For simplicity, we under-decoded the rs2 form. */ - return false; } else { gen_store_gpr(dc, a->rd, cpu_regs[a->rs2_or_imm]); } @@ -3806,8 +3823,7 @@ static bool trans_UDIV(DisasContext *dc, arg_r_r_ri *a) if (!avail_DIV(dc)) { return false; } - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && a->rs2_or_imm & ~0x1f) { + if (!check_r_r_ri(dc, a)) { return false; } @@ -3858,8 +3874,7 @@ static bool trans_UDIVX(DisasContext *dc, arg_r_r_ri *a) if (!avail_64(dc)) { return false; } - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && a->rs2_or_imm & ~0x1f) { + if (!check_r_r_ri(dc, a)) { return false; } @@ -3896,8 +3911,7 @@ static bool trans_SDIVX(DisasContext *dc, arg_r_r_ri *a) if (!avail_64(dc)) { return false; } - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && a->rs2_or_imm & ~0x1f) { + if (!check_r_r_ri(dc, a)) { return false; } @@ -4193,8 +4207,7 @@ TRANS(SRA_i, ALL, do_shift_i, a, false, false) static TCGv gen_rs2_or_imm(DisasContext *dc, bool imm, int rs2_or_imm) { - /* For simplicity, we under-decoded the rs2 form. */ - if (!imm && rs2_or_imm & ~0x1f) { + if (!imm && !check_rs2(dc, &rs2_or_imm)) { return NULL; } if (imm || rs2_or_imm == 0) { @@ -4257,8 +4270,7 @@ static bool do_add_special(DisasContext *dc, arg_r_r_ri *a, { TCGv src1, sum; - /* For simplicity, we under-decoded the rs2 form. */ - if (!a->imm && a->rs2_or_imm & ~0x1f) { + if (!check_r_r_ri(dc, a)) { return false; } @@ -4376,8 +4388,7 @@ static TCGv gen_ldst_addr(DisasContext *dc, int rs1, bool imm, int rs2_or_imm) { TCGv addr, tmp = NULL; - /* For simplicity, we under-decoded the rs2 form. */ - if (!imm && rs2_or_imm & ~0x1f) { + if (!imm && !check_rs2(dc, &rs2_or_imm)) { return NULL; } diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c index e56f90f..04319e1 100644 --- a/target/tricore/cpu.c +++ b/target/tricore/cpu.c @@ -37,7 +37,7 @@ static const gchar *tricore_gdb_arch_name(CPUState *cs) static void tricore_cpu_set_pc(CPUState *cs, vaddr value) { - cpu_env(cs)->PC = value & ~(target_ulong)1; + cpu_env(cs)->PC = value & ~1; } static vaddr tricore_cpu_get_pc(CPUState *cs) @@ -190,6 +190,7 @@ static const TCGCPUOps tricore_tcg_ops = { .restore_state_to_opc = tricore_restore_state_to_opc, .mmu_index = tricore_cpu_mmu_index, .tlb_fill = tricore_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = tricore_cpu_exec_interrupt, .cpu_exec_halt = tricore_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/tricore/helper.c b/target/tricore/helper.c index e4c53d4..7574111 100644 --- a/target/tricore/helper.c +++ b/target/tricore/helper.c @@ -35,7 +35,7 @@ enum { }; static int get_physical_address(CPUTriCoreState *env, hwaddr *physical, - int *prot, target_ulong address, + int *prot, vaddr address, MMUAccessType access_type, int mmu_idx) { int ret = TLBRET_MATCH; @@ -61,7 +61,7 @@ hwaddr tricore_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) } /* TODO: Add exception support */ -static void raise_mmu_exception(CPUTriCoreState *env, target_ulong address, +static void raise_mmu_exception(CPUTriCoreState *env, vaddr address, int rw, int tlb_error) { } diff --git a/target/tricore/op_helper.c b/target/tricore/op_helper.c index 9910c13..610f148 100644 --- a/target/tricore/op_helper.c +++ b/target/tricore/op_helper.c @@ -149,15 +149,15 @@ static uint32_t ssov32(CPUTriCoreState *env, int64_t arg) if (arg > max_pos) { env->PSW_USB_V = (1 << 31); env->PSW_USB_SV = (1 << 31); - ret = (target_ulong)max_pos; + ret = (uint32_t)max_pos; } else { if (arg < max_neg) { env->PSW_USB_V = (1 << 31); env->PSW_USB_SV = (1 << 31); - ret = (target_ulong)max_neg; + ret = (uint32_t)max_neg; } else { env->PSW_USB_V = 0; - ret = (target_ulong)arg; + ret = (uint32_t)arg; } } env->PSW_USB_AV = arg ^ arg * 2u; @@ -172,10 +172,10 @@ static uint32_t suov32_pos(CPUTriCoreState *env, uint64_t arg) if (arg > max_pos) { env->PSW_USB_V = (1 << 31); env->PSW_USB_SV = (1 << 31); - ret = (target_ulong)max_pos; + ret = (uint32_t)max_pos; } else { env->PSW_USB_V = 0; - ret = (target_ulong)arg; + ret = (uint32_t)arg; } env->PSW_USB_AV = arg ^ arg * 2u; env->PSW_USB_SAV |= env->PSW_USB_AV; @@ -192,7 +192,7 @@ static uint32_t suov32_neg(CPUTriCoreState *env, int64_t arg) ret = 0; } else { env->PSW_USB_V = 0; - ret = (target_ulong)arg; + ret = (uint32_t)arg; } env->PSW_USB_AV = arg ^ arg * 2u; env->PSW_USB_SAV |= env->PSW_USB_AV; @@ -260,8 +260,7 @@ static uint32_t suov16(CPUTriCoreState *env, int32_t hw0, int32_t hw1) return (hw0 & 0xffff) | (hw1 << 16); } -target_ulong helper_add_ssov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_add_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int64_t t1 = sextract64(r1, 0, 32); int64_t t2 = sextract64(r2, 0, 32); @@ -294,8 +293,7 @@ uint64_t helper_add64_ssov(CPUTriCoreState *env, uint64_t r1, uint64_t r2) return result; } -target_ulong helper_add_h_ssov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_add_h_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t ret_hw0, ret_hw1; @@ -397,8 +395,7 @@ uint32_t helper_addsur_h_ssov(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l, } -target_ulong helper_add_suov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_add_suov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int64_t t1 = extract64(r1, 0, 32); int64_t t2 = extract64(r2, 0, 32); @@ -406,8 +403,7 @@ target_ulong helper_add_suov(CPUTriCoreState *env, target_ulong r1, return suov32_pos(env, result); } -target_ulong helper_add_h_suov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_add_h_suov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t ret_hw0, ret_hw1; @@ -416,8 +412,7 @@ target_ulong helper_add_h_suov(CPUTriCoreState *env, target_ulong r1, return suov16(env, ret_hw0, ret_hw1); } -target_ulong helper_sub_ssov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_sub_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int64_t t1 = sextract64(r1, 0, 32); int64_t t2 = sextract64(r2, 0, 32); @@ -450,8 +445,7 @@ uint64_t helper_sub64_ssov(CPUTriCoreState *env, uint64_t r1, uint64_t r2) return result; } -target_ulong helper_sub_h_ssov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_sub_h_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t ret_hw0, ret_hw1; @@ -552,8 +546,7 @@ uint32_t helper_subadr_h_ssov(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l, return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL); } -target_ulong helper_sub_suov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_sub_suov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int64_t t1 = extract64(r1, 0, 32); int64_t t2 = extract64(r2, 0, 32); @@ -561,8 +554,7 @@ target_ulong helper_sub_suov(CPUTriCoreState *env, target_ulong r1, return suov32_neg(env, result); } -target_ulong helper_sub_h_suov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_sub_h_suov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t ret_hw0, ret_hw1; @@ -571,8 +563,7 @@ target_ulong helper_sub_h_suov(CPUTriCoreState *env, target_ulong r1, return suov16(env, ret_hw0, ret_hw1); } -target_ulong helper_mul_ssov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_mul_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int64_t t1 = sextract64(r1, 0, 32); int64_t t2 = sextract64(r2, 0, 32); @@ -580,8 +571,7 @@ target_ulong helper_mul_ssov(CPUTriCoreState *env, target_ulong r1, return ssov32(env, result); } -target_ulong helper_mul_suov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_mul_suov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int64_t t1 = extract64(r1, 0, 32); int64_t t2 = extract64(r2, 0, 32); @@ -590,8 +580,7 @@ target_ulong helper_mul_suov(CPUTriCoreState *env, target_ulong r1, return suov32_pos(env, result); } -target_ulong helper_sha_ssov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_sha_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int64_t t1 = sextract64(r1, 0, 32); int32_t t2 = sextract64(r2, 0, 6); @@ -606,14 +595,14 @@ target_ulong helper_sha_ssov(CPUTriCoreState *env, target_ulong r1, return ssov32(env, result); } -uint32_t helper_abs_ssov(CPUTriCoreState *env, target_ulong r1) +uint32_t helper_abs_ssov(CPUTriCoreState *env, uint32_t r1) { - target_ulong result; + uint32_t result; result = ((int32_t)r1 >= 0) ? r1 : (0 - r1); return ssov32(env, result); } -uint32_t helper_abs_h_ssov(CPUTriCoreState *env, target_ulong r1) +uint32_t helper_abs_h_ssov(CPUTriCoreState *env, uint32_t r1) { int32_t ret_h0, ret_h1; @@ -626,8 +615,7 @@ uint32_t helper_abs_h_ssov(CPUTriCoreState *env, target_ulong r1) return ssov16(env, ret_h0, ret_h1); } -target_ulong helper_absdif_ssov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_absdif_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int64_t t1 = sextract64(r1, 0, 32); int64_t t2 = sextract64(r2, 0, 32); @@ -641,8 +629,7 @@ target_ulong helper_absdif_ssov(CPUTriCoreState *env, target_ulong r1, return ssov32(env, result); } -uint32_t helper_absdif_h_ssov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2) +uint32_t helper_absdif_h_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t t1, t2; int32_t ret_h0, ret_h1; @@ -666,8 +653,8 @@ uint32_t helper_absdif_h_ssov(CPUTriCoreState *env, target_ulong r1, return ssov16(env, ret_h0, ret_h1); } -target_ulong helper_madd32_ssov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2, target_ulong r3) +uint32_t helper_madd32_ssov(CPUTriCoreState *env, uint32_t r1, + uint32_t r2, uint32_t r3) { int64_t t1 = sextract64(r1, 0, 32); int64_t t2 = sextract64(r2, 0, 32); @@ -678,8 +665,8 @@ target_ulong helper_madd32_ssov(CPUTriCoreState *env, target_ulong r1, return ssov32(env, result); } -target_ulong helper_madd32_suov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2, target_ulong r3) +uint32_t helper_madd32_suov(CPUTriCoreState *env, uint32_t r1, + uint32_t r2, uint32_t r3) { uint64_t t1 = extract64(r1, 0, 32); uint64_t t2 = extract64(r2, 0, 32); @@ -690,8 +677,8 @@ target_ulong helper_madd32_suov(CPUTriCoreState *env, target_ulong r1, return suov32_pos(env, result); } -uint64_t helper_madd64_ssov(CPUTriCoreState *env, target_ulong r1, - uint64_t r2, target_ulong r3) +uint64_t helper_madd64_ssov(CPUTriCoreState *env, uint32_t r1, + uint64_t r2, uint32_t r3) { uint64_t ret, ovf; int64_t t1 = sextract64(r1, 0, 32); @@ -848,8 +835,8 @@ uint32_t helper_maddr_q_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2, return ret & 0xffff0000ll; } -uint64_t helper_madd64_suov(CPUTriCoreState *env, target_ulong r1, - uint64_t r2, target_ulong r3) +uint64_t helper_madd64_suov(CPUTriCoreState *env, uint32_t r1, + uint64_t r2, uint32_t r3) { uint64_t ret, mul; uint64_t t1 = extract64(r1, 0, 32); @@ -873,8 +860,8 @@ uint64_t helper_madd64_suov(CPUTriCoreState *env, target_ulong r1, return ret; } -target_ulong helper_msub32_ssov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2, target_ulong r3) +uint32_t helper_msub32_ssov(CPUTriCoreState *env, uint32_t r1, + uint32_t r2, uint32_t r3) { int64_t t1 = sextract64(r1, 0, 32); int64_t t2 = sextract64(r2, 0, 32); @@ -885,8 +872,8 @@ target_ulong helper_msub32_ssov(CPUTriCoreState *env, target_ulong r1, return ssov32(env, result); } -target_ulong helper_msub32_suov(CPUTriCoreState *env, target_ulong r1, - target_ulong r2, target_ulong r3) +uint32_t helper_msub32_suov(CPUTriCoreState *env, uint32_t r1, + uint32_t r2, uint32_t r3) { uint64_t t1 = extract64(r1, 0, 32); uint64_t t2 = extract64(r2, 0, 32); @@ -912,8 +899,8 @@ target_ulong helper_msub32_suov(CPUTriCoreState *env, target_ulong r1, return result; } -uint64_t helper_msub64_ssov(CPUTriCoreState *env, target_ulong r1, - uint64_t r2, target_ulong r3) +uint64_t helper_msub64_ssov(CPUTriCoreState *env, uint32_t r1, + uint64_t r2, uint32_t r3) { uint64_t ret, ovf; int64_t t1 = sextract64(r1, 0, 32); @@ -944,8 +931,8 @@ uint64_t helper_msub64_ssov(CPUTriCoreState *env, target_ulong r1, return ret; } -uint64_t helper_msub64_suov(CPUTriCoreState *env, target_ulong r1, - uint64_t r2, target_ulong r3) +uint64_t helper_msub64_suov(CPUTriCoreState *env, uint32_t r1, + uint64_t r2, uint32_t r3) { uint64_t ret, mul; uint64_t t1 = extract64(r1, 0, 32); @@ -1097,7 +1084,7 @@ uint32_t helper_msubr_q_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2, return ret & 0xffff0000ll; } -uint32_t helper_abs_b(CPUTriCoreState *env, target_ulong arg) +uint32_t helper_abs_b(CPUTriCoreState *env, uint32_t arg) { int32_t b, i; int32_t ovf = 0; @@ -1120,7 +1107,7 @@ uint32_t helper_abs_b(CPUTriCoreState *env, target_ulong arg) return ret; } -uint32_t helper_abs_h(CPUTriCoreState *env, target_ulong arg) +uint32_t helper_abs_h(CPUTriCoreState *env, uint32_t arg) { int32_t h, i; int32_t ovf = 0; @@ -1143,7 +1130,7 @@ uint32_t helper_abs_h(CPUTriCoreState *env, target_ulong arg) return ret; } -uint32_t helper_absdif_b(CPUTriCoreState *env, target_ulong r1, target_ulong r2) +uint32_t helper_absdif_b(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t b, i; int32_t extr_r2; @@ -1167,7 +1154,7 @@ uint32_t helper_absdif_b(CPUTriCoreState *env, target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_absdif_h(CPUTriCoreState *env, target_ulong r1, target_ulong r2) +uint32_t helper_absdif_h(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t h, i; int32_t extr_r2; @@ -1296,7 +1283,7 @@ uint32_t helper_maddr_q(CPUTriCoreState *env, uint32_t r1, uint32_t r2, return ret & 0xffff0000ll; } -uint32_t helper_add_b(CPUTriCoreState *env, target_ulong r1, target_ulong r2) +uint32_t helper_add_b(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t b, i; int32_t extr_r1, extr_r2; @@ -1322,7 +1309,7 @@ uint32_t helper_add_b(CPUTriCoreState *env, target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_add_h(CPUTriCoreState *env, target_ulong r1, target_ulong r2) +uint32_t helper_add_h(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t h, i; int32_t extr_r1, extr_r2; @@ -1451,7 +1438,7 @@ uint32_t helper_msubr_q(CPUTriCoreState *env, uint32_t r1, uint32_t r2, return ret & 0xffff0000ll; } -uint32_t helper_sub_b(CPUTriCoreState *env, target_ulong r1, target_ulong r2) +uint32_t helper_sub_b(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t b, i; int32_t extr_r1, extr_r2; @@ -1477,7 +1464,7 @@ uint32_t helper_sub_b(CPUTriCoreState *env, target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_sub_h(CPUTriCoreState *env, target_ulong r1, target_ulong r2) +uint32_t helper_sub_h(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t h, i; int32_t extr_r1, extr_r2; @@ -1502,7 +1489,7 @@ uint32_t helper_sub_h(CPUTriCoreState *env, target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_eq_b(target_ulong r1, target_ulong r2) +uint32_t helper_eq_b(uint32_t r1, uint32_t r2) { uint32_t ret, msk; int32_t i; @@ -1519,7 +1506,7 @@ uint32_t helper_eq_b(target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_eq_h(target_ulong r1, target_ulong r2) +uint32_t helper_eq_h(uint32_t r1, uint32_t r2) { int32_t ret = 0; @@ -1534,7 +1521,7 @@ uint32_t helper_eq_h(target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_eqany_b(target_ulong r1, target_ulong r2) +uint32_t helper_eqany_b(uint32_t r1, uint32_t r2) { int32_t i; uint32_t ret = 0; @@ -1546,7 +1533,7 @@ uint32_t helper_eqany_b(target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_eqany_h(target_ulong r1, target_ulong r2) +uint32_t helper_eqany_h(uint32_t r1, uint32_t r2) { uint32_t ret; @@ -1556,7 +1543,7 @@ uint32_t helper_eqany_h(target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_lt_b(target_ulong r1, target_ulong r2) +uint32_t helper_lt_b(uint32_t r1, uint32_t r2) { int32_t i; uint32_t ret = 0; @@ -1570,7 +1557,7 @@ uint32_t helper_lt_b(target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_lt_bu(target_ulong r1, target_ulong r2) +uint32_t helper_lt_bu(uint32_t r1, uint32_t r2) { int32_t i; uint32_t ret = 0; @@ -1584,7 +1571,7 @@ uint32_t helper_lt_bu(target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_lt_h(target_ulong r1, target_ulong r2) +uint32_t helper_lt_h(uint32_t r1, uint32_t r2) { uint32_t ret = 0; @@ -1599,7 +1586,7 @@ uint32_t helper_lt_h(target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_lt_hu(target_ulong r1, target_ulong r2) +uint32_t helper_lt_hu(uint32_t r1, uint32_t r2) { uint32_t ret = 0; @@ -1615,7 +1602,7 @@ uint32_t helper_lt_hu(target_ulong r1, target_ulong r2) } #define EXTREMA_H_B(name, op) \ -uint32_t helper_##name ##_b(target_ulong r1, target_ulong r2) \ +uint32_t helper_##name ##_b(uint32_t r1, uint32_t r2) \ { \ int32_t i, extr_r1, extr_r2; \ uint32_t ret = 0; \ @@ -1629,7 +1616,7 @@ uint32_t helper_##name ##_b(target_ulong r1, target_ulong r2) \ return ret; \ } \ \ -uint32_t helper_##name ##_bu(target_ulong r1, target_ulong r2)\ +uint32_t helper_##name ##_bu(uint32_t r1, uint32_t r2) \ { \ int32_t i; \ uint32_t extr_r1, extr_r2; \ @@ -1644,7 +1631,7 @@ uint32_t helper_##name ##_bu(target_ulong r1, target_ulong r2)\ return ret; \ } \ \ -uint32_t helper_##name ##_h(target_ulong r1, target_ulong r2) \ +uint32_t helper_##name ##_h(uint32_t r1, uint32_t r2) \ { \ int32_t extr_r1, extr_r2; \ uint32_t ret = 0; \ @@ -1662,7 +1649,7 @@ uint32_t helper_##name ##_h(target_ulong r1, target_ulong r2) \ return ret; \ } \ \ -uint32_t helper_##name ##_hu(target_ulong r1, target_ulong r2)\ +uint32_t helper_##name ##_hu(uint32_t r1, uint32_t r2) \ { \ uint32_t extr_r1, extr_r2; \ uint32_t ret = 0; \ @@ -1729,7 +1716,7 @@ EXTREMA_H_B(min, <) #undef EXTREMA_H_B -uint32_t helper_clo_h(target_ulong r1) +uint32_t helper_clo_h(uint32_t r1) { uint32_t ret_hw0 = extract32(r1, 0, 16); uint32_t ret_hw1 = extract32(r1, 16, 16); @@ -1747,7 +1734,7 @@ uint32_t helper_clo_h(target_ulong r1) return ret_hw0 | (ret_hw1 << 16); } -uint32_t helper_clz_h(target_ulong r1) +uint32_t helper_clz_h(uint32_t r1) { uint32_t ret_hw0 = extract32(r1, 0, 16); uint32_t ret_hw1 = extract32(r1, 16, 16); @@ -1765,7 +1752,7 @@ uint32_t helper_clz_h(target_ulong r1) return ret_hw0 | (ret_hw1 << 16); } -uint32_t helper_cls_h(target_ulong r1) +uint32_t helper_cls_h(uint32_t r1) { uint32_t ret_hw0 = extract32(r1, 0, 16); uint32_t ret_hw1 = extract32(r1, 16, 16); @@ -1783,7 +1770,7 @@ uint32_t helper_cls_h(target_ulong r1) return ret_hw0 | (ret_hw1 << 16); } -uint32_t helper_sh(target_ulong r1, target_ulong r2) +uint32_t helper_sh(uint32_t r1, uint32_t r2) { int32_t shift_count = sextract32(r2, 0, 6); @@ -1796,7 +1783,7 @@ uint32_t helper_sh(target_ulong r1, target_ulong r2) } } -uint32_t helper_sh_h(target_ulong r1, target_ulong r2) +uint32_t helper_sh_h(uint32_t r1, uint32_t r2) { int32_t ret_hw0, ret_hw1; int32_t shift_count; @@ -1816,7 +1803,7 @@ uint32_t helper_sh_h(target_ulong r1, target_ulong r2) } } -uint32_t helper_sha(CPUTriCoreState *env, target_ulong r1, target_ulong r2) +uint32_t helper_sha(CPUTriCoreState *env, uint32_t r1, uint32_t r2) { int32_t shift_count; int64_t result, t1; @@ -1854,7 +1841,7 @@ uint32_t helper_sha(CPUTriCoreState *env, target_ulong r1, target_ulong r2) return ret; } -uint32_t helper_sha_h(target_ulong r1, target_ulong r2) +uint32_t helper_sha_h(uint32_t r1, uint32_t r2) { int32_t shift_count; int32_t ret_hw0, ret_hw1; @@ -1874,7 +1861,7 @@ uint32_t helper_sha_h(target_ulong r1, target_ulong r2) } } -uint32_t helper_bmerge(target_ulong r1, target_ulong r2) +uint32_t helper_bmerge(uint32_t r1, uint32_t r2) { uint32_t i, ret; @@ -1905,7 +1892,7 @@ uint64_t helper_bsplit(uint32_t r1) return ret; } -uint32_t helper_parity(target_ulong r1) +uint32_t helper_parity(uint32_t r1) { uint32_t ret; uint32_t nOnes, i; @@ -1942,7 +1929,7 @@ uint32_t helper_parity(target_ulong r1) } uint32_t helper_pack(uint32_t carry, uint32_t r1_low, uint32_t r1_high, - target_ulong r2) + uint32_t r2) { uint32_t ret; int32_t fp_exp, fp_frac, temp_exp, fp_exp_frac; @@ -1983,7 +1970,7 @@ uint32_t helper_pack(uint32_t carry, uint32_t r1_low, uint32_t r1_high, return ret; } -uint64_t helper_unpack(target_ulong arg1) +uint64_t helper_unpack(uint32_t arg1) { int32_t fp_exp = extract32(arg1, 23, 8); int32_t fp_frac = extract32(arg1, 0, 23); @@ -2408,7 +2395,7 @@ uint32_t helper_shuffle(uint32_t arg0, uint32_t arg1) /* context save area (CSA) related helpers */ -static int cdc_increment(target_ulong *psw) +static int cdc_increment(uint32_t *psw) { if ((*psw & MASK_PSW_CDC) == 0x7f) { return 0; @@ -2426,7 +2413,7 @@ static int cdc_increment(target_ulong *psw) return 0; } -static int cdc_decrement(target_ulong *psw) +static int cdc_decrement(uint32_t *psw) { if ((*psw & MASK_PSW_CDC) == 0x7f) { return 0; @@ -2442,7 +2429,7 @@ static int cdc_decrement(target_ulong *psw) return 0; } -static bool cdc_zero(target_ulong *psw) +static bool cdc_zero(uint32_t *psw) { int cdc = *psw & MASK_PSW_CDC; /* Returns TRUE if PSW.CDC.COUNT == 0 or if PSW.CDC == @@ -2457,7 +2444,7 @@ static bool cdc_zero(target_ulong *psw) return count == 0; } -static void save_context_upper(CPUTriCoreState *env, target_ulong ea) +static void save_context_upper(CPUTriCoreState *env, uint32_t ea) { cpu_stl_data(env, ea, env->PCXI); cpu_stl_data(env, ea+4, psw_read(env)); @@ -2477,7 +2464,7 @@ static void save_context_upper(CPUTriCoreState *env, target_ulong ea) cpu_stl_data(env, ea+60, env->gpr_d[15]); } -static void save_context_lower(CPUTriCoreState *env, target_ulong ea) +static void save_context_lower(CPUTriCoreState *env, uint32_t ea) { cpu_stl_data(env, ea, env->PCXI); cpu_stl_data(env, ea+4, env->gpr_a[11]); @@ -2497,8 +2484,8 @@ static void save_context_lower(CPUTriCoreState *env, target_ulong ea) cpu_stl_data(env, ea+60, env->gpr_d[7]); } -static void restore_context_upper(CPUTriCoreState *env, target_ulong ea, - target_ulong *new_PCXI, target_ulong *new_PSW) +static void restore_context_upper(CPUTriCoreState *env, uint32_t ea, + uint32_t *new_PCXI, uint32_t *new_PSW) { *new_PCXI = cpu_ldl_data(env, ea); *new_PSW = cpu_ldl_data(env, ea+4); @@ -2518,8 +2505,8 @@ static void restore_context_upper(CPUTriCoreState *env, target_ulong ea, env->gpr_d[15] = cpu_ldl_data(env, ea+60); } -static void restore_context_lower(CPUTriCoreState *env, target_ulong ea, - target_ulong *ra, target_ulong *pcxi) +static void restore_context_lower(CPUTriCoreState *env, uint32_t ea, + uint32_t *ra, uint32_t *pcxi) { *pcxi = cpu_ldl_data(env, ea); *ra = cpu_ldl_data(env, ea+4); @@ -2541,10 +2528,10 @@ static void restore_context_lower(CPUTriCoreState *env, target_ulong ea, void helper_call(CPUTriCoreState *env, uint32_t next_pc) { - target_ulong tmp_FCX; - target_ulong ea; - target_ulong new_FCX; - target_ulong psw; + uint32_t tmp_FCX; + uint32_t ea; + uint32_t new_FCX; + uint32_t psw; psw = psw_read(env); /* if (FCX == 0) trap(FCU); */ @@ -2604,9 +2591,9 @@ void helper_call(CPUTriCoreState *env, uint32_t next_pc) void helper_ret(CPUTriCoreState *env) { - target_ulong ea; - target_ulong new_PCXI; - target_ulong new_PSW, psw; + uint32_t ea; + uint32_t new_PCXI; + uint32_t new_PSW, psw; psw = psw_read(env); /* if (PSW.CDE) then if (cdc_decrement()) then trap(CDU);*/ @@ -2657,9 +2644,9 @@ void helper_ret(CPUTriCoreState *env) void helper_bisr(CPUTriCoreState *env, uint32_t const9) { - target_ulong tmp_FCX; - target_ulong ea; - target_ulong new_FCX; + uint32_t tmp_FCX; + uint32_t ea; + uint32_t new_FCX; if (env->FCX == 0) { /* FCU trap */ @@ -2701,9 +2688,9 @@ void helper_bisr(CPUTriCoreState *env, uint32_t const9) void helper_rfe(CPUTriCoreState *env) { - target_ulong ea; - target_ulong new_PCXI; - target_ulong new_PSW; + uint32_t ea; + uint32_t new_PCXI; + uint32_t new_PSW; /* if (PCXI[19: 0] == 0) then trap(CSU); */ if ((env->PCXI & 0xfffff) == 0) { /* raise csu trap */ @@ -2762,35 +2749,35 @@ void helper_rfm(CPUTriCoreState *env) } } -void helper_ldlcx(CPUTriCoreState *env, target_ulong ea) +void helper_ldlcx(CPUTriCoreState *env, uint32_t ea) { uint32_t dummy; /* insn doesn't load PCXI and RA */ restore_context_lower(env, ea, &dummy, &dummy); } -void helper_lducx(CPUTriCoreState *env, target_ulong ea) +void helper_lducx(CPUTriCoreState *env, uint32_t ea) { uint32_t dummy; /* insn doesn't load PCXI and PSW */ restore_context_upper(env, ea, &dummy, &dummy); } -void helper_stlcx(CPUTriCoreState *env, target_ulong ea) +void helper_stlcx(CPUTriCoreState *env, uint32_t ea) { save_context_lower(env, ea); } -void helper_stucx(CPUTriCoreState *env, target_ulong ea) +void helper_stucx(CPUTriCoreState *env, uint32_t ea) { save_context_upper(env, ea); } void helper_svlcx(CPUTriCoreState *env) { - target_ulong tmp_FCX; - target_ulong ea; - target_ulong new_FCX; + uint32_t tmp_FCX; + uint32_t ea; + uint32_t new_FCX; if (env->FCX == 0) { /* FCU trap */ @@ -2831,9 +2818,9 @@ void helper_svlcx(CPUTriCoreState *env) void helper_svucx(CPUTriCoreState *env) { - target_ulong tmp_FCX; - target_ulong ea; - target_ulong new_FCX; + uint32_t tmp_FCX; + uint32_t ea; + uint32_t new_FCX; if (env->FCX == 0) { /* FCU trap */ @@ -2874,8 +2861,8 @@ void helper_svucx(CPUTriCoreState *env) void helper_rslcx(CPUTriCoreState *env) { - target_ulong ea; - target_ulong new_PCXI; + uint32_t ea; + uint32_t new_PCXI; /* if (PCXI[19: 0] == 0) then trap(CSU); */ if ((env->PCXI & 0xfffff) == 0) { /* CSU trap */ diff --git a/target/tricore/translate.c b/target/tricore/translate.c index 3d0e7a1..fbe05a9 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -44,19 +44,19 @@ /* * TCG registers */ -static TCGv cpu_PC; -static TCGv cpu_PCXI; -static TCGv cpu_PSW; -static TCGv cpu_ICR; +static TCGv_i32 cpu_PC; +static TCGv_i32 cpu_PCXI; +static TCGv_i32 cpu_PSW; +static TCGv_i32 cpu_ICR; /* GPR registers */ -static TCGv cpu_gpr_a[16]; -static TCGv cpu_gpr_d[16]; +static TCGv_i32 cpu_gpr_a[16]; +static TCGv_i32 cpu_gpr_d[16]; /* PSW Flag cache */ -static TCGv cpu_PSW_C; -static TCGv cpu_PSW_V; -static TCGv cpu_PSW_SV; -static TCGv cpu_PSW_AV; -static TCGv cpu_PSW_SAV; +static TCGv_i32 cpu_PSW_C; +static TCGv_i32 cpu_PSW_V; +static TCGv_i32 cpu_PSW_SV; +static TCGv_i32 cpu_PSW_AV; +static TCGv_i32 cpu_PSW_SAV; static const char *regnames_a[] = { "a0" , "a1" , "a2" , "a3" , "a4" , "a5" , @@ -72,7 +72,8 @@ static const char *regnames_d[] = { typedef struct DisasContext { DisasContextBase base; - target_ulong pc_succ_insn; + + vaddr pc_succ_insn; uint32_t opcode; /* Routine used to access memory */ int mem_idx; @@ -135,46 +136,46 @@ void tricore_cpu_dump_state(CPUState *cs, FILE *f, int flags) } while (0) #define GEN_HELPER_LL(name, ret, arg0, arg1, n) do { \ - TCGv arg00 = tcg_temp_new(); \ - TCGv arg01 = tcg_temp_new(); \ - TCGv arg11 = tcg_temp_new(); \ - tcg_gen_sari_tl(arg00, arg0, 16); \ - tcg_gen_ext16s_tl(arg01, arg0); \ - tcg_gen_ext16s_tl(arg11, arg1); \ + TCGv_i32 arg00 = tcg_temp_new_i32(); \ + TCGv_i32 arg01 = tcg_temp_new_i32(); \ + TCGv_i32 arg11 = tcg_temp_new_i32(); \ + tcg_gen_sari_i32(arg00, arg0, 16); \ + tcg_gen_ext16s_i32(arg01, arg0); \ + tcg_gen_ext16s_i32(arg11, arg1); \ gen_helper_##name(ret, arg00, arg01, arg11, arg11, n); \ } while (0) #define GEN_HELPER_LU(name, ret, arg0, arg1, n) do { \ - TCGv arg00 = tcg_temp_new(); \ - TCGv arg01 = tcg_temp_new(); \ - TCGv arg10 = tcg_temp_new(); \ - TCGv arg11 = tcg_temp_new(); \ - tcg_gen_sari_tl(arg00, arg0, 16); \ - tcg_gen_ext16s_tl(arg01, arg0); \ - tcg_gen_sari_tl(arg11, arg1, 16); \ - tcg_gen_ext16s_tl(arg10, arg1); \ + TCGv_i32 arg00 = tcg_temp_new_i32(); \ + TCGv_i32 arg01 = tcg_temp_new_i32(); \ + TCGv_i32 arg10 = tcg_temp_new_i32(); \ + TCGv_i32 arg11 = tcg_temp_new_i32(); \ + tcg_gen_sari_i32(arg00, arg0, 16); \ + tcg_gen_ext16s_i32(arg01, arg0); \ + tcg_gen_sari_i32(arg11, arg1, 16); \ + tcg_gen_ext16s_i32(arg10, arg1); \ gen_helper_##name(ret, arg00, arg01, arg10, arg11, n); \ } while (0) #define GEN_HELPER_UL(name, ret, arg0, arg1, n) do { \ - TCGv arg00 = tcg_temp_new(); \ - TCGv arg01 = tcg_temp_new(); \ - TCGv arg10 = tcg_temp_new(); \ - TCGv arg11 = tcg_temp_new(); \ - tcg_gen_sari_tl(arg00, arg0, 16); \ - tcg_gen_ext16s_tl(arg01, arg0); \ - tcg_gen_sari_tl(arg10, arg1, 16); \ - tcg_gen_ext16s_tl(arg11, arg1); \ + TCGv_i32 arg00 = tcg_temp_new_i32(); \ + TCGv_i32 arg01 = tcg_temp_new_i32(); \ + TCGv_i32 arg10 = tcg_temp_new_i32(); \ + TCGv_i32 arg11 = tcg_temp_new_i32(); \ + tcg_gen_sari_i32(arg00, arg0, 16); \ + tcg_gen_ext16s_i32(arg01, arg0); \ + tcg_gen_sari_i32(arg10, arg1, 16); \ + tcg_gen_ext16s_i32(arg11, arg1); \ gen_helper_##name(ret, arg00, arg01, arg10, arg11, n); \ } while (0) #define GEN_HELPER_UU(name, ret, arg0, arg1, n) do { \ - TCGv arg00 = tcg_temp_new(); \ - TCGv arg01 = tcg_temp_new(); \ - TCGv arg11 = tcg_temp_new(); \ - tcg_gen_sari_tl(arg01, arg0, 16); \ - tcg_gen_ext16s_tl(arg00, arg0); \ - tcg_gen_sari_tl(arg11, arg1, 16); \ + TCGv_i32 arg00 = tcg_temp_new_i32(); \ + TCGv_i32 arg01 = tcg_temp_new_i32(); \ + TCGv_i32 arg11 = tcg_temp_new_i32(); \ + tcg_gen_sari_i32(arg01, arg0, 16); \ + tcg_gen_ext16s_i32(arg00, arg0); \ + tcg_gen_sari_i32(arg11, arg1, 16); \ gen_helper_##name(ret, arg00, arg01, arg11, arg11, n); \ } while (0) @@ -200,7 +201,7 @@ void tricore_cpu_dump_state(CPUState *cs, FILE *f, int flags) /* For two 32-bit registers used a 64-bit register, the first registernumber needs to be even. Otherwise we trap. */ -static inline void generate_trap(DisasContext *ctx, int class, int tin); +static void generate_trap(DisasContext *ctx, int class, int tin); #define CHECK_REG_PAIR(reg) do { \ if (reg & 0x1) { \ generate_trap(ctx, TRAPC_INSN_ERR, TIN2_OPD); \ @@ -209,23 +210,24 @@ static inline void generate_trap(DisasContext *ctx, int class, int tin); /* Functions for load/save to/from memory */ -static inline void gen_offset_ld(DisasContext *ctx, TCGv r1, TCGv r2, - int16_t con, MemOp mop) +static void gen_offset_ld(DisasContext *ctx, TCGv_i32 r1, TCGv_i32 r2, + int16_t con, MemOp mop) { - TCGv temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, r2, con); - tcg_gen_qemu_ld_tl(r1, temp, ctx->mem_idx, mop); + TCGv_i32 temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, r2, con); + tcg_gen_qemu_ld_i32(r1, temp, ctx->mem_idx, mop); } -static inline void gen_offset_st(DisasContext *ctx, TCGv r1, TCGv r2, - int16_t con, MemOp mop) +static void gen_offset_st(DisasContext *ctx, TCGv_i32 r1, TCGv_i32 r2, + int16_t con, MemOp mop) { - TCGv temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, r2, con); - tcg_gen_qemu_st_tl(r1, temp, ctx->mem_idx, mop); + TCGv_i32 temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, r2, con); + tcg_gen_qemu_st_i32(r1, temp, ctx->mem_idx, mop); } -static void gen_st_2regs_64(TCGv rh, TCGv rl, TCGv address, DisasContext *ctx) +static void gen_st_2regs_64(DisasContext *ctx, TCGv_i32 rh, TCGv_i32 rl, + TCGv_i32 address) { TCGv_i64 temp = tcg_temp_new_i64(); @@ -233,15 +235,17 @@ static void gen_st_2regs_64(TCGv rh, TCGv rl, TCGv address, DisasContext *ctx) tcg_gen_qemu_st_i64(temp, address, ctx->mem_idx, MO_LEUQ); } -static void gen_offset_st_2regs(TCGv rh, TCGv rl, TCGv base, int16_t con, - DisasContext *ctx) +static void gen_offset_st_2regs(DisasContext *ctx, + TCGv_i32 rh, TCGv_i32 rl, + TCGv_i32 base, int16_t con) { - TCGv temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, base, con); - gen_st_2regs_64(rh, rl, temp, ctx); + TCGv_i32 temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, base, con); + gen_st_2regs_64(ctx, rh, rl, temp); } -static void gen_ld_2regs_64(TCGv rh, TCGv rl, TCGv address, DisasContext *ctx) +static void gen_ld_2regs_64(DisasContext *ctx, TCGv_i32 rh, TCGv_i32 rl, + TCGv_i32 address) { TCGv_i64 temp = tcg_temp_new_i64(); @@ -250,87 +254,88 @@ static void gen_ld_2regs_64(TCGv rh, TCGv rl, TCGv address, DisasContext *ctx) tcg_gen_extr_i64_i32(rl, rh, temp); } -static void gen_offset_ld_2regs(TCGv rh, TCGv rl, TCGv base, int16_t con, - DisasContext *ctx) +static void gen_offset_ld_2regs(DisasContext *ctx, + TCGv_i32 rh, TCGv_i32 rl, + TCGv_i32 base, int16_t con) { - TCGv temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, base, con); - gen_ld_2regs_64(rh, rl, temp, ctx); + TCGv_i32 temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, base, con); + gen_ld_2regs_64(ctx, rh, rl, temp); } -static void gen_st_preincr(DisasContext *ctx, TCGv r1, TCGv r2, int16_t off, - MemOp mop) +static void gen_st_preincr(DisasContext *ctx, TCGv_i32 r1, TCGv_i32 r2, + int16_t off, MemOp mop) { - TCGv temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, r2, off); - tcg_gen_qemu_st_tl(r1, temp, ctx->mem_idx, mop); - tcg_gen_mov_tl(r2, temp); + TCGv_i32 temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, r2, off); + tcg_gen_qemu_st_i32(r1, temp, ctx->mem_idx, mop); + tcg_gen_mov_i32(r2, temp); } -static void gen_ld_preincr(DisasContext *ctx, TCGv r1, TCGv r2, int16_t off, - MemOp mop) +static void gen_ld_preincr(DisasContext *ctx, TCGv_i32 r1, TCGv_i32 r2, + int16_t off, MemOp mop) { - TCGv temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, r2, off); - tcg_gen_qemu_ld_tl(r1, temp, ctx->mem_idx, mop); - tcg_gen_mov_tl(r2, temp); + TCGv_i32 temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, r2, off); + tcg_gen_qemu_ld_i32(r1, temp, ctx->mem_idx, mop); + tcg_gen_mov_i32(r2, temp); } /* M(EA, word) = (M(EA, word) & ~E[a][63:32]) | (E[a][31:0] & E[a][63:32]); */ -static void gen_ldmst(DisasContext *ctx, int ereg, TCGv ea) +static void gen_ldmst(DisasContext *ctx, int ereg, TCGv_i32 ea) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); CHECK_REG_PAIR(ereg); /* temp = (M(EA, word) */ - tcg_gen_qemu_ld_tl(temp, ea, ctx->mem_idx, MO_LEUL); + tcg_gen_qemu_ld_i32(temp, ea, ctx->mem_idx, MO_LEUL); /* temp = temp & ~E[a][63:32]) */ - tcg_gen_andc_tl(temp, temp, cpu_gpr_d[ereg+1]); + tcg_gen_andc_i32(temp, temp, cpu_gpr_d[ereg + 1]); /* temp2 = (E[a][31:0] & E[a][63:32]); */ - tcg_gen_and_tl(temp2, cpu_gpr_d[ereg], cpu_gpr_d[ereg+1]); + tcg_gen_and_i32(temp2, cpu_gpr_d[ereg], cpu_gpr_d[ereg + 1]); /* temp = temp | temp2; */ - tcg_gen_or_tl(temp, temp, temp2); + tcg_gen_or_i32(temp, temp, temp2); /* M(EA, word) = temp; */ - tcg_gen_qemu_st_tl(temp, ea, ctx->mem_idx, MO_LEUL); + tcg_gen_qemu_st_i32(temp, ea, ctx->mem_idx, MO_LEUL); } /* tmp = M(EA, word); M(EA, word) = D[a]; D[a] = tmp[31:0];*/ -static void gen_swap(DisasContext *ctx, int reg, TCGv ea) +static void gen_swap(DisasContext *ctx, int reg, TCGv_i32 ea) { - TCGv temp = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); - tcg_gen_qemu_ld_tl(temp, ea, ctx->mem_idx, MO_LEUL); - tcg_gen_qemu_st_tl(cpu_gpr_d[reg], ea, ctx->mem_idx, MO_LEUL); - tcg_gen_mov_tl(cpu_gpr_d[reg], temp); + tcg_gen_qemu_ld_i32(temp, ea, ctx->mem_idx, MO_LEUL); + tcg_gen_qemu_st_i32(cpu_gpr_d[reg], ea, ctx->mem_idx, MO_LEUL); + tcg_gen_mov_i32(cpu_gpr_d[reg], temp); } -static void gen_cmpswap(DisasContext *ctx, int reg, TCGv ea) +static void gen_cmpswap(DisasContext *ctx, int reg, TCGv_i32 ea) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); CHECK_REG_PAIR(reg); - tcg_gen_qemu_ld_tl(temp, ea, ctx->mem_idx, MO_LEUL); - tcg_gen_movcond_tl(TCG_COND_EQ, temp2, cpu_gpr_d[reg+1], temp, + tcg_gen_qemu_ld_i32(temp, ea, ctx->mem_idx, MO_LEUL); + tcg_gen_movcond_i32(TCG_COND_EQ, temp2, cpu_gpr_d[reg + 1], temp, cpu_gpr_d[reg], temp); - tcg_gen_qemu_st_tl(temp2, ea, ctx->mem_idx, MO_LEUL); - tcg_gen_mov_tl(cpu_gpr_d[reg], temp); + tcg_gen_qemu_st_i32(temp2, ea, ctx->mem_idx, MO_LEUL); + tcg_gen_mov_i32(cpu_gpr_d[reg], temp); } -static void gen_swapmsk(DisasContext *ctx, int reg, TCGv ea) +static void gen_swapmsk(DisasContext *ctx, int reg, TCGv_i32 ea) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); - TCGv temp3 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); + TCGv_i32 temp3 = tcg_temp_new_i32(); CHECK_REG_PAIR(reg); - tcg_gen_qemu_ld_tl(temp, ea, ctx->mem_idx, MO_LEUL); - tcg_gen_and_tl(temp2, cpu_gpr_d[reg], cpu_gpr_d[reg+1]); - tcg_gen_andc_tl(temp3, temp, cpu_gpr_d[reg+1]); - tcg_gen_or_tl(temp2, temp2, temp3); - tcg_gen_qemu_st_tl(temp2, ea, ctx->mem_idx, MO_LEUL); - tcg_gen_mov_tl(cpu_gpr_d[reg], temp); + tcg_gen_qemu_ld_i32(temp, ea, ctx->mem_idx, MO_LEUL); + tcg_gen_and_i32(temp2, cpu_gpr_d[reg], cpu_gpr_d[reg + 1]); + tcg_gen_andc_i32(temp3, temp, cpu_gpr_d[reg + 1]); + tcg_gen_or_i32(temp2, temp2, temp3); + tcg_gen_qemu_st_i32(temp2, ea, ctx->mem_idx, MO_LEUL); + tcg_gen_mov_i32(cpu_gpr_d[reg], temp); } /* We generate loads and store to core special function register (csfr) through @@ -340,12 +345,12 @@ static void gen_swapmsk(DisasContext *ctx, int reg, TCGv ea) #define R(ADDRESS, REG, FEATURE) \ case ADDRESS: \ if (has_feature(ctx, FEATURE)) { \ - tcg_gen_ld_tl(ret, tcg_env, offsetof(CPUTriCoreState, REG)); \ + tcg_gen_ld_i32(ret, tcg_env, offsetof(CPUTriCoreState, REG)); \ } \ break; #define A(ADDRESS, REG, FEATURE) R(ADDRESS, REG, FEATURE) #define E(ADDRESS, REG, FEATURE) R(ADDRESS, REG, FEATURE) -static inline void gen_mfcr(DisasContext *ctx, TCGv ret, int32_t offset) +static void gen_mfcr(DisasContext *ctx, TCGv_i32 ret, int32_t offset) { /* since we're caching PSW make this a special case */ if (offset == 0xfe04) { @@ -365,7 +370,7 @@ static inline void gen_mfcr(DisasContext *ctx, TCGv ret, int32_t offset) #define A(ADDRESS, REG, FEATURE) R(ADDRESS, REG, FEATURE) \ case ADDRESS: \ if (has_feature(ctx, FEATURE)) { \ - tcg_gen_st_tl(r1, tcg_env, offsetof(CPUTriCoreState, REG)); \ + tcg_gen_st_i32(r1, tcg_env, offsetof(CPUTriCoreState, REG)); \ } \ break; /* Endinit protected registers @@ -373,8 +378,7 @@ static inline void gen_mfcr(DisasContext *ctx, TCGv ret, int32_t offset) watchdog device, we handle endinit protected registers like all-access registers for now. */ #define E(ADDRESS, REG, FEATURE) A(ADDRESS, REG, FEATURE) -static inline void gen_mtcr(DisasContext *ctx, TCGv r1, - int32_t offset) +static void gen_mtcr(DisasContext *ctx, TCGv_i32 r1, int32_t offset) { if (ctx->priv == TRICORE_PRIV_SM) { /* since we're caching PSW make this a special case */ @@ -393,31 +397,30 @@ static inline void gen_mtcr(DisasContext *ctx, TCGv r1, /* Functions for arithmetic instructions */ -static inline void gen_add_d(TCGv ret, TCGv r1, TCGv r2) +static void gen_add_d(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { - TCGv t0 = tcg_temp_new_i32(); - TCGv result = tcg_temp_new_i32(); + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 result = tcg_temp_new_i32(); /* Addition and set V/SV bits */ - tcg_gen_add_tl(result, r1, r2); + tcg_gen_add_i32(result, r1, r2); /* calc V bit */ - tcg_gen_xor_tl(cpu_PSW_V, result, r1); - tcg_gen_xor_tl(t0, r1, r2); - tcg_gen_andc_tl(cpu_PSW_V, cpu_PSW_V, t0); + tcg_gen_xor_i32(cpu_PSW_V, result, r1); + tcg_gen_xor_i32(t0, r1, r2); + tcg_gen_andc_i32(cpu_PSW_V, cpu_PSW_V, t0); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, result, result); - tcg_gen_xor_tl(cpu_PSW_AV, result, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, result, result); + tcg_gen_xor_i32(cpu_PSW_AV, result, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back result */ - tcg_gen_mov_tl(ret, result); + tcg_gen_mov_i32(ret, result); } -static inline void -gen_add64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2) +static void gen_add64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2) { - TCGv temp = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 result = tcg_temp_new_i64(); @@ -429,65 +432,66 @@ gen_add64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2) tcg_gen_andc_i64(t1, t1, t0); tcg_gen_extrh_i64_i32(cpu_PSW_V, t1); /* calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* calc AV/SAV bits */ tcg_gen_extrh_i64_i32(temp, result); - tcg_gen_add_tl(cpu_PSW_AV, temp, temp); - tcg_gen_xor_tl(cpu_PSW_AV, temp, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, temp, temp); + tcg_gen_xor_i32(cpu_PSW_AV, temp, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back result */ tcg_gen_mov_i64(ret, result); } -static inline void -gen_addsub64_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, void(*op1)(TCGv, TCGv, TCGv), - void(*op2)(TCGv, TCGv, TCGv)) +static void gen_addsub64_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + void(*op1)(TCGv_i32, TCGv_i32, TCGv_i32), + void(*op2)(TCGv_i32, TCGv_i32, TCGv_i32)) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); - TCGv temp3 = tcg_temp_new(); - TCGv temp4 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); + TCGv_i32 temp3 = tcg_temp_new_i32(); + TCGv_i32 temp4 = tcg_temp_new_i32(); (*op1)(temp, r1_low, r2); /* calc V0 bit */ - tcg_gen_xor_tl(temp2, temp, r1_low); - tcg_gen_xor_tl(temp3, r1_low, r2); + tcg_gen_xor_i32(temp2, temp, r1_low); + tcg_gen_xor_i32(temp3, r1_low, r2); if (op1 == tcg_gen_add_tl) { - tcg_gen_andc_tl(temp2, temp2, temp3); + tcg_gen_andc_i32(temp2, temp2, temp3); } else { - tcg_gen_and_tl(temp2, temp2, temp3); + tcg_gen_and_i32(temp2, temp2, temp3); } (*op2)(temp3, r1_high, r3); /* calc V1 bit */ - tcg_gen_xor_tl(cpu_PSW_V, temp3, r1_high); - tcg_gen_xor_tl(temp4, r1_high, r3); + tcg_gen_xor_i32(cpu_PSW_V, temp3, r1_high); + tcg_gen_xor_i32(temp4, r1_high, r3); if (op2 == tcg_gen_add_tl) { - tcg_gen_andc_tl(cpu_PSW_V, cpu_PSW_V, temp4); + tcg_gen_andc_i32(cpu_PSW_V, cpu_PSW_V, temp4); } else { - tcg_gen_and_tl(cpu_PSW_V, cpu_PSW_V, temp4); + tcg_gen_and_i32(cpu_PSW_V, cpu_PSW_V, temp4); } /* combine V0/V1 bits */ - tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp2); + tcg_gen_or_i32(cpu_PSW_V, cpu_PSW_V, temp2); /* calc sv bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* write result */ - tcg_gen_mov_tl(ret_low, temp); - tcg_gen_mov_tl(ret_high, temp3); + tcg_gen_mov_i32(ret_low, temp); + tcg_gen_mov_i32(ret_high, temp3); /* calc AV bit */ - tcg_gen_add_tl(temp, ret_low, ret_low); - tcg_gen_xor_tl(temp, temp, ret_low); - tcg_gen_add_tl(cpu_PSW_AV, ret_high, ret_high); - tcg_gen_xor_tl(cpu_PSW_AV, cpu_PSW_AV, ret_high); - tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp); + tcg_gen_add_i32(temp, ret_low, ret_low); + tcg_gen_xor_i32(temp, temp, ret_low); + tcg_gen_add_i32(cpu_PSW_AV, ret_high, ret_high); + tcg_gen_xor_i32(cpu_PSW_AV, cpu_PSW_AV, ret_high); + tcg_gen_or_i32(cpu_PSW_AV, cpu_PSW_AV, temp); /* calc SAV bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } /* ret = r2 + (r1 * r3); */ -static inline void gen_madd32_d(TCGv ret, TCGv r1, TCGv r2, TCGv r3) +static void gen_madd32_d(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); @@ -508,53 +512,51 @@ static inline void gen_madd32_d(TCGv ret, TCGv r1, TCGv r2, TCGv r3) tcg_gen_setcondi_i64(TCG_COND_LT, t2, t1, -0x80000000LL); tcg_gen_or_i64(t2, t2, t3); tcg_gen_extrl_i64_i32(cpu_PSW_V, t2); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, ret, ret); - tcg_gen_xor_tl(cpu_PSW_AV, ret, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, ret, ret); + tcg_gen_xor_i32(cpu_PSW_AV, ret, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static inline void gen_maddi32_d(TCGv ret, TCGv r1, TCGv r2, int32_t con) +static void gen_maddi32_d(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_madd32_d(ret, r1, r2, temp); } -static inline void -gen_madd64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - TCGv r3) +static void gen_madd64_d(TCGv_i32 ret_low, TCGv_i32 ret_high, TCGv_i32 r1, + TCGv_i32 r2_low, TCGv_i32 r2_high, TCGv_i32 r3) { - TCGv t1 = tcg_temp_new(); - TCGv t2 = tcg_temp_new(); - TCGv t3 = tcg_temp_new(); - TCGv t4 = tcg_temp_new(); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + TCGv_i32 t4 = tcg_temp_new_i32(); - tcg_gen_muls2_tl(t1, t2, r1, r3); + tcg_gen_muls2_i32(t1, t2, r1, r3); /* only the add can overflow */ - tcg_gen_add2_tl(t3, t4, r2_low, r2_high, t1, t2); + tcg_gen_add2_i32(t3, t4, r2_low, r2_high, t1, t2); /* calc V bit */ - tcg_gen_xor_tl(cpu_PSW_V, t4, r2_high); - tcg_gen_xor_tl(t1, r2_high, t2); - tcg_gen_andc_tl(cpu_PSW_V, cpu_PSW_V, t1); + tcg_gen_xor_i32(cpu_PSW_V, t4, r2_high); + tcg_gen_xor_i32(t1, r2_high, t2); + tcg_gen_andc_i32(cpu_PSW_V, cpu_PSW_V, t1); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, t4, t4); - tcg_gen_xor_tl(cpu_PSW_AV, t4, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, t4, t4); + tcg_gen_xor_i32(cpu_PSW_AV, t4, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back the result */ - tcg_gen_mov_tl(ret_low, t3); - tcg_gen_mov_tl(ret_high, t4); + tcg_gen_mov_i32(ret_low, t3); + tcg_gen_mov_i32(ret_high, t4); } -static inline void -gen_maddu64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - TCGv r3) +static void gen_maddu64_d(TCGv_i32 ret_low, TCGv_i32 ret_high, TCGv_i32 r1, + TCGv_i32 r2_low, TCGv_i32 r2_high, TCGv_i32 r3) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); @@ -572,39 +574,38 @@ gen_maddu64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, calc V bit */ tcg_gen_setcond_i64(TCG_COND_LTU, t2, t2, t1); tcg_gen_extrl_i64_i32(cpu_PSW_V, t2); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, ret_high, ret_high); - tcg_gen_xor_tl(cpu_PSW_AV, ret_high, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, ret_high, ret_high); + tcg_gen_xor_i32(cpu_PSW_AV, ret_high, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static inline void -gen_maddi64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - int32_t con) +static void gen_maddi64_d(TCGv_i32 ret_low, TCGv_i32 ret_high, TCGv_i32 r1, + TCGv_i32 r2_low, TCGv_i32 r2_high, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_madd64_d(ret_low, ret_high, r1, r2_low, r2_high, temp); } -static inline void -gen_maddui64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - int32_t con) +static void gen_maddui64_d(TCGv_i32 ret_low, TCGv_i32 ret_high, TCGv_i32 r1, + TCGv_i32 r2_low, TCGv_i32 r2_high, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_maddu64_d(ret_low, ret_high, r1, r2_low, r2_high, temp); } -static inline void -gen_madd_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_madd_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -625,13 +626,14 @@ gen_madd_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_gen_add_tl, tcg_gen_add_tl); } -static inline void -gen_maddsu_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_maddsu_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -652,11 +654,12 @@ gen_maddsu_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_gen_sub_tl, tcg_gen_add_tl); } -static inline void -gen_maddsum_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_maddsum_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); TCGv_i64 temp64_2 = tcg_temp_new_i64(); TCGv_i64 temp64_3 = tcg_temp_new_i64(); @@ -685,16 +688,17 @@ gen_maddsum_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_gen_extr_i64_i32(ret_low, ret_high, temp64_2); } -static inline void gen_adds(TCGv ret, TCGv r1, TCGv r2); +static void gen_adds(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2); -static inline void -gen_madds_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_madds_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); - TCGv temp3 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); + TCGv_i32 temp3 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { @@ -713,25 +717,26 @@ gen_madds_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, } tcg_gen_extr_i64_i32(temp, temp2, temp64); gen_adds(ret_low, r1_low, temp); - tcg_gen_mov_tl(temp, cpu_PSW_V); - tcg_gen_mov_tl(temp3, cpu_PSW_AV); + tcg_gen_mov_i32(temp, cpu_PSW_V); + tcg_gen_mov_i32(temp3, cpu_PSW_AV); gen_adds(ret_high, r1_high, temp2); /* combine v bits */ - tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp); + tcg_gen_or_i32(cpu_PSW_V, cpu_PSW_V, temp); /* combine av bits */ - tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp3); + tcg_gen_or_i32(cpu_PSW_AV, cpu_PSW_AV, temp3); } -static inline void gen_subs(TCGv ret, TCGv r1, TCGv r2); +static void gen_subs(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2); -static inline void -gen_maddsus_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_maddsus_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); - TCGv temp3 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); + TCGv_i32 temp3 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { @@ -750,20 +755,21 @@ gen_maddsus_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, } tcg_gen_extr_i64_i32(temp, temp2, temp64); gen_subs(ret_low, r1_low, temp); - tcg_gen_mov_tl(temp, cpu_PSW_V); - tcg_gen_mov_tl(temp3, cpu_PSW_AV); + tcg_gen_mov_i32(temp, cpu_PSW_V); + tcg_gen_mov_i32(temp3, cpu_PSW_AV); gen_adds(ret_high, r1_high, temp2); /* combine v bits */ - tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp); + tcg_gen_or_i32(cpu_PSW_V, cpu_PSW_V, temp); /* combine av bits */ - tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp3); + tcg_gen_or_i32(cpu_PSW_AV, cpu_PSW_AV, temp3); } -static inline void -gen_maddsums_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_maddsums_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); TCGv_i64 temp64_2 = tcg_temp_new_i64(); @@ -792,11 +798,12 @@ gen_maddsums_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, } -static inline void -gen_maddm_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_maddm_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); TCGv_i64 temp64_2 = tcg_temp_new_i64(); TCGv_i64 temp64_3 = tcg_temp_new_i64(); @@ -820,11 +827,12 @@ gen_maddm_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_gen_extr_i64_i32(ret_low, ret_high, temp64_3); } -static inline void -gen_maddms_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_maddms_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); TCGv_i64 temp64_2 = tcg_temp_new_i64(); switch (mode) { @@ -846,11 +854,11 @@ gen_maddms_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_gen_extr_i64_i32(ret_low, ret_high, temp64); } -static inline void -gen_maddr64_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, uint32_t n, - uint32_t mode) +static void gen_maddr64_h(TCGv_i32 ret, TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -869,23 +877,25 @@ gen_maddr64_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, uint32_t n, gen_helper_addr_h(ret, tcg_env, temp64, r1_low, r1_high); } -static inline void -gen_maddr32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) +static void gen_maddr32_h(TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); - tcg_gen_andi_tl(temp2, r1, 0xffff0000); - tcg_gen_shli_tl(temp, r1, 16); + tcg_gen_andi_i32(temp2, r1, 0xffff0000); + tcg_gen_shli_i32(temp, r1, 16); gen_maddr64_h(ret, temp, temp2, r2, r3, n, mode); } -static inline void -gen_maddsur32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) +static void gen_maddsur32_h(TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -901,17 +911,17 @@ gen_maddsur32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) GEN_HELPER_UU(mul_h, temp64, r2, r3, t_n); break; } - tcg_gen_andi_tl(temp2, r1, 0xffff0000); - tcg_gen_shli_tl(temp, r1, 16); + tcg_gen_andi_i32(temp2, r1, 0xffff0000); + tcg_gen_shli_i32(temp, r1, 16); gen_helper_addsur_h(ret, tcg_env, temp64, temp, temp2); } -static inline void -gen_maddr64s_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, - uint32_t n, uint32_t mode) +static void gen_maddr64s_h(TCGv_i32 ret, TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -930,23 +940,25 @@ gen_maddr64s_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, gen_helper_addr_h_ssov(ret, tcg_env, temp64, r1_low, r1_high); } -static inline void -gen_maddr32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) +static void gen_maddr32s_h(TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); - tcg_gen_andi_tl(temp2, r1, 0xffff0000); - tcg_gen_shli_tl(temp, r1, 16); + tcg_gen_andi_i32(temp2, r1, 0xffff0000); + tcg_gen_shli_i32(temp, r1, 16); gen_maddr64s_h(ret, temp, temp2, r2, r3, n, mode); } -static inline void -gen_maddsur32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) +static void gen_maddsur32s_h(TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -962,32 +974,32 @@ gen_maddsur32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) GEN_HELPER_UU(mul_h, temp64, r2, r3, t_n); break; } - tcg_gen_andi_tl(temp2, r1, 0xffff0000); - tcg_gen_shli_tl(temp, r1, 16); + tcg_gen_andi_i32(temp2, r1, 0xffff0000); + tcg_gen_shli_i32(temp, r1, 16); gen_helper_addsur_h_ssov(ret, tcg_env, temp64, temp, temp2); } -static inline void -gen_maddr_q(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n) +static void gen_maddr_q(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, + uint32_t n) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); gen_helper_maddr_q(ret, tcg_env, r1, r2, r3, t_n); } -static inline void -gen_maddrs_q(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n) +static void gen_maddrs_q(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, + uint32_t n) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); gen_helper_maddr_q_ssov(ret, tcg_env, r1, r2, r3, t_n); } -static inline void -gen_madd32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n, - uint32_t up_shift) +static void gen_madd32_q(TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, + uint32_t n, uint32_t up_shift) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); - TCGv temp3 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); + TCGv_i32 temp3 = tcg_temp_new_i32(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); TCGv_i64 t3 = tcg_temp_new_i64(); @@ -1008,81 +1020,83 @@ gen_madd32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n, tcg_gen_setcondi_i64(TCG_COND_LT, t2, t3, -0x80000000LL); tcg_gen_or_i64(t1, t1, t2); tcg_gen_extrl_i64_i32(cpu_PSW_V, t1); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* We produce an overflow on the host if the mul before was (0x80000000 * 0x80000000) << 1). If this is the case, we negate the ovf. */ if (n == 1) { - tcg_gen_setcondi_tl(TCG_COND_EQ, temp, arg2, 0x80000000); - tcg_gen_setcond_tl(TCG_COND_EQ, temp2, arg2, arg3); - tcg_gen_and_tl(temp, temp, temp2); - tcg_gen_shli_tl(temp, temp, 31); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp, arg2, 0x80000000); + tcg_gen_setcond_i32(TCG_COND_EQ, temp2, arg2, arg3); + tcg_gen_and_i32(temp, temp, temp2); + tcg_gen_shli_i32(temp, temp, 31); /* negate v bit, if special condition */ - tcg_gen_xor_tl(cpu_PSW_V, cpu_PSW_V, temp); + tcg_gen_xor_i32(cpu_PSW_V, cpu_PSW_V, temp); } /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, temp3, temp3); - tcg_gen_xor_tl(cpu_PSW_AV, temp3, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, temp3, temp3); + tcg_gen_xor_i32(cpu_PSW_AV, temp3, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back result */ - tcg_gen_mov_tl(ret, temp3); + tcg_gen_mov_i32(ret, temp3); } -static inline void -gen_m16add32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n) +static void gen_m16add32_q(TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, + uint32_t n) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); if (n == 0) { - tcg_gen_mul_tl(temp, arg2, arg3); + tcg_gen_mul_i32(temp, arg2, arg3); } else { /* n is expected to be 1 */ - tcg_gen_mul_tl(temp, arg2, arg3); - tcg_gen_shli_tl(temp, temp, 1); + tcg_gen_mul_i32(temp, arg2, arg3); + tcg_gen_shli_i32(temp, temp, 1); /* catch special case r1 = r2 = 0x8000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000); - tcg_gen_sub_tl(temp, temp, temp2); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp2, temp, 0x80000000); + tcg_gen_sub_i32(temp, temp, temp2); } gen_add_d(ret, arg1, temp); } -static inline void -gen_m16adds32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n) +static void gen_m16adds32_q(TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, + uint32_t n) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); if (n == 0) { - tcg_gen_mul_tl(temp, arg2, arg3); + tcg_gen_mul_i32(temp, arg2, arg3); } else { /* n is expected to be 1 */ - tcg_gen_mul_tl(temp, arg2, arg3); - tcg_gen_shli_tl(temp, temp, 1); + tcg_gen_mul_i32(temp, arg2, arg3); + tcg_gen_shli_i32(temp, temp, 1); /* catch special case r1 = r2 = 0x8000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000); - tcg_gen_sub_tl(temp, temp, temp2); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp2, temp, 0x80000000); + tcg_gen_sub_i32(temp, temp, temp2); } gen_adds(ret, arg1, temp); } -static inline void -gen_m16add64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, - TCGv arg3, uint32_t n) +static void gen_m16add64_q(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1_low, TCGv_i32 arg1_high, + TCGv_i32 arg2, TCGv_i32 arg3, uint32_t n) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); TCGv_i64 t3 = tcg_temp_new_i64(); if (n == 0) { - tcg_gen_mul_tl(temp, arg2, arg3); + tcg_gen_mul_i32(temp, arg2, arg3); } else { /* n is expected to be 1 */ - tcg_gen_mul_tl(temp, arg2, arg3); - tcg_gen_shli_tl(temp, temp, 1); + tcg_gen_mul_i32(temp, arg2, arg3); + tcg_gen_shli_i32(temp, temp, 1); /* catch special case r1 = r2 = 0x8000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000); - tcg_gen_sub_tl(temp, temp, temp2); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp2, temp, 0x80000000); + tcg_gen_sub_i32(temp, temp, temp2); } tcg_gen_ext_i32_i64(t2, temp); tcg_gen_shli_i64(t2, t2, 16); @@ -1092,23 +1106,23 @@ gen_m16add64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, tcg_gen_extr_i64_i32(rl, rh, t3); } -static inline void -gen_m16adds64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, - TCGv arg3, uint32_t n) +static void gen_m16adds64_q(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1_low, TCGv_i32 arg1_high, + TCGv_i32 arg2, TCGv_i32 arg3, uint32_t n) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); if (n == 0) { - tcg_gen_mul_tl(temp, arg2, arg3); + tcg_gen_mul_i32(temp, arg2, arg3); } else { /* n is expected to be 1 */ - tcg_gen_mul_tl(temp, arg2, arg3); - tcg_gen_shli_tl(temp, temp, 1); + tcg_gen_mul_i32(temp, arg2, arg3); + tcg_gen_shli_i32(temp, temp, 1); /* catch special case r1 = r2 = 0x8000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000); - tcg_gen_sub_tl(temp, temp, temp2); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp2, temp, 0x80000000); + tcg_gen_sub_i32(temp, temp, temp2); } tcg_gen_ext_i32_i64(t2, temp); tcg_gen_shli_i64(t2, t2, 16); @@ -1118,15 +1132,15 @@ gen_m16adds64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, tcg_gen_extr_i64_i32(rl, rh, t1); } -static inline void -gen_madd64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, - TCGv arg3, uint32_t n) +static void gen_madd64_q(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1_low, TCGv_i32 arg1_high, + TCGv_i32 arg2, TCGv_i32 arg3, uint32_t n) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); TCGv_i64 t3 = tcg_temp_new_i64(); TCGv_i64 t4 = tcg_temp_new_i64(); - TCGv temp, temp2; + TCGv_i32 temp, temp2; tcg_gen_concat_i32_i64(t1, arg1_low, arg1_high); tcg_gen_ext_i32_i64(t2, arg2); @@ -1146,29 +1160,29 @@ gen_madd64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, (0x80000000 * 0x80000000) << 1). If this is the case, we negate the ovf. */ if (n == 1) { - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); - tcg_gen_setcondi_tl(TCG_COND_EQ, temp, arg2, 0x80000000); - tcg_gen_setcond_tl(TCG_COND_EQ, temp2, arg2, arg3); - tcg_gen_and_tl(temp, temp, temp2); - tcg_gen_shli_tl(temp, temp, 31); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp, arg2, 0x80000000); + tcg_gen_setcond_i32(TCG_COND_EQ, temp2, arg2, arg3); + tcg_gen_and_i32(temp, temp, temp2); + tcg_gen_shli_i32(temp, temp, 31); /* negate v bit, if special condition */ - tcg_gen_xor_tl(cpu_PSW_V, cpu_PSW_V, temp); + tcg_gen_xor_i32(cpu_PSW_V, cpu_PSW_V, temp); } /* write back result */ tcg_gen_extr_i64_i32(rl, rh, t4); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, rh, rh); - tcg_gen_xor_tl(cpu_PSW_AV, rh, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, rh, rh); + tcg_gen_xor_i32(cpu_PSW_AV, rh, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static inline void -gen_madds32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n, - uint32_t up_shift) +static void gen_madds32_q(TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, + uint32_t n, uint32_t up_shift) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); @@ -1184,12 +1198,12 @@ gen_madds32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n, gen_helper_madd32_q_add_ssov(ret, tcg_env, t1, t2); } -static inline void -gen_madds64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, - TCGv arg3, uint32_t n) +static void gen_madds64_q(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1_low, TCGv_i32 arg1_high, + TCGv_i32 arg2, TCGv_i32 arg3, uint32_t n) { TCGv_i64 r1 = tcg_temp_new_i64(); - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); tcg_gen_concat_i32_i64(r1, arg1_low, arg1_high); gen_helper_madd64_q_ssov(r1, tcg_env, r1, arg2, arg3, t_n); @@ -1197,7 +1211,7 @@ gen_madds64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, } /* ret = r2 - (r1 * r3); */ -static inline void gen_msub32_d(TCGv ret, TCGv r1, TCGv r2, TCGv r3) +static void gen_msub32_d(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); @@ -1218,62 +1232,61 @@ static inline void gen_msub32_d(TCGv ret, TCGv r1, TCGv r2, TCGv r3) tcg_gen_setcondi_i64(TCG_COND_LT, t2, t1, -0x80000000LL); tcg_gen_or_i64(t2, t2, t3); tcg_gen_extrl_i64_i32(cpu_PSW_V, t2); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, ret, ret); - tcg_gen_xor_tl(cpu_PSW_AV, ret, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, ret, ret); + tcg_gen_xor_i32(cpu_PSW_AV, ret, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static inline void gen_msubi32_d(TCGv ret, TCGv r1, TCGv r2, int32_t con) +static void gen_msubi32_d(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_msub32_d(ret, r1, r2, temp); } -static inline void -gen_msub64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - TCGv r3) +static void gen_msub64_d(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2_low, TCGv_i32 r2_high, + TCGv_i32 r3) { - TCGv t1 = tcg_temp_new(); - TCGv t2 = tcg_temp_new(); - TCGv t3 = tcg_temp_new(); - TCGv t4 = tcg_temp_new(); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + TCGv_i32 t4 = tcg_temp_new_i32(); - tcg_gen_muls2_tl(t1, t2, r1, r3); + tcg_gen_muls2_i32(t1, t2, r1, r3); /* only the sub can overflow */ - tcg_gen_sub2_tl(t3, t4, r2_low, r2_high, t1, t2); + tcg_gen_sub2_i32(t3, t4, r2_low, r2_high, t1, t2); /* calc V bit */ - tcg_gen_xor_tl(cpu_PSW_V, t4, r2_high); - tcg_gen_xor_tl(t1, r2_high, t2); - tcg_gen_and_tl(cpu_PSW_V, cpu_PSW_V, t1); + tcg_gen_xor_i32(cpu_PSW_V, t4, r2_high); + tcg_gen_xor_i32(t1, r2_high, t2); + tcg_gen_and_i32(cpu_PSW_V, cpu_PSW_V, t1); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, t4, t4); - tcg_gen_xor_tl(cpu_PSW_AV, t4, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, t4, t4); + tcg_gen_xor_i32(cpu_PSW_AV, t4, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back the result */ - tcg_gen_mov_tl(ret_low, t3); - tcg_gen_mov_tl(ret_high, t4); + tcg_gen_mov_i32(ret_low, t3); + tcg_gen_mov_i32(ret_high, t4); } -static inline void -gen_msubi64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - int32_t con) +static void gen_msubi64_d(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2_low, TCGv_i32 r2_high, + int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_msub64_d(ret_low, ret_high, r1, r2_low, r2_high, temp); } -static inline void -gen_msubu64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - TCGv r3) +static void gen_msubu64_d(TCGv_i32 ret_low, TCGv_i32 ret_high, TCGv_i32 r1, + TCGv_i32 r2_low, TCGv_i32 r2_high, TCGv_i32 r3) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); @@ -1289,153 +1302,152 @@ gen_msubu64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, /* calc V bit, only the sub can overflow, if t1 > t2 */ tcg_gen_setcond_i64(TCG_COND_GTU, t1, t1, t2); tcg_gen_extrl_i64_i32(cpu_PSW_V, t1); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, ret_high, ret_high); - tcg_gen_xor_tl(cpu_PSW_AV, ret_high, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, ret_high, ret_high); + tcg_gen_xor_i32(cpu_PSW_AV, ret_high, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static inline void -gen_msubui64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - int32_t con) +static void gen_msubui64_d(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2_low, TCGv_i32 r2_high, + int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_msubu64_d(ret_low, ret_high, r1, r2_low, r2_high, temp); } -static inline void gen_addi_d(TCGv ret, TCGv r1, target_ulong r2) +static void gen_addi_d(TCGv_i32 ret, TCGv_i32 r1, int32_t r2) { - TCGv temp = tcg_constant_i32(r2); + TCGv_i32 temp = tcg_constant_i32(r2); gen_add_d(ret, r1, temp); } /* calculate the carry bit too */ -static inline void gen_add_CC(TCGv ret, TCGv r1, TCGv r2) +static void gen_add_CC(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { - TCGv t0 = tcg_temp_new_i32(); - TCGv result = tcg_temp_new_i32(); + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 result = tcg_temp_new_i32(); - tcg_gen_movi_tl(t0, 0); + tcg_gen_movi_i32(t0, 0); /* Addition and set C/V/SV bits */ tcg_gen_add2_i32(result, cpu_PSW_C, r1, t0, r2, t0); /* calc V bit */ - tcg_gen_xor_tl(cpu_PSW_V, result, r1); - tcg_gen_xor_tl(t0, r1, r2); - tcg_gen_andc_tl(cpu_PSW_V, cpu_PSW_V, t0); + tcg_gen_xor_i32(cpu_PSW_V, result, r1); + tcg_gen_xor_i32(t0, r1, r2); + tcg_gen_andc_i32(cpu_PSW_V, cpu_PSW_V, t0); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, result, result); - tcg_gen_xor_tl(cpu_PSW_AV, result, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, result, result); + tcg_gen_xor_i32(cpu_PSW_AV, result, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back result */ - tcg_gen_mov_tl(ret, result); + tcg_gen_mov_i32(ret, result); } -static inline void gen_addi_CC(TCGv ret, TCGv r1, int32_t con) +static void gen_addi_CC(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_add_CC(ret, r1, temp); } -static inline void gen_addc_CC(TCGv ret, TCGv r1, TCGv r2) +static void gen_addc_CC(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { - TCGv t0 = tcg_temp_new_i32(); - TCGv result = tcg_temp_new_i32(); + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 result = tcg_temp_new_i32(); /* Addition, carry and set C/V/SV bits */ tcg_gen_addcio_i32(result, cpu_PSW_C, r1, r2, cpu_PSW_C); /* calc V bit */ - tcg_gen_xor_tl(cpu_PSW_V, result, r1); - tcg_gen_xor_tl(t0, r1, r2); - tcg_gen_andc_tl(cpu_PSW_V, cpu_PSW_V, t0); + tcg_gen_xor_i32(cpu_PSW_V, result, r1); + tcg_gen_xor_i32(t0, r1, r2); + tcg_gen_andc_i32(cpu_PSW_V, cpu_PSW_V, t0); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, result, result); - tcg_gen_xor_tl(cpu_PSW_AV, result, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, result, result); + tcg_gen_xor_i32(cpu_PSW_AV, result, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back result */ - tcg_gen_mov_tl(ret, result); + tcg_gen_mov_i32(ret, result); } -static inline void gen_addci_CC(TCGv ret, TCGv r1, int32_t con) +static void gen_addci_CC(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_addc_CC(ret, r1, temp); } -static inline void gen_cond_add(TCGCond cond, TCGv r1, TCGv r2, TCGv r3, - TCGv r4) +static void gen_cond_add(TCGCond cond, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, TCGv_i32 r4) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); - TCGv result = tcg_temp_new(); - TCGv mask = tcg_temp_new(); - TCGv t0 = tcg_constant_i32(0); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); + TCGv_i32 result = tcg_temp_new_i32(); + TCGv_i32 mask = tcg_temp_new_i32(); + TCGv_i32 t0 = tcg_constant_i32(0); /* create mask for sticky bits */ - tcg_gen_setcond_tl(cond, mask, r4, t0); - tcg_gen_shli_tl(mask, mask, 31); + tcg_gen_setcond_i32(cond, mask, r4, t0); + tcg_gen_shli_i32(mask, mask, 31); - tcg_gen_add_tl(result, r1, r2); + tcg_gen_add_i32(result, r1, r2); /* Calc PSW_V */ - tcg_gen_xor_tl(temp, result, r1); - tcg_gen_xor_tl(temp2, r1, r2); - tcg_gen_andc_tl(temp, temp, temp2); - tcg_gen_movcond_tl(cond, cpu_PSW_V, r4, t0, temp, cpu_PSW_V); + tcg_gen_xor_i32(temp, result, r1); + tcg_gen_xor_i32(temp2, r1, r2); + tcg_gen_andc_i32(temp, temp, temp2); + tcg_gen_movcond_i32(cond, cpu_PSW_V, r4, t0, temp, cpu_PSW_V); /* Set PSW_SV */ - tcg_gen_and_tl(temp, temp, mask); - tcg_gen_or_tl(cpu_PSW_SV, temp, cpu_PSW_SV); + tcg_gen_and_i32(temp, temp, mask); + tcg_gen_or_i32(cpu_PSW_SV, temp, cpu_PSW_SV); /* calc AV bit */ - tcg_gen_add_tl(temp, result, result); - tcg_gen_xor_tl(temp, temp, result); - tcg_gen_movcond_tl(cond, cpu_PSW_AV, r4, t0, temp, cpu_PSW_AV); + tcg_gen_add_i32(temp, result, result); + tcg_gen_xor_i32(temp, temp, result); + tcg_gen_movcond_i32(cond, cpu_PSW_AV, r4, t0, temp, cpu_PSW_AV); /* calc SAV bit */ - tcg_gen_and_tl(temp, temp, mask); - tcg_gen_or_tl(cpu_PSW_SAV, temp, cpu_PSW_SAV); + tcg_gen_and_i32(temp, temp, mask); + tcg_gen_or_i32(cpu_PSW_SAV, temp, cpu_PSW_SAV); /* write back result */ - tcg_gen_movcond_tl(cond, r3, r4, t0, result, r1); + tcg_gen_movcond_i32(cond, r3, r4, t0, result, r1); } -static inline void gen_condi_add(TCGCond cond, TCGv r1, int32_t r2, - TCGv r3, TCGv r4) +static void gen_condi_add(TCGCond cond, + TCGv_i32 r1, int32_t r2, TCGv_i32 r3, TCGv_i32 r4) { - TCGv temp = tcg_constant_i32(r2); + TCGv_i32 temp = tcg_constant_i32(r2); gen_cond_add(cond, r1, temp, r3, r4); } -static inline void gen_sub_d(TCGv ret, TCGv r1, TCGv r2) +static void gen_sub_d(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { - TCGv temp = tcg_temp_new_i32(); - TCGv result = tcg_temp_new_i32(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 result = tcg_temp_new_i32(); - tcg_gen_sub_tl(result, r1, r2); + tcg_gen_sub_i32(result, r1, r2); /* calc V bit */ - tcg_gen_xor_tl(cpu_PSW_V, result, r1); - tcg_gen_xor_tl(temp, r1, r2); - tcg_gen_and_tl(cpu_PSW_V, cpu_PSW_V, temp); + tcg_gen_xor_i32(cpu_PSW_V, result, r1); + tcg_gen_xor_i32(temp, r1, r2); + tcg_gen_and_i32(cpu_PSW_V, cpu_PSW_V, temp); /* calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV bit */ - tcg_gen_add_tl(cpu_PSW_AV, result, result); - tcg_gen_xor_tl(cpu_PSW_AV, result, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, result, result); + tcg_gen_xor_i32(cpu_PSW_AV, result, cpu_PSW_AV); /* calc SAV bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back result */ - tcg_gen_mov_tl(ret, result); + tcg_gen_mov_i32(ret, result); } -static inline void -gen_sub64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2) +static void gen_sub64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2) { - TCGv temp = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 result = tcg_temp_new_i64(); @@ -1447,87 +1459,88 @@ gen_sub64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2) tcg_gen_and_i64(t1, t1, t0); tcg_gen_extrh_i64_i32(cpu_PSW_V, t1); /* calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* calc AV/SAV bits */ tcg_gen_extrh_i64_i32(temp, result); - tcg_gen_add_tl(cpu_PSW_AV, temp, temp); - tcg_gen_xor_tl(cpu_PSW_AV, temp, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, temp, temp); + tcg_gen_xor_i32(cpu_PSW_AV, temp, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back result */ tcg_gen_mov_i64(ret, result); } -static inline void gen_sub_CC(TCGv ret, TCGv r1, TCGv r2) +static void gen_sub_CC(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { - TCGv result = tcg_temp_new(); - TCGv temp = tcg_temp_new(); + TCGv_i32 result = tcg_temp_new_i32(); + TCGv_i32 temp = tcg_temp_new_i32(); - tcg_gen_sub_tl(result, r1, r2); + tcg_gen_sub_i32(result, r1, r2); /* calc C bit */ - tcg_gen_setcond_tl(TCG_COND_GEU, cpu_PSW_C, r1, r2); + tcg_gen_setcond_i32(TCG_COND_GEU, cpu_PSW_C, r1, r2); /* calc V bit */ - tcg_gen_xor_tl(cpu_PSW_V, result, r1); - tcg_gen_xor_tl(temp, r1, r2); - tcg_gen_and_tl(cpu_PSW_V, cpu_PSW_V, temp); + tcg_gen_xor_i32(cpu_PSW_V, result, r1); + tcg_gen_xor_i32(temp, r1, r2); + tcg_gen_and_i32(cpu_PSW_V, cpu_PSW_V, temp); /* calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV bit */ - tcg_gen_add_tl(cpu_PSW_AV, result, result); - tcg_gen_xor_tl(cpu_PSW_AV, result, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, result, result); + tcg_gen_xor_i32(cpu_PSW_AV, result, cpu_PSW_AV); /* calc SAV bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back result */ - tcg_gen_mov_tl(ret, result); + tcg_gen_mov_i32(ret, result); } -static inline void gen_subc_CC(TCGv ret, TCGv r1, TCGv r2) +static void gen_subc_CC(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { - TCGv temp = tcg_temp_new(); - tcg_gen_not_tl(temp, r2); + TCGv_i32 temp = tcg_temp_new_i32(); + tcg_gen_not_i32(temp, r2); gen_addc_CC(ret, r1, temp); } -static inline void gen_cond_sub(TCGCond cond, TCGv r1, TCGv r2, TCGv r3, - TCGv r4) +static void gen_cond_sub(TCGCond cond, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, TCGv_i32 r4) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); - TCGv result = tcg_temp_new(); - TCGv mask = tcg_temp_new(); - TCGv t0 = tcg_constant_i32(0); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); + TCGv_i32 result = tcg_temp_new_i32(); + TCGv_i32 mask = tcg_temp_new_i32(); + TCGv_i32 t0 = tcg_constant_i32(0); /* create mask for sticky bits */ - tcg_gen_setcond_tl(cond, mask, r4, t0); - tcg_gen_shli_tl(mask, mask, 31); + tcg_gen_setcond_i32(cond, mask, r4, t0); + tcg_gen_shli_i32(mask, mask, 31); - tcg_gen_sub_tl(result, r1, r2); + tcg_gen_sub_i32(result, r1, r2); /* Calc PSW_V */ - tcg_gen_xor_tl(temp, result, r1); - tcg_gen_xor_tl(temp2, r1, r2); - tcg_gen_and_tl(temp, temp, temp2); - tcg_gen_movcond_tl(cond, cpu_PSW_V, r4, t0, temp, cpu_PSW_V); + tcg_gen_xor_i32(temp, result, r1); + tcg_gen_xor_i32(temp2, r1, r2); + tcg_gen_and_i32(temp, temp, temp2); + tcg_gen_movcond_i32(cond, cpu_PSW_V, r4, t0, temp, cpu_PSW_V); /* Set PSW_SV */ - tcg_gen_and_tl(temp, temp, mask); - tcg_gen_or_tl(cpu_PSW_SV, temp, cpu_PSW_SV); + tcg_gen_and_i32(temp, temp, mask); + tcg_gen_or_i32(cpu_PSW_SV, temp, cpu_PSW_SV); /* calc AV bit */ - tcg_gen_add_tl(temp, result, result); - tcg_gen_xor_tl(temp, temp, result); - tcg_gen_movcond_tl(cond, cpu_PSW_AV, r4, t0, temp, cpu_PSW_AV); + tcg_gen_add_i32(temp, result, result); + tcg_gen_xor_i32(temp, temp, result); + tcg_gen_movcond_i32(cond, cpu_PSW_AV, r4, t0, temp, cpu_PSW_AV); /* calc SAV bit */ - tcg_gen_and_tl(temp, temp, mask); - tcg_gen_or_tl(cpu_PSW_SAV, temp, cpu_PSW_SAV); + tcg_gen_and_i32(temp, temp, mask); + tcg_gen_or_i32(cpu_PSW_SAV, temp, cpu_PSW_SAV); /* write back result */ - tcg_gen_movcond_tl(cond, r3, r4, t0, result, r1); + tcg_gen_movcond_i32(cond, r3, r4, t0, result, r1); } -static inline void -gen_msub_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_msub_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -1548,14 +1561,15 @@ gen_msub_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_gen_sub_tl, tcg_gen_sub_tl); } -static inline void -gen_msubs_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_msubs_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); - TCGv temp3 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); + TCGv_i32 temp3 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { @@ -1574,20 +1588,21 @@ gen_msubs_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, } tcg_gen_extr_i64_i32(temp, temp2, temp64); gen_subs(ret_low, r1_low, temp); - tcg_gen_mov_tl(temp, cpu_PSW_V); - tcg_gen_mov_tl(temp3, cpu_PSW_AV); + tcg_gen_mov_i32(temp, cpu_PSW_V); + tcg_gen_mov_i32(temp3, cpu_PSW_AV); gen_subs(ret_high, r1_high, temp2); /* combine v bits */ - tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp); + tcg_gen_or_i32(cpu_PSW_V, cpu_PSW_V, temp); /* combine av bits */ - tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp3); + tcg_gen_or_i32(cpu_PSW_AV, cpu_PSW_AV, temp3); } -static inline void -gen_msubm_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_msubm_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); TCGv_i64 temp64_2 = tcg_temp_new_i64(); TCGv_i64 temp64_3 = tcg_temp_new_i64(); @@ -1611,11 +1626,12 @@ gen_msubm_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_gen_extr_i64_i32(ret_low, ret_high, temp64_3); } -static inline void -gen_msubms_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_msubms_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); TCGv_i64 temp64_2 = tcg_temp_new_i64(); switch (mode) { @@ -1637,11 +1653,12 @@ gen_msubms_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_gen_extr_i64_i32(ret_low, ret_high, temp64); } -static inline void -gen_msubr64_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, uint32_t n, - uint32_t mode) +static void gen_msubr64_h(TCGv_i32 ret, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -1660,22 +1677,24 @@ gen_msubr64_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, uint32_t n, gen_helper_subr_h(ret, tcg_env, temp64, r1_low, r1_high); } -static inline void -gen_msubr32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) +static void gen_msubr32_h(TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); - tcg_gen_andi_tl(temp2, r1, 0xffff0000); - tcg_gen_shli_tl(temp, r1, 16); + tcg_gen_andi_i32(temp2, r1, 0xffff0000); + tcg_gen_shli_i32(temp, r1, 16); gen_msubr64_h(ret, temp, temp2, r2, r3, n, mode); } -static inline void -gen_msubr64s_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, - uint32_t n, uint32_t mode) +static void gen_msubr64s_h(TCGv_i32 ret, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -1694,36 +1713,37 @@ gen_msubr64s_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, gen_helper_subr_h_ssov(ret, tcg_env, temp64, r1_low, r1_high); } -static inline void -gen_msubr32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) +static void gen_msubr32s_h(TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); - tcg_gen_andi_tl(temp2, r1, 0xffff0000); - tcg_gen_shli_tl(temp, r1, 16); + tcg_gen_andi_i32(temp2, r1, 0xffff0000); + tcg_gen_shli_i32(temp, r1, 16); gen_msubr64s_h(ret, temp, temp2, r2, r3, n, mode); } -static inline void -gen_msubr_q(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n) +static void gen_msubr_q(TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, uint32_t n) { - TCGv temp = tcg_constant_i32(n); + TCGv_i32 temp = tcg_constant_i32(n); gen_helper_msubr_q(ret, tcg_env, r1, r2, r3, temp); } -static inline void -gen_msubrs_q(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n) +static void gen_msubrs_q(TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, uint32_t n) { - TCGv temp = tcg_constant_i32(n); + TCGv_i32 temp = tcg_constant_i32(n); gen_helper_msubr_q_ssov(ret, tcg_env, r1, r2, r3, temp); } -static inline void -gen_msub32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n, - uint32_t up_shift) +static void gen_msub32_q(TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, + uint32_t n, uint32_t up_shift) { - TCGv temp3 = tcg_temp_new(); + TCGv_i32 temp3 = tcg_temp_new_i32(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); TCGv_i64 t3 = tcg_temp_new_i64(); @@ -1748,70 +1768,72 @@ gen_msub32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n, tcg_gen_setcondi_i64(TCG_COND_LT, t2, t3, -0x80000000LL); tcg_gen_or_i64(t1, t1, t2); tcg_gen_extrl_i64_i32(cpu_PSW_V, t1); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, temp3, temp3); - tcg_gen_xor_tl(cpu_PSW_AV, temp3, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, temp3, temp3); + tcg_gen_xor_i32(cpu_PSW_AV, temp3, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back result */ - tcg_gen_mov_tl(ret, temp3); + tcg_gen_mov_i32(ret, temp3); } -static inline void -gen_m16sub32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n) +static void gen_m16sub32_q(TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, + uint32_t n) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); if (n == 0) { - tcg_gen_mul_tl(temp, arg2, arg3); + tcg_gen_mul_i32(temp, arg2, arg3); } else { /* n is expected to be 1 */ - tcg_gen_mul_tl(temp, arg2, arg3); - tcg_gen_shli_tl(temp, temp, 1); + tcg_gen_mul_i32(temp, arg2, arg3); + tcg_gen_shli_i32(temp, temp, 1); /* catch special case r1 = r2 = 0x8000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000); - tcg_gen_sub_tl(temp, temp, temp2); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp2, temp, 0x80000000); + tcg_gen_sub_i32(temp, temp, temp2); } gen_sub_d(ret, arg1, temp); } -static inline void -gen_m16subs32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n) +static void gen_m16subs32_q(TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, + uint32_t n) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); if (n == 0) { - tcg_gen_mul_tl(temp, arg2, arg3); + tcg_gen_mul_i32(temp, arg2, arg3); } else { /* n is expected to be 1 */ - tcg_gen_mul_tl(temp, arg2, arg3); - tcg_gen_shli_tl(temp, temp, 1); + tcg_gen_mul_i32(temp, arg2, arg3); + tcg_gen_shli_i32(temp, temp, 1); /* catch special case r1 = r2 = 0x8000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000); - tcg_gen_sub_tl(temp, temp, temp2); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp2, temp, 0x80000000); + tcg_gen_sub_i32(temp, temp, temp2); } gen_subs(ret, arg1, temp); } -static inline void -gen_m16sub64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, - TCGv arg3, uint32_t n) +static void gen_m16sub64_q(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1_low, TCGv_i32 arg1_high, + TCGv_i32 arg2, TCGv_i32 arg3, uint32_t n) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); TCGv_i64 t3 = tcg_temp_new_i64(); if (n == 0) { - tcg_gen_mul_tl(temp, arg2, arg3); + tcg_gen_mul_i32(temp, arg2, arg3); } else { /* n is expected to be 1 */ - tcg_gen_mul_tl(temp, arg2, arg3); - tcg_gen_shli_tl(temp, temp, 1); + tcg_gen_mul_i32(temp, arg2, arg3); + tcg_gen_shli_i32(temp, temp, 1); /* catch special case r1 = r2 = 0x8000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000); - tcg_gen_sub_tl(temp, temp, temp2); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp2, temp, 0x80000000); + tcg_gen_sub_i32(temp, temp, temp2); } tcg_gen_ext_i32_i64(t2, temp); tcg_gen_shli_i64(t2, t2, 16); @@ -1821,23 +1843,23 @@ gen_m16sub64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, tcg_gen_extr_i64_i32(rl, rh, t3); } -static inline void -gen_m16subs64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, - TCGv arg3, uint32_t n) +static void gen_m16subs64_q(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1_low, TCGv_i32 arg1_high, + TCGv_i32 arg2, TCGv_i32 arg3, uint32_t n) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); if (n == 0) { - tcg_gen_mul_tl(temp, arg2, arg3); + tcg_gen_mul_i32(temp, arg2, arg3); } else { /* n is expected to be 1 */ - tcg_gen_mul_tl(temp, arg2, arg3); - tcg_gen_shli_tl(temp, temp, 1); + tcg_gen_mul_i32(temp, arg2, arg3); + tcg_gen_shli_i32(temp, temp, 1); /* catch special case r1 = r2 = 0x8000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000); - tcg_gen_sub_tl(temp, temp, temp2); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp2, temp, 0x80000000); + tcg_gen_sub_i32(temp, temp, temp2); } tcg_gen_ext_i32_i64(t2, temp); tcg_gen_shli_i64(t2, t2, 16); @@ -1847,15 +1869,15 @@ gen_m16subs64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, tcg_gen_extr_i64_i32(rl, rh, t1); } -static inline void -gen_msub64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, - TCGv arg3, uint32_t n) +static void gen_msub64_q(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1_low, TCGv_i32 arg1_high, + TCGv_i32 arg2, TCGv_i32 arg3, uint32_t n) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); TCGv_i64 t3 = tcg_temp_new_i64(); TCGv_i64 t4 = tcg_temp_new_i64(); - TCGv temp, temp2; + TCGv_i32 temp, temp2; tcg_gen_concat_i32_i64(t1, arg1_low, arg1_high); tcg_gen_ext_i32_i64(t2, arg2); @@ -1875,29 +1897,29 @@ gen_msub64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, (0x80000000 * 0x80000000) << 1). If this is the case, we negate the ovf. */ if (n == 1) { - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); - tcg_gen_setcondi_tl(TCG_COND_EQ, temp, arg2, 0x80000000); - tcg_gen_setcond_tl(TCG_COND_EQ, temp2, arg2, arg3); - tcg_gen_and_tl(temp, temp, temp2); - tcg_gen_shli_tl(temp, temp, 31); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp, arg2, 0x80000000); + tcg_gen_setcond_i32(TCG_COND_EQ, temp2, arg2, arg3); + tcg_gen_and_i32(temp, temp, temp2); + tcg_gen_shli_i32(temp, temp, 31); /* negate v bit, if special condition */ - tcg_gen_xor_tl(cpu_PSW_V, cpu_PSW_V, temp); + tcg_gen_xor_i32(cpu_PSW_V, cpu_PSW_V, temp); } /* write back result */ tcg_gen_extr_i64_i32(rl, rh, t4); /* Calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV/SAV bits */ - tcg_gen_add_tl(cpu_PSW_AV, rh, rh); - tcg_gen_xor_tl(cpu_PSW_AV, rh, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, rh, rh); + tcg_gen_xor_i32(cpu_PSW_AV, rh, cpu_PSW_AV); /* calc SAV */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static inline void -gen_msubs32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n, - uint32_t up_shift) +static void gen_msubs32_q(TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, + uint32_t n, uint32_t up_shift) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); @@ -1918,25 +1940,26 @@ gen_msubs32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n, gen_helper_msub32_q_sub_ssov(ret, tcg_env, t1, t3); } -static inline void -gen_msubs64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, - TCGv arg3, uint32_t n) +static void gen_msubs64_q(TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 arg1_low, TCGv_i32 arg1_high, + TCGv_i32 arg2, TCGv_i32 arg3, uint32_t n) { TCGv_i64 r1 = tcg_temp_new_i64(); - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); tcg_gen_concat_i32_i64(r1, arg1_low, arg1_high); gen_helper_msub64_q_ssov(r1, tcg_env, r1, arg2, arg3, t_n); tcg_gen_extr_i64_i32(rl, rh, r1); } -static inline void -gen_msubad_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_msubad_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -1957,11 +1980,12 @@ gen_msubad_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_gen_add_tl, tcg_gen_sub_tl); } -static inline void -gen_msubadm_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_msubadm_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); TCGv_i64 temp64_2 = tcg_temp_new_i64(); TCGv_i64 temp64_3 = tcg_temp_new_i64(); @@ -1990,12 +2014,13 @@ gen_msubadm_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_gen_extr_i64_i32(ret_low, ret_high, temp64_2); } -static inline void -gen_msubadr32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) +static void gen_msubadr32_h(TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -2011,19 +2036,20 @@ gen_msubadr32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) GEN_HELPER_UU(mul_h, temp64, r2, r3, t_n); break; } - tcg_gen_andi_tl(temp2, r1, 0xffff0000); - tcg_gen_shli_tl(temp, r1, 16); + tcg_gen_andi_i32(temp2, r1, 0xffff0000); + tcg_gen_shli_i32(temp, r1, 16); gen_helper_subadr_h(ret, tcg_env, temp64, temp, temp2); } -static inline void -gen_msubads_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_msubads_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); - TCGv temp3 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); + TCGv_i32 temp3 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { @@ -2042,20 +2068,21 @@ gen_msubads_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, } tcg_gen_extr_i64_i32(temp, temp2, temp64); gen_adds(ret_low, r1_low, temp); - tcg_gen_mov_tl(temp, cpu_PSW_V); - tcg_gen_mov_tl(temp3, cpu_PSW_AV); + tcg_gen_mov_i32(temp, cpu_PSW_V); + tcg_gen_mov_i32(temp3, cpu_PSW_AV); gen_subs(ret_high, r1_high, temp2); /* combine v bits */ - tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp); + tcg_gen_or_i32(cpu_PSW_V, cpu_PSW_V, temp); /* combine av bits */ - tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp3); + tcg_gen_or_i32(cpu_PSW_AV, cpu_PSW_AV, temp3); } -static inline void -gen_msubadms_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, - TCGv r3, uint32_t n, uint32_t mode) +static void gen_msubadms_h(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1_low, TCGv_i32 r1_high, + TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); + TCGv_i32 t_n = tcg_constant_i32(n); TCGv_i64 temp64 = tcg_temp_new_i64(); TCGv_i64 temp64_2 = tcg_temp_new_i64(); @@ -2083,12 +2110,13 @@ gen_msubadms_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_gen_extr_i64_i32(ret_low, ret_high, temp64); } -static inline void -gen_msubadr32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) +static void gen_msubadr32s_h(TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 r3, + uint32_t n, uint32_t mode) { - TCGv t_n = tcg_constant_i32(n); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 t_n = tcg_constant_i32(n); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); TCGv_i64 temp64 = tcg_temp_new_i64(); switch (mode) { case MODE_LL: @@ -2104,166 +2132,168 @@ gen_msubadr32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) GEN_HELPER_UU(mul_h, temp64, r2, r3, t_n); break; } - tcg_gen_andi_tl(temp2, r1, 0xffff0000); - tcg_gen_shli_tl(temp, r1, 16); + tcg_gen_andi_i32(temp2, r1, 0xffff0000); + tcg_gen_shli_i32(temp, r1, 16); gen_helper_subadr_h_ssov(ret, tcg_env, temp64, temp, temp2); } -static inline void gen_abs(TCGv ret, TCGv r1) +static void gen_abs(TCGv_i32 ret, TCGv_i32 r1) { - tcg_gen_abs_tl(ret, r1); + tcg_gen_abs_i32(ret, r1); /* overflow can only happen, if r1 = 0x80000000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_PSW_V, r1, 0x80000000); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_PSW_V, r1, 0x80000000); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV bit */ - tcg_gen_add_tl(cpu_PSW_AV, ret, ret); - tcg_gen_xor_tl(cpu_PSW_AV, ret, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, ret, ret); + tcg_gen_xor_i32(cpu_PSW_AV, ret, cpu_PSW_AV); /* calc SAV bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static inline void gen_absdif(TCGv ret, TCGv r1, TCGv r2) +static void gen_absdif(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { - TCGv temp = tcg_temp_new_i32(); - TCGv result = tcg_temp_new_i32(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 result = tcg_temp_new_i32(); - tcg_gen_sub_tl(result, r1, r2); - tcg_gen_sub_tl(temp, r2, r1); - tcg_gen_movcond_tl(TCG_COND_GT, result, r1, r2, result, temp); + tcg_gen_sub_i32(result, r1, r2); + tcg_gen_sub_i32(temp, r2, r1); + tcg_gen_movcond_i32(TCG_COND_GT, result, r1, r2, result, temp); /* calc V bit */ - tcg_gen_xor_tl(cpu_PSW_V, result, r1); - tcg_gen_xor_tl(temp, result, r2); - tcg_gen_movcond_tl(TCG_COND_GT, cpu_PSW_V, r1, r2, cpu_PSW_V, temp); - tcg_gen_xor_tl(temp, r1, r2); - tcg_gen_and_tl(cpu_PSW_V, cpu_PSW_V, temp); + tcg_gen_xor_i32(cpu_PSW_V, result, r1); + tcg_gen_xor_i32(temp, result, r2); + tcg_gen_movcond_i32(TCG_COND_GT, cpu_PSW_V, r1, r2, cpu_PSW_V, temp); + tcg_gen_xor_i32(temp, r1, r2); + tcg_gen_and_i32(cpu_PSW_V, cpu_PSW_V, temp); /* calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV bit */ - tcg_gen_add_tl(cpu_PSW_AV, result, result); - tcg_gen_xor_tl(cpu_PSW_AV, result, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, result, result); + tcg_gen_xor_i32(cpu_PSW_AV, result, cpu_PSW_AV); /* calc SAV bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* write back result */ - tcg_gen_mov_tl(ret, result); + tcg_gen_mov_i32(ret, result); } -static inline void gen_absdifi(TCGv ret, TCGv r1, int32_t con) +static void gen_absdifi(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_absdif(ret, r1, temp); } -static inline void gen_absdifsi(TCGv ret, TCGv r1, int32_t con) +static void gen_absdifsi(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_helper_absdif_ssov(ret, tcg_env, r1, temp); } -static inline void gen_mul_i32s(TCGv ret, TCGv r1, TCGv r2) +static void gen_mul_i32s(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { - TCGv high = tcg_temp_new(); - TCGv low = tcg_temp_new(); + TCGv_i32 high = tcg_temp_new_i32(); + TCGv_i32 low = tcg_temp_new_i32(); - tcg_gen_muls2_tl(low, high, r1, r2); - tcg_gen_mov_tl(ret, low); + tcg_gen_muls2_i32(low, high, r1, r2); + tcg_gen_mov_i32(ret, low); /* calc V bit */ - tcg_gen_sari_tl(low, low, 31); - tcg_gen_setcond_tl(TCG_COND_NE, cpu_PSW_V, high, low); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_sari_i32(low, low, 31); + tcg_gen_setcond_i32(TCG_COND_NE, cpu_PSW_V, high, low); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV bit */ - tcg_gen_add_tl(cpu_PSW_AV, ret, ret); - tcg_gen_xor_tl(cpu_PSW_AV, ret, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, ret, ret); + tcg_gen_xor_i32(cpu_PSW_AV, ret, cpu_PSW_AV); /* calc SAV bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static inline void gen_muli_i32s(TCGv ret, TCGv r1, int32_t con) +static void gen_muli_i32s(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_mul_i32s(ret, r1, temp); } -static inline void gen_mul_i64s(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2) +static void gen_mul_i64s(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2) { - tcg_gen_muls2_tl(ret_low, ret_high, r1, r2); + tcg_gen_muls2_i32(ret_low, ret_high, r1, r2); /* clear V bit */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); /* calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV bit */ - tcg_gen_add_tl(cpu_PSW_AV, ret_high, ret_high); - tcg_gen_xor_tl(cpu_PSW_AV, ret_high, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, ret_high, ret_high); + tcg_gen_xor_i32(cpu_PSW_AV, ret_high, cpu_PSW_AV); /* calc SAV bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static inline void gen_muli_i64s(TCGv ret_low, TCGv ret_high, TCGv r1, - int32_t con) +static void gen_muli_i64s(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_mul_i64s(ret_low, ret_high, r1, temp); } -static inline void gen_mul_i64u(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2) +static void gen_mul_i64u(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2) { - tcg_gen_mulu2_tl(ret_low, ret_high, r1, r2); + tcg_gen_mulu2_i32(ret_low, ret_high, r1, r2); /* clear V bit */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); /* calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* Calc AV bit */ - tcg_gen_add_tl(cpu_PSW_AV, ret_high, ret_high); - tcg_gen_xor_tl(cpu_PSW_AV, ret_high, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, ret_high, ret_high); + tcg_gen_xor_i32(cpu_PSW_AV, ret_high, cpu_PSW_AV); /* calc SAV bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static inline void gen_muli_i64u(TCGv ret_low, TCGv ret_high, TCGv r1, - int32_t con) +static void gen_muli_i64u(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_mul_i64u(ret_low, ret_high, r1, temp); } -static inline void gen_mulsi_i32(TCGv ret, TCGv r1, int32_t con) +static void gen_mulsi_i32(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_helper_mul_ssov(ret, tcg_env, r1, temp); } -static inline void gen_mulsui_i32(TCGv ret, TCGv r1, int32_t con) +static void gen_mulsui_i32(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_helper_mul_suov(ret, tcg_env, r1, temp); } /* gen_maddsi_32(cpu_gpr_d[r4], cpu_gpr_d[r1], cpu_gpr_d[r3], const9); */ -static inline void gen_maddsi_32(TCGv ret, TCGv r1, TCGv r2, int32_t con) +static void gen_maddsi_32(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_helper_madd32_ssov(ret, tcg_env, r1, r2, temp); } -static inline void gen_maddsui_32(TCGv ret, TCGv r1, TCGv r2, int32_t con) +static void gen_maddsui_32(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_helper_madd32_suov(ret, tcg_env, r1, r2, temp); } -static void -gen_mul_q(TCGv rl, TCGv rh, TCGv arg1, TCGv arg2, uint32_t n, uint32_t up_shift) +static void gen_mul_q(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2, + uint32_t n, uint32_t up_shift) { TCGv_i64 temp_64 = tcg_temp_new_i64(); TCGv_i64 temp2_64 = tcg_temp_new_i64(); if (n == 0) { if (up_shift == 32) { - tcg_gen_muls2_tl(rh, rl, arg1, arg2); + tcg_gen_muls2_i32(rh, rl, arg1, arg2); } else if (up_shift == 16) { tcg_gen_ext_i32_i64(temp_64, arg1); tcg_gen_ext_i32_i64(temp2_64, arg2); @@ -2272,10 +2302,10 @@ gen_mul_q(TCGv rl, TCGv rh, TCGv arg1, TCGv arg2, uint32_t n, uint32_t up_shift) tcg_gen_shri_i64(temp_64, temp_64, up_shift); tcg_gen_extr_i64_i32(rl, rh, temp_64); } else { - tcg_gen_muls2_tl(rl, rh, arg1, arg2); + tcg_gen_muls2_i32(rl, rh, arg1, arg2); } /* reset v bit */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); } else { /* n is expected to be 1 */ tcg_gen_ext_i32_i64(temp_64, arg1); tcg_gen_ext_i32_i64(temp2_64, arg2); @@ -2290,79 +2320,78 @@ gen_mul_q(TCGv rl, TCGv rh, TCGv arg1, TCGv arg2, uint32_t n, uint32_t up_shift) tcg_gen_extr_i64_i32(rl, rh, temp_64); /* overflow only occurs if r1 = r2 = 0x8000 */ if (up_shift == 0) {/* result is 64 bit */ - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_PSW_V, rh, + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_PSW_V, rh, 0x80000000); } else { /* result is 32 bit */ - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_PSW_V, rl, + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_PSW_V, rl, 0x80000000); } - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* calc sv overflow bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); } /* calc av overflow bit */ if (up_shift == 0) { - tcg_gen_add_tl(cpu_PSW_AV, rh, rh); - tcg_gen_xor_tl(cpu_PSW_AV, rh, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, rh, rh); + tcg_gen_xor_i32(cpu_PSW_AV, rh, cpu_PSW_AV); } else { - tcg_gen_add_tl(cpu_PSW_AV, rl, rl); - tcg_gen_xor_tl(cpu_PSW_AV, rl, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, rl, rl); + tcg_gen_xor_i32(cpu_PSW_AV, rl, cpu_PSW_AV); } /* calc sav overflow bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static void -gen_mul_q_16(TCGv ret, TCGv arg1, TCGv arg2, uint32_t n) +static void gen_mul_q_16(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, uint32_t n) { - TCGv temp = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); if (n == 0) { - tcg_gen_mul_tl(ret, arg1, arg2); + tcg_gen_mul_i32(ret, arg1, arg2); } else { /* n is expected to be 1 */ - tcg_gen_mul_tl(ret, arg1, arg2); - tcg_gen_shli_tl(ret, ret, 1); + tcg_gen_mul_i32(ret, arg1, arg2); + tcg_gen_shli_i32(ret, ret, 1); /* catch special case r1 = r2 = 0x8000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, temp, ret, 0x80000000); - tcg_gen_sub_tl(ret, ret, temp); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp, ret, 0x80000000); + tcg_gen_sub_i32(ret, ret, temp); } /* reset v bit */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); /* calc av overflow bit */ - tcg_gen_add_tl(cpu_PSW_AV, ret, ret); - tcg_gen_xor_tl(cpu_PSW_AV, ret, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, ret, ret); + tcg_gen_xor_i32(cpu_PSW_AV, ret, cpu_PSW_AV); /* calc sav overflow bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static void gen_mulr_q(TCGv ret, TCGv arg1, TCGv arg2, uint32_t n) +static void gen_mulr_q(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, uint32_t n) { - TCGv temp = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); if (n == 0) { - tcg_gen_mul_tl(ret, arg1, arg2); - tcg_gen_addi_tl(ret, ret, 0x8000); + tcg_gen_mul_i32(ret, arg1, arg2); + tcg_gen_addi_i32(ret, ret, 0x8000); } else { - tcg_gen_mul_tl(ret, arg1, arg2); - tcg_gen_shli_tl(ret, ret, 1); - tcg_gen_addi_tl(ret, ret, 0x8000); + tcg_gen_mul_i32(ret, arg1, arg2); + tcg_gen_shli_i32(ret, ret, 1); + tcg_gen_addi_i32(ret, ret, 0x8000); /* catch special case r1 = r2 = 0x8000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, temp, ret, 0x80008000); - tcg_gen_muli_tl(temp, temp, 0x8001); - tcg_gen_sub_tl(ret, ret, temp); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp, ret, 0x80008000); + tcg_gen_muli_i32(temp, temp, 0x8001); + tcg_gen_sub_i32(ret, ret, temp); } /* reset v bit */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); /* calc av overflow bit */ - tcg_gen_add_tl(cpu_PSW_AV, ret, ret); - tcg_gen_xor_tl(cpu_PSW_AV, ret, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, ret, ret); + tcg_gen_xor_i32(cpu_PSW_AV, ret, cpu_PSW_AV); /* calc sav overflow bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* cut halfword off */ - tcg_gen_andi_tl(ret, ret, 0xffff0000); + tcg_gen_andi_i32(ret, ret, 0xffff0000); } -static inline void -gen_madds_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - TCGv r3) +static void gen_madds_64(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2_low, TCGv_i32 r2_high, + TCGv_i32 r3) { TCGv_i64 temp64 = tcg_temp_new_i64(); tcg_gen_concat_i32_i64(temp64, r2_low, r2_high); @@ -2370,17 +2399,17 @@ gen_madds_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, tcg_gen_extr_i64_i32(ret_low, ret_high, temp64); } -static inline void -gen_maddsi_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - int32_t con) +static void gen_maddsi_64(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2_low, TCGv_i32 r2_high, + int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_madds_64(ret_low, ret_high, r1, r2_low, r2_high, temp); } -static inline void -gen_maddsu_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - TCGv r3) +static void gen_maddsu_64(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2_low, TCGv_i32 r2_high, + TCGv_i32 r3) { TCGv_i64 temp64 = tcg_temp_new_i64(); tcg_gen_concat_i32_i64(temp64, r2_low, r2_high); @@ -2388,29 +2417,29 @@ gen_maddsu_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, tcg_gen_extr_i64_i32(ret_low, ret_high, temp64); } -static inline void -gen_maddsui_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - int32_t con) +static void gen_maddsui_64(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2_low, + TCGv_i32 r2_high, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_maddsu_64(ret_low, ret_high, r1, r2_low, r2_high, temp); } -static inline void gen_msubsi_32(TCGv ret, TCGv r1, TCGv r2, int32_t con) +static void gen_msubsi_32(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_helper_msub32_ssov(ret, tcg_env, r1, r2, temp); } -static inline void gen_msubsui_32(TCGv ret, TCGv r1, TCGv r2, int32_t con) +static void gen_msubsui_32(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_helper_msub32_suov(ret, tcg_env, r1, r2, temp); } -static inline void -gen_msubs_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - TCGv r3) +static void gen_msubs_64(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2_low, TCGv_i32 r2_high, + TCGv_i32 r3) { TCGv_i64 temp64 = tcg_temp_new_i64(); tcg_gen_concat_i32_i64(temp64, r2_low, r2_high); @@ -2418,17 +2447,17 @@ gen_msubs_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, tcg_gen_extr_i64_i32(ret_low, ret_high, temp64); } -static inline void -gen_msubsi_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - int32_t con) +static void gen_msubsi_64(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2_low, TCGv_i32 r2_high, + int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_msubs_64(ret_low, ret_high, r1, r2_low, r2_high, temp); } -static inline void -gen_msubsu_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - TCGv r3) +static void gen_msubsu_64(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2_low, TCGv_i32 r2_high, + TCGv_i32 r3) { TCGv_i64 temp64 = tcg_temp_new_i64(); tcg_gen_concat_i32_i64(temp64, r2_low, r2_high); @@ -2436,310 +2465,312 @@ gen_msubsu_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, tcg_gen_extr_i64_i32(ret_low, ret_high, temp64); } -static inline void -gen_msubsui_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, - int32_t con) +static void gen_msubsui_64(TCGv_i32 ret_low, TCGv_i32 ret_high, + TCGv_i32 r1, TCGv_i32 r2_low, TCGv_i32 r2_high, + int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_msubsu_64(ret_low, ret_high, r1, r2_low, r2_high, temp); } -static void gen_saturate(TCGv ret, TCGv arg, int32_t up, int32_t low) +static void gen_saturate(TCGv_i32 ret, TCGv_i32 arg, int32_t up, int32_t low) { - tcg_gen_smax_tl(ret, arg, tcg_constant_i32(low)); - tcg_gen_smin_tl(ret, ret, tcg_constant_i32(up)); + tcg_gen_smax_i32(ret, arg, tcg_constant_i32(low)); + tcg_gen_smin_i32(ret, ret, tcg_constant_i32(up)); } -static void gen_saturate_u(TCGv ret, TCGv arg, int32_t up) +static void gen_saturate_u(TCGv_i32 ret, TCGv_i32 arg, int32_t up) { - tcg_gen_umin_tl(ret, arg, tcg_constant_i32(up)); + tcg_gen_umin_i32(ret, arg, tcg_constant_i32(up)); } -static void gen_shi(TCGv ret, TCGv r1, int32_t shift_count) +static void gen_shi(TCGv_i32 ret, TCGv_i32 r1, int32_t shift_count) { if (shift_count == -32) { - tcg_gen_movi_tl(ret, 0); + tcg_gen_movi_i32(ret, 0); } else if (shift_count >= 0) { - tcg_gen_shli_tl(ret, r1, shift_count); + tcg_gen_shli_i32(ret, r1, shift_count); } else { - tcg_gen_shri_tl(ret, r1, -shift_count); + tcg_gen_shri_i32(ret, r1, -shift_count); } } -static void gen_sh_hi(TCGv ret, TCGv r1, int32_t shiftcount) +static void gen_sh_hi(TCGv_i32 ret, TCGv_i32 r1, int32_t shiftcount) { - TCGv temp_low, temp_high; + TCGv_i32 temp_low, temp_high; if (shiftcount == -16) { - tcg_gen_movi_tl(ret, 0); + tcg_gen_movi_i32(ret, 0); } else { - temp_high = tcg_temp_new(); - temp_low = tcg_temp_new(); + temp_high = tcg_temp_new_i32(); + temp_low = tcg_temp_new_i32(); - tcg_gen_andi_tl(temp_low, r1, 0xffff); - tcg_gen_andi_tl(temp_high, r1, 0xffff0000); + tcg_gen_andi_i32(temp_low, r1, 0xffff); + tcg_gen_andi_i32(temp_high, r1, 0xffff0000); gen_shi(temp_low, temp_low, shiftcount); gen_shi(ret, temp_high, shiftcount); - tcg_gen_deposit_tl(ret, ret, temp_low, 0, 16); + tcg_gen_deposit_i32(ret, ret, temp_low, 0, 16); } } -static void gen_shaci(TCGv ret, TCGv r1, int32_t shift_count) +static void gen_shaci(TCGv_i32 ret, TCGv_i32 r1, int32_t shift_count) { uint32_t msk, msk_start; - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); if (shift_count == 0) { /* Clear PSW.C and PSW.V */ - tcg_gen_movi_tl(cpu_PSW_C, 0); - tcg_gen_mov_tl(cpu_PSW_V, cpu_PSW_C); - tcg_gen_mov_tl(ret, r1); + tcg_gen_movi_i32(cpu_PSW_C, 0); + tcg_gen_mov_i32(cpu_PSW_V, cpu_PSW_C); + tcg_gen_mov_i32(ret, r1); } else if (shift_count == -32) { /* set PSW.C */ - tcg_gen_mov_tl(cpu_PSW_C, r1); + tcg_gen_mov_i32(cpu_PSW_C, r1); /* fill ret completely with sign bit */ - tcg_gen_sari_tl(ret, r1, 31); + tcg_gen_sari_i32(ret, r1, 31); /* clear PSW.V */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); } else if (shift_count > 0) { - TCGv t_max = tcg_constant_i32(0x7FFFFFFF >> shift_count); - TCGv t_min = tcg_constant_i32(((int32_t) -0x80000000) >> shift_count); + TCGv_i32 t_max = tcg_constant_i32(0x7FFFFFFF >> shift_count); + TCGv_i32 t_min = tcg_constant_i32(((int32_t) -0x80000000) >> shift_count); /* calc carry */ msk_start = 32 - shift_count; msk = ((1 << shift_count) - 1) << msk_start; - tcg_gen_andi_tl(cpu_PSW_C, r1, msk); + tcg_gen_andi_i32(cpu_PSW_C, r1, msk); /* calc v/sv bits */ - tcg_gen_setcond_tl(TCG_COND_GT, temp, r1, t_max); - tcg_gen_setcond_tl(TCG_COND_LT, temp2, r1, t_min); - tcg_gen_or_tl(cpu_PSW_V, temp, temp2); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_setcond_i32(TCG_COND_GT, temp, r1, t_max); + tcg_gen_setcond_i32(TCG_COND_LT, temp2, r1, t_min); + tcg_gen_or_i32(cpu_PSW_V, temp, temp2); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* calc sv */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_V, cpu_PSW_SV); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_V, cpu_PSW_SV); /* do shift */ - tcg_gen_shli_tl(ret, r1, shift_count); + tcg_gen_shli_i32(ret, r1, shift_count); } else { /* clear PSW.V */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); /* calc carry */ msk = (1 << -shift_count) - 1; - tcg_gen_andi_tl(cpu_PSW_C, r1, msk); + tcg_gen_andi_i32(cpu_PSW_C, r1, msk); /* do shift */ - tcg_gen_sari_tl(ret, r1, -shift_count); + tcg_gen_sari_i32(ret, r1, -shift_count); } /* calc av overflow bit */ - tcg_gen_add_tl(cpu_PSW_AV, ret, ret); - tcg_gen_xor_tl(cpu_PSW_AV, ret, cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, ret, ret); + tcg_gen_xor_i32(cpu_PSW_AV, ret, cpu_PSW_AV); /* calc sav overflow bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); } -static void gen_shas(TCGv ret, TCGv r1, TCGv r2) +static void gen_shas(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { gen_helper_sha_ssov(ret, tcg_env, r1, r2); } -static void gen_shasi(TCGv ret, TCGv r1, int32_t con) +static void gen_shasi(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_shas(ret, r1, temp); } -static void gen_sha_hi(TCGv ret, TCGv r1, int32_t shift_count) +static void gen_sha_hi(TCGv_i32 ret, TCGv_i32 r1, int32_t shift_count) { - TCGv low, high; + TCGv_i32 low, high; if (shift_count == 0) { - tcg_gen_mov_tl(ret, r1); + tcg_gen_mov_i32(ret, r1); } else if (shift_count > 0) { - low = tcg_temp_new(); - high = tcg_temp_new(); + low = tcg_temp_new_i32(); + high = tcg_temp_new_i32(); - tcg_gen_andi_tl(high, r1, 0xffff0000); - tcg_gen_shli_tl(low, r1, shift_count); - tcg_gen_shli_tl(ret, high, shift_count); - tcg_gen_deposit_tl(ret, ret, low, 0, 16); + tcg_gen_andi_i32(high, r1, 0xffff0000); + tcg_gen_shli_i32(low, r1, shift_count); + tcg_gen_shli_i32(ret, high, shift_count); + tcg_gen_deposit_i32(ret, ret, low, 0, 16); } else { - low = tcg_temp_new(); - high = tcg_temp_new(); + low = tcg_temp_new_i32(); + high = tcg_temp_new_i32(); - tcg_gen_ext16s_tl(low, r1); - tcg_gen_sari_tl(low, low, -shift_count); - tcg_gen_sari_tl(ret, r1, -shift_count); - tcg_gen_deposit_tl(ret, ret, low, 0, 16); + tcg_gen_ext16s_i32(low, r1); + tcg_gen_sari_i32(low, low, -shift_count); + tcg_gen_sari_i32(ret, r1, -shift_count); + tcg_gen_deposit_i32(ret, ret, low, 0, 16); } } /* ret = {ret[30:0], (r1 cond r2)}; */ -static void gen_sh_cond(int cond, TCGv ret, TCGv r1, TCGv r2) +static void gen_sh_cond(int cond, TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); - tcg_gen_shli_tl(temp, ret, 1); - tcg_gen_setcond_tl(cond, temp2, r1, r2); - tcg_gen_or_tl(ret, temp, temp2); + tcg_gen_shli_i32(temp, ret, 1); + tcg_gen_setcond_i32(cond, temp2, r1, r2); + tcg_gen_or_i32(ret, temp, temp2); } -static void gen_sh_condi(int cond, TCGv ret, TCGv r1, int32_t con) +static void gen_sh_condi(int cond, TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_sh_cond(cond, ret, r1, temp); } -static inline void gen_adds(TCGv ret, TCGv r1, TCGv r2) +static void gen_adds(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { gen_helper_add_ssov(ret, tcg_env, r1, r2); } -static inline void gen_addsi(TCGv ret, TCGv r1, int32_t con) +static void gen_addsi(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_helper_add_ssov(ret, tcg_env, r1, temp); } -static inline void gen_addsui(TCGv ret, TCGv r1, int32_t con) +static void gen_addsui(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_helper_add_suov(ret, tcg_env, r1, temp); } -static inline void gen_subs(TCGv ret, TCGv r1, TCGv r2) +static void gen_subs(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { gen_helper_sub_ssov(ret, tcg_env, r1, r2); } -static inline void gen_subsu(TCGv ret, TCGv r1, TCGv r2) +static void gen_subsu(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2) { gen_helper_sub_suov(ret, tcg_env, r1, r2); } -static inline void gen_bit_2op(TCGv ret, TCGv r1, TCGv r2, - int pos1, int pos2, - void(*op1)(TCGv, TCGv, TCGv), - void(*op2)(TCGv, TCGv, TCGv)) +static void gen_bit_2op(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, + int pos1, int pos2, + void(*op1)(TCGv_i32, TCGv_i32, TCGv_i32), + void(*op2)(TCGv_i32, TCGv_i32, TCGv_i32)) { - TCGv temp1, temp2; + TCGv_i32 temp1, temp2; - temp1 = tcg_temp_new(); - temp2 = tcg_temp_new(); + temp1 = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); - tcg_gen_shri_tl(temp2, r2, pos2); - tcg_gen_shri_tl(temp1, r1, pos1); + tcg_gen_shri_i32(temp2, r2, pos2); + tcg_gen_shri_i32(temp1, r1, pos1); (*op1)(temp1, temp1, temp2); (*op2)(temp1 , ret, temp1); - tcg_gen_deposit_tl(ret, ret, temp1, 0, 1); + tcg_gen_deposit_i32(ret, ret, temp1, 0, 1); } /* ret = r1[pos1] op1 r2[pos2]; */ -static inline void gen_bit_1op(TCGv ret, TCGv r1, TCGv r2, - int pos1, int pos2, - void(*op1)(TCGv, TCGv, TCGv)) +static void gen_bit_1op(TCGv_i32 ret, TCGv_i32 r1, TCGv_i32 r2, + int pos1, int pos2, + void(*op1)(TCGv_i32, TCGv_i32, TCGv_i32)) { - TCGv temp1, temp2; + TCGv_i32 temp1, temp2; - temp1 = tcg_temp_new(); - temp2 = tcg_temp_new(); + temp1 = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); - tcg_gen_shri_tl(temp2, r2, pos2); - tcg_gen_shri_tl(temp1, r1, pos1); + tcg_gen_shri_i32(temp2, r2, pos2); + tcg_gen_shri_i32(temp1, r1, pos1); (*op1)(ret, temp1, temp2); - tcg_gen_andi_tl(ret, ret, 0x1); + tcg_gen_andi_i32(ret, ret, 0x1); } -static inline void gen_accumulating_cond(int cond, TCGv ret, TCGv r1, TCGv r2, - void(*op)(TCGv, TCGv, TCGv)) +static void gen_accumulating_cond(int cond, TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, + void(*op)(TCGv_i32, TCGv_i32, TCGv_i32)) { - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); /* temp = (arg1 cond arg2 )*/ - tcg_gen_setcond_tl(cond, temp, r1, r2); + tcg_gen_setcond_i32(cond, temp, r1, r2); /* temp2 = ret[0]*/ - tcg_gen_andi_tl(temp2, ret, 0x1); + tcg_gen_andi_i32(temp2, ret, 0x1); /* temp = temp insn temp2 */ (*op)(temp, temp, temp2); /* ret = {ret[31:1], temp} */ - tcg_gen_deposit_tl(ret, ret, temp, 0, 1); + tcg_gen_deposit_i32(ret, ret, temp, 0, 1); } -static inline void -gen_accumulating_condi(int cond, TCGv ret, TCGv r1, int32_t con, - void(*op)(TCGv, TCGv, TCGv)) +static void gen_accumulating_condi(int cond, TCGv_i32 ret, TCGv_i32 r1, + int32_t con, + void(*op)(TCGv_i32, TCGv_i32, TCGv_i32)) { - TCGv temp = tcg_constant_i32(con); + TCGv_i32 temp = tcg_constant_i32(con); gen_accumulating_cond(cond, ret, r1, temp, op); } -static inline void gen_eqany_bi(TCGv ret, TCGv r1, int32_t con) +static void gen_eqany_bi(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv b0 = tcg_temp_new(); - TCGv b1 = tcg_temp_new(); - TCGv b2 = tcg_temp_new(); - TCGv b3 = tcg_temp_new(); + TCGv_i32 b0 = tcg_temp_new_i32(); + TCGv_i32 b1 = tcg_temp_new_i32(); + TCGv_i32 b2 = tcg_temp_new_i32(); + TCGv_i32 b3 = tcg_temp_new_i32(); /* byte 0 */ - tcg_gen_andi_tl(b0, r1, 0xff); - tcg_gen_setcondi_tl(TCG_COND_EQ, b0, b0, con & 0xff); + tcg_gen_andi_i32(b0, r1, 0xff); + tcg_gen_setcondi_i32(TCG_COND_EQ, b0, b0, con & 0xff); /* byte 1 */ - tcg_gen_andi_tl(b1, r1, 0xff00); - tcg_gen_setcondi_tl(TCG_COND_EQ, b1, b1, con & 0xff00); + tcg_gen_andi_i32(b1, r1, 0xff00); + tcg_gen_setcondi_i32(TCG_COND_EQ, b1, b1, con & 0xff00); /* byte 2 */ - tcg_gen_andi_tl(b2, r1, 0xff0000); - tcg_gen_setcondi_tl(TCG_COND_EQ, b2, b2, con & 0xff0000); + tcg_gen_andi_i32(b2, r1, 0xff0000); + tcg_gen_setcondi_i32(TCG_COND_EQ, b2, b2, con & 0xff0000); /* byte 3 */ - tcg_gen_andi_tl(b3, r1, 0xff000000); - tcg_gen_setcondi_tl(TCG_COND_EQ, b3, b3, con & 0xff000000); + tcg_gen_andi_i32(b3, r1, 0xff000000); + tcg_gen_setcondi_i32(TCG_COND_EQ, b3, b3, con & 0xff000000); /* combine them */ - tcg_gen_or_tl(ret, b0, b1); - tcg_gen_or_tl(ret, ret, b2); - tcg_gen_or_tl(ret, ret, b3); + tcg_gen_or_i32(ret, b0, b1); + tcg_gen_or_i32(ret, ret, b2); + tcg_gen_or_i32(ret, ret, b3); } -static inline void gen_eqany_hi(TCGv ret, TCGv r1, int32_t con) +static void gen_eqany_hi(TCGv_i32 ret, TCGv_i32 r1, int32_t con) { - TCGv h0 = tcg_temp_new(); - TCGv h1 = tcg_temp_new(); + TCGv_i32 h0 = tcg_temp_new_i32(); + TCGv_i32 h1 = tcg_temp_new_i32(); /* halfword 0 */ - tcg_gen_andi_tl(h0, r1, 0xffff); - tcg_gen_setcondi_tl(TCG_COND_EQ, h0, h0, con & 0xffff); + tcg_gen_andi_i32(h0, r1, 0xffff); + tcg_gen_setcondi_i32(TCG_COND_EQ, h0, h0, con & 0xffff); /* halfword 1 */ - tcg_gen_andi_tl(h1, r1, 0xffff0000); - tcg_gen_setcondi_tl(TCG_COND_EQ, h1, h1, con & 0xffff0000); + tcg_gen_andi_i32(h1, r1, 0xffff0000); + tcg_gen_setcondi_i32(TCG_COND_EQ, h1, h1, con & 0xffff0000); /* combine them */ - tcg_gen_or_tl(ret, h0, h1); + tcg_gen_or_i32(ret, h0, h1); } /* mask = ((1 << width) -1) << pos; ret = (r1 & ~mask) | (r2 << pos) & mask); */ -static inline void gen_insert(TCGv ret, TCGv r1, TCGv r2, TCGv width, TCGv pos) +static void gen_insert(TCGv_i32 ret, + TCGv_i32 r1, TCGv_i32 r2, TCGv_i32 width, TCGv_i32 pos) { - TCGv mask = tcg_temp_new(); - TCGv temp = tcg_temp_new(); - TCGv temp2 = tcg_temp_new(); + TCGv_i32 mask = tcg_temp_new_i32(); + TCGv_i32 temp = tcg_temp_new_i32(); + TCGv_i32 temp2 = tcg_temp_new_i32(); - tcg_gen_shl_tl(mask, tcg_constant_tl(1), width); - tcg_gen_subi_tl(mask, mask, 1); - tcg_gen_shl_tl(mask, mask, pos); + tcg_gen_shl_i32(mask, tcg_constant_i32(1), width); + tcg_gen_subi_i32(mask, mask, 1); + tcg_gen_shl_i32(mask, mask, pos); - tcg_gen_shl_tl(temp, r2, pos); - tcg_gen_and_tl(temp, temp, mask); - tcg_gen_andc_tl(temp2, r1, mask); - tcg_gen_or_tl(ret, temp, temp2); + tcg_gen_shl_i32(temp, r2, pos); + tcg_gen_and_i32(temp, temp, mask); + tcg_gen_andc_i32(temp2, r1, mask); + tcg_gen_or_i32(ret, temp, temp2); } -static inline void gen_bsplit(TCGv rl, TCGv rh, TCGv r1) +static void gen_bsplit(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 r1) { TCGv_i64 temp = tcg_temp_new_i64(); @@ -2747,7 +2778,7 @@ static inline void gen_bsplit(TCGv rl, TCGv rh, TCGv r1) tcg_gen_extr_i64_i32(rl, rh, temp); } -static inline void gen_unpack(TCGv rl, TCGv rh, TCGv r1) +static void gen_unpack(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 r1) { TCGv_i64 temp = tcg_temp_new_i64(); @@ -2755,8 +2786,9 @@ static inline void gen_unpack(TCGv rl, TCGv rh, TCGv r1) tcg_gen_extr_i64_i32(rl, rh, temp); } -static inline void -gen_dvinit_b(DisasContext *ctx, TCGv rl, TCGv rh, TCGv r1, TCGv r2) +static void gen_dvinit_b(DisasContext *ctx, + TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 r1, TCGv_i32 r2) { TCGv_i64 ret = tcg_temp_new_i64(); @@ -2768,8 +2800,9 @@ gen_dvinit_b(DisasContext *ctx, TCGv rl, TCGv rh, TCGv r1, TCGv r2) tcg_gen_extr_i64_i32(rl, rh, ret); } -static inline void -gen_dvinit_h(DisasContext *ctx, TCGv rl, TCGv rh, TCGv r1, TCGv r2) +static void gen_dvinit_h(DisasContext *ctx, + TCGv_i32 rl, TCGv_i32 rh, + TCGv_i32 r1, TCGv_i32 r2) { TCGv_i64 ret = tcg_temp_new_i64(); @@ -2781,47 +2814,47 @@ gen_dvinit_h(DisasContext *ctx, TCGv rl, TCGv rh, TCGv r1, TCGv r2) tcg_gen_extr_i64_i32(rl, rh, ret); } -static void gen_calc_usb_mul_h(TCGv arg_low, TCGv arg_high) +static void gen_calc_usb_mul_h(TCGv_i32 arg_low, TCGv_i32 arg_high) { - TCGv temp = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); /* calc AV bit */ - tcg_gen_add_tl(temp, arg_low, arg_low); - tcg_gen_xor_tl(temp, temp, arg_low); - tcg_gen_add_tl(cpu_PSW_AV, arg_high, arg_high); - tcg_gen_xor_tl(cpu_PSW_AV, cpu_PSW_AV, arg_high); - tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp); + tcg_gen_add_i32(temp, arg_low, arg_low); + tcg_gen_xor_i32(temp, temp, arg_low); + tcg_gen_add_i32(cpu_PSW_AV, arg_high, arg_high); + tcg_gen_xor_i32(cpu_PSW_AV, cpu_PSW_AV, arg_high); + tcg_gen_or_i32(cpu_PSW_AV, cpu_PSW_AV, temp); /* calc SAV bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_movi_i32(cpu_PSW_V, 0); } -static void gen_calc_usb_mulr_h(TCGv arg) +static void gen_calc_usb_mulr_h(TCGv_i32 arg) { - TCGv temp = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); /* calc AV bit */ - tcg_gen_add_tl(temp, arg, arg); - tcg_gen_xor_tl(temp, temp, arg); - tcg_gen_shli_tl(cpu_PSW_AV, temp, 16); - tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp); + tcg_gen_add_i32(temp, arg, arg); + tcg_gen_xor_i32(temp, temp, arg); + tcg_gen_shli_i32(cpu_PSW_AV, temp, 16); + tcg_gen_or_i32(cpu_PSW_AV, cpu_PSW_AV, temp); /* calc SAV bit */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); /* clear V bit */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); } /* helpers for generating program flow micro-ops */ -static inline void gen_save_pc(target_ulong pc) +static void gen_save_pc(vaddr pc) { - tcg_gen_movi_tl(cpu_PC, pc); + tcg_gen_movi_i32(cpu_PC, pc); } -static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) +static void gen_goto_tb(DisasContext *ctx, unsigned tb_slot_index, vaddr dest) { if (translator_use_goto_tb(&ctx->base, dest)) { - tcg_gen_goto_tb(n); + tcg_gen_goto_tb(tb_slot_index); gen_save_pc(dest); - tcg_gen_exit_tb(ctx->base.tb, n); + tcg_gen_exit_tb(ctx->base.tb, tb_slot_index); } else { gen_save_pc(dest); tcg_gen_lookup_and_goto_ptr(); @@ -2839,11 +2872,11 @@ static void generate_trap(DisasContext *ctx, int class, int tin) ctx->base.is_jmp = DISAS_NORETURN; } -static inline void gen_branch_cond(DisasContext *ctx, TCGCond cond, TCGv r1, - TCGv r2, int16_t address) +static void gen_branch_cond(DisasContext *ctx, TCGCond cond, TCGv_i32 r1, + TCGv_i32 r2, int16_t address) { TCGLabel *jumpLabel = gen_new_label(); - tcg_gen_brcond_tl(cond, r1, r2, jumpLabel); + tcg_gen_brcond_i32(cond, r1, r2, jumpLabel); gen_goto_tb(ctx, 1, ctx->pc_succ_insn); @@ -2851,10 +2884,10 @@ static inline void gen_branch_cond(DisasContext *ctx, TCGCond cond, TCGv r1, gen_goto_tb(ctx, 0, ctx->base.pc_next + address * 2); } -static inline void gen_branch_condi(DisasContext *ctx, TCGCond cond, TCGv r1, - int r2, int16_t address) +static void gen_branch_condi(DisasContext *ctx, TCGCond cond, TCGv_i32 r1, + int r2, int16_t address) { - TCGv temp = tcg_constant_i32(r2); + TCGv_i32 temp = tcg_constant_i32(r2); gen_branch_cond(ctx, cond, r1, temp, address); } @@ -2862,8 +2895,8 @@ static void gen_loop(DisasContext *ctx, int r1, int32_t offset) { TCGLabel *l1 = gen_new_label(); - tcg_gen_subi_tl(cpu_gpr_a[r1], cpu_gpr_a[r1], 1); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr_a[r1], -1, l1); + tcg_gen_subi_i32(cpu_gpr_a[r1], cpu_gpr_a[r1], 1); + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_gpr_a[r1], -1, l1); gen_goto_tb(ctx, 1, ctx->base.pc_next + offset); gen_set_label(l1); gen_goto_tb(ctx, 0, ctx->pc_succ_insn); @@ -2871,29 +2904,29 @@ static void gen_loop(DisasContext *ctx, int r1, int32_t offset) static void gen_fcall_save_ctx(DisasContext *ctx) { - TCGv temp = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); - tcg_gen_addi_tl(temp, cpu_gpr_a[10], -4); - tcg_gen_qemu_st_tl(cpu_gpr_a[11], temp, ctx->mem_idx, MO_LESL); - tcg_gen_movi_tl(cpu_gpr_a[11], ctx->pc_succ_insn); - tcg_gen_mov_tl(cpu_gpr_a[10], temp); + tcg_gen_addi_i32(temp, cpu_gpr_a[10], -4); + tcg_gen_qemu_st_i32(cpu_gpr_a[11], temp, ctx->mem_idx, MO_LESL); + tcg_gen_movi_i32(cpu_gpr_a[11], ctx->pc_succ_insn); + tcg_gen_mov_i32(cpu_gpr_a[10], temp); } static void gen_fret(DisasContext *ctx) { - TCGv temp = tcg_temp_new(); + TCGv_i32 temp = tcg_temp_new_i32(); - tcg_gen_andi_tl(temp, cpu_gpr_a[11], ~0x1); - tcg_gen_qemu_ld_tl(cpu_gpr_a[11], cpu_gpr_a[10], ctx->mem_idx, MO_LESL); - tcg_gen_addi_tl(cpu_gpr_a[10], cpu_gpr_a[10], 4); - tcg_gen_mov_tl(cpu_PC, temp); + tcg_gen_andi_i32(temp, cpu_gpr_a[11], ~0x1); + tcg_gen_qemu_ld_i32(cpu_gpr_a[11], cpu_gpr_a[10], ctx->mem_idx, MO_LESL); + tcg_gen_addi_i32(cpu_gpr_a[10], cpu_gpr_a[10], 4); + tcg_gen_mov_i32(cpu_PC, temp); ctx->base.is_jmp = DISAS_EXIT; } static void gen_compute_branch(DisasContext *ctx, uint32_t opc, int r1, int r2 , int32_t constant , int32_t offset) { - TCGv temp, temp2; + TCGv_i32 temp, temp2; int n; switch (opc) { @@ -2930,13 +2963,13 @@ static void gen_compute_branch(DisasContext *ctx, uint32_t opc, int r1, break; /* SBRN-format jumps */ case OPC1_16_SBRN_JZ_T: - temp = tcg_temp_new(); - tcg_gen_andi_tl(temp, cpu_gpr_d[15], 0x1u << constant); + temp = tcg_temp_new_i32(); + tcg_gen_andi_i32(temp, cpu_gpr_d[15], 0x1u << constant); gen_branch_condi(ctx, TCG_COND_EQ, temp, 0, offset); break; case OPC1_16_SBRN_JNZ_T: - temp = tcg_temp_new(); - tcg_gen_andi_tl(temp, cpu_gpr_d[15], 0x1u << constant); + temp = tcg_temp_new_i32(); + tcg_gen_andi_i32(temp, cpu_gpr_d[15], 0x1u << constant); gen_branch_condi(ctx, TCG_COND_NE, temp, 0, offset); break; /* SBR-format jumps */ @@ -2985,7 +3018,7 @@ static void gen_compute_branch(DisasContext *ctx, uint32_t opc, int r1, break; /* SR-format jumps */ case OPC1_16_SR_JI: - tcg_gen_andi_tl(cpu_PC, cpu_gpr_a[r1], 0xfffffffe); + tcg_gen_andi_i32(cpu_PC, cpu_gpr_a[r1], 0xfffffffe); ctx->base.is_jmp = DISAS_EXIT; break; case OPC2_32_SYS_RET: @@ -3007,13 +3040,13 @@ static void gen_compute_branch(DisasContext *ctx, uint32_t opc, int r1, gen_goto_tb(ctx, 0, EA_B_ABSOLUT(offset)); break; case OPC1_32_B_JLA: - tcg_gen_movi_tl(cpu_gpr_a[11], ctx->pc_succ_insn); + tcg_gen_movi_i32(cpu_gpr_a[11], ctx->pc_succ_insn); /* fall through */ case OPC1_32_B_JA: gen_goto_tb(ctx, 0, EA_B_ABSOLUT(offset)); break; case OPC1_32_B_JL: - tcg_gen_movi_tl(cpu_gpr_a[11], ctx->pc_succ_insn); + tcg_gen_movi_i32(cpu_gpr_a[11], ctx->pc_succ_insn); gen_goto_tb(ctx, 0, ctx->base.pc_next + offset * 2); break; /* BOL format */ @@ -3043,16 +3076,16 @@ static void gen_compute_branch(DisasContext *ctx, uint32_t opc, int r1, } break; case OPCM_32_BRC_JNE: - temp = tcg_temp_new(); + temp = tcg_temp_new_i32(); if (MASK_OP_BRC_OP2(ctx->opcode) == OPC2_32_BRC_JNED) { - tcg_gen_mov_tl(temp, cpu_gpr_d[r1]); + tcg_gen_mov_i32(temp, cpu_gpr_d[r1]); /* subi is unconditional */ - tcg_gen_subi_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 1); + tcg_gen_subi_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], 1); gen_branch_condi(ctx, TCG_COND_NE, temp, constant, offset); } else { - tcg_gen_mov_tl(temp, cpu_gpr_d[r1]); + tcg_gen_mov_i32(temp, cpu_gpr_d[r1]); /* addi is unconditional */ - tcg_gen_addi_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 1); + tcg_gen_addi_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], 1); gen_branch_condi(ctx, TCG_COND_NE, temp, constant, offset); } break; @@ -3060,8 +3093,8 @@ static void gen_compute_branch(DisasContext *ctx, uint32_t opc, int r1, case OPCM_32_BRN_JTT: n = MASK_OP_BRN_N(ctx->opcode); - temp = tcg_temp_new(); - tcg_gen_andi_tl(temp, cpu_gpr_d[r1], (1 << n)); + temp = tcg_temp_new_i32(); + tcg_gen_andi_i32(temp, cpu_gpr_d[r1], (1 << n)); if (MASK_OP_BRN_OP2(ctx->opcode) == OPC2_32_BRN_JNZ_T) { gen_branch_condi(ctx, TCG_COND_NE, temp, 0, offset); @@ -3115,21 +3148,21 @@ static void gen_compute_branch(DisasContext *ctx, uint32_t opc, int r1, } break; case OPCM_32_BRR_JNE: - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); if (MASK_OP_BRC_OP2(ctx->opcode) == OPC2_32_BRR_JNED) { - tcg_gen_mov_tl(temp, cpu_gpr_d[r1]); + tcg_gen_mov_i32(temp, cpu_gpr_d[r1]); /* also save r2, in case of r1 == r2, so r2 is not decremented */ - tcg_gen_mov_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_mov_i32(temp2, cpu_gpr_d[r2]); /* subi is unconditional */ - tcg_gen_subi_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 1); + tcg_gen_subi_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], 1); gen_branch_cond(ctx, TCG_COND_NE, temp, temp2, offset); } else { - tcg_gen_mov_tl(temp, cpu_gpr_d[r1]); + tcg_gen_mov_i32(temp, cpu_gpr_d[r1]); /* also save r2, in case of r1 == r2, so r2 is not decremented */ - tcg_gen_mov_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_mov_i32(temp2, cpu_gpr_d[r2]); /* addi is unconditional */ - tcg_gen_addi_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 1); + tcg_gen_addi_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], 1); gen_branch_cond(ctx, TCG_COND_NE, temp, temp2, offset); } break; @@ -3154,7 +3187,7 @@ static void decode_src_opc(DisasContext *ctx, int op1) { int r1; int32_t const4; - TCGv temp, temp2; + TCGv_i32 temp, temp2; r1 = MASK_OP_SRC_S1D(ctx->opcode); const4 = MASK_OP_SRC_CONST4_SEXT(ctx->opcode); @@ -3170,7 +3203,7 @@ static void decode_src_opc(DisasContext *ctx, int op1) gen_addi_d(cpu_gpr_d[15], cpu_gpr_d[r1], const4); break; case OPC1_16_SRC_ADD_A: - tcg_gen_addi_tl(cpu_gpr_a[r1], cpu_gpr_a[r1], const4); + tcg_gen_addi_i32(cpu_gpr_a[r1], cpu_gpr_a[r1], const4); break; case OPC1_16_SRC_CADD: gen_condi_add(TCG_COND_NE, cpu_gpr_d[r1], const4, cpu_gpr_d[r1], @@ -3181,37 +3214,37 @@ static void decode_src_opc(DisasContext *ctx, int op1) cpu_gpr_d[15]); break; case OPC1_16_SRC_CMOV: - temp = tcg_constant_tl(0); - temp2 = tcg_constant_tl(const4); - tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr_d[r1], cpu_gpr_d[15], temp, + temp = tcg_constant_i32(0); + temp2 = tcg_constant_i32(const4); + tcg_gen_movcond_i32(TCG_COND_NE, cpu_gpr_d[r1], cpu_gpr_d[15], temp, temp2, cpu_gpr_d[r1]); break; case OPC1_16_SRC_CMOVN: - temp = tcg_constant_tl(0); - temp2 = tcg_constant_tl(const4); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr_d[r1], cpu_gpr_d[15], temp, + temp = tcg_constant_i32(0); + temp2 = tcg_constant_i32(const4); + tcg_gen_movcond_i32(TCG_COND_EQ, cpu_gpr_d[r1], cpu_gpr_d[15], temp, temp2, cpu_gpr_d[r1]); break; case OPC1_16_SRC_EQ: - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_gpr_d[15], cpu_gpr_d[r1], + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_gpr_d[15], cpu_gpr_d[r1], const4); break; case OPC1_16_SRC_LT: - tcg_gen_setcondi_tl(TCG_COND_LT, cpu_gpr_d[15], cpu_gpr_d[r1], + tcg_gen_setcondi_i32(TCG_COND_LT, cpu_gpr_d[15], cpu_gpr_d[r1], const4); break; case OPC1_16_SRC_MOV: - tcg_gen_movi_tl(cpu_gpr_d[r1], const4); + tcg_gen_movi_i32(cpu_gpr_d[r1], const4); break; case OPC1_16_SRC_MOV_A: const4 = MASK_OP_SRC_CONST4(ctx->opcode); - tcg_gen_movi_tl(cpu_gpr_a[r1], const4); + tcg_gen_movi_i32(cpu_gpr_a[r1], const4); break; case OPC1_16_SRC_MOV_E: if (has_feature(ctx, TRICORE_FEATURE_16)) { CHECK_REG_PAIR(r1); - tcg_gen_movi_tl(cpu_gpr_d[r1], const4); - tcg_gen_sari_tl(cpu_gpr_d[r1+1], cpu_gpr_d[r1], 31); + tcg_gen_movi_i32(cpu_gpr_d[r1], const4); + tcg_gen_sari_i32(cpu_gpr_d[r1 + 1], cpu_gpr_d[r1], 31); } else { generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); } @@ -3230,7 +3263,7 @@ static void decode_src_opc(DisasContext *ctx, int op1) static void decode_srr_opc(DisasContext *ctx, int op1) { int r1, r2; - TCGv temp; + TCGv_i32 temp; r1 = MASK_OP_SRR_S1D(ctx->opcode); r2 = MASK_OP_SRR_S2(ctx->opcode); @@ -3246,49 +3279,49 @@ static void decode_srr_opc(DisasContext *ctx, int op1) gen_add_d(cpu_gpr_d[15], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC1_16_SRR_ADD_A: - tcg_gen_add_tl(cpu_gpr_a[r1], cpu_gpr_a[r1], cpu_gpr_a[r2]); + tcg_gen_add_i32(cpu_gpr_a[r1], cpu_gpr_a[r1], cpu_gpr_a[r2]); break; case OPC1_16_SRR_ADDS: gen_adds(cpu_gpr_d[r1], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC1_16_SRR_AND: - tcg_gen_and_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_and_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC1_16_SRR_CMOV: - temp = tcg_constant_tl(0); - tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr_d[r1], cpu_gpr_d[15], temp, + temp = tcg_constant_i32(0); + tcg_gen_movcond_i32(TCG_COND_NE, cpu_gpr_d[r1], cpu_gpr_d[15], temp, cpu_gpr_d[r2], cpu_gpr_d[r1]); break; case OPC1_16_SRR_CMOVN: - temp = tcg_constant_tl(0); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr_d[r1], cpu_gpr_d[15], temp, + temp = tcg_constant_i32(0); + tcg_gen_movcond_i32(TCG_COND_EQ, cpu_gpr_d[r1], cpu_gpr_d[15], temp, cpu_gpr_d[r2], cpu_gpr_d[r1]); break; case OPC1_16_SRR_EQ: - tcg_gen_setcond_tl(TCG_COND_EQ, cpu_gpr_d[15], cpu_gpr_d[r1], + tcg_gen_setcond_i32(TCG_COND_EQ, cpu_gpr_d[15], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC1_16_SRR_LT: - tcg_gen_setcond_tl(TCG_COND_LT, cpu_gpr_d[15], cpu_gpr_d[r1], + tcg_gen_setcond_i32(TCG_COND_LT, cpu_gpr_d[15], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC1_16_SRR_MOV: - tcg_gen_mov_tl(cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_mov_i32(cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC1_16_SRR_MOV_A: - tcg_gen_mov_tl(cpu_gpr_a[r1], cpu_gpr_d[r2]); + tcg_gen_mov_i32(cpu_gpr_a[r1], cpu_gpr_d[r2]); break; case OPC1_16_SRR_MOV_AA: - tcg_gen_mov_tl(cpu_gpr_a[r1], cpu_gpr_a[r2]); + tcg_gen_mov_i32(cpu_gpr_a[r1], cpu_gpr_a[r2]); break; case OPC1_16_SRR_MOV_D: - tcg_gen_mov_tl(cpu_gpr_d[r1], cpu_gpr_a[r2]); + tcg_gen_mov_i32(cpu_gpr_d[r1], cpu_gpr_a[r2]); break; case OPC1_16_SRR_MUL: gen_mul_i32s(cpu_gpr_d[r1], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC1_16_SRR_OR: - tcg_gen_or_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_or_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC1_16_SRR_SUB: gen_sub_d(cpu_gpr_d[r1], cpu_gpr_d[r1], cpu_gpr_d[r2]); @@ -3303,7 +3336,7 @@ static void decode_srr_opc(DisasContext *ctx, int op1) gen_subs(cpu_gpr_d[r1], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC1_16_SRR_XOR: - tcg_gen_xor_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_xor_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -3319,32 +3352,32 @@ static void decode_ssr_opc(DisasContext *ctx, int op1) switch (op1) { case OPC1_16_SSR_ST_A: - tcg_gen_qemu_st_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUL); + tcg_gen_qemu_st_i32(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUL); break; case OPC1_16_SSR_ST_A_POSTINC: - tcg_gen_qemu_st_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUL); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], 4); + tcg_gen_qemu_st_i32(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUL); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], 4); break; case OPC1_16_SSR_ST_B: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_UB); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_UB); break; case OPC1_16_SSR_ST_B_POSTINC: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_UB); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], 1); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_UB); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], 1); break; case OPC1_16_SSR_ST_H: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUW); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUW); break; case OPC1_16_SSR_ST_H_POSTINC: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUW); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], 2); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUW); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], 2); break; case OPC1_16_SSR_ST_W: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUL); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUL); break; case OPC1_16_SSR_ST_W_POSTINC: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUL); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], 4); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUL); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], 4); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -3359,7 +3392,7 @@ static void decode_sc_opc(DisasContext *ctx, int op1) switch (op1) { case OPC1_16_SC_AND: - tcg_gen_andi_tl(cpu_gpr_d[15], cpu_gpr_d[15], const16); + tcg_gen_andi_i32(cpu_gpr_d[15], cpu_gpr_d[15], const16); break; case OPC1_16_SC_BISR: if (ctx->priv == TRICORE_PRIV_SM) { @@ -3375,10 +3408,10 @@ static void decode_sc_opc(DisasContext *ctx, int op1) gen_offset_ld(ctx, cpu_gpr_d[15], cpu_gpr_a[10], const16 * 4, MO_LESL); break; case OPC1_16_SC_MOV: - tcg_gen_movi_tl(cpu_gpr_d[15], const16); + tcg_gen_movi_i32(cpu_gpr_d[15], const16); break; case OPC1_16_SC_OR: - tcg_gen_ori_tl(cpu_gpr_d[15], cpu_gpr_d[15], const16); + tcg_gen_ori_i32(cpu_gpr_d[15], cpu_gpr_d[15], const16); break; case OPC1_16_SC_ST_A: gen_offset_st(ctx, cpu_gpr_a[15], cpu_gpr_a[10], const16 * 4, MO_LESL); @@ -3387,7 +3420,7 @@ static void decode_sc_opc(DisasContext *ctx, int op1) gen_offset_st(ctx, cpu_gpr_d[15], cpu_gpr_a[10], const16 * 4, MO_LESL); break; case OPC1_16_SC_SUB_A: - tcg_gen_subi_tl(cpu_gpr_a[10], cpu_gpr_a[10], const16); + tcg_gen_subi_i32(cpu_gpr_a[10], cpu_gpr_a[10], const16); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -3404,32 +3437,32 @@ static void decode_slr_opc(DisasContext *ctx, int op1) switch (op1) { /* SLR-format */ case OPC1_16_SLR_LD_A: - tcg_gen_qemu_ld_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESL); + tcg_gen_qemu_ld_i32(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESL); break; case OPC1_16_SLR_LD_A_POSTINC: - tcg_gen_qemu_ld_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESL); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], 4); + tcg_gen_qemu_ld_i32(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESL); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], 4); break; case OPC1_16_SLR_LD_BU: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_UB); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_UB); break; case OPC1_16_SLR_LD_BU_POSTINC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_UB); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], 1); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_UB); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], 1); break; case OPC1_16_SLR_LD_H: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESW); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESW); break; case OPC1_16_SLR_LD_H_POSTINC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESW); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], 2); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESW); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], 2); break; case OPC1_16_SLR_LD_W: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESL); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESL); break; case OPC1_16_SLR_LD_W_POSTINC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESL); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], 4); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESL); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], 4); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -3512,17 +3545,18 @@ static void decode_sr_accu(DisasContext *ctx) switch (op2) { case OPC2_16_SR_RSUB: /* calc V bit -- overflow only if r1 = -0x80000000 */ - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_PSW_V, cpu_gpr_d[r1], -0x80000000); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_PSW_V, + cpu_gpr_d[r1], -0x80000000); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* calc SV bit */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* sub */ - tcg_gen_neg_tl(cpu_gpr_d[r1], cpu_gpr_d[r1]); + tcg_gen_neg_i32(cpu_gpr_d[r1], cpu_gpr_d[r1]); /* calc av */ - tcg_gen_add_tl(cpu_PSW_AV, cpu_gpr_d[r1], cpu_gpr_d[r1]); - tcg_gen_xor_tl(cpu_PSW_AV, cpu_gpr_d[r1], cpu_PSW_AV); + tcg_gen_add_i32(cpu_PSW_AV, cpu_gpr_d[r1], cpu_gpr_d[r1]); + tcg_gen_xor_i32(cpu_PSW_AV, cpu_gpr_d[r1], cpu_PSW_AV); /* calc sav */ - tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); + tcg_gen_or_i32(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); break; case OPC2_16_SR_SAT_B: gen_saturate(cpu_gpr_d[r1], cpu_gpr_d[r1], 0x7f, -0x80); @@ -3547,7 +3581,7 @@ static void decode_16Bit_opc(DisasContext *ctx) int r1, r2; int32_t const16; int32_t address; - TCGv temp; + TCGv_i32 temp; op1 = MASK_OP_MAJOR(ctx->opcode); @@ -3614,9 +3648,9 @@ static void decode_16Bit_opc(DisasContext *ctx) r2 = MASK_OP_SRRS_S2(ctx->opcode); r1 = MASK_OP_SRRS_S1D(ctx->opcode); const16 = MASK_OP_SRRS_N(ctx->opcode); - temp = tcg_temp_new(); - tcg_gen_shli_tl(temp, cpu_gpr_d[15], const16); - tcg_gen_add_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], temp); + temp = tcg_temp_new_i32(); + tcg_gen_shli_i32(temp, cpu_gpr_d[15], const16); + tcg_gen_add_i32(cpu_gpr_a[r1], cpu_gpr_a[r2], temp); break; /* SLRO-format */ case OPC1_16_SLRO_LD_A: @@ -3765,7 +3799,7 @@ static void decode_16Bit_opc(DisasContext *ctx) break; case OPC1_16_SR_NOT: r1 = MASK_OP_SR_S1D(ctx->opcode); - tcg_gen_not_tl(cpu_gpr_d[r1], cpu_gpr_d[r1]); + tcg_gen_not_i32(cpu_gpr_d[r1], cpu_gpr_d[r1]); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -3782,7 +3816,7 @@ static void decode_abs_ldw(DisasContext *ctx) int32_t op2; int32_t r1; uint32_t address; - TCGv temp; + TCGv_i32 temp; r1 = MASK_OP_ABS_S1D(ctx->opcode); address = MASK_OP_ABS_OFF18(ctx->opcode); @@ -3792,18 +3826,18 @@ static void decode_abs_ldw(DisasContext *ctx) switch (op2) { case OPC2_32_ABS_LD_A: - tcg_gen_qemu_ld_tl(cpu_gpr_a[r1], temp, ctx->mem_idx, MO_LESL); + tcg_gen_qemu_ld_i32(cpu_gpr_a[r1], temp, ctx->mem_idx, MO_LESL); break; case OPC2_32_ABS_LD_D: CHECK_REG_PAIR(r1); - gen_ld_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], temp, ctx); + gen_ld_2regs_64(ctx, cpu_gpr_d[r1 + 1], cpu_gpr_d[r1], temp); break; case OPC2_32_ABS_LD_DA: CHECK_REG_PAIR(r1); - gen_ld_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], temp, ctx); + gen_ld_2regs_64(ctx, cpu_gpr_a[r1 + 1], cpu_gpr_a[r1], temp); break; case OPC2_32_ABS_LD_W: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LESL); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LESL); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -3815,7 +3849,7 @@ static void decode_abs_ldb(DisasContext *ctx) int32_t op2; int32_t r1; uint32_t address; - TCGv temp; + TCGv_i32 temp; r1 = MASK_OP_ABS_S1D(ctx->opcode); address = MASK_OP_ABS_OFF18(ctx->opcode); @@ -3825,16 +3859,16 @@ static void decode_abs_ldb(DisasContext *ctx) switch (op2) { case OPC2_32_ABS_LD_B: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_SB); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_SB); break; case OPC2_32_ABS_LD_BU: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_UB); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_UB); break; case OPC2_32_ABS_LD_H: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LESW); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LESW); break; case OPC2_32_ABS_LD_HU: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LEUW); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LEUW); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -3846,7 +3880,7 @@ static void decode_abs_ldst_swap(DisasContext *ctx) int32_t op2; int32_t r1; uint32_t address; - TCGv temp; + TCGv_i32 temp; r1 = MASK_OP_ABS_S1D(ctx->opcode); address = MASK_OP_ABS_OFF18(ctx->opcode); @@ -3897,7 +3931,7 @@ static void decode_abs_store(DisasContext *ctx) int32_t op2; int32_t r1; uint32_t address; - TCGv temp; + TCGv_i32 temp; r1 = MASK_OP_ABS_S1D(ctx->opcode); address = MASK_OP_ABS_OFF18(ctx->opcode); @@ -3907,18 +3941,18 @@ static void decode_abs_store(DisasContext *ctx) switch (op2) { case OPC2_32_ABS_ST_A: - tcg_gen_qemu_st_tl(cpu_gpr_a[r1], temp, ctx->mem_idx, MO_LESL); + tcg_gen_qemu_st_i32(cpu_gpr_a[r1], temp, ctx->mem_idx, MO_LESL); break; case OPC2_32_ABS_ST_D: CHECK_REG_PAIR(r1); - gen_st_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], temp, ctx); + gen_st_2regs_64(ctx, cpu_gpr_d[r1 + 1], cpu_gpr_d[r1], temp); break; case OPC2_32_ABS_ST_DA: CHECK_REG_PAIR(r1); - gen_st_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], temp, ctx); + gen_st_2regs_64(ctx, cpu_gpr_a[r1 + 1], cpu_gpr_a[r1], temp); break; case OPC2_32_ABS_ST_W: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LESL); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LESL); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -3930,7 +3964,7 @@ static void decode_abs_storeb_h(DisasContext *ctx) int32_t op2; int32_t r1; uint32_t address; - TCGv temp; + TCGv_i32 temp; r1 = MASK_OP_ABS_S1D(ctx->opcode); address = MASK_OP_ABS_OFF18(ctx->opcode); @@ -3940,10 +3974,10 @@ static void decode_abs_storeb_h(DisasContext *ctx) switch (op2) { case OPC2_32_ABS_ST_B: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_UB); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_UB); break; case OPC2_32_ABS_ST_H: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LEUW); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LEUW); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -4032,7 +4066,7 @@ static void decode_bit_insert(DisasContext *ctx) uint32_t op2; int r1, r2, r3; int pos1, pos2; - TCGv temp; + TCGv_i32 temp; op2 = MASK_OP_BIT_OP2(ctx->opcode); r1 = MASK_OP_BIT_S1(ctx->opcode); r2 = MASK_OP_BIT_S2(ctx->opcode); @@ -4040,13 +4074,13 @@ static void decode_bit_insert(DisasContext *ctx) pos1 = MASK_OP_BIT_POS1(ctx->opcode); pos2 = MASK_OP_BIT_POS2(ctx->opcode); - temp = tcg_temp_new(); + temp = tcg_temp_new_i32(); - tcg_gen_shri_tl(temp, cpu_gpr_d[r2], pos2); + tcg_gen_shri_i32(temp, cpu_gpr_d[r2], pos2); if (op2 == OPC2_32_BIT_INSN_T) { - tcg_gen_not_tl(temp, temp); + tcg_gen_not_i32(temp, temp); } - tcg_gen_deposit_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], temp, pos1, 1); + tcg_gen_deposit_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], temp, pos1, 1); } static void decode_bit_logical_t2(DisasContext *ctx) @@ -4131,7 +4165,7 @@ static void decode_bit_sh_logic1(DisasContext *ctx) uint32_t op2; int r1, r2, r3; int pos1, pos2; - TCGv temp; + TCGv_i32 temp; op2 = MASK_OP_BIT_OP2(ctx->opcode); r1 = MASK_OP_BIT_S1(ctx->opcode); @@ -4140,7 +4174,7 @@ static void decode_bit_sh_logic1(DisasContext *ctx) pos1 = MASK_OP_BIT_POS1(ctx->opcode); pos2 = MASK_OP_BIT_POS2(ctx->opcode); - temp = tcg_temp_new(); + temp = tcg_temp_new_i32(); switch (op2) { case OPC2_32_BIT_SH_AND_T: @@ -4162,8 +4196,8 @@ static void decode_bit_sh_logic1(DisasContext *ctx) default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); } - tcg_gen_shli_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 1); - tcg_gen_add_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], temp); + tcg_gen_shli_i32(cpu_gpr_d[r3], cpu_gpr_d[r3], 1); + tcg_gen_add_i32(cpu_gpr_d[r3], cpu_gpr_d[r3], temp); } static void decode_bit_sh_logic2(DisasContext *ctx) @@ -4171,7 +4205,7 @@ static void decode_bit_sh_logic2(DisasContext *ctx) uint32_t op2; int r1, r2, r3; int pos1, pos2; - TCGv temp; + TCGv_i32 temp; op2 = MASK_OP_BIT_OP2(ctx->opcode); r1 = MASK_OP_BIT_S1(ctx->opcode); @@ -4180,7 +4214,7 @@ static void decode_bit_sh_logic2(DisasContext *ctx) pos1 = MASK_OP_BIT_POS1(ctx->opcode); pos2 = MASK_OP_BIT_POS2(ctx->opcode); - temp = tcg_temp_new(); + temp = tcg_temp_new_i32(); switch (op2) { case OPC2_32_BIT_SH_NAND_T: @@ -4202,8 +4236,8 @@ static void decode_bit_sh_logic2(DisasContext *ctx) default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); } - tcg_gen_shli_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 1); - tcg_gen_add_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], temp); + tcg_gen_shli_i32(cpu_gpr_d[r3], cpu_gpr_d[r3], 1); + tcg_gen_add_i32(cpu_gpr_d[r3], cpu_gpr_d[r3], temp); } /* BO-format */ @@ -4214,7 +4248,7 @@ static void decode_bo_addrmode_post_pre_base(DisasContext *ctx) uint32_t op2; uint32_t off10; int32_t r1, r2; - TCGv temp; + TCGv_i32 temp; r1 = MASK_OP_BO_S1D(ctx->opcode); r2 = MASK_OP_BO_S2(ctx->opcode); @@ -4232,14 +4266,14 @@ static void decode_bo_addrmode_post_pre_base(DisasContext *ctx) case OPC2_32_BO_CACHEA_I_POSTINC: /* instruction to access the cache, but we still need to handle the addressing mode */ - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_CACHEA_WI_PREINC: case OPC2_32_BO_CACHEA_W_PREINC: case OPC2_32_BO_CACHEA_I_PREINC: /* instruction to access the cache, but we still need to handle the addressing mode */ - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_CACHEI_WI_SHORTOFF: case OPC2_32_BO_CACHEI_W_SHORTOFF: @@ -4250,7 +4284,7 @@ static void decode_bo_addrmode_post_pre_base(DisasContext *ctx) case OPC2_32_BO_CACHEI_W_POSTINC: case OPC2_32_BO_CACHEI_WI_POSTINC: if (has_feature(ctx, TRICORE_FEATURE_131)) { - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); } else { generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); } @@ -4258,7 +4292,7 @@ static void decode_bo_addrmode_post_pre_base(DisasContext *ctx) case OPC2_32_BO_CACHEI_W_PREINC: case OPC2_32_BO_CACHEI_WI_PREINC: if (has_feature(ctx, TRICORE_FEATURE_131)) { - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); } else { generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); } @@ -4267,9 +4301,9 @@ static void decode_bo_addrmode_post_pre_base(DisasContext *ctx) gen_offset_st(ctx, cpu_gpr_a[r1], cpu_gpr_a[r2], off10, MO_LESL); break; case OPC2_32_BO_ST_A_POSTINC: - tcg_gen_qemu_st_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, + tcg_gen_qemu_st_i32(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESL); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_ST_A_PREINC: gen_st_preincr(ctx, cpu_gpr_a[r1], cpu_gpr_a[r2], off10, MO_LESL); @@ -4278,82 +4312,84 @@ static void decode_bo_addrmode_post_pre_base(DisasContext *ctx) gen_offset_st(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_UB); break; case OPC2_32_BO_ST_B_POSTINC: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_UB); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_ST_B_PREINC: gen_st_preincr(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_UB); break; case OPC2_32_BO_ST_D_SHORTOFF: CHECK_REG_PAIR(r1); - gen_offset_st_2regs(cpu_gpr_d[r1+1], cpu_gpr_d[r1], cpu_gpr_a[r2], - off10, ctx); + gen_offset_st_2regs(ctx, + cpu_gpr_d[r1 + 1], cpu_gpr_d[r1], cpu_gpr_a[r2], + off10); break; case OPC2_32_BO_ST_D_POSTINC: CHECK_REG_PAIR(r1); - gen_st_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], cpu_gpr_a[r2], ctx); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + gen_st_2regs_64(ctx, cpu_gpr_d[r1 + 1], cpu_gpr_d[r1], cpu_gpr_a[r2]); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_ST_D_PREINC: CHECK_REG_PAIR(r1); - temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); - gen_st_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], temp, ctx); - tcg_gen_mov_tl(cpu_gpr_a[r2], temp); + temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); + gen_st_2regs_64(ctx, cpu_gpr_d[r1 + 1], cpu_gpr_d[r1], temp); + tcg_gen_mov_i32(cpu_gpr_a[r2], temp); break; case OPC2_32_BO_ST_DA_SHORTOFF: CHECK_REG_PAIR(r1); - gen_offset_st_2regs(cpu_gpr_a[r1+1], cpu_gpr_a[r1], cpu_gpr_a[r2], - off10, ctx); + gen_offset_st_2regs(ctx, + cpu_gpr_a[r1 + 1], cpu_gpr_a[r1], cpu_gpr_a[r2], + off10); break; case OPC2_32_BO_ST_DA_POSTINC: CHECK_REG_PAIR(r1); - gen_st_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], cpu_gpr_a[r2], ctx); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + gen_st_2regs_64(ctx, cpu_gpr_a[r1 + 1], cpu_gpr_a[r1], cpu_gpr_a[r2]); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_ST_DA_PREINC: CHECK_REG_PAIR(r1); - temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); - gen_st_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], temp, ctx); - tcg_gen_mov_tl(cpu_gpr_a[r2], temp); + temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); + gen_st_2regs_64(ctx, cpu_gpr_a[r1 + 1], cpu_gpr_a[r1], temp); + tcg_gen_mov_i32(cpu_gpr_a[r2], temp); break; case OPC2_32_BO_ST_H_SHORTOFF: gen_offset_st(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUW); break; case OPC2_32_BO_ST_H_POSTINC: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUW); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_ST_H_PREINC: gen_st_preincr(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUW); break; case OPC2_32_BO_ST_Q_SHORTOFF: - temp = tcg_temp_new(); - tcg_gen_shri_tl(temp, cpu_gpr_d[r1], 16); + temp = tcg_temp_new_i32(); + tcg_gen_shri_i32(temp, cpu_gpr_d[r1], 16); gen_offset_st(ctx, temp, cpu_gpr_a[r2], off10, MO_LEUW); break; case OPC2_32_BO_ST_Q_POSTINC: - temp = tcg_temp_new(); - tcg_gen_shri_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_qemu_st_tl(temp, cpu_gpr_a[r2], ctx->mem_idx, + temp = tcg_temp_new_i32(); + tcg_gen_shri_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_qemu_st_i32(temp, cpu_gpr_a[r2], ctx->mem_idx, MO_LEUW); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_ST_Q_PREINC: - temp = tcg_temp_new(); - tcg_gen_shri_tl(temp, cpu_gpr_d[r1], 16); + temp = tcg_temp_new_i32(); + tcg_gen_shri_i32(temp, cpu_gpr_d[r1], 16); gen_st_preincr(ctx, temp, cpu_gpr_a[r2], off10, MO_LEUW); break; case OPC2_32_BO_ST_W_SHORTOFF: gen_offset_st(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUL); break; case OPC2_32_BO_ST_W_POSTINC: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUL); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_ST_W_PREINC: gen_st_preincr(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUL); @@ -4368,102 +4404,102 @@ static void decode_bo_addrmode_bitreverse_circular(DisasContext *ctx) uint32_t op2; uint32_t off10; int32_t r1, r2; - TCGv temp, temp2, t_off10; + TCGv_i32 temp, temp2, t_off10; r1 = MASK_OP_BO_S1D(ctx->opcode); r2 = MASK_OP_BO_S2(ctx->opcode); off10 = MASK_OP_BO_OFF10_SEXT(ctx->opcode); op2 = MASK_OP_BO_OP2(ctx->opcode); - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); t_off10 = tcg_constant_i32(off10); CHECK_REG_PAIR(r2); - tcg_gen_ext16u_tl(temp, cpu_gpr_a[r2+1]); - tcg_gen_add_tl(temp2, cpu_gpr_a[r2], temp); + tcg_gen_ext16u_i32(temp, cpu_gpr_a[r2 + 1]); + tcg_gen_add_i32(temp2, cpu_gpr_a[r2], temp); switch (op2) { case OPC2_32_BO_CACHEA_WI_BR: case OPC2_32_BO_CACHEA_W_BR: case OPC2_32_BO_CACHEA_I_BR: - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_CACHEA_WI_CIRC: case OPC2_32_BO_CACHEA_W_CIRC: case OPC2_32_BO_CACHEA_I_CIRC: - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_ST_A_BR: - tcg_gen_qemu_st_tl(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_qemu_st_i32(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_ST_A_CIRC: - tcg_gen_qemu_st_tl(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_st_i32(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_ST_B_BR: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_UB); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_UB); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_ST_B_CIRC: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_UB); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_UB); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_ST_D_BR: CHECK_REG_PAIR(r1); - gen_st_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], temp2, ctx); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + gen_st_2regs_64(ctx, cpu_gpr_d[r1 + 1], cpu_gpr_d[r1], temp2); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_ST_D_CIRC: CHECK_REG_PAIR(r1); - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); - tcg_gen_shri_tl(temp2, cpu_gpr_a[r2+1], 16); - tcg_gen_addi_tl(temp, temp, 4); - tcg_gen_rem_tl(temp, temp, temp2); - tcg_gen_add_tl(temp2, cpu_gpr_a[r2], temp); - tcg_gen_qemu_st_tl(cpu_gpr_d[r1+1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); + tcg_gen_shri_i32(temp2, cpu_gpr_a[r2 + 1], 16); + tcg_gen_addi_i32(temp, temp, 4); + tcg_gen_rem_i32(temp, temp, temp2); + tcg_gen_add_i32(temp2, cpu_gpr_a[r2], temp); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1 + 1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_ST_DA_BR: CHECK_REG_PAIR(r1); - gen_st_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], temp2, ctx); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + gen_st_2regs_64(ctx, cpu_gpr_a[r1 + 1], cpu_gpr_a[r1], temp2); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_ST_DA_CIRC: CHECK_REG_PAIR(r1); - tcg_gen_qemu_st_tl(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); - tcg_gen_shri_tl(temp2, cpu_gpr_a[r2+1], 16); - tcg_gen_addi_tl(temp, temp, 4); - tcg_gen_rem_tl(temp, temp, temp2); - tcg_gen_add_tl(temp2, cpu_gpr_a[r2], temp); - tcg_gen_qemu_st_tl(cpu_gpr_a[r1+1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_st_i32(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); + tcg_gen_shri_i32(temp2, cpu_gpr_a[r2 + 1], 16); + tcg_gen_addi_i32(temp, temp, 4); + tcg_gen_rem_i32(temp, temp, temp2); + tcg_gen_add_i32(temp2, cpu_gpr_a[r2], temp); + tcg_gen_qemu_st_i32(cpu_gpr_a[r1 + 1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_ST_H_BR: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_ST_H_CIRC: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_ST_Q_BR: - tcg_gen_shri_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_qemu_st_tl(temp, temp2, ctx->mem_idx, MO_LEUW); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_shri_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_qemu_st_i32(temp, temp2, ctx->mem_idx, MO_LEUW); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_ST_Q_CIRC: - tcg_gen_shri_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_qemu_st_tl(temp, temp2, ctx->mem_idx, MO_LEUW); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_shri_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_qemu_st_i32(temp, temp2, ctx->mem_idx, MO_LEUW); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_ST_W_BR: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_ST_W_CIRC: - tcg_gen_qemu_st_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_st_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -4475,7 +4511,7 @@ static void decode_bo_addrmode_ld_post_pre_base(DisasContext *ctx) uint32_t op2; uint32_t off10; int32_t r1, r2; - TCGv temp; + TCGv_i32 temp; r1 = MASK_OP_BO_S1D(ctx->opcode); r2 = MASK_OP_BO_S2(ctx->opcode); @@ -4487,9 +4523,9 @@ static void decode_bo_addrmode_ld_post_pre_base(DisasContext *ctx) gen_offset_ld(ctx, cpu_gpr_a[r1], cpu_gpr_a[r2], off10, MO_LEUL); break; case OPC2_32_BO_LD_A_POSTINC: - tcg_gen_qemu_ld_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, + tcg_gen_qemu_ld_i32(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUL); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_LD_A_PREINC: gen_ld_preincr(ctx, cpu_gpr_a[r1], cpu_gpr_a[r2], off10, MO_LEUL); @@ -4498,9 +4534,9 @@ static void decode_bo_addrmode_ld_post_pre_base(DisasContext *ctx) gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_SB); break; case OPC2_32_BO_LD_B_POSTINC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_SB); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_LD_B_PREINC: gen_ld_preincr(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_SB); @@ -4509,54 +4545,56 @@ static void decode_bo_addrmode_ld_post_pre_base(DisasContext *ctx) gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_UB); break; case OPC2_32_BO_LD_BU_POSTINC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_UB); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_LD_BU_PREINC: gen_ld_preincr(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_UB); break; case OPC2_32_BO_LD_D_SHORTOFF: CHECK_REG_PAIR(r1); - gen_offset_ld_2regs(cpu_gpr_d[r1+1], cpu_gpr_d[r1], cpu_gpr_a[r2], - off10, ctx); + gen_offset_ld_2regs(ctx, + cpu_gpr_d[r1 + 1], cpu_gpr_d[r1], cpu_gpr_a[r2], + off10); break; case OPC2_32_BO_LD_D_POSTINC: CHECK_REG_PAIR(r1); - gen_ld_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], cpu_gpr_a[r2], ctx); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + gen_ld_2regs_64(ctx, cpu_gpr_d[r1 + 1], cpu_gpr_d[r1], cpu_gpr_a[r2]); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_LD_D_PREINC: CHECK_REG_PAIR(r1); - temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); - gen_ld_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], temp, ctx); - tcg_gen_mov_tl(cpu_gpr_a[r2], temp); + temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); + gen_ld_2regs_64(ctx, cpu_gpr_d[r1 + 1], cpu_gpr_d[r1], temp); + tcg_gen_mov_i32(cpu_gpr_a[r2], temp); break; case OPC2_32_BO_LD_DA_SHORTOFF: CHECK_REG_PAIR(r1); - gen_offset_ld_2regs(cpu_gpr_a[r1+1], cpu_gpr_a[r1], cpu_gpr_a[r2], - off10, ctx); + gen_offset_ld_2regs(ctx, + cpu_gpr_a[r1 + 1], cpu_gpr_a[r1], cpu_gpr_a[r2], + off10); break; case OPC2_32_BO_LD_DA_POSTINC: CHECK_REG_PAIR(r1); - gen_ld_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], cpu_gpr_a[r2], ctx); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + gen_ld_2regs_64(ctx, cpu_gpr_a[r1 + 1], cpu_gpr_a[r1], cpu_gpr_a[r2]); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_LD_DA_PREINC: CHECK_REG_PAIR(r1); - temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); - gen_ld_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], temp, ctx); - tcg_gen_mov_tl(cpu_gpr_a[r2], temp); + temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); + gen_ld_2regs_64(ctx, cpu_gpr_a[r1 + 1], cpu_gpr_a[r1], temp); + tcg_gen_mov_i32(cpu_gpr_a[r2], temp); break; case OPC2_32_BO_LD_H_SHORTOFF: gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LESW); break; case OPC2_32_BO_LD_H_POSTINC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LESW); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_LD_H_PREINC: gen_ld_preincr(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LESW); @@ -4565,34 +4603,34 @@ static void decode_bo_addrmode_ld_post_pre_base(DisasContext *ctx) gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUW); break; case OPC2_32_BO_LD_HU_POSTINC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUW); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_LD_HU_PREINC: gen_ld_preincr(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUW); break; case OPC2_32_BO_LD_Q_SHORTOFF: gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUW); - tcg_gen_shli_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); + tcg_gen_shli_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); break; case OPC2_32_BO_LD_Q_POSTINC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUW); - tcg_gen_shli_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_shli_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_LD_Q_PREINC: gen_ld_preincr(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUW); - tcg_gen_shli_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); + tcg_gen_shli_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); break; case OPC2_32_BO_LD_W_SHORTOFF: gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUL); break; case OPC2_32_BO_LD_W_POSTINC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx, MO_LEUL); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_LD_W_PREINC: gen_ld_preincr(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUL); @@ -4607,109 +4645,109 @@ static void decode_bo_addrmode_ld_bitreverse_circular(DisasContext *ctx) uint32_t op2; uint32_t off10; int r1, r2; - TCGv temp, temp2, t_off10; + TCGv_i32 temp, temp2, t_off10; r1 = MASK_OP_BO_S1D(ctx->opcode); r2 = MASK_OP_BO_S2(ctx->opcode); off10 = MASK_OP_BO_OFF10_SEXT(ctx->opcode); op2 = MASK_OP_BO_OP2(ctx->opcode); - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); t_off10 = tcg_constant_i32(off10); CHECK_REG_PAIR(r2); - tcg_gen_ext16u_tl(temp, cpu_gpr_a[r2+1]); - tcg_gen_add_tl(temp2, cpu_gpr_a[r2], temp); + tcg_gen_ext16u_i32(temp, cpu_gpr_a[r2 + 1]); + tcg_gen_add_i32(temp2, cpu_gpr_a[r2], temp); switch (op2) { case OPC2_32_BO_LD_A_BR: - tcg_gen_qemu_ld_tl(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_qemu_ld_i32(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_LD_A_CIRC: - tcg_gen_qemu_ld_tl(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_ld_i32(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_LD_B_BR: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_SB); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_SB); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_LD_B_CIRC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_SB); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_SB); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_LD_BU_BR: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_UB); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_UB); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_LD_BU_CIRC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_UB); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_UB); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_LD_D_BR: CHECK_REG_PAIR(r1); - gen_ld_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], temp2, ctx); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + gen_ld_2regs_64(ctx, cpu_gpr_d[r1 + 1], cpu_gpr_d[r1], temp2); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_LD_D_CIRC: CHECK_REG_PAIR(r1); - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); - tcg_gen_shri_tl(temp2, cpu_gpr_a[r2+1], 16); - tcg_gen_addi_tl(temp, temp, 4); - tcg_gen_rem_tl(temp, temp, temp2); - tcg_gen_add_tl(temp2, cpu_gpr_a[r2], temp); - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1+1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); + tcg_gen_shri_i32(temp2, cpu_gpr_a[r2 + 1], 16); + tcg_gen_addi_i32(temp, temp, 4); + tcg_gen_rem_i32(temp, temp, temp2); + tcg_gen_add_i32(temp2, cpu_gpr_a[r2], temp); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1 + 1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_LD_DA_BR: CHECK_REG_PAIR(r1); - gen_ld_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], temp2, ctx); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + gen_ld_2regs_64(ctx, cpu_gpr_a[r1 + 1], cpu_gpr_a[r1], temp2); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_LD_DA_CIRC: CHECK_REG_PAIR(r1); - tcg_gen_qemu_ld_tl(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); - tcg_gen_shri_tl(temp2, cpu_gpr_a[r2+1], 16); - tcg_gen_addi_tl(temp, temp, 4); - tcg_gen_rem_tl(temp, temp, temp2); - tcg_gen_add_tl(temp2, cpu_gpr_a[r2], temp); - tcg_gen_qemu_ld_tl(cpu_gpr_a[r1+1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_ld_i32(cpu_gpr_a[r1], temp2, ctx->mem_idx, MO_LEUL); + tcg_gen_shri_i32(temp2, cpu_gpr_a[r2 + 1], 16); + tcg_gen_addi_i32(temp, temp, 4); + tcg_gen_rem_i32(temp, temp, temp2); + tcg_gen_add_i32(temp2, cpu_gpr_a[r2], temp); + tcg_gen_qemu_ld_i32(cpu_gpr_a[r1 + 1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_LD_H_BR: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LESW); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LESW); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_LD_H_CIRC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LESW); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LESW); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_LD_HU_BR: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_LD_HU_CIRC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_LD_Q_BR: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); - tcg_gen_shli_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); + tcg_gen_shli_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_LD_Q_CIRC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); - tcg_gen_shli_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUW); + tcg_gen_shli_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_LD_W_BR: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_LD_W_CIRC: - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp2, ctx->mem_idx, MO_LEUL); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -4722,7 +4760,7 @@ static void decode_bo_addrmode_stctx_post_pre_base(DisasContext *ctx) uint32_t off10; int r1, r2; - TCGv temp; + TCGv_i32 temp; r1 = MASK_OP_BO_S1D(ctx->opcode); r2 = MASK_OP_BO_S2(ctx->opcode); @@ -4730,74 +4768,74 @@ static void decode_bo_addrmode_stctx_post_pre_base(DisasContext *ctx) op2 = MASK_OP_BO_OP2(ctx->opcode); - temp = tcg_temp_new(); + temp = tcg_temp_new_i32(); switch (op2) { case OPC2_32_BO_LDLCX_SHORTOFF: - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); gen_helper_ldlcx(tcg_env, temp); break; case OPC2_32_BO_LDMST_SHORTOFF: - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); gen_ldmst(ctx, r1, temp); break; case OPC2_32_BO_LDMST_POSTINC: gen_ldmst(ctx, r1, cpu_gpr_a[r2]); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_LDMST_PREINC: - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); gen_ldmst(ctx, r1, cpu_gpr_a[r2]); break; case OPC2_32_BO_LDUCX_SHORTOFF: - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); gen_helper_lducx(tcg_env, temp); break; case OPC2_32_BO_LEA_SHORTOFF: - tcg_gen_addi_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r1], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_STLCX_SHORTOFF: - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); gen_helper_stlcx(tcg_env, temp); break; case OPC2_32_BO_STUCX_SHORTOFF: - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); gen_helper_stucx(tcg_env, temp); break; case OPC2_32_BO_SWAP_W_SHORTOFF: - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); gen_swap(ctx, r1, temp); break; case OPC2_32_BO_SWAP_W_POSTINC: gen_swap(ctx, r1, cpu_gpr_a[r2]); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_SWAP_W_PREINC: - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); gen_swap(ctx, r1, cpu_gpr_a[r2]); break; case OPC2_32_BO_CMPSWAP_W_SHORTOFF: - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); gen_cmpswap(ctx, r1, temp); break; case OPC2_32_BO_CMPSWAP_W_POSTINC: gen_cmpswap(ctx, r1, cpu_gpr_a[r2]); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_CMPSWAP_W_PREINC: - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); gen_cmpswap(ctx, r1, cpu_gpr_a[r2]); break; case OPC2_32_BO_SWAPMSK_W_SHORTOFF: - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], off10); gen_swapmsk(ctx, r1, temp); break; case OPC2_32_BO_SWAPMSK_W_POSTINC: gen_swapmsk(ctx, r1, cpu_gpr_a[r2]); - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); break; case OPC2_32_BO_SWAPMSK_W_PREINC: - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r2], off10); gen_swapmsk(ctx, r1, cpu_gpr_a[r2]); break; default: @@ -4810,52 +4848,52 @@ static void decode_bo_addrmode_ldmst_bitreverse_circular(DisasContext *ctx) uint32_t op2; uint32_t off10; int r1, r2; - TCGv temp, temp2, t_off10; + TCGv_i32 temp, temp2, t_off10; r1 = MASK_OP_BO_S1D(ctx->opcode); r2 = MASK_OP_BO_S2(ctx->opcode); off10 = MASK_OP_BO_OFF10_SEXT(ctx->opcode); op2 = MASK_OP_BO_OP2(ctx->opcode); - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); t_off10 = tcg_constant_i32(off10); CHECK_REG_PAIR(r2); - tcg_gen_ext16u_tl(temp, cpu_gpr_a[r2+1]); - tcg_gen_add_tl(temp2, cpu_gpr_a[r2], temp); + tcg_gen_ext16u_i32(temp, cpu_gpr_a[r2 + 1]); + tcg_gen_add_i32(temp2, cpu_gpr_a[r2], temp); switch (op2) { case OPC2_32_BO_LDMST_BR: gen_ldmst(ctx, r1, temp2); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_LDMST_CIRC: gen_ldmst(ctx, r1, temp2); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_SWAP_W_BR: gen_swap(ctx, r1, temp2); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_SWAP_W_CIRC: gen_swap(ctx, r1, temp2); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_CMPSWAP_W_BR: gen_cmpswap(ctx, r1, temp2); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_CMPSWAP_W_CIRC: gen_cmpswap(ctx, r1, temp2); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; case OPC2_32_BO_SWAPMSK_W_BR: gen_swapmsk(ctx, r1, temp2); - gen_helper_br_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1]); + gen_helper_br_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1]); break; case OPC2_32_BO_SWAPMSK_W_CIRC: gen_swapmsk(ctx, r1, temp2); - gen_helper_circ_update(cpu_gpr_a[r2+1], cpu_gpr_a[r2+1], t_off10); + gen_helper_circ_update(cpu_gpr_a[r2 + 1], cpu_gpr_a[r2 + 1], t_off10); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -4866,7 +4904,7 @@ static void decode_bol_opc(DisasContext *ctx, int32_t op1) { int r1, r2; int32_t address; - TCGv temp; + TCGv_i32 temp; r1 = MASK_OP_BOL_S1D(ctx->opcode); r2 = MASK_OP_BOL_S2(ctx->opcode); @@ -4874,17 +4912,17 @@ static void decode_bol_opc(DisasContext *ctx, int32_t op1) switch (op1) { case OPC1_32_BOL_LD_A_LONGOFF: - temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], address); - tcg_gen_qemu_ld_tl(cpu_gpr_a[r1], temp, ctx->mem_idx, MO_LEUL); + temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], address); + tcg_gen_qemu_ld_i32(cpu_gpr_a[r1], temp, ctx->mem_idx, MO_LEUL); break; case OPC1_32_BOL_LD_W_LONGOFF: - temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, cpu_gpr_a[r2], address); - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LEUL); + temp = tcg_temp_new_i32(); + tcg_gen_addi_i32(temp, cpu_gpr_a[r2], address); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LEUL); break; case OPC1_32_BOL_LEA_LONGOFF: - tcg_gen_addi_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], address); + tcg_gen_addi_i32(cpu_gpr_a[r1], cpu_gpr_a[r2], address); break; case OPC1_32_BOL_ST_A_LONGOFF: if (has_feature(ctx, TRICORE_FEATURE_16)) { @@ -4949,7 +4987,7 @@ static void decode_rc_logical_shift(DisasContext *ctx) uint32_t op2; int r1, r2; int32_t const9; - TCGv temp; + TCGv_i32 temp; r2 = MASK_OP_RC_D(ctx->opcode); r1 = MASK_OP_RC_S1(ctx->opcode); @@ -4958,26 +4996,26 @@ static void decode_rc_logical_shift(DisasContext *ctx) switch (op2) { case OPC2_32_RC_AND: - tcg_gen_andi_tl(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); + tcg_gen_andi_i32(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_ANDN: - tcg_gen_andi_tl(cpu_gpr_d[r2], cpu_gpr_d[r1], ~const9); + tcg_gen_andi_i32(cpu_gpr_d[r2], cpu_gpr_d[r1], ~const9); break; case OPC2_32_RC_NAND: - temp = tcg_temp_new(); - tcg_gen_movi_tl(temp, const9); - tcg_gen_nand_tl(cpu_gpr_d[r2], cpu_gpr_d[r1], temp); + temp = tcg_temp_new_i32(); + tcg_gen_movi_i32(temp, const9); + tcg_gen_nand_i32(cpu_gpr_d[r2], cpu_gpr_d[r1], temp); break; case OPC2_32_RC_NOR: - temp = tcg_temp_new(); - tcg_gen_movi_tl(temp, const9); - tcg_gen_nor_tl(cpu_gpr_d[r2], cpu_gpr_d[r1], temp); + temp = tcg_temp_new_i32(); + tcg_gen_movi_i32(temp, const9); + tcg_gen_nor_i32(cpu_gpr_d[r2], cpu_gpr_d[r1], temp); break; case OPC2_32_RC_OR: - tcg_gen_ori_tl(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); + tcg_gen_ori_i32(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_ORN: - tcg_gen_ori_tl(cpu_gpr_d[r2], cpu_gpr_d[r1], ~const9); + tcg_gen_ori_i32(cpu_gpr_d[r2], cpu_gpr_d[r1], ~const9); break; case OPC2_32_RC_SH: const9 = sextract32(const9, 0, 6); @@ -4999,11 +5037,11 @@ static void decode_rc_logical_shift(DisasContext *ctx) gen_shasi(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_XNOR: - tcg_gen_xori_tl(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); - tcg_gen_not_tl(cpu_gpr_d[r2], cpu_gpr_d[r2]); + tcg_gen_xori_i32(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); + tcg_gen_not_i32(cpu_gpr_d[r2], cpu_gpr_d[r2]); break; case OPC2_32_RC_XOR: - tcg_gen_xori_tl(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); + tcg_gen_xori_i32(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_SHUFFLE: if (has_feature(ctx, TRICORE_FEATURE_162)) { @@ -5024,7 +5062,7 @@ static void decode_rc_accumulator(DisasContext *ctx) int r1, r2; int16_t const9; - TCGv temp; + TCGv_i32 temp; r2 = MASK_OP_RC_D(ctx->opcode); r1 = MASK_OP_RC_S1(ctx->opcode); @@ -5032,7 +5070,7 @@ static void decode_rc_accumulator(DisasContext *ctx) op2 = MASK_OP_RC_OP2(ctx->opcode); - temp = tcg_temp_new(); + temp = tcg_temp_new_i32(); switch (op2) { case OPC2_32_RC_ABSDIF: @@ -5083,7 +5121,7 @@ static void decode_rc_accumulator(DisasContext *ctx) const9, &tcg_gen_and_tl); break; case OPC2_32_RC_EQ: - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_EQANY_B: gen_eqany_bi(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); @@ -5092,41 +5130,41 @@ static void decode_rc_accumulator(DisasContext *ctx) gen_eqany_hi(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_GE: - tcg_gen_setcondi_tl(TCG_COND_GE, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); + tcg_gen_setcondi_i32(TCG_COND_GE, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_GE_U: const9 = MASK_OP_RC_CONST9(ctx->opcode); - tcg_gen_setcondi_tl(TCG_COND_GEU, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); + tcg_gen_setcondi_i32(TCG_COND_GEU, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_LT: - tcg_gen_setcondi_tl(TCG_COND_LT, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); + tcg_gen_setcondi_i32(TCG_COND_LT, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_LT_U: const9 = MASK_OP_RC_CONST9(ctx->opcode); - tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); + tcg_gen_setcondi_i32(TCG_COND_LTU, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_MAX: - tcg_gen_movi_tl(temp, const9); - tcg_gen_movcond_tl(TCG_COND_GT, cpu_gpr_d[r2], cpu_gpr_d[r1], temp, + tcg_gen_movi_i32(temp, const9); + tcg_gen_movcond_i32(TCG_COND_GT, cpu_gpr_d[r2], cpu_gpr_d[r1], temp, cpu_gpr_d[r1], temp); break; case OPC2_32_RC_MAX_U: - tcg_gen_movi_tl(temp, MASK_OP_RC_CONST9(ctx->opcode)); - tcg_gen_movcond_tl(TCG_COND_GTU, cpu_gpr_d[r2], cpu_gpr_d[r1], temp, + tcg_gen_movi_i32(temp, MASK_OP_RC_CONST9(ctx->opcode)); + tcg_gen_movcond_i32(TCG_COND_GTU, cpu_gpr_d[r2], cpu_gpr_d[r1], temp, cpu_gpr_d[r1], temp); break; case OPC2_32_RC_MIN: - tcg_gen_movi_tl(temp, const9); - tcg_gen_movcond_tl(TCG_COND_LT, cpu_gpr_d[r2], cpu_gpr_d[r1], temp, + tcg_gen_movi_i32(temp, const9); + tcg_gen_movcond_i32(TCG_COND_LT, cpu_gpr_d[r2], cpu_gpr_d[r1], temp, cpu_gpr_d[r1], temp); break; case OPC2_32_RC_MIN_U: - tcg_gen_movi_tl(temp, MASK_OP_RC_CONST9(ctx->opcode)); - tcg_gen_movcond_tl(TCG_COND_LTU, cpu_gpr_d[r2], cpu_gpr_d[r1], temp, + tcg_gen_movi_i32(temp, MASK_OP_RC_CONST9(ctx->opcode)); + tcg_gen_movcond_i32(TCG_COND_LTU, cpu_gpr_d[r2], cpu_gpr_d[r1], temp, cpu_gpr_d[r1], temp); break; case OPC2_32_RC_NE: - tcg_gen_setcondi_tl(TCG_COND_NE, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); + tcg_gen_setcondi_i32(TCG_COND_NE, cpu_gpr_d[r2], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_OR_EQ: gen_accumulating_condi(TCG_COND_EQ, cpu_gpr_d[r2], cpu_gpr_d[r1], @@ -5155,15 +5193,15 @@ static void decode_rc_accumulator(DisasContext *ctx) const9, &tcg_gen_or_tl); break; case OPC2_32_RC_RSUB: - tcg_gen_movi_tl(temp, const9); + tcg_gen_movi_i32(temp, const9); gen_sub_d(cpu_gpr_d[r2], temp, cpu_gpr_d[r1]); break; case OPC2_32_RC_RSUBS: - tcg_gen_movi_tl(temp, const9); + tcg_gen_movi_i32(temp, const9); gen_subs(cpu_gpr_d[r2], temp, cpu_gpr_d[r1]); break; case OPC2_32_RC_RSUBS_U: - tcg_gen_movi_tl(temp, const9); + tcg_gen_movi_i32(temp, const9); gen_subsu(cpu_gpr_d[r2], temp, cpu_gpr_d[r1]); break; case OPC2_32_RC_SH_EQ: @@ -5259,7 +5297,7 @@ static void decode_rc_mul(DisasContext *ctx) break; case OPC2_32_RC_MUL_64: CHECK_REG_PAIR(r2); - gen_muli_i64s(cpu_gpr_d[r2], cpu_gpr_d[r2+1], cpu_gpr_d[r1], const9); + gen_muli_i64s(cpu_gpr_d[r2], cpu_gpr_d[r2 + 1], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_MULS_32: gen_mulsi_i32(cpu_gpr_d[r2], cpu_gpr_d[r1], const9); @@ -5267,7 +5305,7 @@ static void decode_rc_mul(DisasContext *ctx) case OPC2_32_RC_MUL_U_64: const9 = MASK_OP_RC_CONST9(ctx->opcode); CHECK_REG_PAIR(r2); - gen_muli_i64u(cpu_gpr_d[r2], cpu_gpr_d[r2+1], cpu_gpr_d[r1], const9); + gen_muli_i64u(cpu_gpr_d[r2], cpu_gpr_d[r2 + 1], cpu_gpr_d[r1], const9); break; case OPC2_32_RC_MULS_U_32: const9 = MASK_OP_RC_CONST9(ctx->opcode); @@ -5285,7 +5323,7 @@ static void decode_rcpw_insert(DisasContext *ctx) int r1, r2; int32_t pos, width, const4; - TCGv temp; + TCGv_i32 temp; op2 = MASK_OP_RCPW_OP2(ctx->opcode); r1 = MASK_OP_RCPW_S1(ctx->opcode); @@ -5299,18 +5337,18 @@ static void decode_rcpw_insert(DisasContext *ctx) CHECK_REG_PAIR(r2); /* if pos + width > 32 undefined result */ if (pos + width <= 32) { - tcg_gen_movi_tl(cpu_gpr_d[r2+1], ((1u << width) - 1) << pos); - tcg_gen_movi_tl(cpu_gpr_d[r2], (const4 << pos)); + tcg_gen_movi_i32(cpu_gpr_d[r2 + 1], ((1u << width) - 1) << pos); + tcg_gen_movi_i32(cpu_gpr_d[r2], (const4 << pos)); } break; case OPC2_32_RCPW_INSERT: - /* tcg_gen_deposit_tl() does not handle the case of width = 0 */ + /* tcg_gen_deposit_i32() does not handle the case of width = 0 */ if (width == 0) { - tcg_gen_mov_tl(cpu_gpr_d[r2], cpu_gpr_d[r1]); + tcg_gen_mov_i32(cpu_gpr_d[r2], cpu_gpr_d[r1]); /* if pos + width > 32 undefined result */ } else if (pos + width <= 32) { temp = tcg_constant_i32(const4); - tcg_gen_deposit_tl(cpu_gpr_d[r2], cpu_gpr_d[r1], temp, pos, width); + tcg_gen_deposit_i32(cpu_gpr_d[r2], cpu_gpr_d[r1], temp, pos, width); } break; default: @@ -5326,7 +5364,7 @@ static void decode_rcrw_insert(DisasContext *ctx) int r1, r3, r4; int32_t width, const4; - TCGv temp, temp2, temp3; + TCGv_i32 temp, temp2, temp3; op2 = MASK_OP_RCRW_OP2(ctx->opcode); r1 = MASK_OP_RCRW_S1(ctx->opcode); @@ -5335,24 +5373,24 @@ static void decode_rcrw_insert(DisasContext *ctx) width = MASK_OP_RCRW_WIDTH(ctx->opcode); const4 = MASK_OP_RCRW_CONST4(ctx->opcode); - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); switch (op2) { case OPC2_32_RCRW_IMASK: CHECK_REG_PAIR(r4); - tcg_gen_andi_tl(temp, cpu_gpr_d[r3], 0x1f); - tcg_gen_movi_tl(temp2, (1 << width) - 1); - tcg_gen_shl_tl(cpu_gpr_d[r4 + 1], temp2, temp); - tcg_gen_movi_tl(temp2, const4); - tcg_gen_shl_tl(cpu_gpr_d[r4], temp2, temp); + tcg_gen_andi_i32(temp, cpu_gpr_d[r3], 0x1f); + tcg_gen_movi_i32(temp2, (1 << width) - 1); + tcg_gen_shl_i32(cpu_gpr_d[r4 + 1], temp2, temp); + tcg_gen_movi_i32(temp2, const4); + tcg_gen_shl_i32(cpu_gpr_d[r4], temp2, temp); break; case OPC2_32_RCRW_INSERT: - temp3 = tcg_temp_new(); + temp3 = tcg_temp_new_i32(); - tcg_gen_movi_tl(temp, width); - tcg_gen_movi_tl(temp2, const4); - tcg_gen_andi_tl(temp3, cpu_gpr_d[r3], 0x1f); + tcg_gen_movi_i32(temp, width); + tcg_gen_movi_i32(temp2, const4); + tcg_gen_andi_i32(temp3, cpu_gpr_d[r3], 0x1f); gen_insert(cpu_gpr_d[r4], cpu_gpr_d[r1], temp2, temp, temp3); break; default: @@ -5368,7 +5406,7 @@ static void decode_rcr_cond_select(DisasContext *ctx) int r1, r3, r4; int32_t const9; - TCGv temp, temp2; + TCGv_i32 temp, temp2; op2 = MASK_OP_RCR_OP2(ctx->opcode); r1 = MASK_OP_RCR_S1(ctx->opcode); @@ -5388,13 +5426,13 @@ static void decode_rcr_cond_select(DisasContext *ctx) case OPC2_32_RCR_SEL: temp = tcg_constant_i32(0); temp2 = tcg_constant_i32(const9); - tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr_d[r4], cpu_gpr_d[r3], temp, + tcg_gen_movcond_i32(TCG_COND_NE, cpu_gpr_d[r4], cpu_gpr_d[r3], temp, cpu_gpr_d[r1], temp2); break; case OPC2_32_RCR_SELN: temp = tcg_constant_i32(0); temp2 = tcg_constant_i32(const9); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr_d[r4], cpu_gpr_d[r3], temp, + tcg_gen_movcond_i32(TCG_COND_EQ, cpu_gpr_d[r4], cpu_gpr_d[r3], temp, cpu_gpr_d[r1], temp2); break; default: @@ -5422,8 +5460,8 @@ static void decode_rcr_madd(DisasContext *ctx) case OPC2_32_RCR_MADD_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddi64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], const9); + gen_maddi64_d(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], const9); break; case OPC2_32_RCR_MADDS_32: gen_maddsi_32(cpu_gpr_d[r4], cpu_gpr_d[r1], cpu_gpr_d[r3], const9); @@ -5431,15 +5469,15 @@ static void decode_rcr_madd(DisasContext *ctx) case OPC2_32_RCR_MADDS_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsi_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], const9); + gen_maddsi_64(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], const9); break; case OPC2_32_RCR_MADD_U_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); const9 = MASK_OP_RCR_CONST9(ctx->opcode); - gen_maddui64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], const9); + gen_maddui64_d(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], const9); break; case OPC2_32_RCR_MADDS_U_32: const9 = MASK_OP_RCR_CONST9(ctx->opcode); @@ -5449,8 +5487,8 @@ static void decode_rcr_madd(DisasContext *ctx) CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); const9 = MASK_OP_RCR_CONST9(ctx->opcode); - gen_maddsui_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], const9); + gen_maddsui_64(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], const9); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -5477,8 +5515,8 @@ static void decode_rcr_msub(DisasContext *ctx) case OPC2_32_RCR_MSUB_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubi64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], const9); + gen_msubi64_d(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], const9); break; case OPC2_32_RCR_MSUBS_32: gen_msubsi_32(cpu_gpr_d[r4], cpu_gpr_d[r1], cpu_gpr_d[r3], const9); @@ -5486,15 +5524,15 @@ static void decode_rcr_msub(DisasContext *ctx) case OPC2_32_RCR_MSUBS_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubsi_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], const9); + gen_msubsi_64(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], const9); break; case OPC2_32_RCR_MSUB_U_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); const9 = MASK_OP_RCR_CONST9(ctx->opcode); - gen_msubui64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], const9); + gen_msubui64_d(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], const9); break; case OPC2_32_RCR_MSUBS_U_32: const9 = MASK_OP_RCR_CONST9(ctx->opcode); @@ -5504,8 +5542,8 @@ static void decode_rcr_msub(DisasContext *ctx) CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); const9 = MASK_OP_RCR_CONST9(ctx->opcode); - gen_msubsui_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], const9); + gen_msubsui_64(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], const9); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -5532,33 +5570,33 @@ static void decode_rlc_opc(DisasContext *ctx, gen_addi_d(cpu_gpr_d[r2], cpu_gpr_d[r1], const16 << 16); break; case OPC1_32_RLC_ADDIH_A: - tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r1], const16 << 16); + tcg_gen_addi_i32(cpu_gpr_a[r2], cpu_gpr_a[r1], const16 << 16); break; case OPC1_32_RLC_MFCR: const16 = MASK_OP_RLC_CONST16(ctx->opcode); gen_mfcr(ctx, cpu_gpr_d[r2], const16); break; case OPC1_32_RLC_MOV: - tcg_gen_movi_tl(cpu_gpr_d[r2], const16); + tcg_gen_movi_i32(cpu_gpr_d[r2], const16); break; case OPC1_32_RLC_MOV_64: if (has_feature(ctx, TRICORE_FEATURE_16)) { CHECK_REG_PAIR(r2); - tcg_gen_movi_tl(cpu_gpr_d[r2], const16); - tcg_gen_movi_tl(cpu_gpr_d[r2+1], const16 >> 15); + tcg_gen_movi_i32(cpu_gpr_d[r2], const16); + tcg_gen_movi_i32(cpu_gpr_d[r2 + 1], const16 >> 15); } else { generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); } break; case OPC1_32_RLC_MOV_U: const16 = MASK_OP_RLC_CONST16(ctx->opcode); - tcg_gen_movi_tl(cpu_gpr_d[r2], const16); + tcg_gen_movi_i32(cpu_gpr_d[r2], const16); break; case OPC1_32_RLC_MOV_H: - tcg_gen_movi_tl(cpu_gpr_d[r2], const16 << 16); + tcg_gen_movi_i32(cpu_gpr_d[r2], const16 << 16); break; case OPC1_32_RLC_MOVH_A: - tcg_gen_movi_tl(cpu_gpr_a[r2], const16 << 16); + tcg_gen_movi_i32(cpu_gpr_a[r2], const16 << 16); break; case OPC1_32_RLC_MTCR: const16 = MASK_OP_RLC_CONST16(ctx->opcode); @@ -5575,7 +5613,7 @@ static void decode_rr_accumulator(DisasContext *ctx) uint32_t op2; int r3, r2, r1; - TCGv temp; + TCGv_i32 temp; r3 = MASK_OP_RR_D(ctx->opcode); r2 = MASK_OP_RR_S2(ctx->opcode); @@ -5672,7 +5710,7 @@ static void decode_rr_accumulator(DisasContext *ctx) cpu_gpr_d[r2], &tcg_gen_and_tl); break; case OPC2_32_RR_EQ: - tcg_gen_setcond_tl(TCG_COND_EQ, cpu_gpr_d[r3], cpu_gpr_d[r1], + tcg_gen_setcond_i32(TCG_COND_EQ, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_EQ_B: @@ -5682,7 +5720,7 @@ static void decode_rr_accumulator(DisasContext *ctx) gen_helper_eq_h(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_EQ_W: - tcg_gen_negsetcond_tl(TCG_COND_EQ, cpu_gpr_d[r3], + tcg_gen_negsetcond_i32(TCG_COND_EQ, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_EQANY_B: @@ -5692,19 +5730,19 @@ static void decode_rr_accumulator(DisasContext *ctx) gen_helper_eqany_h(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_GE: - tcg_gen_setcond_tl(TCG_COND_GE, cpu_gpr_d[r3], cpu_gpr_d[r1], + tcg_gen_setcond_i32(TCG_COND_GE, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_GE_U: - tcg_gen_setcond_tl(TCG_COND_GEU, cpu_gpr_d[r3], cpu_gpr_d[r1], + tcg_gen_setcond_i32(TCG_COND_GEU, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_LT: - tcg_gen_setcond_tl(TCG_COND_LT, cpu_gpr_d[r3], cpu_gpr_d[r1], + tcg_gen_setcond_i32(TCG_COND_LT, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_LT_U: - tcg_gen_setcond_tl(TCG_COND_LTU, cpu_gpr_d[r3], cpu_gpr_d[r1], + tcg_gen_setcond_i32(TCG_COND_LTU, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_LT_B: @@ -5720,19 +5758,19 @@ static void decode_rr_accumulator(DisasContext *ctx) gen_helper_lt_hu(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_LT_W: - tcg_gen_negsetcond_tl(TCG_COND_LT, cpu_gpr_d[r3], + tcg_gen_negsetcond_i32(TCG_COND_LT, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_LT_WU: - tcg_gen_negsetcond_tl(TCG_COND_LTU, cpu_gpr_d[r3], + tcg_gen_negsetcond_i32(TCG_COND_LTU, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_MAX: - tcg_gen_movcond_tl(TCG_COND_GT, cpu_gpr_d[r3], cpu_gpr_d[r1], + tcg_gen_movcond_i32(TCG_COND_GT, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_MAX_U: - tcg_gen_movcond_tl(TCG_COND_GTU, cpu_gpr_d[r3], cpu_gpr_d[r1], + tcg_gen_movcond_i32(TCG_COND_GTU, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_MAX_B: @@ -5748,11 +5786,11 @@ static void decode_rr_accumulator(DisasContext *ctx) gen_helper_max_hu(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_MIN: - tcg_gen_movcond_tl(TCG_COND_LT, cpu_gpr_d[r3], cpu_gpr_d[r1], + tcg_gen_movcond_i32(TCG_COND_LT, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_MIN_U: - tcg_gen_movcond_tl(TCG_COND_LTU, cpu_gpr_d[r3], cpu_gpr_d[r1], + tcg_gen_movcond_i32(TCG_COND_LTU, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_MIN_B: @@ -5768,16 +5806,16 @@ static void decode_rr_accumulator(DisasContext *ctx) gen_helper_min_hu(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_MOV: - tcg_gen_mov_tl(cpu_gpr_d[r3], cpu_gpr_d[r2]); + tcg_gen_mov_i32(cpu_gpr_d[r3], cpu_gpr_d[r2]); break; case OPC2_32_RR_MOV_64: if (has_feature(ctx, TRICORE_FEATURE_16)) { - temp = tcg_temp_new(); + temp = tcg_temp_new_i32(); CHECK_REG_PAIR(r3); - tcg_gen_mov_tl(temp, cpu_gpr_d[r1]); - tcg_gen_mov_tl(cpu_gpr_d[r3], cpu_gpr_d[r2]); - tcg_gen_mov_tl(cpu_gpr_d[r3 + 1], temp); + tcg_gen_mov_i32(temp, cpu_gpr_d[r1]); + tcg_gen_mov_i32(cpu_gpr_d[r3], cpu_gpr_d[r2]); + tcg_gen_mov_i32(cpu_gpr_d[r3 + 1], temp); } else { generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); } @@ -5785,14 +5823,14 @@ static void decode_rr_accumulator(DisasContext *ctx) case OPC2_32_RR_MOVS_64: if (has_feature(ctx, TRICORE_FEATURE_16)) { CHECK_REG_PAIR(r3); - tcg_gen_mov_tl(cpu_gpr_d[r3], cpu_gpr_d[r2]); - tcg_gen_sari_tl(cpu_gpr_d[r3 + 1], cpu_gpr_d[r2], 31); + tcg_gen_mov_i32(cpu_gpr_d[r3], cpu_gpr_d[r2]); + tcg_gen_sari_i32(cpu_gpr_d[r3 + 1], cpu_gpr_d[r2], 31); } else { generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); } break; case OPC2_32_RR_NE: - tcg_gen_setcond_tl(TCG_COND_NE, cpu_gpr_d[r3], cpu_gpr_d[r1], + tcg_gen_setcond_i32(TCG_COND_NE, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_OR_EQ: @@ -5925,41 +5963,41 @@ static void decode_rr_logical_shift(DisasContext *ctx) switch (op2) { case OPC2_32_RR_AND: - tcg_gen_and_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_and_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_ANDN: - tcg_gen_andc_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_andc_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_CLO: - tcg_gen_not_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]); - tcg_gen_clzi_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], TARGET_LONG_BITS); + tcg_gen_not_i32(cpu_gpr_d[r3], cpu_gpr_d[r1]); + tcg_gen_clzi_i32(cpu_gpr_d[r3], cpu_gpr_d[r3], TARGET_LONG_BITS); break; case OPC2_32_RR_CLO_H: gen_helper_clo_h(cpu_gpr_d[r3], cpu_gpr_d[r1]); break; case OPC2_32_RR_CLS: - tcg_gen_clrsb_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]); + tcg_gen_clrsb_i32(cpu_gpr_d[r3], cpu_gpr_d[r1]); break; case OPC2_32_RR_CLS_H: gen_helper_cls_h(cpu_gpr_d[r3], cpu_gpr_d[r1]); break; case OPC2_32_RR_CLZ: - tcg_gen_clzi_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], TARGET_LONG_BITS); + tcg_gen_clzi_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], TARGET_LONG_BITS); break; case OPC2_32_RR_CLZ_H: gen_helper_clz_h(cpu_gpr_d[r3], cpu_gpr_d[r1]); break; case OPC2_32_RR_NAND: - tcg_gen_nand_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_nand_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_NOR: - tcg_gen_nor_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_nor_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_OR: - tcg_gen_or_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_or_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_ORN: - tcg_gen_orc_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_orc_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_SH: gen_helper_sh(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); @@ -5977,10 +6015,10 @@ static void decode_rr_logical_shift(DisasContext *ctx) gen_shas(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_XNOR: - tcg_gen_eqv_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_eqv_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_XOR: - tcg_gen_xor_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); + tcg_gen_xor_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -5991,7 +6029,7 @@ static void decode_rr_address(DisasContext *ctx) { uint32_t op2, n; int r1, r2, r3; - TCGv temp; + TCGv_i32 temp; op2 = MASK_OP_RR_OP2(ctx->opcode); r3 = MASK_OP_RR_D(ctx->opcode); @@ -6001,52 +6039,52 @@ static void decode_rr_address(DisasContext *ctx) switch (op2) { case OPC2_32_RR_ADD_A: - tcg_gen_add_tl(cpu_gpr_a[r3], cpu_gpr_a[r1], cpu_gpr_a[r2]); + tcg_gen_add_i32(cpu_gpr_a[r3], cpu_gpr_a[r1], cpu_gpr_a[r2]); break; case OPC2_32_RR_ADDSC_A: - temp = tcg_temp_new(); - tcg_gen_shli_tl(temp, cpu_gpr_d[r1], n); - tcg_gen_add_tl(cpu_gpr_a[r3], cpu_gpr_a[r2], temp); + temp = tcg_temp_new_i32(); + tcg_gen_shli_i32(temp, cpu_gpr_d[r1], n); + tcg_gen_add_i32(cpu_gpr_a[r3], cpu_gpr_a[r2], temp); break; case OPC2_32_RR_ADDSC_AT: - temp = tcg_temp_new(); - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 3); - tcg_gen_add_tl(temp, cpu_gpr_a[r2], temp); - tcg_gen_andi_tl(cpu_gpr_a[r3], temp, 0xFFFFFFFC); + temp = tcg_temp_new_i32(); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 3); + tcg_gen_add_i32(temp, cpu_gpr_a[r2], temp); + tcg_gen_andi_i32(cpu_gpr_a[r3], temp, 0xFFFFFFFC); break; case OPC2_32_RR_EQ_A: - tcg_gen_setcond_tl(TCG_COND_EQ, cpu_gpr_d[r3], cpu_gpr_a[r1], + tcg_gen_setcond_i32(TCG_COND_EQ, cpu_gpr_d[r3], cpu_gpr_a[r1], cpu_gpr_a[r2]); break; case OPC2_32_RR_EQZ: - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_gpr_d[r3], cpu_gpr_a[r1], 0); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_gpr_d[r3], cpu_gpr_a[r1], 0); break; case OPC2_32_RR_GE_A: - tcg_gen_setcond_tl(TCG_COND_GEU, cpu_gpr_d[r3], cpu_gpr_a[r1], + tcg_gen_setcond_i32(TCG_COND_GEU, cpu_gpr_d[r3], cpu_gpr_a[r1], cpu_gpr_a[r2]); break; case OPC2_32_RR_LT_A: - tcg_gen_setcond_tl(TCG_COND_LTU, cpu_gpr_d[r3], cpu_gpr_a[r1], + tcg_gen_setcond_i32(TCG_COND_LTU, cpu_gpr_d[r3], cpu_gpr_a[r1], cpu_gpr_a[r2]); break; case OPC2_32_RR_MOV_A: - tcg_gen_mov_tl(cpu_gpr_a[r3], cpu_gpr_d[r2]); + tcg_gen_mov_i32(cpu_gpr_a[r3], cpu_gpr_d[r2]); break; case OPC2_32_RR_MOV_AA: - tcg_gen_mov_tl(cpu_gpr_a[r3], cpu_gpr_a[r2]); + tcg_gen_mov_i32(cpu_gpr_a[r3], cpu_gpr_a[r2]); break; case OPC2_32_RR_MOV_D: - tcg_gen_mov_tl(cpu_gpr_d[r3], cpu_gpr_a[r2]); + tcg_gen_mov_i32(cpu_gpr_d[r3], cpu_gpr_a[r2]); break; case OPC2_32_RR_NE_A: - tcg_gen_setcond_tl(TCG_COND_NE, cpu_gpr_d[r3], cpu_gpr_a[r1], + tcg_gen_setcond_i32(TCG_COND_NE, cpu_gpr_d[r3], cpu_gpr_a[r1], cpu_gpr_a[r2]); break; case OPC2_32_RR_NEZ_A: - tcg_gen_setcondi_tl(TCG_COND_NE, cpu_gpr_d[r3], cpu_gpr_a[r1], 0); + tcg_gen_setcondi_i32(TCG_COND_NE, cpu_gpr_d[r3], cpu_gpr_a[r1], 0); break; case OPC2_32_RR_SUB_A: - tcg_gen_sub_tl(cpu_gpr_a[r3], cpu_gpr_a[r1], cpu_gpr_a[r2]); + tcg_gen_sub_i32(cpu_gpr_a[r3], cpu_gpr_a[r1], cpu_gpr_a[r2]); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -6063,19 +6101,19 @@ static void decode_rr_idirect(DisasContext *ctx) switch (op2) { case OPC2_32_RR_JI: - tcg_gen_andi_tl(cpu_PC, cpu_gpr_a[r1], ~0x1); + tcg_gen_andi_i32(cpu_PC, cpu_gpr_a[r1], ~0x1); break; case OPC2_32_RR_JLI: - tcg_gen_andi_tl(cpu_PC, cpu_gpr_a[r1], ~0x1); - tcg_gen_movi_tl(cpu_gpr_a[11], ctx->pc_succ_insn); + tcg_gen_andi_i32(cpu_PC, cpu_gpr_a[r1], ~0x1); + tcg_gen_movi_i32(cpu_gpr_a[11], ctx->pc_succ_insn); break; case OPC2_32_RR_CALLI: gen_helper_1arg(call, ctx->pc_succ_insn); - tcg_gen_andi_tl(cpu_PC, cpu_gpr_a[r1], ~0x1); + tcg_gen_andi_i32(cpu_PC, cpu_gpr_a[r1], ~0x1); break; case OPC2_32_RR_FCALLI: gen_fcall_save_ctx(ctx); - tcg_gen_andi_tl(cpu_PC, cpu_gpr_a[r1], ~0x1); + tcg_gen_andi_i32(cpu_PC, cpu_gpr_a[r1], ~0x1); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -6089,7 +6127,7 @@ static void decode_rr_divide(DisasContext *ctx) uint32_t op2; int r1, r2, r3; - TCGv temp, temp2, temp3; + TCGv_i32 temp, temp2, temp3; op2 = MASK_OP_RR_OP2(ctx->opcode); r3 = MASK_OP_RR_D(ctx->opcode); @@ -6102,107 +6140,107 @@ static void decode_rr_divide(DisasContext *ctx) break; case OPC2_32_RR_BSPLIT: CHECK_REG_PAIR(r3); - gen_bsplit(cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r1]); + gen_bsplit(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1]); break; case OPC2_32_RR_DVINIT_B: CHECK_REG_PAIR(r3); - gen_dvinit_b(ctx, cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r1], + gen_dvinit_b(ctx, cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_DVINIT_BU: - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); - temp3 = tcg_temp_new(); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); + temp3 = tcg_temp_new_i32(); CHECK_REG_PAIR(r3); - tcg_gen_shri_tl(temp3, cpu_gpr_d[r1], 8); + tcg_gen_shri_i32(temp3, cpu_gpr_d[r1], 8); /* reset av */ - tcg_gen_movi_tl(cpu_PSW_AV, 0); + tcg_gen_movi_i32(cpu_PSW_AV, 0); if (!has_feature(ctx, TRICORE_FEATURE_131)) { - /* overflow = (abs(D[r3+1]) >= abs(D[r2])) */ - tcg_gen_abs_tl(temp, temp3); - tcg_gen_abs_tl(temp2, cpu_gpr_d[r2]); - tcg_gen_setcond_tl(TCG_COND_GE, cpu_PSW_V, temp, temp2); + /* overflow = (abs(D[r3 + 1]) >= abs(D[r2])) */ + tcg_gen_abs_i32(temp, temp3); + tcg_gen_abs_i32(temp2, cpu_gpr_d[r2]); + tcg_gen_setcond_i32(TCG_COND_GE, cpu_PSW_V, temp, temp2); } else { /* overflow = (D[b] == 0) */ - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_PSW_V, cpu_gpr_d[r2], 0); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_PSW_V, cpu_gpr_d[r2], 0); } - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* sv */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* write result */ - tcg_gen_shli_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], 24); - tcg_gen_mov_tl(cpu_gpr_d[r3+1], temp3); + tcg_gen_shli_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], 24); + tcg_gen_mov_i32(cpu_gpr_d[r3 + 1], temp3); break; case OPC2_32_RR_DVINIT_H: CHECK_REG_PAIR(r3); - gen_dvinit_h(ctx, cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r1], + gen_dvinit_h(ctx, cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR_DVINIT_HU: - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); - temp3 = tcg_temp_new(); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); + temp3 = tcg_temp_new_i32(); CHECK_REG_PAIR(r3); - tcg_gen_shri_tl(temp3, cpu_gpr_d[r1], 16); + tcg_gen_shri_i32(temp3, cpu_gpr_d[r1], 16); /* reset av */ - tcg_gen_movi_tl(cpu_PSW_AV, 0); + tcg_gen_movi_i32(cpu_PSW_AV, 0); if (!has_feature(ctx, TRICORE_FEATURE_131)) { - /* overflow = (abs(D[r3+1]) >= abs(D[r2])) */ - tcg_gen_abs_tl(temp, temp3); - tcg_gen_abs_tl(temp2, cpu_gpr_d[r2]); - tcg_gen_setcond_tl(TCG_COND_GE, cpu_PSW_V, temp, temp2); + /* overflow = (abs(D[r3 + 1]) >= abs(D[r2])) */ + tcg_gen_abs_i32(temp, temp3); + tcg_gen_abs_i32(temp2, cpu_gpr_d[r2]); + tcg_gen_setcond_i32(TCG_COND_GE, cpu_PSW_V, temp, temp2); } else { /* overflow = (D[b] == 0) */ - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_PSW_V, cpu_gpr_d[r2], 0); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_PSW_V, cpu_gpr_d[r2], 0); } - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* sv */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* write result */ - tcg_gen_shli_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], 16); - tcg_gen_mov_tl(cpu_gpr_d[r3+1], temp3); + tcg_gen_shli_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], 16); + tcg_gen_mov_i32(cpu_gpr_d[r3 + 1], temp3); break; case OPC2_32_RR_DVINIT: - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); CHECK_REG_PAIR(r3); /* overflow = ((D[b] == 0) || ((D[b] == 0xFFFFFFFF) && (D[a] == 0x80000000))) */ - tcg_gen_setcondi_tl(TCG_COND_EQ, temp, cpu_gpr_d[r2], 0xffffffff); - tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, cpu_gpr_d[r1], 0x80000000); - tcg_gen_and_tl(temp, temp, temp2); - tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, cpu_gpr_d[r2], 0); - tcg_gen_or_tl(cpu_PSW_V, temp, temp2); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp, cpu_gpr_d[r2], 0xffffffff); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp2, cpu_gpr_d[r1], 0x80000000); + tcg_gen_and_i32(temp, temp, temp2); + tcg_gen_setcondi_i32(TCG_COND_EQ, temp2, cpu_gpr_d[r2], 0); + tcg_gen_or_i32(cpu_PSW_V, temp, temp2); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* sv */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* reset av */ - tcg_gen_movi_tl(cpu_PSW_AV, 0); + tcg_gen_movi_i32(cpu_PSW_AV, 0); /* write result */ - tcg_gen_mov_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]); + tcg_gen_mov_i32(cpu_gpr_d[r3], cpu_gpr_d[r1]); /* sign extend to high reg */ - tcg_gen_sari_tl(cpu_gpr_d[r3+1], cpu_gpr_d[r1], 31); + tcg_gen_sari_i32(cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], 31); break; case OPC2_32_RR_DVINIT_U: CHECK_REG_PAIR(r3); /* overflow = (D[b] == 0) */ - tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_PSW_V, cpu_gpr_d[r2], 0); - tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_PSW_V, cpu_gpr_d[r2], 0); + tcg_gen_shli_i32(cpu_PSW_V, cpu_PSW_V, 31); /* sv */ - tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); + tcg_gen_or_i32(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* reset av */ - tcg_gen_movi_tl(cpu_PSW_AV, 0); + tcg_gen_movi_i32(cpu_PSW_AV, 0); /* write result */ - tcg_gen_mov_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]); + tcg_gen_mov_i32(cpu_gpr_d[r3], cpu_gpr_d[r1]); /* zero extend to high reg*/ - tcg_gen_movi_tl(cpu_gpr_d[r3+1], 0); + tcg_gen_movi_i32(cpu_gpr_d[r3 + 1], 0); break; case OPC2_32_RR_PARITY: gen_helper_parity(cpu_gpr_d[r3], cpu_gpr_d[r1]); break; case OPC2_32_RR_UNPACK: CHECK_REG_PAIR(r3); - gen_unpack(cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r1]); + gen_unpack(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1]); break; case OPC2_32_RR_CRC32_B: if (has_feature(ctx, TRICORE_FEATURE_162)) { @@ -6228,7 +6266,7 @@ static void decode_rr_divide(DisasContext *ctx) case OPC2_32_RR_POPCNT_W: if (has_feature(ctx, TRICORE_FEATURE_162)) { - tcg_gen_ctpop_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]); + tcg_gen_ctpop_i32(cpu_gpr_d[r3], cpu_gpr_d[r1]); } else { generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); } @@ -6236,7 +6274,7 @@ static void decode_rr_divide(DisasContext *ctx) case OPC2_32_RR_DIV: if (has_feature(ctx, TRICORE_FEATURE_16)) { CHECK_REG_PAIR(r3); - GEN_HELPER_RR(divide, cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r1], + GEN_HELPER_RR(divide, cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2]); } else { generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -6245,7 +6283,7 @@ static void decode_rr_divide(DisasContext *ctx) case OPC2_32_RR_DIV_U: if (has_feature(ctx, TRICORE_FEATURE_16)) { CHECK_REG_PAIR(r3); - GEN_HELPER_RR(divide_u, cpu_gpr_d[r3], cpu_gpr_d[r3+1], + GEN_HELPER_RR(divide_u, cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2]); } else { generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -6313,7 +6351,7 @@ static void decode_rr1_mul(DisasContext *ctx) uint32_t op2; int r1, r2, r3; - TCGv n; + TCGv_i32 n; TCGv_i64 temp64; r1 = MASK_OP_RR1_S1(ctx->opcode); @@ -6327,69 +6365,69 @@ static void decode_rr1_mul(DisasContext *ctx) temp64 = tcg_temp_new_i64(); CHECK_REG_PAIR(r3); GEN_HELPER_LL(mul_h, temp64, cpu_gpr_d[r1], cpu_gpr_d[r2], n); - tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3+1], temp64); - gen_calc_usb_mul_h(cpu_gpr_d[r3], cpu_gpr_d[r3+1]); + tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], temp64); + gen_calc_usb_mul_h(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1]); break; case OPC2_32_RR1_MUL_H_32_LU: temp64 = tcg_temp_new_i64(); CHECK_REG_PAIR(r3); GEN_HELPER_LU(mul_h, temp64, cpu_gpr_d[r1], cpu_gpr_d[r2], n); - tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3+1], temp64); - gen_calc_usb_mul_h(cpu_gpr_d[r3], cpu_gpr_d[r3+1]); + tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], temp64); + gen_calc_usb_mul_h(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1]); break; case OPC2_32_RR1_MUL_H_32_UL: temp64 = tcg_temp_new_i64(); CHECK_REG_PAIR(r3); GEN_HELPER_UL(mul_h, temp64, cpu_gpr_d[r1], cpu_gpr_d[r2], n); - tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3+1], temp64); - gen_calc_usb_mul_h(cpu_gpr_d[r3], cpu_gpr_d[r3+1]); + tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], temp64); + gen_calc_usb_mul_h(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1]); break; case OPC2_32_RR1_MUL_H_32_UU: temp64 = tcg_temp_new_i64(); CHECK_REG_PAIR(r3); GEN_HELPER_UU(mul_h, temp64, cpu_gpr_d[r1], cpu_gpr_d[r2], n); - tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3+1], temp64); - gen_calc_usb_mul_h(cpu_gpr_d[r3], cpu_gpr_d[r3+1]); + tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], temp64); + gen_calc_usb_mul_h(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1]); break; case OPC2_32_RR1_MULM_H_64_LL: temp64 = tcg_temp_new_i64(); CHECK_REG_PAIR(r3); GEN_HELPER_LL(mulm_h, temp64, cpu_gpr_d[r1], cpu_gpr_d[r2], n); - tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3+1], temp64); + tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], temp64); /* reset V bit */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); /* reset AV bit */ - tcg_gen_mov_tl(cpu_PSW_AV, cpu_PSW_V); + tcg_gen_mov_i32(cpu_PSW_AV, cpu_PSW_V); break; case OPC2_32_RR1_MULM_H_64_LU: temp64 = tcg_temp_new_i64(); CHECK_REG_PAIR(r3); GEN_HELPER_LU(mulm_h, temp64, cpu_gpr_d[r1], cpu_gpr_d[r2], n); - tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3+1], temp64); + tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], temp64); /* reset V bit */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); /* reset AV bit */ - tcg_gen_mov_tl(cpu_PSW_AV, cpu_PSW_V); + tcg_gen_mov_i32(cpu_PSW_AV, cpu_PSW_V); break; case OPC2_32_RR1_MULM_H_64_UL: temp64 = tcg_temp_new_i64(); CHECK_REG_PAIR(r3); GEN_HELPER_UL(mulm_h, temp64, cpu_gpr_d[r1], cpu_gpr_d[r2], n); - tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3+1], temp64); + tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], temp64); /* reset V bit */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); /* reset AV bit */ - tcg_gen_mov_tl(cpu_PSW_AV, cpu_PSW_V); + tcg_gen_mov_i32(cpu_PSW_AV, cpu_PSW_V); break; case OPC2_32_RR1_MULM_H_64_UU: temp64 = tcg_temp_new_i64(); CHECK_REG_PAIR(r3); GEN_HELPER_UU(mulm_h, temp64, cpu_gpr_d[r1], cpu_gpr_d[r2], n); - tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3+1], temp64); + tcg_gen_extr_i64_i32(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], temp64); /* reset V bit */ - tcg_gen_movi_tl(cpu_PSW_V, 0); + tcg_gen_movi_i32(cpu_PSW_V, 0); /* reset AV bit */ - tcg_gen_mov_tl(cpu_PSW_AV, cpu_PSW_V); + tcg_gen_mov_i32(cpu_PSW_AV, cpu_PSW_V); break; case OPC2_32_RR1_MULR_H_16_LL: GEN_HELPER_LL(mulr_h, cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], n); @@ -6418,7 +6456,7 @@ static void decode_rr1_mulq(DisasContext *ctx) int r1, r2, r3; uint32_t n; - TCGv temp, temp2; + TCGv_i32 temp, temp2; r1 = MASK_OP_RR1_S1(ctx->opcode); r2 = MASK_OP_RR1_S2(ctx->opcode); @@ -6426,8 +6464,8 @@ static void decode_rr1_mulq(DisasContext *ctx) n = MASK_OP_RR1_N(ctx->opcode); op2 = MASK_OP_RR1_OP2(ctx->opcode); - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); switch (op2) { case OPC2_32_RR1_MUL_Q_32: @@ -6435,45 +6473,45 @@ static void decode_rr1_mulq(DisasContext *ctx) break; case OPC2_32_RR1_MUL_Q_64: CHECK_REG_PAIR(r3); - gen_mul_q(cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_mul_q(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, 0); break; case OPC2_32_RR1_MUL_Q_32_L: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r2]); gen_mul_q(cpu_gpr_d[r3], temp, cpu_gpr_d[r1], temp, n, 16); break; case OPC2_32_RR1_MUL_Q_64_L: CHECK_REG_PAIR(r3); - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]); - gen_mul_q(cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp, n, 0); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r2]); + gen_mul_q(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], temp, n, 0); break; case OPC2_32_RR1_MUL_Q_32_U: - tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r2], 16); gen_mul_q(cpu_gpr_d[r3], temp, cpu_gpr_d[r1], temp, n, 16); break; case OPC2_32_RR1_MUL_Q_64_U: CHECK_REG_PAIR(r3); - tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16); - gen_mul_q(cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp, n, 0); + tcg_gen_sari_i32(temp, cpu_gpr_d[r2], 16); + gen_mul_q(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], temp, n, 0); break; case OPC2_32_RR1_MUL_Q_32_LL: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); gen_mul_q_16(cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RR1_MUL_Q_32_UU: - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); gen_mul_q_16(cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RR1_MULR_Q_32_L: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); gen_mulr_q(cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RR1_MULR_Q_32_U: - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); gen_mulr_q(cpu_gpr_d[r3], temp, temp2, n); break; default: @@ -6497,7 +6535,7 @@ static void decode_rr2_mul(DisasContext *ctx) break; case OPC2_32_RR2_MUL_64: CHECK_REG_PAIR(r3); - gen_mul_i64s(cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r1], + gen_mul_i64s(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR2_MULS_32: @@ -6506,7 +6544,7 @@ static void decode_rr2_mul(DisasContext *ctx) break; case OPC2_32_RR2_MUL_U_64: CHECK_REG_PAIR(r3); - gen_mul_i64u(cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r1], + gen_mul_i64u(cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RR2_MULS_U_32: @@ -6524,7 +6562,7 @@ static void decode_rrpw_extract_insert(DisasContext *ctx) uint32_t op2; int r1, r2, r3; int32_t pos, width; - TCGv temp; + TCGv_i32 temp; op2 = MASK_OP_RRPW_OP2(ctx->opcode); r1 = MASK_OP_RRPW_S1(ctx->opcode); @@ -6536,35 +6574,35 @@ static void decode_rrpw_extract_insert(DisasContext *ctx) switch (op2) { case OPC2_32_RRPW_EXTR: if (width == 0) { - tcg_gen_movi_tl(cpu_gpr_d[r3], 0); + tcg_gen_movi_i32(cpu_gpr_d[r3], 0); } else if (pos + width <= 32) { - tcg_gen_sextract_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], pos, width); + tcg_gen_sextract_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], pos, width); } break; case OPC2_32_RRPW_EXTR_U: if (width == 0) { - tcg_gen_movi_tl(cpu_gpr_d[r3], 0); + tcg_gen_movi_i32(cpu_gpr_d[r3], 0); } else { - tcg_gen_extract_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], pos, width); + tcg_gen_extract_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], pos, width); } break; case OPC2_32_RRPW_IMASK: CHECK_REG_PAIR(r3); if (pos + width <= 32) { - temp = tcg_temp_new(); - tcg_gen_movi_tl(temp, ((1u << width) - 1) << pos); - tcg_gen_shli_tl(cpu_gpr_d[r3], cpu_gpr_d[r2], pos); - tcg_gen_mov_tl(cpu_gpr_d[r3 + 1], temp); + temp = tcg_temp_new_i32(); + tcg_gen_movi_i32(temp, ((1u << width) - 1) << pos); + tcg_gen_shli_i32(cpu_gpr_d[r3], cpu_gpr_d[r2], pos); + tcg_gen_mov_i32(cpu_gpr_d[r3 + 1], temp); } break; case OPC2_32_RRPW_INSERT: - /* tcg_gen_deposit_tl() does not handle the case of width = 0 */ + /* tcg_gen_deposit_i32() does not handle the case of width = 0 */ if (width == 0) { - tcg_gen_mov_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]); + tcg_gen_mov_i32(cpu_gpr_d[r3], cpu_gpr_d[r1]); } else if (pos + width <= 32) { - tcg_gen_deposit_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], + tcg_gen_deposit_i32(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], pos, width); } break; @@ -6578,7 +6616,7 @@ static void decode_rrr_cond_select(DisasContext *ctx) { uint32_t op2; int r1, r2, r3, r4; - TCGv temp; + TCGv_i32 temp; op2 = MASK_OP_RRR_OP2(ctx->opcode); r1 = MASK_OP_RRR_S1(ctx->opcode); @@ -6605,12 +6643,12 @@ static void decode_rrr_cond_select(DisasContext *ctx) break; case OPC2_32_RRR_SEL: temp = tcg_constant_i32(0); - tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr_d[r4], cpu_gpr_d[r3], temp, + tcg_gen_movcond_i32(TCG_COND_NE, cpu_gpr_d[r4], cpu_gpr_d[r3], temp, cpu_gpr_d[r1], cpu_gpr_d[r2]); break; case OPC2_32_RRR_SELN: temp = tcg_constant_i32(0); - tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr_d[r4], cpu_gpr_d[r3], temp, + tcg_gen_movcond_i32(TCG_COND_EQ, cpu_gpr_d[r4], cpu_gpr_d[r3], temp, cpu_gpr_d[r1], cpu_gpr_d[r2]); break; default: @@ -6634,49 +6672,49 @@ static void decode_rrr_divide(DisasContext *ctx) case OPC2_32_RRR_DVADJ: CHECK_REG_PAIR(r3); CHECK_REG_PAIR(r4); - GEN_HELPER_RRR(dvadj, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + GEN_HELPER_RRR(dvadj, cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR_DVSTEP: CHECK_REG_PAIR(r3); CHECK_REG_PAIR(r4); - GEN_HELPER_RRR(dvstep, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + GEN_HELPER_RRR(dvstep, cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR_DVSTEP_U: CHECK_REG_PAIR(r3); CHECK_REG_PAIR(r4); - GEN_HELPER_RRR(dvstep_u, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + GEN_HELPER_RRR(dvstep_u, cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR_IXMAX: CHECK_REG_PAIR(r3); CHECK_REG_PAIR(r4); - GEN_HELPER_RRR(ixmax, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + GEN_HELPER_RRR(ixmax, cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR_IXMAX_U: CHECK_REG_PAIR(r3); CHECK_REG_PAIR(r4); - GEN_HELPER_RRR(ixmax_u, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + GEN_HELPER_RRR(ixmax_u, cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR_IXMIN: CHECK_REG_PAIR(r3); CHECK_REG_PAIR(r4); - GEN_HELPER_RRR(ixmin, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + GEN_HELPER_RRR(ixmin, cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR_IXMIN_U: CHECK_REG_PAIR(r3); CHECK_REG_PAIR(r4); - GEN_HELPER_RRR(ixmin_u, cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + GEN_HELPER_RRR(ixmin_u, cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR_PACK: CHECK_REG_PAIR(r3); gen_helper_pack(cpu_gpr_d[r4], cpu_PSW_C, cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1]); + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1]); break; case OPC2_32_RRR_CRCN: if (has_feature(ctx, TRICORE_FEATURE_162)) { @@ -6724,8 +6762,8 @@ static void decode_rrr2_madd(DisasContext *ctx) case OPC2_32_RRR2_MADD_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madd64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + gen_madd64_d(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR2_MADDS_32: gen_helper_madd32_ssov(cpu_gpr_d[r4], tcg_env, cpu_gpr_d[r1], @@ -6734,14 +6772,14 @@ static void decode_rrr2_madd(DisasContext *ctx) case OPC2_32_RRR2_MADDS_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madds_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + gen_madds_64(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR2_MADD_U_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddu64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + gen_maddu64_d(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR2_MADDS_U_32: gen_helper_madd32_suov(cpu_gpr_d[r4], tcg_env, cpu_gpr_d[r1], @@ -6750,8 +6788,8 @@ static void decode_rrr2_madd(DisasContext *ctx) case OPC2_32_RRR2_MADDS_U_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsu_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + gen_maddsu_64(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -6777,8 +6815,8 @@ static void decode_rrr2_msub(DisasContext *ctx) case OPC2_32_RRR2_MSUB_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msub64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + gen_msub64_d(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR2_MSUBS_32: gen_helper_msub32_ssov(cpu_gpr_d[r4], tcg_env, cpu_gpr_d[r1], @@ -6787,14 +6825,14 @@ static void decode_rrr2_msub(DisasContext *ctx) case OPC2_32_RRR2_MSUBS_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubs_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + gen_msubs_64(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR2_MSUB_U_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubu64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + gen_msubu64_d(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; case OPC2_32_RRR2_MSUBS_U_32: gen_helper_msub32_suov(cpu_gpr_d[r4], tcg_env, cpu_gpr_d[r1], @@ -6803,8 +6841,8 @@ static void decode_rrr2_msub(DisasContext *ctx) case OPC2_32_RRR2_MSUBS_U_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubsu_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1], - cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]); + gen_msubsu_64(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r1], + cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r2]); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -6828,98 +6866,98 @@ static void decode_rrr1_madd(DisasContext *ctx) case OPC2_32_RRR1_MADD_H_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); + gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MADD_H_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); + gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MADD_H_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); + gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MADD_H_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); + gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MADDS_H_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); + gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MADDS_H_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); + gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MADDS_H_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); + gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MADDS_H_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); + gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MADDM_H_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); + gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MADDM_H_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); + gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MADDM_H_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); + gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MADDM_H_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); + gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MADDMS_H_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); + gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MADDMS_H_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); + gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MADDMS_H_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); + gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MADDMS_H_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); + gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MADDR_H_LL: gen_maddr32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], @@ -6962,7 +7000,7 @@ static void decode_rrr1_maddq_h(DisasContext *ctx) { uint32_t op2; uint32_t r1, r2, r3, r4, n; - TCGv temp, temp2; + TCGv_i32 temp, temp2; op2 = MASK_OP_RRR1_OP2(ctx->opcode); r1 = MASK_OP_RRR1_S1(ctx->opcode); @@ -6971,8 +7009,8 @@ static void decode_rrr1_maddq_h(DisasContext *ctx) r4 = MASK_OP_RRR1_D(ctx->opcode); n = MASK_OP_RRR1_N(ctx->opcode); - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); switch (op2) { case OPC2_32_RRR1_MADD_Q_32: @@ -6982,61 +7020,61 @@ static void decode_rrr1_maddq_h(DisasContext *ctx) case OPC2_32_RRR1_MADD_Q_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n); break; case OPC2_32_RRR1_MADD_Q_32_L: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r2]); gen_madd32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], temp, n, 16); break; case OPC2_32_RRR1_MADD_Q_64_L: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]); - gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp, + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r2]); + gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], temp, n); break; case OPC2_32_RRR1_MADD_Q_32_U: - tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r2], 16); gen_madd32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], temp, n, 16); break; case OPC2_32_RRR1_MADD_Q_64_U: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16); - gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp, + tcg_gen_sari_i32(temp, cpu_gpr_d[r2], 16); + gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], temp, n); break; case OPC2_32_RRR1_MADD_Q_32_LL: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); gen_m16add32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MADD_Q_64_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); - gen_m16add64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], temp, temp2, n); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); + gen_m16add64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], temp, temp2, n); break; case OPC2_32_RRR1_MADD_Q_32_UU: - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); gen_m16add32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MADD_Q_64_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); - gen_m16add64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], temp, temp2, n); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); + gen_m16add64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], temp, temp2, n); break; case OPC2_32_RRR1_MADDS_Q_32: gen_madds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], @@ -7045,90 +7083,90 @@ static void decode_rrr1_maddq_h(DisasContext *ctx) case OPC2_32_RRR1_MADDS_Q_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n); break; case OPC2_32_RRR1_MADDS_Q_32_L: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r2]); gen_madds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], temp, n, 16); break; case OPC2_32_RRR1_MADDS_Q_64_L: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]); - gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp, + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r2]); + gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], temp, n); break; case OPC2_32_RRR1_MADDS_Q_32_U: - tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r2], 16); gen_madds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], temp, n, 16); break; case OPC2_32_RRR1_MADDS_Q_64_U: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16); - gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp, + tcg_gen_sari_i32(temp, cpu_gpr_d[r2], 16); + gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], temp, n); break; case OPC2_32_RRR1_MADDS_Q_32_LL: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); gen_m16adds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MADDS_Q_64_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); - gen_m16adds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], temp, temp2, n); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); + gen_m16adds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], temp, temp2, n); break; case OPC2_32_RRR1_MADDS_Q_32_UU: - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); gen_m16adds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MADDS_Q_64_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); - gen_m16adds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], temp, temp2, n); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); + gen_m16adds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], temp, temp2, n); break; case OPC2_32_RRR1_MADDR_H_64_UL: CHECK_REG_PAIR(r3); - gen_maddr64_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3+1], + gen_maddr64_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, 2); break; case OPC2_32_RRR1_MADDRS_H_64_UL: CHECK_REG_PAIR(r3); - gen_maddr64s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3+1], + gen_maddr64s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, 2); break; case OPC2_32_RRR1_MADDR_Q_32_LL: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); gen_maddr_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MADDR_Q_32_UU: - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); gen_maddr_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MADDRS_Q_32_LL: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); gen_maddrs_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MADDRS_Q_32_UU: - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); gen_maddrs_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; default: @@ -7152,109 +7190,109 @@ static void decode_rrr1_maddsu_h(DisasContext *ctx) case OPC2_32_RRR1_MADDSU_H_32_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); + gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MADDSU_H_32_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); + gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MADDSU_H_32_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); + gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MADDSU_H_32_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); + gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MADDSUS_H_32_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MADDSUS_H_32_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MADDSUS_H_32_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MADDSUS_H_32_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MADDSUM_H_64_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MADDSUM_H_64_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MADDSUM_H_64_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MADDSUM_H_64_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MADDSUMS_H_64_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MADDSUMS_H_64_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MADDSUMS_H_64_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MADDSUMS_H_64_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MADDSUR_H_16_LL: @@ -7310,98 +7348,98 @@ static void decode_rrr1_msub(DisasContext *ctx) case OPC2_32_RRR1_MSUB_H_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msub_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); + gen_msub_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MSUB_H_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msub_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); + gen_msub_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MSUB_H_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msub_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); + gen_msub_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MSUB_H_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msub_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); + gen_msub_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MSUBS_H_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubs_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); + gen_msubs_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MSUBS_H_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubs_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); + gen_msubs_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MSUBS_H_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubs_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); + gen_msubs_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MSUBS_H_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubs_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); + gen_msubs_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MSUBM_H_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); + gen_msubm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MSUBM_H_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); + gen_msubm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MSUBM_H_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); + gen_msubm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MSUBM_H_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); + gen_msubm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MSUBMS_H_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); + gen_msubms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MSUBMS_H_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); + gen_msubms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MSUBMS_H_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); + gen_msubms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MSUBMS_H_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); + gen_msubms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MSUBR_H_LL: gen_msubr32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], @@ -7444,7 +7482,7 @@ static void decode_rrr1_msubq_h(DisasContext *ctx) { uint32_t op2; uint32_t r1, r2, r3, r4, n; - TCGv temp, temp2; + TCGv_i32 temp, temp2; op2 = MASK_OP_RRR1_OP2(ctx->opcode); r1 = MASK_OP_RRR1_S1(ctx->opcode); @@ -7453,8 +7491,8 @@ static void decode_rrr1_msubq_h(DisasContext *ctx) r4 = MASK_OP_RRR1_D(ctx->opcode); n = MASK_OP_RRR1_N(ctx->opcode); - temp = tcg_temp_new(); - temp2 = tcg_temp_new(); + temp = tcg_temp_new_i32(); + temp2 = tcg_temp_new_i32(); switch (op2) { case OPC2_32_RRR1_MSUB_Q_32: @@ -7464,61 +7502,61 @@ static void decode_rrr1_msubq_h(DisasContext *ctx) case OPC2_32_RRR1_MSUB_Q_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msub64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msub64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n); break; case OPC2_32_RRR1_MSUB_Q_32_L: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r2]); gen_msub32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], temp, n, 16); break; case OPC2_32_RRR1_MSUB_Q_64_L: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]); - gen_msub64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp, + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r2]); + gen_msub64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], temp, n); break; case OPC2_32_RRR1_MSUB_Q_32_U: - tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r2], 16); gen_msub32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], temp, n, 16); break; case OPC2_32_RRR1_MSUB_Q_64_U: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16); - gen_msub64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp, + tcg_gen_sari_i32(temp, cpu_gpr_d[r2], 16); + gen_msub64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], temp, n); break; case OPC2_32_RRR1_MSUB_Q_32_LL: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); gen_m16sub32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MSUB_Q_64_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); - gen_m16sub64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], temp, temp2, n); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); + gen_m16sub64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], temp, temp2, n); break; case OPC2_32_RRR1_MSUB_Q_32_UU: - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); gen_m16sub32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MSUB_Q_64_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); - gen_m16sub64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], temp, temp2, n); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); + gen_m16sub64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], temp, temp2, n); break; case OPC2_32_RRR1_MSUBS_Q_32: gen_msubs32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], @@ -7527,90 +7565,90 @@ static void decode_rrr1_msubq_h(DisasContext *ctx) case OPC2_32_RRR1_MSUBS_Q_64: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubs64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubs64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n); break; case OPC2_32_RRR1_MSUBS_Q_32_L: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r2]); gen_msubs32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], temp, n, 16); break; case OPC2_32_RRR1_MSUBS_Q_64_L: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]); - gen_msubs64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp, + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r2]); + gen_msubs64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], temp, n); break; case OPC2_32_RRR1_MSUBS_Q_32_U: - tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r2], 16); gen_msubs32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], temp, n, 16); break; case OPC2_32_RRR1_MSUBS_Q_64_U: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16); - gen_msubs64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp, + tcg_gen_sari_i32(temp, cpu_gpr_d[r2], 16); + gen_msubs64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], temp, n); break; case OPC2_32_RRR1_MSUBS_Q_32_LL: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); gen_m16subs32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MSUBS_Q_64_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); - gen_m16subs64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], temp, temp2, n); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); + gen_m16subs64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], temp, temp2, n); break; case OPC2_32_RRR1_MSUBS_Q_32_UU: - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); gen_m16subs32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MSUBS_Q_64_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); - gen_m16subs64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], temp, temp2, n); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); + gen_m16subs64_q(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], temp, temp2, n); break; case OPC2_32_RRR1_MSUBR_H_64_UL: CHECK_REG_PAIR(r3); - gen_msubr64_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3+1], + gen_msubr64_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, 2); break; case OPC2_32_RRR1_MSUBRS_H_64_UL: CHECK_REG_PAIR(r3); - gen_msubr64s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3+1], + gen_msubr64s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, 2); break; case OPC2_32_RRR1_MSUBR_Q_32_LL: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); gen_msubr_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MSUBR_Q_32_UU: - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); gen_msubr_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MSUBRS_Q_32_LL: - tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]); - tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]); + tcg_gen_ext16s_i32(temp, cpu_gpr_d[r1]); + tcg_gen_ext16s_i32(temp2, cpu_gpr_d[r2]); gen_msubrs_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; case OPC2_32_RRR1_MSUBRS_Q_32_UU: - tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16); - tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16); + tcg_gen_sari_i32(temp, cpu_gpr_d[r1], 16); + tcg_gen_sari_i32(temp2, cpu_gpr_d[r2], 16); gen_msubrs_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n); break; default: @@ -7634,109 +7672,109 @@ static void decode_rrr1_msubad_h(DisasContext *ctx) case OPC2_32_RRR1_MSUBAD_H_32_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubad_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); + gen_msubad_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MSUBAD_H_32_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubad_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); + gen_msubad_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MSUBAD_H_32_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubad_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); + gen_msubad_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MSUBAD_H_32_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubad_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); + gen_msubad_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MSUBADS_H_32_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubads_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubads_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MSUBADS_H_32_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubads_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubads_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MSUBADS_H_32_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubads_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubads_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MSUBADS_H_32_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubads_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubads_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MSUBADM_H_64_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubadm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubadm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MSUBADM_H_64_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubadm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubadm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MSUBADM_H_64_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubadm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubadm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MSUBADM_H_64_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubadm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubadm_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MSUBADMS_H_64_LL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubadms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubadms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); break; case OPC2_32_RRR1_MSUBADMS_H_64_LU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubadms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubadms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); break; case OPC2_32_RRR1_MSUBADMS_H_64_UL: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubadms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubadms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); break; case OPC2_32_RRR1_MSUBADMS_H_64_UU: CHECK_REG_PAIR(r4); CHECK_REG_PAIR(r3); - gen_msubadms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], - cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + gen_msubadms_h(cpu_gpr_d[r4], cpu_gpr_d[r4 + 1], cpu_gpr_d[r3], + cpu_gpr_d[r3 + 1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); break; case OPC2_32_RRR1_MSUBADR_H_16_LL: @@ -7781,7 +7819,7 @@ static void decode_rrrr_extract_insert(DisasContext *ctx) { uint32_t op2; int r1, r2, r3, r4; - TCGv tmp_width, tmp_pos; + TCGv_i32 tmp_width, tmp_pos; r1 = MASK_OP_RRRR_S1(ctx->opcode); r2 = MASK_OP_RRRR_S2(ctx->opcode); @@ -7789,48 +7827,48 @@ static void decode_rrrr_extract_insert(DisasContext *ctx) r4 = MASK_OP_RRRR_D(ctx->opcode); op2 = MASK_OP_RRRR_OP2(ctx->opcode); - tmp_pos = tcg_temp_new(); - tmp_width = tcg_temp_new(); + tmp_pos = tcg_temp_new_i32(); + tmp_width = tcg_temp_new_i32(); switch (op2) { case OPC2_32_RRRR_DEXTR: - tcg_gen_andi_tl(tmp_pos, cpu_gpr_d[r3], 0x1f); + tcg_gen_andi_i32(tmp_pos, cpu_gpr_d[r3], 0x1f); if (r1 == r2) { - tcg_gen_rotl_tl(cpu_gpr_d[r4], cpu_gpr_d[r1], tmp_pos); + tcg_gen_rotl_i32(cpu_gpr_d[r4], cpu_gpr_d[r1], tmp_pos); } else { - TCGv msw = tcg_temp_new(); - TCGv zero = tcg_constant_tl(0); - tcg_gen_shl_tl(tmp_width, cpu_gpr_d[r1], tmp_pos); - tcg_gen_subfi_tl(msw, 32, tmp_pos); - tcg_gen_shr_tl(msw, cpu_gpr_d[r2], msw); + TCGv_i32 msw = tcg_temp_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + tcg_gen_shl_i32(tmp_width, cpu_gpr_d[r1], tmp_pos); + tcg_gen_subfi_i32(msw, 32, tmp_pos); + tcg_gen_shr_i32(msw, cpu_gpr_d[r2], msw); /* * if pos == 0, then we do cpu_gpr_d[r2] << 32, which is undefined * behaviour. So check that case here and set the low bits to zero * which effectivly returns cpu_gpr_d[r1] */ - tcg_gen_movcond_tl(TCG_COND_EQ, msw, tmp_pos, zero, zero, msw); - tcg_gen_or_tl(cpu_gpr_d[r4], tmp_width, msw); + tcg_gen_movcond_i32(TCG_COND_EQ, msw, tmp_pos, zero, zero, msw); + tcg_gen_or_i32(cpu_gpr_d[r4], tmp_width, msw); } break; case OPC2_32_RRRR_EXTR: case OPC2_32_RRRR_EXTR_U: CHECK_REG_PAIR(r3); - tcg_gen_andi_tl(tmp_width, cpu_gpr_d[r3+1], 0x1f); - tcg_gen_andi_tl(tmp_pos, cpu_gpr_d[r3], 0x1f); - tcg_gen_add_tl(tmp_pos, tmp_pos, tmp_width); - tcg_gen_subfi_tl(tmp_pos, 32, tmp_pos); - tcg_gen_shl_tl(cpu_gpr_d[r4], cpu_gpr_d[r1], tmp_pos); - tcg_gen_subfi_tl(tmp_width, 32, tmp_width); + tcg_gen_andi_i32(tmp_width, cpu_gpr_d[r3 + 1], 0x1f); + tcg_gen_andi_i32(tmp_pos, cpu_gpr_d[r3], 0x1f); + tcg_gen_add_i32(tmp_pos, tmp_pos, tmp_width); + tcg_gen_subfi_i32(tmp_pos, 32, tmp_pos); + tcg_gen_shl_i32(cpu_gpr_d[r4], cpu_gpr_d[r1], tmp_pos); + tcg_gen_subfi_i32(tmp_width, 32, tmp_width); if (op2 == OPC2_32_RRRR_EXTR) { - tcg_gen_sar_tl(cpu_gpr_d[r4], cpu_gpr_d[r4], tmp_width); + tcg_gen_sar_i32(cpu_gpr_d[r4], cpu_gpr_d[r4], tmp_width); } else { - tcg_gen_shr_tl(cpu_gpr_d[r4], cpu_gpr_d[r4], tmp_width); + tcg_gen_shr_i32(cpu_gpr_d[r4], cpu_gpr_d[r4], tmp_width); } break; case OPC2_32_RRRR_INSERT: CHECK_REG_PAIR(r3); - tcg_gen_andi_tl(tmp_width, cpu_gpr_d[r3+1], 0x1f); - tcg_gen_andi_tl(tmp_pos, cpu_gpr_d[r3], 0x1f); + tcg_gen_andi_i32(tmp_width, cpu_gpr_d[r3 + 1], 0x1f); + tcg_gen_andi_i32(tmp_pos, cpu_gpr_d[r3], 0x1f); gen_insert(cpu_gpr_d[r4], cpu_gpr_d[r1], cpu_gpr_d[r2], tmp_width, tmp_pos); break; @@ -7846,7 +7884,7 @@ static void decode_rrrw_extract_insert(DisasContext *ctx) int r1, r2, r3, r4; int32_t width; - TCGv temp, temp2; + TCGv_i32 temp, temp2; op2 = MASK_OP_RRRW_OP2(ctx->opcode); r1 = MASK_OP_RRRW_S1(ctx->opcode); @@ -7855,39 +7893,39 @@ static void decode_rrrw_extract_insert(DisasContext *ctx) r4 = MASK_OP_RRRW_D(ctx->opcode); width = MASK_OP_RRRW_WIDTH(ctx->opcode); - temp = tcg_temp_new(); + temp = tcg_temp_new_i32(); switch (op2) { case OPC2_32_RRRW_EXTR: - tcg_gen_andi_tl(temp, cpu_gpr_d[r3], 0x1f); - tcg_gen_addi_tl(temp, temp, width); - tcg_gen_subfi_tl(temp, 32, temp); - tcg_gen_shl_tl(cpu_gpr_d[r4], cpu_gpr_d[r1], temp); - tcg_gen_sari_tl(cpu_gpr_d[r4], cpu_gpr_d[r4], 32 - width); + tcg_gen_andi_i32(temp, cpu_gpr_d[r3], 0x1f); + tcg_gen_addi_i32(temp, temp, width); + tcg_gen_subfi_i32(temp, 32, temp); + tcg_gen_shl_i32(cpu_gpr_d[r4], cpu_gpr_d[r1], temp); + tcg_gen_sari_i32(cpu_gpr_d[r4], cpu_gpr_d[r4], 32 - width); break; case OPC2_32_RRRW_EXTR_U: if (width == 0) { - tcg_gen_movi_tl(cpu_gpr_d[r4], 0); + tcg_gen_movi_i32(cpu_gpr_d[r4], 0); } else { - tcg_gen_andi_tl(temp, cpu_gpr_d[r3], 0x1f); - tcg_gen_shr_tl(cpu_gpr_d[r4], cpu_gpr_d[r1], temp); - tcg_gen_andi_tl(cpu_gpr_d[r4], cpu_gpr_d[r4], ~0u >> (32-width)); + tcg_gen_andi_i32(temp, cpu_gpr_d[r3], 0x1f); + tcg_gen_shr_i32(cpu_gpr_d[r4], cpu_gpr_d[r1], temp); + tcg_gen_andi_i32(cpu_gpr_d[r4], cpu_gpr_d[r4], ~0u >> (32 - width)); } break; case OPC2_32_RRRW_IMASK: - temp2 = tcg_temp_new(); + temp2 = tcg_temp_new_i32(); CHECK_REG_PAIR(r4); - tcg_gen_andi_tl(temp, cpu_gpr_d[r3], 0x1f); - tcg_gen_movi_tl(temp2, (1 << width) - 1); - tcg_gen_shl_tl(temp2, temp2, temp); - tcg_gen_shl_tl(cpu_gpr_d[r4], cpu_gpr_d[r2], temp); - tcg_gen_mov_tl(cpu_gpr_d[r4+1], temp2); + tcg_gen_andi_i32(temp, cpu_gpr_d[r3], 0x1f); + tcg_gen_movi_i32(temp2, (1 << width) - 1); + tcg_gen_shl_i32(temp2, temp2, temp); + tcg_gen_shl_i32(cpu_gpr_d[r4], cpu_gpr_d[r2], temp); + tcg_gen_mov_i32(cpu_gpr_d[r4 + 1], temp2); break; case OPC2_32_RRRW_INSERT: - temp2 = tcg_temp_new(); + temp2 = tcg_temp_new_i32(); - tcg_gen_movi_tl(temp, width); - tcg_gen_andi_tl(temp2, cpu_gpr_d[r3], 0x1f); + tcg_gen_movi_i32(temp, width); + tcg_gen_andi_i32(temp2, cpu_gpr_d[r3], 0x1f); gen_insert(cpu_gpr_d[r4], cpu_gpr_d[r1], cpu_gpr_d[r2], temp, temp2); break; default: @@ -7901,7 +7939,7 @@ static void decode_sys_interrupts(DisasContext *ctx) uint32_t op2; uint32_t r1; TCGLabel *l1; - TCGv tmp; + TCGv_i32 tmp; op2 = MASK_OP_SYS_OP2(ctx->opcode); r1 = MASK_OP_SYS_S1D(ctx->opcode); @@ -7912,7 +7950,7 @@ static void decode_sys_interrupts(DisasContext *ctx) break; case OPC2_32_SYS_DISABLE: if (ctx->priv == TRICORE_PRIV_SM || ctx->priv == TRICORE_PRIV_UM1) { - tcg_gen_andi_tl(cpu_ICR, cpu_ICR, ~ctx->icr_ie_mask); + tcg_gen_andi_i32(cpu_ICR, cpu_ICR, ~ctx->icr_ie_mask); } else { generate_trap(ctx, TRAPC_PROT, TIN1_PRIV); } @@ -7920,9 +7958,9 @@ static void decode_sys_interrupts(DisasContext *ctx) case OPC2_32_SYS_DISABLE_D: if (has_feature(ctx, TRICORE_FEATURE_16)) { if (ctx->priv == TRICORE_PRIV_SM || ctx->priv == TRICORE_PRIV_UM1) { - tcg_gen_extract_tl(cpu_gpr_d[r1], cpu_ICR, + tcg_gen_extract_i32(cpu_gpr_d[r1], cpu_ICR, ctx->icr_ie_offset, 1); - tcg_gen_andi_tl(cpu_ICR, cpu_ICR, ~ctx->icr_ie_mask); + tcg_gen_andi_i32(cpu_ICR, cpu_ICR, ~ctx->icr_ie_mask); } else { generate_trap(ctx, TRAPC_PROT, TIN1_PRIV); } @@ -7933,7 +7971,7 @@ static void decode_sys_interrupts(DisasContext *ctx) break; case OPC2_32_SYS_ENABLE: if (ctx->priv == TRICORE_PRIV_SM || ctx->priv == TRICORE_PRIV_UM1) { - tcg_gen_ori_tl(cpu_ICR, cpu_ICR, ctx->icr_ie_mask); + tcg_gen_ori_i32(cpu_ICR, cpu_ICR, ctx->icr_ie_mask); ctx->base.is_jmp = DISAS_EXIT_UPDATE; } else { generate_trap(ctx, TRAPC_PROT, TIN1_PRIV); @@ -7955,12 +7993,12 @@ static void decode_sys_interrupts(DisasContext *ctx) break; case OPC2_32_SYS_RFM: if (ctx->priv == TRICORE_PRIV_SM) { - tmp = tcg_temp_new(); + tmp = tcg_temp_new_i32(); l1 = gen_new_label(); - tcg_gen_ld32u_tl(tmp, tcg_env, offsetof(CPUTriCoreState, DBGSR)); - tcg_gen_andi_tl(tmp, tmp, MASK_DBGSR_DE); - tcg_gen_brcondi_tl(TCG_COND_NE, tmp, 1, l1); + tcg_gen_ld_i32(tmp, tcg_env, offsetof(CPUTriCoreState, DBGSR)); + tcg_gen_andi_i32(tmp, tmp, MASK_DBGSR_DE); + tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 1, l1); gen_helper_rfm(tcg_env); gen_set_label(l1); ctx->base.is_jmp = DISAS_EXIT; @@ -7977,7 +8015,7 @@ static void decode_sys_interrupts(DisasContext *ctx) case OPC2_32_SYS_RESTORE: if (has_feature(ctx, TRICORE_FEATURE_16)) { if (ctx->priv == TRICORE_PRIV_SM || ctx->priv == TRICORE_PRIV_UM1) { - tcg_gen_deposit_tl(cpu_ICR, cpu_ICR, cpu_gpr_d[r1], + tcg_gen_deposit_i32(cpu_ICR, cpu_ICR, cpu_gpr_d[r1], ctx->icr_ie_offset, 1); ctx->base.is_jmp = DISAS_EXIT_UPDATE; } else { @@ -7989,13 +8027,13 @@ static void decode_sys_interrupts(DisasContext *ctx) break; case OPC2_32_SYS_TRAPSV: l1 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_GE, cpu_PSW_SV, 0, l1); + tcg_gen_brcondi_i32(TCG_COND_GE, cpu_PSW_SV, 0, l1); generate_trap(ctx, TRAPC_ASSERT, TIN5_SOVF); gen_set_label(l1); break; case OPC2_32_SYS_TRAPV: l1 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_GE, cpu_PSW_V, 0, l1); + tcg_gen_brcondi_i32(TCG_COND_GE, cpu_PSW_V, 0, l1); generate_trap(ctx, TRAPC_ASSERT, TIN5_OVF); gen_set_label(l1); break; @@ -8011,7 +8049,7 @@ static void decode_32Bit_opc(DisasContext *ctx) int32_t address, const16; int8_t b, const4; int32_t bpos; - TCGv temp, temp2, temp3; + TCGv_i32 temp, temp2, temp3; op1 = MASK_OP_MAJOR(ctx->opcode); @@ -8044,18 +8082,18 @@ static void decode_32Bit_opc(DisasContext *ctx) address = MASK_OP_ABS_OFF18(ctx->opcode); r1 = MASK_OP_ABS_S1D(ctx->opcode); temp = tcg_constant_i32(EA_ABS_FORMAT(address)); - temp2 = tcg_temp_new(); + temp2 = tcg_temp_new_i32(); - tcg_gen_shri_tl(temp2, cpu_gpr_d[r1], 16); - tcg_gen_qemu_st_tl(temp2, temp, ctx->mem_idx, MO_LEUW); + tcg_gen_shri_i32(temp2, cpu_gpr_d[r1], 16); + tcg_gen_qemu_st_i32(temp2, temp, ctx->mem_idx, MO_LEUW); break; case OPC1_32_ABS_LD_Q: address = MASK_OP_ABS_OFF18(ctx->opcode); r1 = MASK_OP_ABS_S1D(ctx->opcode); temp = tcg_constant_i32(EA_ABS_FORMAT(address)); - tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LEUW); - tcg_gen_shli_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); + tcg_gen_qemu_ld_i32(cpu_gpr_d[r1], temp, ctx->mem_idx, MO_LEUW); + tcg_gen_shli_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], 16); break; case OPCM_32_ABS_LEA_LHA: address = MASK_OP_ABS_OFF18(ctx->opcode); @@ -8064,13 +8102,13 @@ static void decode_32Bit_opc(DisasContext *ctx) if (has_feature(ctx, TRICORE_FEATURE_162)) { op2 = MASK_OP_ABS_OP2(ctx->opcode); if (op2 == OPC2_32_ABS_LHA) { - tcg_gen_movi_tl(cpu_gpr_a[r1], address << 14); + tcg_gen_movi_i32(cpu_gpr_a[r1], address << 14); break; } /* otherwise translate regular LEA */ } - tcg_gen_movi_tl(cpu_gpr_a[r1], EA_ABS_FORMAT(address)); + tcg_gen_movi_i32(cpu_gpr_a[r1], EA_ABS_FORMAT(address)); break; /* ABSB-format */ case OPC1_32_ABSB_ST_T: @@ -8079,12 +8117,12 @@ static void decode_32Bit_opc(DisasContext *ctx) bpos = MASK_OP_ABSB_BPOS(ctx->opcode); temp = tcg_constant_i32(EA_ABS_FORMAT(address)); - temp2 = tcg_temp_new(); + temp2 = tcg_temp_new_i32(); - tcg_gen_qemu_ld_tl(temp2, temp, ctx->mem_idx, MO_UB); - tcg_gen_andi_tl(temp2, temp2, ~(0x1u << bpos)); - tcg_gen_ori_tl(temp2, temp2, (b << bpos)); - tcg_gen_qemu_st_tl(temp2, temp, ctx->mem_idx, MO_UB); + tcg_gen_qemu_ld_i32(temp2, temp, ctx->mem_idx, MO_UB); + tcg_gen_andi_i32(temp2, temp2, ~(0x1u << bpos)); + tcg_gen_ori_i32(temp2, temp2, (b << bpos)); + tcg_gen_qemu_st_i32(temp2, temp, ctx->mem_idx, MO_UB); break; /* B-format */ case OPC1_32_B_CALL: @@ -8206,13 +8244,13 @@ static void decode_32Bit_opc(DisasContext *ctx) r3 = MASK_OP_RCRR_D(ctx->opcode); const16 = MASK_OP_RCRR_CONST4(ctx->opcode); temp = tcg_constant_i32(const16); - temp2 = tcg_temp_new(); /* width*/ - temp3 = tcg_temp_new(); /* pos */ + temp2 = tcg_temp_new_i32(); /* width*/ + temp3 = tcg_temp_new_i32(); /* pos */ CHECK_REG_PAIR(r2); - tcg_gen_andi_tl(temp2, cpu_gpr_d[r2 + 1], 0x1f); - tcg_gen_andi_tl(temp3, cpu_gpr_d[r2], 0x1f); + tcg_gen_andi_i32(temp2, cpu_gpr_d[r2 + 1], 0x1f); + tcg_gen_andi_i32(temp3, cpu_gpr_d[r2], 0x1f); gen_insert(cpu_gpr_d[r3], cpu_gpr_d[r1], temp, temp2, temp3); break; @@ -8280,7 +8318,7 @@ static void decode_32Bit_opc(DisasContext *ctx) r3 = MASK_OP_RRPW_D(ctx->opcode); const16 = MASK_OP_RRPW_POS(ctx->opcode); - tcg_gen_extract2_tl(cpu_gpr_d[r3], cpu_gpr_d[r2], cpu_gpr_d[r1], + tcg_gen_extract2_i32(cpu_gpr_d[r3], cpu_gpr_d[r2], cpu_gpr_d[r1], 32 - const16); break; /* RRR Format */ @@ -8329,10 +8367,10 @@ static void decode_32Bit_opc(DisasContext *ctx) decode_sys_interrupts(ctx); break; case OPC1_32_SYS_RSTV: - tcg_gen_movi_tl(cpu_PSW_V, 0); - tcg_gen_mov_tl(cpu_PSW_SV, cpu_PSW_V); - tcg_gen_mov_tl(cpu_PSW_AV, cpu_PSW_V); - tcg_gen_mov_tl(cpu_PSW_SAV, cpu_PSW_V); + tcg_gen_movi_i32(cpu_PSW_V, 0); + tcg_gen_mov_i32(cpu_PSW_SV, cpu_PSW_V); + tcg_gen_mov_i32(cpu_PSW_AV, cpu_PSW_V); + tcg_gen_mov_i32(cpu_PSW_SAV, cpu_PSW_V); break; default: generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC); @@ -8375,7 +8413,7 @@ static void tricore_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) tcg_gen_insn_start(ctx->base.pc_next); } -static bool insn_crosses_page(CPUTriCoreState *env, DisasContext *ctx) +static bool insn_crosses_page(DisasContext *ctx, CPUTriCoreState *env) { /* * Return true if the insn at ctx->base.pc_next might cross a page boundary. @@ -8413,12 +8451,12 @@ static void tricore_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) ctx->base.pc_next = ctx->pc_succ_insn; if (ctx->base.is_jmp == DISAS_NEXT) { - target_ulong page_start; + vaddr page_start; page_start = ctx->base.pc_first & TARGET_PAGE_MASK; if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE || (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - 3 - && insn_crosses_page(env, ctx))) { + && insn_crosses_page(ctx, env))) { ctx->base.is_jmp = DISAS_TOO_MANY; } } @@ -8479,14 +8517,14 @@ void cpu_state_reset(CPUTriCoreState *env) static void tricore_tcg_init_csfr(void) { - cpu_PCXI = tcg_global_mem_new(tcg_env, - offsetof(CPUTriCoreState, PCXI), "PCXI"); - cpu_PSW = tcg_global_mem_new(tcg_env, - offsetof(CPUTriCoreState, PSW), "PSW"); - cpu_PC = tcg_global_mem_new(tcg_env, - offsetof(CPUTriCoreState, PC), "PC"); - cpu_ICR = tcg_global_mem_new(tcg_env, - offsetof(CPUTriCoreState, ICR), "ICR"); + cpu_PCXI = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUTriCoreState, PCXI), "PCXI"); + cpu_PSW = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUTriCoreState, PSW), "PSW"); + cpu_PC = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUTriCoreState, PC), "PC"); + cpu_ICR = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUTriCoreState, ICR), "ICR"); } void tricore_tcg_init(void) @@ -8495,30 +8533,32 @@ void tricore_tcg_init(void) /* reg init */ for (i = 0 ; i < 16 ; i++) { - cpu_gpr_a[i] = tcg_global_mem_new(tcg_env, - offsetof(CPUTriCoreState, gpr_a[i]), - regnames_a[i]); + cpu_gpr_a[i] = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUTriCoreState, + gpr_a[i]), + regnames_a[i]); } for (i = 0 ; i < 16 ; i++) { - cpu_gpr_d[i] = tcg_global_mem_new(tcg_env, - offsetof(CPUTriCoreState, gpr_d[i]), - regnames_d[i]); + cpu_gpr_d[i] = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUTriCoreState, + gpr_d[i]), + regnames_d[i]); } tricore_tcg_init_csfr(); /* init PSW flag cache */ - cpu_PSW_C = tcg_global_mem_new(tcg_env, - offsetof(CPUTriCoreState, PSW_USB_C), - "PSW_C"); - cpu_PSW_V = tcg_global_mem_new(tcg_env, - offsetof(CPUTriCoreState, PSW_USB_V), - "PSW_V"); - cpu_PSW_SV = tcg_global_mem_new(tcg_env, - offsetof(CPUTriCoreState, PSW_USB_SV), - "PSW_SV"); - cpu_PSW_AV = tcg_global_mem_new(tcg_env, - offsetof(CPUTriCoreState, PSW_USB_AV), - "PSW_AV"); - cpu_PSW_SAV = tcg_global_mem_new(tcg_env, - offsetof(CPUTriCoreState, PSW_USB_SAV), - "PSW_SAV"); + cpu_PSW_C = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUTriCoreState, PSW_USB_C), + "PSW_C"); + cpu_PSW_V = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUTriCoreState, PSW_USB_V), + "PSW_V"); + cpu_PSW_SV = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUTriCoreState, PSW_USB_SV), + "PSW_SV"); + cpu_PSW_AV = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUTriCoreState, PSW_USB_AV), + "PSW_AV"); + cpu_PSW_SAV = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUTriCoreState, PSW_USB_SAV), + "PSW_SAV"); } diff --git a/target/xtensa/core-dc232b/gdb-config.c.inc b/target/xtensa/core-dc232b/gdb-config.c.inc index d871686..8c88cae 100644 --- a/target/xtensa/core-dc232b/gdb-config.c.inc +++ b/target/xtensa/core-dc232b/gdb-config.c.inc @@ -15,9 +15,8 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. */ + along with this program; if not, see + <https://www.gnu.org/licenses/>. */ XTREG(0, 0, 32, 4, 4, 0x0020, 0x0006, -2, 9, 0x0100, pc, 0, 0, 0, 0, 0, 0) diff --git a/target/xtensa/core-dc232b/xtensa-modules.c.inc b/target/xtensa/core-dc232b/xtensa-modules.c.inc index 164df3b..bb9ebd2 100644 --- a/target/xtensa/core-dc232b/xtensa-modules.c.inc +++ b/target/xtensa/core-dc232b/xtensa-modules.c.inc @@ -14,9 +14,8 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program; if not, see + <https://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" #include "xtensa-isa.h" diff --git a/target/xtensa/core-fsf/xtensa-modules.c.inc b/target/xtensa/core-fsf/xtensa-modules.c.inc index c32683f..531f5e2 100644 --- a/target/xtensa/core-fsf/xtensa-modules.c.inc +++ b/target/xtensa/core-fsf/xtensa-modules.c.inc @@ -14,9 +14,8 @@ General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program; if not, see + <https://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" #include "xtensa-isa.h" diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c index 91b71b6c..1eeed44 100644 --- a/target/xtensa/cpu.c +++ b/target/xtensa/cpu.c @@ -59,13 +59,13 @@ static TCGTBCPUState xtensa_get_tb_cpu_state(CPUState *cs) { CPUXtensaState *env = cpu_env(cs); uint32_t flags = 0; - target_ulong cs_base = 0; + uint64_t cs_base = 0; flags |= xtensa_get_ring(env); if (env->sregs[PS] & PS_EXCM) { flags |= XTENSA_TBFLAG_EXCM; } else if (xtensa_option_enabled(env->config, XTENSA_OPTION_LOOP)) { - target_ulong lend_dist = + uint64_t lend_dist = env->sregs[LEND] - (env->pc & -(1u << TARGET_PAGE_BITS)); /* @@ -83,7 +83,7 @@ static TCGTBCPUState xtensa_get_tb_cpu_state(CPUState *cs) * for the TB that contains this instruction. */ if (lend_dist < (1u << TARGET_PAGE_BITS) + env->config->max_insn_size) { - target_ulong lbeg_off = env->sregs[LEND] - env->sregs[LBEG]; + uint64_t lbeg_off = env->sregs[LEND] - env->sregs[LBEG]; cs_base = lend_dist; if (lbeg_off < 256) { @@ -318,6 +318,7 @@ static const TCGCPUOps xtensa_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = xtensa_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = xtensa_cpu_exec_interrupt, .cpu_exec_halt = xtensa_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c index 34ae2f4..bb8d2ed 100644 --- a/target/xtensa/translate.c +++ b/target/xtensa/translate.c @@ -1166,7 +1166,7 @@ static void xtensa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); CPUXtensaState *env = cpu_env(cpu); - target_ulong page_start; + vaddr page_start; /* These two conditions only apply to the first insn in the TB, but this is the first TranslateOps hook that allows exiting. */ diff --git a/target/xtensa/xtensa-semi.c b/target/xtensa/xtensa-semi.c index 636f421..431c263 100644 --- a/target/xtensa/xtensa-semi.c +++ b/target/xtensa/xtensa-semi.c @@ -32,6 +32,7 @@ #include "exec/target_page.h" #include "semihosting/semihost.h" #include "semihosting/uaccess.h" +#include "system/memory.h" #include "qapi/error.h" #include "qemu/log.h" @@ -192,7 +193,9 @@ void xtensa_sim_open_console(Chardev *chr) void HELPER(simcall)(CPUXtensaState *env) { + const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; CPUState *cs = env_cpu(env); + AddressSpace *as = cs->as; uint32_t *regs = env->regs; switch (regs[2]) { @@ -215,7 +218,7 @@ void HELPER(simcall)(CPUXtensaState *env) TARGET_PAGE_SIZE - (vaddr & (TARGET_PAGE_SIZE - 1)); uint32_t io_sz = page_left < len ? page_left : len; hwaddr sz = io_sz; - void *buf = cpu_physical_memory_map(paddr, &sz, !is_write); + void *buf = address_space_map(as, paddr, &sz, !is_write, attrs); uint32_t io_done; bool error = false; @@ -261,7 +264,7 @@ void HELPER(simcall)(CPUXtensaState *env) error = true; io_done = 0; } - cpu_physical_memory_unmap(buf, sz, !is_write, io_done); + address_space_unmap(as, buf, sz, !is_write, io_done); } else { error = true; regs[3] = TARGET_EINVAL; @@ -408,11 +411,11 @@ void HELPER(simcall)(CPUXtensaState *env) while (sz) { hwaddr len = sz; - void *buf = cpu_physical_memory_map(base, &len, 1); + void *buf = address_space_map(as, base, &len, true, attrs); if (buf && len) { memset(buf, regs[4], len); - cpu_physical_memory_unmap(buf, len, 1, len); + address_space_unmap(as, buf, len, true, len); } else { len = 1; } |